diff --git a/build.xml b/build.xml index 9db8312f383..78e0295c20d 100755 --- a/build.xml +++ b/build.xml @@ -145,6 +145,7 @@ (~$/(?i)\bno(n|)commit\b/$) : 'nocommit', (~$/\bTOOD:/$) : 'TOOD instead TODO', (~$/\t/$) : 'tabs instead spaces', + (~$/\Q/**\E((?:\s)|(?:\*))*\Q{@inheritDoc}\E((?:\s)|(?:\*))*\Q*/\E/$) : '{@inheritDoc} on its own is unnecessary', (~$/\$$(?:LastChanged)?Date\b/$) : 'svn keyword', (~$/\$$(?:(?:LastChanged)?Revision|Rev)\b/$) : 'svn keyword', (~$/\$$(?:LastChangedBy|Author)\b/$) : 'svn keyword', diff --git a/dev-tools/doap/lucene.rdf b/dev-tools/doap/lucene.rdf index ade660df8de..12caed36f6e 100644 --- a/dev-tools/doap/lucene.rdf +++ b/dev-tools/doap/lucene.rdf @@ -67,6 +67,13 @@ + + + lucene-7.2.1 + 2018-01-15 + 7.2.1 + + lucene-7.2.0 diff --git a/dev-tools/doap/solr.rdf b/dev-tools/doap/solr.rdf index fd2f96abf76..105e711c490 100644 --- a/dev-tools/doap/solr.rdf +++ b/dev-tools/doap/solr.rdf @@ -67,6 +67,13 @@ + + + solr-7.2.1 + 2018-01-15 + 7.2.1 + + solr-7.2.0 diff --git a/dev-tools/idea/solr/contrib/langid/langid.iml b/dev-tools/idea/solr/contrib/langid/langid.iml index 28223bd1352..afeb1255819 100644 --- a/dev-tools/idea/solr/contrib/langid/langid.iml +++ b/dev-tools/idea/solr/contrib/langid/langid.iml @@ -31,5 +31,6 @@ + diff --git a/dev-tools/scripts/reproduceJenkinsFailures.py b/dev-tools/scripts/reproduceJenkinsFailures.py new file mode 100644 index 00000000000..bb3212376fc --- /dev/null +++ b/dev-tools/scripts/reproduceJenkinsFailures.py @@ -0,0 +1,215 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import subprocess +import sys +import urllib.error +import urllib.request +from textwrap import dedent + +# Number of iterations per test suite +testIters = 5 + +usage = dedent('''\ + Usage:\n + python3 -u %s URL\n + Must be run from a Lucene/Solr git workspace. Downloads the Jenkins + log pointed to by the given URL, parses it for Git revision and failed + Lucene/Solr tests, checks out the Git revision in the local workspace, + groups the failed tests by module, then runs + 'ant test -Dtest.dups=%d -Dtests.class="*.test1[|*.test2[...]]" ...' + in each module of interest, failing at the end if any of the runs fails. + To control the maximum number of concurrent JVMs used for each module's + test run, set 'tests.jvms', e.g. in ~/lucene.build.properties + ''' % (sys.argv[0], testIters)) + +reHelpArg = re.compile(r'-{1,2}(?:\?|h(?:elp)?)') + +# Example: Checking out Revision e441a99009a557f82ea17ee9f9c3e9b89c75cee6 (refs/remotes/origin/master) +reGitRev = re.compile(r'Checking out Revision (\S+)') + +# Method example: NOTE: reproduce with: ant test -Dtestcase=ZkSolrClientTest -Dtests.method=testMultipleWatchesAsync -Dtests.seed=6EF5AB70F0032849 -Dtests.slow=true -Dtests.locale=he-IL -Dtests.timezone=NST -Dtests.asserts=true -Dtests.file.encoding=UTF-8 +# Suite example: NOTE: reproduce with: ant test -Dtestcase=CloudSolrClientTest -Dtests.seed=DB2DF2D8228BAF27 -Dtests.multiplier=3 -Dtests.slow=true -Dtests.locale=es-AR -Dtests.timezone=America/Argentina/Cordoba -Dtests.asserts=true -Dtests.file.encoding=US-ASCII +reReproLine = re.compile(r'NOTE:\s+reproduce\s+with:(\s+ant\s+test\s+-Dtestcase=(\S+)\s+(?:-Dtests.method=\S+\s+)?(.*))') + +# Example: https://jenkins.thetaphi.de/job/Lucene-Solr-master-Linux/21108/ +reJenkinsURLWithoutConsoleText = re.compile(r'https?://.*/\d+/?\Z', re.IGNORECASE) + +reJavaFile = re.compile(r'(.*)\.java\Z') +reModule = re.compile(r'\./(.*)/src/') +reTestOutputFile = re.compile(r'TEST-(.*\.([^-.]+))(?:-\d+)?\.xml\Z') +reErrorFailure = re.compile(r'(?:errors|failures)="[^0]') + +# consoleText from Policeman Jenkins's Windows jobs fails to decode as UTF-8 +encoding = 'iso-8859-1' + +tests = {} +modules = {} + +lastFailureCode = 0 +gitCheckoutSucceeded = False + +def runOutput(cmd): + print('[repro] %s' % cmd) + try: + return subprocess.check_output(cmd.split(' '), universal_newlines=True).strip() + except CalledProcessError as e: + raise RuntimeError("ERROR: Cmd '%s' failed with exit code %d and the following output:\n%s" + % (cmd, e.returncode, e.output)) + +# Remembers non-zero exit code in lastFailureCode unless rememberFailure==False +def run(cmd, rememberFailure=True): + global lastFailureCode + print('[repro] %s' % cmd) + code = os.system(cmd) + if 0 != code and rememberFailure: + print('\n[repro] Setting last failure code to %d\n' % code) + lastFailureCode = code + return code + +def fetchAndParseJenkinsLog(url): + global revision + revision = None + print('[repro] Jenkins log URL: %s\n' % url) + try: + with urllib.request.urlopen(url) as consoleText: + for rawLine in consoleText: + line = rawLine.decode(encoding) + match = reGitRev.match(line) + if match is not None: + revision = match.group(1) + print('[repro] Revision: %s\n' % revision) + else: + match = reReproLine.search(line) + if match is not None: + print('[repro] Repro line: %s\n' % match.group(1)) + testcase = match.group(2) + reproLineWithoutMethod = match.group(3).strip() + tests[testcase] = reproLineWithoutMethod + except urllib.error.URLError as e: + raise RuntimeError('ERROR: fetching %s : %s' % (url, e)) + + if revision == None: + if reJenkinsURLWithoutConsoleText.match(url): + print('[repro] Not a Jenkins log. Appending "/consoleText" and retrying ...\n') + fetchAndParseJenkinsLog(url + '/consoleText') + else: + raise RuntimeError('ERROR: %s does not appear to be a Jenkins log.' % url) + if 0 == len(tests): + print('[repro] No "reproduce with" lines found; exiting.') + sys.exit(0) + +def prepareWorkspace(): + global gitCheckoutSucceeded + code = run('git checkout %s' % revision) + if 0 != code: + raise RuntimeError('ERROR: "git checkout %s" failed. See above. Maybe try "git pull"?' % revision) + gitCheckoutSucceeded = True + code = run('ant clean') + if 0 != code: + raise RuntimeError('ERROR: "ant clean" failed. See above.') + +def groupTestsByModule(): + for (dir, _, files) in os.walk('.'): + for file in files: + match = reJavaFile.search(file) + if match is not None: + test = match.group(1) + if test in tests: + match = reModule.match(dir) + module = match.group(1) + if module not in modules: + modules[module] = set() + modules[module].add(test) + print('[repro] Test suites by module:') + for module in modules: + print('[repro] %s' % module) + for test in modules[module]: + print('[repro] %s' % test) + +def runTests(): + global lastFailureCode + cwd = os.getcwd() + testCmdline = 'ant test-nocompile -Dtests.dups=%d -Dtests.maxfailures=%d -Dtests.class="%s" -Dtests.showOutput=onerror %s' + for module in modules: + moduleTests = list(modules[module]) + testList = '|'.join(map(lambda t: '*.%s' % t, moduleTests)) + numTests = len(moduleTests) + params = tests[moduleTests[0]] # Assumption: all tests in this module have the same cmdline params + os.chdir(module) + code = run('ant compile-test') + try: + if (0 != code): + raise RuntimeError("ERROR: Compile failed in %s/ with code %d. See above." % (module, code)) + run(testCmdline % (testIters, testIters * numTests, testList, params)) + finally: + os.chdir(cwd) + +def printReport(): + failures = {} + for start in ('lucene/build', 'solr/build'): + for (dir, _, files) in os.walk(start): + for file in files: + testOutputFileMatch = reTestOutputFile.search(file) + if testOutputFileMatch is not None: + testcase = testOutputFileMatch.group(1) + if testcase not in failures: + failures[testcase] = 0 + with open(os.path.join(dir, file), encoding='UTF-8') as testOutputFile: + for line in testOutputFile: + errorFailureMatch = reErrorFailure.search(line) + if errorFailureMatch is not None: + failures[testcase] += 1 + break + print("[repro] Failures:") + for testcase in sorted(failures): + print("[repro] %d/%d failed: %s" % (failures[testcase], testIters, testcase)) + +def rememberGitBranch(): + global origGitBranch + origGitBranch = runOutput('git rev-parse --abbrev-ref HEAD') + if (origGitBranch == 'HEAD'): # In detached HEAD state + origGitBranch = runOutput('git rev-parse HEAD') # Use the SHA when not on a branch + print('[repro] Initial local git branch/revision: %s' % origGitBranch) + +def main(): + if 2 != len(sys.argv) or reHelpArg.match(sys.argv[1]): + print(usage) + sys.exit(0) + fetchAndParseJenkinsLog(sys.argv[1]) + rememberGitBranch() + + try: + prepareWorkspace() + groupTestsByModule() + runTests() + printReport() + except Exception as e: + print('[repro] %s' % e) + sys.exit(1) + finally: + if gitCheckoutSucceeded: + run('git checkout %s' % origGitBranch, rememberFailure=False) # Restore original git branch/sha + + print('[repro] Exiting with code %d' % lastFailureCode) + sys.exit(lastFailureCode) + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + print('[repro] Keyboard interrupt...exiting') diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ff948094c88..e95d066a7f3 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -32,6 +32,12 @@ API Changes * LUCENE-8012: Explanation now takes Number rather than float (Alan Woodward, Robert Muir) +* LUCENE-8116: SimScorer now only takes a frequency and a norm as per-document + scoring factors. (Adrien Grand) + +* LUCENE-8113: TermContext has been renamed to TermStates, and can now be + constructed lazily if term statistics are not required (Alan Woodward) + Changes in Runtime Behavior * LUCENE-7837: Indices that were created before the previous major version @@ -46,6 +52,9 @@ Changes in Runtime Behavior * LUCENE-7996: FunctionQuery and FunctionScoreQuery now return a score of 0 when the function produces a negative value. (Adrien Grand) +* LUCENE-8116: Similarities now score fields that omit norms as if the norm was + 1. This might change score values on fields that omit norms. (Adrien Grand) + Improvements * LUCENE-7997: Add BaseSimilarityTestCase to sanity check similarities. @@ -110,16 +119,55 @@ Improvements * LUCENE-8094: TermInSetQuery.toString now returns "field:(A B C)" (Mike McCandless) +* LUCENE-8121: UnifiedHighlighter passage relevancy is improved for terms that are + position sensitive (e.g. part of a phrase) by having an accurate freq. + (David Smiley) + +* LUCENE-8125: ICUTokenizer support for emoji/emoji sequence tokens. (Robert Muir) + +* LUCENE-8129: A Unicode set filter can now be specified when using ICUFoldingFilter. + (Ere Maijala) + Bug Fixes * LUCENE-8077: Fixed bug in how CheckIndex verifies doc-value iterators. (Xiaoshan Sun via Adrien Grand) +* SOLR-11758: Fixed FloatDocValues.boolVal to correctly return true for all values != 0.0F + (Munendra S N via hossman) + +* LUCENE-8121: The UnifiedHighlighter would highlight some terms within some nested + SpanNearQueries at positions where it should not have. It's fixed in the UH by + switching to the SpanCollector API. The original Highlighter still has this + problem (LUCENE-2287, LUCENE-5455, LUCENE-6796). Some public but internal parts of + the UH were refactored. (David Smiley, Steve Davids) + +* LUCENE-8120: Fix LatLonBoundingBox's toString() method (Martijn van Groningen, Adrien Grand) + +* LUCENE-8130: Fix NullPointerException from TermStates.toString() (Mike McCandless) + +* LUCENE-8124: Fixed HyphenationCompoundWordTokenFilter to handle correctly + hyphenation patterns with indicator >= 7. (Holger Bruch via Adrien Grand) + Other * LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name. (Kai Chan via Adrien Grand) +* LUCENE-8122: Upgrade analysis/icu to ICU 60.2. (Robert Muir) + +* LUCENE-8106: Add script (reproduceJenkinsFailures.py) to attempt to reproduce + failing tests from a Jenkins log. (Steve Rowe) + +* LUCENE-8075: Removed unnecessary null check in IntersectTermsEnum. + (Pulak Ghosh via Adrien Grand) + +======================= Lucene 7.2.1 ======================= + +Bug Fixes + +* LUCENE-8117: Fix advanceExact on SortedNumericDocValues produced by Lucene54DocValues. (Jim Ferenczi). + ======================= Lucene 7.2.0 ======================= API Changes diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 9d02db82f4d..d486a2141a1 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -19,12 +19,14 @@ FunctionScoreQuery maps negative values to 0. ## CustomScoreQuery, BoostedQuery and BoostingQuery removed (LUCENE-8099) ## -Instead use FunctionScoreQuery and a DoubleValuesSource implementation. For example, -to replace the functionality of BoostedQuery, you could do the following, using -the lucene-expressions module: +Instead use FunctionScoreQuery and a DoubleValuesSource implementation. BoostedQuery +and BoostingQuery may be replaced by calls to FunctionScoreQuery.boostByValue() and +FunctionScoreQuery.boostByQuery(). To replace more complex calculations in +CustomScoreQuery, use the lucene-expressions module: SimpleBindings bindings = new SimpleBindings(); bindings.add("score", DoubleValuesSource.SCORES); -bindings.add("boost", DoubleValuesSource.fromIntField("myboostfield")); -Expression expr = JavascriptCompiler.compile("score * boost"); +bindings.add("boost1", DoubleValuesSource.fromIntField("myboostfield")); +bindings.add("boost2", DoubleValuesSource.fromIntField("myotherboostfield")); +Expression expr = JavascriptCompiler.compile("score * (boost1 + ln(boost2))"); FunctionScoreQuery q = new FunctionScoreQuery(inputQuery, expr.getDoubleValuesSource(bindings)); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java index c01e2638042..f32b8c0801c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java @@ -123,9 +123,6 @@ public final class CommonGramsFilter extends TokenFilter { return true; } - /** - * {@inheritDoc} - */ @Override public void reset() throws IOException { super.reset(); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java index 9307e7b74aa..e8c98b7b5dd 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java @@ -62,9 +62,6 @@ public final class CommonGramsQueryFilter extends TokenFilter { super(input); } - /** - * {@inheritDoc} - */ @Override public void reset() throws IOException { super.reset(); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java index 0f7dd2b5c48..3c72b4fe045 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java @@ -89,7 +89,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer { StringBuilder buf = new StringBuilder(); byte v = vspace.get(k++); while (v != 0) { - char c = (char) ((v >>> 4) - 1 + '0'); + char c = (char) (((v & 0xf0 )>>> 4) - 1 + '0'); buf.append(c); c = (char) (v & 0x0f); if (c == 0) { @@ -151,7 +151,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer { StringBuilder buf = new StringBuilder(); byte v = vspace.get(k++); while (v != 0) { - char c = (char) ((v >>> 4) - 1); + char c = (char) (((v & 0xf0 )>>> 4) - 1); buf.append(c); c = (char) (v & 0x0f); if (c == 0) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java index 7cbd6f8a0c8..dfe06c88fbf 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java @@ -204,9 +204,6 @@ public class FingerprintFilter extends TokenFilter { } } - /** - * {@inheritDoc} - */ @Override public void reset() throws IOException { super.reset(); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java index 6c53aa3eef8..ff5d311754e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java @@ -71,9 +71,6 @@ public final class HyphenatedWordsFilter extends TokenFilter { super(in); } - /** - * {@inheritDoc} - */ @Override public boolean incrementToken() throws IOException { while (!exhausted && input.incrementToken()) { @@ -112,9 +109,6 @@ public final class HyphenatedWordsFilter extends TokenFilter { return false; } - /** - * {@inheritDoc} - */ @Override public void reset() throws IOException { super.reset(); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java index 457087c9041..69a7759c341 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java @@ -43,9 +43,6 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter { super(in); } - /** - * {@inheritDoc} - */ @Override public boolean incrementToken() throws IOException { while (input.incrementToken()) { @@ -71,9 +68,6 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter { return false; } - /** - * {@inheritDoc} - */ @Override public void reset() throws IOException { super.reset(); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java index 00ee311a4b0..254977f1423 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java @@ -1,58 +1,58 @@ -// DO NOT EDIT THIS FILE! Use "ant unicode-data" to recreate. - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.analysis.util; - -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.SparseFixedBitSet; - -/** - * This file contains unicode properties used by various {@link CharTokenizer}s. - * The data was created using ICU4J v59.1.0.0 - *

- * Unicode version: 9.0.0.0 - */ -public final class UnicodeProps { - private UnicodeProps() {} - - /** Unicode version that was used to generate this file: {@value} */ - public static final String UNICODE_VERSION = "9.0.0.0"; - - /** Bitset with Unicode WHITESPACE code points. */ - public static final Bits WHITESPACE = createBits( - 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x0085, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, - 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000); - - private static Bits createBits(final int... codepoints) { - final int len = codepoints[codepoints.length - 1] + 1; - final SparseFixedBitSet bitset = new SparseFixedBitSet(len); - for (int i : codepoints) bitset.set(i); - return new Bits() { - @Override - public boolean get(int index) { - return index < len && bitset.get(index); - } - - @Override - public int length() { - return 0x10FFFF + 1; - } - }; - } -} +// DO NOT EDIT THIS FILE! Use "ant unicode-data" to recreate. + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.util; + +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.SparseFixedBitSet; + +/** + * This file contains unicode properties used by various {@link CharTokenizer}s. + * The data was created using ICU4J v60.2.0.0 + *

+ * Unicode version: 10.0.0.0 + */ +public final class UnicodeProps { + private UnicodeProps() {} + + /** Unicode version that was used to generate this file: {@value} */ + public static final String UNICODE_VERSION = "10.0.0.0"; + + /** Bitset with Unicode WHITESPACE code points. */ + public static final Bits WHITESPACE = createBits( + 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x0085, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, + 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000); + + private static Bits createBits(final int... codepoints) { + final int len = codepoints[codepoints.length - 1] + 1; + final SparseFixedBitSet bitset = new SparseFixedBitSet(len); + for (int i : codepoints) bitset.set(i); + return new Bits() { + @Override + public boolean get(int index) { + return index < len && bitset.get(index); + } + + @Override + public int length() { + return 0x10FFFF + 1; + } + }; + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java index ed3abe45b54..67a1bb42920 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java @@ -262,6 +262,21 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { } } + + public void testLucene8124() throws Exception { + InputSource is = new InputSource(getClass().getResource("hyphenation-LUCENE-8124.xml").toExternalForm()); + HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter + .getHyphenationTree(is); + + HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter( + whitespaceMockTokenizer( + "Rindfleisch"), + hyphenator); + + // TODO Rindfleisch returned twice is another issue of the HyphenationCompoundTokenFilter + assertTokenStreamContents(tf, new String[] { "Rindfleisch", "Rind", "Rindfleisch", "fleisch"}); + } + public static interface MockRetainAttribute extends Attribute { void setRetain(boolean attr); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation-LUCENE-8124.xml b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation-LUCENE-8124.xml new file mode 100644 index 00000000000..8710eab0872 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation-LUCENE-8124.xml @@ -0,0 +1,61 @@ + + + + + + + + + + +aA +bB +cC +dD +eE +fF +gG +hH +iI +jJ +kK +lL +mM +nN +oO +pP +qQ +rR +sS +tT +uU +vV +wW +xX +yY +zZ +æÆ +øØ +åÅ + + +d7f + + diff --git a/lucene/analysis/icu/src/data/uax29/Default.rbbi b/lucene/analysis/icu/src/data/uax29/Default.rbbi index 6c6d1f9ef23..afda68f47b5 100644 --- a/lucene/analysis/icu/src/data/uax29/Default.rbbi +++ b/lucene/analysis/icu/src/data/uax29/Default.rbbi @@ -14,16 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# This file is from ICU (with some small modifications, to avoid CJK dictionary break) +# This file is from ICU (with some small modifications, to avoid CJK dictionary break, +# and status code change related to that) # -# Copyright (C) 2002-2013, International Business Machines Corporation +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Copyright (C) 2002-2016, International Business Machines Corporation # and others. All Rights Reserved. # # file: word.txt # # ICU Word Break Rules # See Unicode Standard Annex #29. -# These rules are based on UAX #29 Revision 22 for Unicode Version 6.3 +# These rules are based on UAX #29 Revision 29 for Unicode Version 9.0 +# with additions for Emoji Sequences from https://goo.gl/cluFCn +# Plus additional characters introduces with Emoji 5, http://www.unicode.org/reports/tr51/proposed.html # # Note: Updates to word.txt will usually need to be merged into # word_POSIX.txt also. @@ -35,6 +40,7 @@ ############################################################################## !!chain; +!!quoted_literals_only; # @@ -43,8 +49,9 @@ $CR = [\p{Word_Break = CR}]; $LF = [\p{Word_Break = LF}]; -$Newline = [\p{Word_Break = Newline}]; +$Newline = [\p{Word_Break = Newline} ]; $Extend = [\p{Word_Break = Extend}]; +$ZWJ = [\p{Word_Break = ZWJ}]; $Regional_Indicator = [\p{Word_Break = Regional_Indicator}]; $Format = [\p{Word_Break = Format}]; $Katakana = [\p{Word_Break = Katakana}]; @@ -57,6 +64,13 @@ $MidLetter = [\p{Word_Break = MidLetter}]; $MidNum = [\p{Word_Break = MidNum}]; $Numeric = [\p{Word_Break = Numeric}[[:Decomposition_Type=Wide:]&[:General_Category=Decimal_Number:]]]; $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; +$E_Base = [\p{Word_Break = EB}]; +$E_Modifier = [\p{Word_Break = EM}]; + +# Data for Extended Pictographic scraped from CLDR common/properties/ExtendedPictographic.txt, r13267 +$Extended_Pict = [\U0001F774-\U0001F77F\U00002700-\U00002701\U00002703-\U00002704\U0000270E\U00002710-\U00002711\U00002765-\U00002767\U0001F030-\U0001F093\U0001F094-\U0001F09F\U0001F10D-\U0001F10F\U0001F12F\U0001F16C-\U0001F16F\U0001F1AD-\U0001F1E5\U0001F260-\U0001F265\U0001F203-\U0001F20F\U0001F23C-\U0001F23F\U0001F249-\U0001F24F\U0001F252-\U0001F25F\U0001F266-\U0001F2FF\U0001F7D5-\U0001F7FF\U0001F000-\U0001F003\U0001F005-\U0001F02B\U0001F02C-\U0001F02F\U0001F322-\U0001F323\U0001F394-\U0001F395\U0001F398\U0001F39C-\U0001F39D\U0001F3F1-\U0001F3F2\U0001F3F6\U0001F4FE\U0001F53E-\U0001F548\U0001F54F\U0001F568-\U0001F56E\U0001F571-\U0001F572\U0001F57B-\U0001F586\U0001F588-\U0001F589\U0001F58E-\U0001F58F\U0001F591-\U0001F594\U0001F597-\U0001F5A3\U0001F5A6-\U0001F5A7\U0001F5A9-\U0001F5B0\U0001F5B3-\U0001F5BB\U0001F5BD-\U0001F5C1\U0001F5C5-\U0001F5D0\U0001F5D4-\U0001F5DB\U0001F5DF-\U0001F5E0\U0001F5E2\U0001F5E4-\U0001F5E7\U0001F5E9-\U0001F5EE\U0001F5F0-\U0001F5F2\U0001F5F4-\U0001F5F9\U00002605\U00002607-\U0000260D\U0000260F-\U00002610\U00002612\U00002616-\U00002617\U00002619-\U0000261C\U0000261E-\U0000261F\U00002621\U00002624-\U00002625\U00002627-\U00002629\U0000262B-\U0000262D\U00002630-\U00002637\U0000263B-\U00002647\U00002654-\U0000265F\U00002661-\U00002662\U00002664\U00002667\U00002669-\U0000267A\U0000267C-\U0000267E\U00002680-\U00002691\U00002695\U00002698\U0000269A\U0000269D-\U0000269F\U000026A2-\U000026A9\U000026AC-\U000026AF\U000026B2-\U000026BC\U000026BF-\U000026C3\U000026C6-\U000026C7\U000026C9-\U000026CD\U000026D0\U000026D2\U000026D5-\U000026E8\U000026EB-\U000026EF\U000026F6\U000026FB-\U000026FC\U000026FE-\U000026FF\U00002388\U0001FA00-\U0001FFFD\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F0AF-\U0001F0B0\U0001F0C0\U0001F0D0\U0001F0F6-\U0001F0FF\U0001F80C-\U0001F80F\U0001F848-\U0001F84F\U0001F85A-\U0001F85F\U0001F888-\U0001F88F\U0001F8AE-\U0001F8FF\U0001F900-\U0001F90B\U0001F91F\U0001F928-\U0001F92F\U0001F931-\U0001F932\U0001F94C\U0001F95F-\U0001F96B\U0001F992-\U0001F997\U0001F9D0-\U0001F9E6\U0001F90C-\U0001F90F\U0001F93F\U0001F94D-\U0001F94F\U0001F96C-\U0001F97F\U0001F998-\U0001F9BF\U0001F9C1-\U0001F9CF\U0001F9E7-\U0001F9FF\U0001F6C6-\U0001F6CA\U0001F6D3-\U0001F6D4\U0001F6E6-\U0001F6E8\U0001F6EA\U0001F6F1-\U0001F6F2\U0001F6F7-\U0001F6F8\U0001F6D5-\U0001F6DF\U0001F6ED-\U0001F6EF\U0001F6F9-\U0001F6FF]; +$EBG = [\p{Word_Break = EBG}]; +$EmojiNRK = [[\p{Emoji}] - [\p{Word_Break = Regional_Indicator}\u002a\u00230-9©®™〰〽]]; $Han = [:Han:]; $Hiragana = [:Hiragana:]; @@ -83,21 +97,21 @@ $ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; # except when they appear at the beginning of a region of text. # # TODO: check if handling of katakana in dictionary makes rules incorrect/void -$KatakanaEx = $Katakana ($Extend | $Format)*; -$Hebrew_LetterEx = $Hebrew_Letter ($Extend | $Format)*; -$ALetterEx = $ALetterPlus ($Extend | $Format)*; -$Single_QuoteEx = $Single_Quote ($Extend | $Format)*; -$Double_QuoteEx = $Double_Quote ($Extend | $Format)*; -$MidNumLetEx = $MidNumLet ($Extend | $Format)*; -$MidLetterEx = $MidLetter ($Extend | $Format)*; -$MidNumEx = $MidNum ($Extend | $Format)*; -$NumericEx = $Numeric ($Extend | $Format)*; -$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; -$Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format)*; +$KatakanaEx = $Katakana ($Extend | $Format | $ZWJ)*; +$Hebrew_LetterEx = $Hebrew_Letter ($Extend | $Format | $ZWJ)*; +$ALetterEx = $ALetterPlus ($Extend | $Format | $ZWJ)*; +$Single_QuoteEx = $Single_Quote ($Extend | $Format | $ZWJ)*; +$Double_QuoteEx = $Double_Quote ($Extend | $Format | $ZWJ)*; +$MidNumLetEx = $MidNumLet ($Extend | $Format | $ZWJ)*; +$MidLetterEx = $MidLetter ($Extend | $Format | $ZWJ)*; +$MidNumEx = $MidNum ($Extend | $Format | $ZWJ)*; +$NumericEx = $Numeric ($Extend | $Format | $ZWJ)*; +$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format | $ZWJ)*; +$Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format | $ZWJ)*; $Ideographic = [\p{Ideographic}]; -$HiraganaEx = $Hiragana ($Extend | $Format)*; -$IdeographicEx = $Ideographic ($Extend | $Format)*; +$HiraganaEx = $Hiragana ($Extend | $Format | $ZWJ)*; +$IdeographicEx = $Ideographic ($Extend | $Format | $ZWJ)*; ## ------------------------------------------------- @@ -108,12 +122,17 @@ $IdeographicEx = $Ideographic ($Extend | $Format)*; # $CR $LF; +# Rule 3c ZWJ x (Extended_Pict | EmojiNRK). Precedes WB4, so no intervening Extend chars allowed. +# +$ZWJ ($Extended_Pict | $EmojiNRK); + + # Rule 4 - ignore Format and Extend characters, except when they appear at the beginning # of a region of Text. The rule here comes into play when the start of text # begins with a group of Format chars, or with a "word" consisting of a single # char that is not in any of the listed word break categories followed by # format char(s), or is not a CJK dictionary character. -[^$CR $LF $Newline]? ($Extend | $Format)+; +[^$CR $LF $Newline]? ($Extend | $Format | $ZWJ)+; $NumericEx {100}; $ALetterEx {200}; @@ -123,6 +142,10 @@ $KatakanaEx {300}; # note: these status values override those from rule 5 $HiraganaEx {300}; # by virtue of being numerically larger. $IdeographicEx {400}; # +$E_Base ($Extend | $Format | $ZWJ)*; +$E_Modifier ($Extend | $Format | $ZWJ)*; +$Extended_Pict ($Extend | $Format | $ZWJ)*; + # # rule 5 # Do not break between most letters. @@ -170,9 +193,42 @@ $ExtendNumLetEx $Hebrew_Letter {200}; # (13b) $ExtendNumLetEx $NumericEx {100}; # (13b) $ExtendNumLetEx $KatakanaEx {300}; # (13b) -# rule 13c +# rule 14 +# Do not break within emoji modifier sequences -$Regional_IndicatorEx $Regional_IndicatorEx; +($E_Base | $EBG) ($Format | $Extend | $ZWJ)* $E_Modifier; + +# rules 15 - 17 +# Pairs of Regional Indicators stay together. +# With rule chaining disabled by ^, this rule will match exactly two of them. +# No other rule begins with a Regional_Indicator, so chaining cannot extend the match. +# +^$Regional_IndicatorEx $Regional_IndicatorEx; # special handling for CJK characters: chain for later dictionary segmentation $HangulSyllable $HangulSyllable {200}; + +# Rule 999 +# Match a single code point if no other rule applies. +.; + + +## ------------------------------------------------- + +!!safe_reverse; + +# rule 3 +($Extend | $Format | $ZWJ)+ .?; + +# rule 6 +($MidLetter | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* ($Hebrew_Letter | $ALetterPlus); + +# rule 7b +$Double_Quote ($Format | $Extend | $ZWJ)* $Hebrew_Letter; + + +# rule 11 +($MidNum | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* $Numeric; + +# rule 13c +$Regional_Indicator ($Format | $Extend | $ZWJ)* $Regional_Indicator; diff --git a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt index eb5b78e0ea2..806a4f9baf8 100644 --- a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt +++ b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt @@ -73,12 +73,14 @@ 0A4D> 0ABC> 0ACD> +0AFD..0AFF> 0B3C> 0B4D> 0BCD> 0C4D> 0CBC> 0CCD> +0D3B..0D3C> 0D4D> 0DCA> 0E47..0E4C> @@ -112,10 +114,10 @@ 1CD0..1CE8> 1CED> 1CF4> -1CF8..1CF9> +1CF7..1CF9> 1D2C..1D6A> 1DC4..1DCF> -1DF5> +1DF5..1DF9> 1DFD..1DFF> 1FBD> 1FBF..1FC1> @@ -175,7 +177,12 @@ FFE3> 1163F> 116B6..116B7> 1172B> +11A34> +11A47> +11A99> 11C3F> +11D42> +11D44..11D45> 16AF0..16AF4> 16F8F..16F9F> 1D167..1D169> diff --git a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt index fb8cf1ac66b..707674e299d 100644 --- a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt +++ b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt @@ -580,6 +580,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE 11C57>0037 # BHAIKSUKI DIGIT SEVEN 11C58>0038 # BHAIKSUKI DIGIT EIGHT 11C59>0039 # BHAIKSUKI DIGIT NINE +11D50>0030 # MASARAM GONDI DIGIT ZERO +11D51>0031 # MASARAM GONDI DIGIT ONE +11D52>0032 # MASARAM GONDI DIGIT TWO +11D53>0033 # MASARAM GONDI DIGIT THREE +11D54>0034 # MASARAM GONDI DIGIT FOUR +11D55>0035 # MASARAM GONDI DIGIT FIVE +11D56>0036 # MASARAM GONDI DIGIT SIX +11D57>0037 # MASARAM GONDI DIGIT SEVEN +11D58>0038 # MASARAM GONDI DIGIT EIGHT +11D59>0039 # MASARAM GONDI DIGIT NINE 16A60>0030 # MRO DIGIT ZERO 16A61>0031 # MRO DIGIT ONE 16A62>0032 # MRO DIGIT TWO diff --git a/lucene/analysis/icu/src/data/utr30/nfc.txt b/lucene/analysis/icu/src/data/utr30/nfc.txt index 5f9b1821760..b41056d8203 100644 --- a/lucene/analysis/icu/src/data/utr30/nfc.txt +++ b/lucene/analysis/icu/src/data/utr30/nfc.txt @@ -1,3 +1,5 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html # Copyright (C) 1999-2016, International Business Machines # Corporation and others. All Rights Reserved. # @@ -7,7 +9,7 @@ # # Complete data for Unicode NFC normalization. -* Unicode 9.0.0 +* Unicode 10.0.0 # Canonical_Combining_Class (ccc) values 0300..0314:230 @@ -164,6 +166,7 @@ 0C56:91 0CBC:7 0CCD:9 +0D3B..0D3C:9 0D4D:9 0DCA:9 0E38..0E39:103 @@ -234,6 +237,9 @@ 1DCF:220 1DD0:202 1DD1..1DF5:230 +1DF6:232 +1DF7..1DF8:228 +1DF9:220 1DFB:230 1DFC:233 1DFD:220 @@ -322,7 +328,12 @@ FE2E..FE2F:230 116B6:9 116B7:7 1172B:9 +11A34:9 +11A47:9 +11A99:9 11C3F:9 +11D42:7 +11D44..11D45:9 16AF0..16AF4:1 16B30..16B36:230 1BC9E:1 diff --git a/lucene/analysis/icu/src/data/utr30/nfkc.txt b/lucene/analysis/icu/src/data/utr30/nfkc.txt index f51fa5db4b7..8b71727f89f 100644 --- a/lucene/analysis/icu/src/data/utr30/nfkc.txt +++ b/lucene/analysis/icu/src/data/utr30/nfkc.txt @@ -1,3 +1,5 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html # Copyright (C) 1999-2016, International Business Machines # Corporation and others. All Rights Reserved. # @@ -11,7 +13,7 @@ # to NFKC one-way mappings. # Use this file as the second gennorm2 input file after nfc.txt. -* Unicode 9.0.0 +* Unicode 10.0.0 00A0>0020 00A8>0020 0308 diff --git a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt index 7f33df58c84..726c5b5adce 100644 --- a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt +++ b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt @@ -1,7 +1,7 @@ -# Unicode Character Database -# Copyright (c) 1991-2016 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see http://www.unicode.org/reports/tr44/ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Copyright (C) 1999-2016, International Business Machines +# Corporation and others. All Rights Reserved. # # file name: nfkc_cf.txt # @@ -12,7 +12,7 @@ # and reformatted into syntax for the gennorm2 Normalizer2 data generator tool. # Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt. -* Unicode 9.0.0 +* Unicode 10.0.0 0041>0061 0042>0062 diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilter.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilter.java index 0895b47438a..9c3770c26a9 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilter.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilter.java @@ -59,18 +59,34 @@ import com.ibm.icu.text.Normalizer2; * All foldings, case folding, and normalization mappings are applied recursively * to ensure a fully folded and normalized result. *

+ *

+ * A normalizer with additional settings such as a filter that lists characters not + * to be normalized can be passed in the constructor. + *

*/ public final class ICUFoldingFilter extends ICUNormalizer2Filter { - // TODO: if the wrong version of the ICU jar is used, loading these data files may give a strange error. - // maybe add an explicit check? http://icu-project.org/apiref/icu4j/com/ibm/icu/util/VersionInfo.html - private static final Normalizer2 normalizer = Normalizer2.getInstance( - ICUFoldingFilter.class.getResourceAsStream("utr30.nrm"), - "utr30", Normalizer2.Mode.COMPOSE); - + /** + * A normalizer for search term folding to Unicode text, + * applying foldings from UTR#30 Character Foldings. + */ + public static final Normalizer2 NORMALIZER = Normalizer2.getInstance( + // TODO: if the wrong version of the ICU jar is used, loading these data files may give a strange error. + // maybe add an explicit check? http://icu-project.org/apiref/icu4j/com/ibm/icu/util/VersionInfo.html + ICUFoldingFilter.class.getResourceAsStream("utr30.nrm"), + "utr30", Normalizer2.Mode.COMPOSE); + /** * Create a new ICUFoldingFilter on the specified input */ public ICUFoldingFilter(TokenStream input) { + super(input, NORMALIZER); + } + + /** + * Create a new ICUFoldingFilter on the specified input with the specified + * normalizer + */ + public ICUFoldingFilter(TokenStream input, Normalizer2 normalizer) { super(input, normalizer); } } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java index 036874ac9ff..1065cbfac81 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java @@ -25,7 +25,11 @@ import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs import org.apache.lucene.analysis.util.MultiTermAwareComponent; import org.apache.lucene.analysis.util.TokenFilterFactory; -/** +import com.ibm.icu.text.FilteredNormalizer2; +import com.ibm.icu.text.Normalizer2; +import com.ibm.icu.text.UnicodeSet; + +/** * Factory for {@link ICUFoldingFilter}. *
  * <fieldType name="text_folded" class="solr.TextField" positionIncrementGap="100">
@@ -37,18 +41,30 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
  * @since 3.1.0
  */
 public class ICUFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+  private final Normalizer2 normalizer;
 
   /** Creates a new ICUFoldingFilterFactory */
   public ICUFoldingFilterFactory(Map args) {
     super(args);
+
+    Normalizer2 normalizer = ICUFoldingFilter.NORMALIZER;
+    String filter = get(args, "filter");
+    if (filter != null) {
+      UnicodeSet set = new UnicodeSet(filter);
+      if (!set.isEmpty()) {
+        set.freeze();
+        normalizer = new FilteredNormalizer2(normalizer, set);
+      }
+    }
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
     }
+    this.normalizer = normalizer;
   }
 
   @Override
   public TokenStream create(TokenStream input) {
-    return new ICUFoldingFilter(input);
+    return new ICUFoldingFilter(input, normalizer);
   }
 
   @Override
diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/BreakIteratorWrapper.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/BreakIteratorWrapper.java
index d8ecb77d401..9e5050d55b8 100644
--- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/BreakIteratorWrapper.java
+++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/BreakIteratorWrapper.java
@@ -16,152 +16,84 @@
  */
 package org.apache.lucene.analysis.icu.segmentation;
 
-
-import java.text.CharacterIterator;
-
 import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.RuleBasedBreakIterator;
 import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
 
 /**
- * Contain all the issues surrounding BreakIterators in ICU in one place.
- * Basically this boils down to the fact that they aren't very friendly to any
- * sort of OO design.
- * 

- * http://bugs.icu-project.org/trac/ticket/5901: RBBI.getRuleStatus(), hoist to - * BreakIterator from RuleBasedBreakIterator - *

- * DictionaryBasedBreakIterator is a subclass of RuleBasedBreakIterator, but - * doesn't actually behave as a subclass: it always returns 0 for - * getRuleStatus(): - * http://bugs.icu-project.org/trac/ticket/4730: Thai RBBI, no boundary type - * tags + * Wraps RuleBasedBreakIterator, making object reuse convenient and + * emitting a rule status for emoji sequences. * @lucene.experimental */ -abstract class BreakIteratorWrapper { - protected final CharArrayIterator textIterator = new CharArrayIterator(); - protected char text[]; - protected int start; - protected int length; +final class BreakIteratorWrapper { + private final CharArrayIterator textIterator = new CharArrayIterator(); + private final RuleBasedBreakIterator rbbi; + private char text[]; + private int start; + private int status; + + BreakIteratorWrapper(RuleBasedBreakIterator rbbi) { + this.rbbi = rbbi; + } + + int current() { + return rbbi.current(); + } - abstract int next(); - abstract int current(); - abstract int getRuleStatus(); - abstract void setText(CharacterIterator text); + int getRuleStatus() { + return status; + } + + int next() { + int current = rbbi.current(); + int next = rbbi.next(); + status = calcStatus(current, next); + return next; + } + + /** Returns current rule status for the text between breaks. (determines token type) */ + private int calcStatus(int current, int next) { + // to support presentation selectors, we need to handle alphanum, num, and none at least, so currently not worth optimizing. + // https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AEmoji%3A%5D-%5B%3AEmoji_Presentation%3A%5D&g=Word_Break&i= + if (next != BreakIterator.DONE && isEmoji(current, next)) { + return ICUTokenizerConfig.EMOJI_SEQUENCE_STATUS; + } else { + return rbbi.getRuleStatus(); + } + } + + // See unicode doc L2/16-315 and also the RBBI rules for rationale. + // we don't include regional indicators here, because they aren't ambiguous for tagging, + // they need only be treated special for segmentation. + static final UnicodeSet EMOJI_RK = new UnicodeSet("[\u002a\u00230-9©®™〰〽]").freeze(); + + /** Returns true if the current text represents emoji character or sequence */ + private boolean isEmoji(int current, int next) { + int begin = start + current; + int end = start + next; + int codepoint = UTF16.charAt(text, 0, end, begin); + // TODO: this can be made more aggressive and future-proof if it uses [:Extended_Pictographic:] + if (UCharacter.hasBinaryProperty(codepoint, UProperty.EMOJI)) { + if (EMOJI_RK.contains(codepoint)) { + // if its in EmojiRK, we don't treat it as emoji unless there is evidence it forms emoji sequence, + // an emoji presentation selector or keycap follows. + int trailer = begin + Character.charCount(codepoint); + return trailer < end && (text[trailer] == 0xFE0F || text[trailer] == 0x20E3); + } else { + return true; + } + } + return false; + } void setText(char text[], int start, int length) { this.text = text; this.start = start; - this.length = length; textIterator.setText(text, start, length); - setText(textIterator); - } - - /** - * If it's a RuleBasedBreakIterator, the rule status can be used for token type. If it's - * any other BreakIterator, the rulestatus method is not available, so treat - * it like a generic BreakIterator. - */ - static BreakIteratorWrapper wrap(BreakIterator breakIterator) { - if (breakIterator instanceof RuleBasedBreakIterator) - return new RBBIWrapper((RuleBasedBreakIterator) breakIterator); - else - return new BIWrapper(breakIterator); - } - - /** - * RuleBasedBreakIterator wrapper: RuleBasedBreakIterator (as long as it's not - * a DictionaryBasedBreakIterator) behaves correctly. - */ - static final class RBBIWrapper extends BreakIteratorWrapper { - private final RuleBasedBreakIterator rbbi; - - RBBIWrapper(RuleBasedBreakIterator rbbi) { - this.rbbi = rbbi; - } - - @Override - int current() { - return rbbi.current(); - } - - @Override - int getRuleStatus() { - return rbbi.getRuleStatus(); - } - - @Override - int next() { - return rbbi.next(); - } - - @Override - void setText(CharacterIterator text) { - rbbi.setText(text); - } - } - - /** - * Generic BreakIterator wrapper: Either the rulestatus method is not - * available or always returns 0. Calculate a rulestatus here so it behaves - * like RuleBasedBreakIterator. - * - * Note: This is slower than RuleBasedBreakIterator. - */ - static final class BIWrapper extends BreakIteratorWrapper { - private final BreakIterator bi; - private int status; - - BIWrapper(BreakIterator bi) { - this.bi = bi; - } - - @Override - int current() { - return bi.current(); - } - - @Override - int getRuleStatus() { - return status; - } - - @Override - int next() { - int current = bi.current(); - int next = bi.next(); - status = calcStatus(current, next); - return next; - } - - private int calcStatus(int current, int next) { - if (current == BreakIterator.DONE || next == BreakIterator.DONE) - return RuleBasedBreakIterator.WORD_NONE; - - int begin = start + current; - int end = start + next; - - int codepoint; - for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) { - codepoint = UTF16.charAt(text, 0, end, begin); - - if (UCharacter.isDigit(codepoint)) - return RuleBasedBreakIterator.WORD_NUMBER; - else if (UCharacter.isLetter(codepoint)) { - // TODO: try to separately specify ideographic, kana? - // [currently all bundled as letter for this case] - return RuleBasedBreakIterator.WORD_LETTER; - } - } - - return RuleBasedBreakIterator.WORD_NONE; - } - - @Override - void setText(CharacterIterator text) { - bi.setText(text); - status = RuleBasedBreakIterator.WORD_NONE; - } + rbbi.setText(textIterator); + status = RuleBasedBreakIterator.WORD_NONE; } } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CompositeBreakIterator.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CompositeBreakIterator.java index 096eada2de3..3cb39edb92d 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CompositeBreakIterator.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CompositeBreakIterator.java @@ -123,7 +123,7 @@ final class CompositeBreakIterator { private BreakIteratorWrapper getBreakIterator(int scriptCode) { if (wordBreakers[scriptCode] == null) - wordBreakers[scriptCode] = BreakIteratorWrapper.wrap(config.getBreakIterator(scriptCode)); + wordBreakers[scriptCode] = new BreakIteratorWrapper(config.getBreakIterator(scriptCode)); return wordBreakers[scriptCode]; } } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java index 50a6b4c71d8..10e6c671817 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java @@ -52,6 +52,8 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig { public static final String WORD_LETTER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM]; /** Token type for words that appear to be numbers */ public static final String WORD_NUMBER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM]; + /** Token type for words that appear to be emoji sequences */ + public static final String WORD_EMOJI = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMOJI]; /* * the default breakiterators in use. these can be expensive to @@ -65,9 +67,9 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig { // maybe add an explicit check? http://icu-project.org/apiref/icu4j/com/ibm/icu/util/VersionInfo.html // the same as ROOT, except no dictionary segmentation for cjk - private static final BreakIterator defaultBreakIterator = + private static final RuleBasedBreakIterator defaultBreakIterator = readBreakIterator("Default.brk"); - private static final BreakIterator myanmarSyllableIterator = + private static final RuleBasedBreakIterator myanmarSyllableIterator = readBreakIterator("MyanmarSyllable.brk"); // TODO: deprecate this boolean? you only care if you are doing super-expert stuff... @@ -95,16 +97,16 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig { } @Override - public BreakIterator getBreakIterator(int script) { + public RuleBasedBreakIterator getBreakIterator(int script) { switch(script) { - case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone(); + case UScript.JAPANESE: return (RuleBasedBreakIterator)cjkBreakIterator.clone(); case UScript.MYANMAR: if (myanmarAsWords) { - return (BreakIterator)defaultBreakIterator.clone(); + return (RuleBasedBreakIterator)defaultBreakIterator.clone(); } else { - return (BreakIterator)myanmarSyllableIterator.clone(); + return (RuleBasedBreakIterator)myanmarSyllableIterator.clone(); } - default: return (BreakIterator)defaultBreakIterator.clone(); + default: return (RuleBasedBreakIterator)defaultBreakIterator.clone(); } } @@ -119,6 +121,8 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig { return script == UScript.HANGUL ? WORD_HANGUL : WORD_LETTER; case RuleBasedBreakIterator.WORD_NUMBER: return WORD_NUMBER; + case EMOJI_SEQUENCE_STATUS: + return WORD_EMOJI; default: /* some other custom code */ return ""; } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java index 09415516479..8b62ddbea67 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java @@ -200,18 +200,18 @@ public final class ICUTokenizer extends Tokenizer { */ private boolean incrementTokenBuffer() { int start = breaker.current(); - if (start == BreakIterator.DONE) - return false; // BreakIterator exhausted + assert start != BreakIterator.DONE; // find the next set of boundaries, skipping over non-tokens (rule status 0) int end = breaker.next(); - while (start != BreakIterator.DONE && breaker.getRuleStatus() == 0) { + while (end != BreakIterator.DONE && breaker.getRuleStatus() == 0) { start = end; end = breaker.next(); } - if (start == BreakIterator.DONE) + if (end == BreakIterator.DONE) { return false; // BreakIterator exhausted + } termAtt.copyBuffer(buffer, start, end - start); offsetAtt.setOffset(correctOffset(offset + start), correctOffset(offset + end)); diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java index 69694fc0780..e2d3dae3d75 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java @@ -16,8 +16,7 @@ */ package org.apache.lucene.analysis.icu.segmentation; - -import com.ibm.icu.text.BreakIterator; +import com.ibm.icu.text.RuleBasedBreakIterator; /** * Class that allows for tailored Unicode Text Segmentation on @@ -25,14 +24,16 @@ import com.ibm.icu.text.BreakIterator; * @lucene.experimental */ public abstract class ICUTokenizerConfig { - + /** Rule status for emoji sequences */ + public static final int EMOJI_SEQUENCE_STATUS = 299; + /** * Sole constructor. (For invocation by subclass * constructors, typically implicit.) */ public ICUTokenizerConfig() {} /** Return a breakiterator capable of processing a given script. */ - public abstract BreakIterator getBreakIterator(int script); + public abstract RuleBasedBreakIterator getBreakIterator(int script); /** Return a token type value for a given script and BreakIterator * rule status. */ public abstract String getType(int script, int ruleStatus); diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java index 4d29b0c36bc..0cd4cf28e5f 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java @@ -116,9 +116,9 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) { @Override - public BreakIterator getBreakIterator(int script) { + public RuleBasedBreakIterator getBreakIterator(int script) { if (breakers[script] != null) { - return (BreakIterator) breakers[script].clone(); + return (RuleBasedBreakIterator) breakers[script].clone(); } else { return super.getBreakIterator(script); } diff --git a/lucene/analysis/icu/src/java/overview.html b/lucene/analysis/icu/src/java/overview.html index bdace97b4c6..6fa5821c242 100644 --- a/lucene/analysis/icu/src/java/overview.html +++ b/lucene/analysis/icu/src/java/overview.html @@ -353,7 +353,7 @@ and

Backwards Compatibility

This module exists to provide up-to-date Unicode functionality that supports -the most recent version of Unicode (currently 8.0). However, some users who wish +the most recent version of Unicode (currently 10.0). However, some users who wish for stronger backwards compatibility can restrict {@link org.apache.lucene.analysis.icu.ICUNormalizer2Filter} to operate on only a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}. diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk index c94a023c2ce..4a9df159935 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk index c3357efa7ce..a9d0673aa8d 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm index 1a16f3eb182..1c3de121cad 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm differ diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java index 3782216d38c..3e3c5235791 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; /** basic tests for {@link ICUFoldingFilterFactory} */ public class TestICUFoldingFilterFactory extends BaseTokenStreamTestCase { - + /** basic tests to ensure the folding is working */ public void test() throws Exception { Reader reader = new StringReader("Résumé"); @@ -35,7 +35,24 @@ public class TestICUFoldingFilterFactory extends BaseTokenStreamTestCase { stream = factory.create(stream); assertTokenStreamContents(stream, new String[] { "resume" }); } - + + /** test to ensure the filter parameter is working */ + public void testFilter() throws Exception { + HashMap args = new HashMap(); + args.put("filter", "[^ö]"); + ICUFoldingFilterFactory factory = new ICUFoldingFilterFactory(args); + + Reader reader = new StringReader("Résumé"); + TokenStream stream = whitespaceMockTokenizer(reader); + stream = factory.create(stream); + assertTokenStreamContents(stream, new String[] { "resume" }); + + reader = new StringReader("Fönster"); + stream = whitespaceMockTokenizer(reader); + stream = factory.create(stream); + assertTokenStreamContents(stream, new String[] { "fönster" }); + } + /** Test that bogus arguments result in exception */ public void testBogusArguments() throws Exception { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java index 027baa35705..98939752cbe 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java @@ -16,13 +16,10 @@ */ package org.apache.lucene.analysis.icu.segmentation; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.icu.ICUNormalizer2Filter; import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute; import com.ibm.icu.lang.UScript; @@ -76,8 +73,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true)); - TokenFilter filter = new ICUNormalizer2Filter(tokenizer); - return new TokenStreamComponents(tokenizer, filter); + return new TokenStreamComponents(tokenizer); } }; } @@ -90,8 +86,8 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { public void testArmenian() throws Exception { assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։", - new String[] { "վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", - "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "վիքիպեդիայի", "կայքը" } ); + new String[] { "Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", + "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը" } ); } public void testAmharic() throws Exception { @@ -102,12 +98,12 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { public void testArabic() throws Exception { assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.", new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", - "بالإنجليزية", "truth", "in", "numbers", "the", "wikipedia", "story", "سيتم", "إطلاقه", "في", "2008" } ); + "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" } ); } public void testAramaic() throws Exception { assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀", - new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ", + new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ", "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"}); } @@ -125,7 +121,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { public void testGreek() throws Exception { assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.", - new String[] { "γράφεται", "σε", "συνεργασία", "από", "εθελοντέσ", "με", "το", "λογισμικό", "wiki", "κάτι", "που", + new String[] { "Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που", "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα" }); } @@ -156,7 +152,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { */ public void testChinese() throws Exception { assertAnalyzesTo(a, "我是中国人。 1234 Tests ", - new String[] { "我", "是", "中", "国", "人", "1234", "tests"}); + new String[] { "我", "是", "中", "国", "人", "1234", "Tests"}); } public void testHebrew() throws Exception { @@ -186,8 +182,8 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { /* Tests from StandardAnalyzer, just to show behavior is similar */ public void testAlphanumericSA() throws Exception { // alphanumeric tokens - assertAnalyzesTo(a, "B2B", new String[]{"b2b"}); - assertAnalyzesTo(a, "2B", new String[]{"2b"}); + assertAnalyzesTo(a, "B2B", new String[]{"B2B"}); + assertAnalyzesTo(a, "2B", new String[]{"2B"}); } public void testDelimitersSA() throws Exception { @@ -199,34 +195,34 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { public void testApostrophesSA() throws Exception { // internal apostrophes: O'Reilly, you're, O'Reilly's - assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"}); + assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"}); assertAnalyzesTo(a, "you're", new String[]{"you're"}); assertAnalyzesTo(a, "she's", new String[]{"she's"}); - assertAnalyzesTo(a, "Jim's", new String[]{"jim's"}); + assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"}); assertAnalyzesTo(a, "don't", new String[]{"don't"}); - assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly's"}); + assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"}); } public void testNumericSA() throws Exception { // floating point, serial, model numbers, ip addresses, etc. // every other segment must have at least one digit assertAnalyzesTo(a, "21.35", new String[]{"21.35"}); - assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"}); + assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"}); assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"}); assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"}); } public void testTextWithNumbersSA() throws Exception { // numbers - assertAnalyzesTo(a, "David has 5000 bones", new String[]{"david", "has", "5000", "bones"}); + assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"}); } public void testVariousTextSA() throws Exception { // various - assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted"}); - assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"}); - assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"}); - assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"}); + assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"}); + assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"}); + assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"}); + assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"}); } public void testKoreanSA() throws Exception { @@ -242,14 +238,14 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { public void testOffsets() throws Exception { assertAnalyzesTo(a, "David has 5000 bones", - new String[] {"david", "has", "5000", "bones"}, + new String[] {"David", "has", "5000", "bones"}, new int[] {0, 6, 10, 15}, new int[] {5, 9, 14, 20}); } public void testTypes() throws Exception { assertAnalyzesTo(a, "David has 5000 bones", - new String[] {"david", "has", "5000", "bones"}, + new String[] {"David", "has", "5000", "bones"}, new String[] { "", "", "", "" }); } @@ -265,6 +261,61 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { new String[] { "", "", "", "", "" }); } + /** simple emoji */ + public void testEmoji() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "💩 💩💩", + new String[] { "💩", "💩", "💩" }, + new String[] { "", "", "" }); + } + + /** emoji zwj sequence */ + public void testEmojiSequence() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "👩‍❤️‍👩", + new String[] { "👩‍❤️‍👩" }, + new String[] { "" }); + } + + /** emoji zwj sequence with fitzpatrick modifier */ + public void testEmojiSequenceWithModifier() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "👨🏼‍⚕️", + new String[] { "👨🏼‍⚕️" }, + new String[] { "" }); + } + + /** regional indicator */ + public void testEmojiRegionalIndicator() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "🇺🇸🇺🇸", + new String[] { "🇺🇸", "🇺🇸" }, + new String[] { "", "" }); + } + + /** variation sequence */ + public void testEmojiVariationSequence() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "#️⃣", + new String[] { "#️⃣" }, + new String[] { "" }); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "3️⃣", + new String[] { "3️⃣",}, + new String[] { "" }); + } + + public void testEmojiTagSequence() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "🏴󠁧󠁢󠁥󠁮󠁧󠁿", + new String[] { "🏴󠁧󠁢󠁥󠁮󠁧󠁿" }, + new String[] { "" }); + } + + public void testEmojiTokenization() throws Exception { + // simple emoji around latin + BaseTokenStreamTestCase.assertAnalyzesTo(a, "poo💩poo", + new String[] { "poo", "💩", "poo" }, + new String[] { "", "", "" }); + // simple emoji around non-latin + BaseTokenStreamTestCase.assertAnalyzesTo(a, "💩中國💩", + new String[] { "💩", "中", "國", "💩" }, + new String[] { "", "", "", "" }); + } + /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java index 75481f1924c..d93a8104891 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java @@ -78,6 +78,15 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase { ); } + /** + * dictionary segmentation with emoji + */ + public void testSimpleJapaneseWithEmoji() throws Exception { + assertAnalyzesTo(a, "それはまだ実験段階にあります💩", + new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます", "💩" } + ); + } + public void testJapaneseTypes() throws Exception { assertAnalyzesTo(a, "仮名遣い カタカナ", new String[] { "仮名遣い", "カタカナ" }, diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java index 0f2bffecfb0..042fa37a2f4 100644 --- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java +++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java @@ -62,9 +62,9 @@ import java.util.regex.Pattern; */ public class GenerateUTR30DataFiles { private static final String ICU_SVN_TAG_URL - = "http://source.icu-project.org/repos/icu/icu/tags"; - private static final String ICU_RELEASE_TAG = "release-58-1"; - private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2"; + = "http://source.icu-project.org/repos/icu/tags"; + private static final String ICU_RELEASE_TAG = "release-60-2"; + private static final String ICU_DATA_NORM2_PATH = "icu4c/source/data/unidata/norm2"; private static final String NFC_TXT = "nfc.txt"; private static final String NFKC_TXT = "nfkc.txt"; private static final String NFKC_CF_TXT = "nfkc_cf.txt"; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java index 4f768b732f9..7e1d7a11529 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java @@ -166,9 +166,6 @@ public class JapaneseIterationMarkCharFilter extends CharFilter { buffer.reset(input); } - /** - * {@inheritDoc} - */ @Override public int read(char[] buffer, int offset, int length) throws IOException { int read = 0; @@ -185,9 +182,6 @@ public class JapaneseIterationMarkCharFilter extends CharFilter { return read == 0 ? -1 : read; } - /** - * {@inheritDoc} - */ @Override public int read() throws IOException { int ic = buffer.get(bufferPosition); diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 98e7aafcacf..edeb0ee6c1c 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -293,7 +293,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase { "7.1.0-cfs", "7.1.0-nocfs", "7.2.0-cfs", - "7.2.0-nocfs" + "7.2.0-nocfs", + "7.2.1-cfs", + "7.2.1-nocfs" }; public static String[] getOldNames() { @@ -304,7 +306,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase { "sorted.7.0.0", "sorted.7.0.1", "sorted.7.1.0", - "sorted.7.2.0" + "sorted.7.2.0", + "sorted.7.2.1" }; public static String[] getOldSortedNames() { diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/index.7.2.1-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.7.2.1-cfs.zip new file mode 100644 index 00000000000..e579dabbb48 Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.7.2.1-cfs.zip differ diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/index.7.2.1-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.7.2.1-nocfs.zip new file mode 100644 index 00000000000..68f14a4e095 Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.7.2.1-nocfs.zip differ diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.7.2.1.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.7.2.1.zip new file mode 100644 index 00000000000..80e676a5a5f Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted.7.2.1.zip differ diff --git a/lucene/classification/src/java/org/apache/lucene/classification/BM25NBClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/BM25NBClassifier.java index 1a74416f33c..f03fc5300f0 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/BM25NBClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/BM25NBClassifier.java @@ -99,17 +99,11 @@ public class BM25NBClassifier implements Classifier { this.query = query; } - /** - * {@inheritDoc} - */ @Override public ClassificationResult assignClass(String inputDocument) throws IOException { return assignClassNormalizedList(inputDocument).get(0); } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text) throws IOException { List> assignedClasses = assignClassNormalizedList(text); @@ -117,9 +111,6 @@ public class BM25NBClassifier implements Classifier { return assignedClasses; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text, int max) throws IOException { List> assignedClasses = assignClassNormalizedList(text); diff --git a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java index 928c0366770..394d15f777d 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java @@ -195,9 +195,6 @@ public class BooleanPerceptronClassifier implements Classifier { } - /** - * {@inheritDoc} - */ @Override public ClassificationResult assignClass(String text) throws IOException { @@ -220,18 +217,12 @@ public class BooleanPerceptronClassifier implements Classifier { return new ClassificationResult<>(output >= bias, score); } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text) throws IOException { return null; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text, int max) throws IOException { diff --git a/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java index cbd241b4bb4..941d881a3ef 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java @@ -103,9 +103,6 @@ public class KNearestFuzzyClassifier implements Classifier { } - /** - * {@inheritDoc} - */ @Override public ClassificationResult assignClass(String text) throws IOException { TopDocs knnResults = knnSearch(text); @@ -121,9 +118,6 @@ public class KNearestFuzzyClassifier implements Classifier { return assignedClass; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text) throws IOException { TopDocs knnResults = knnSearch(text); @@ -132,9 +126,6 @@ public class KNearestFuzzyClassifier implements Classifier { return assignedClasses; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text, int max) throws IOException { TopDocs knnResults = knnSearch(text); @@ -213,7 +204,7 @@ public class KNearestFuzzyClassifier implements Classifier { ", classFieldName='" + classFieldName + '\'' + ", k=" + k + ", query=" + query + - ", similarity=" + indexSearcher.getSimilarity(true) + + ", similarity=" + indexSearcher.getSimilarity() + '}'; } } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java index f0391f4471d..1bc53b0202c 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java @@ -119,9 +119,6 @@ public class KNearestNeighborClassifier implements Classifier { } - /** - * {@inheritDoc} - */ @Override public ClassificationResult assignClass(String text) throws IOException { return classifyFromTopDocs(knnSearch(text)); @@ -143,9 +140,6 @@ public class KNearestNeighborClassifier implements Classifier { return assignedClass; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text) throws IOException { TopDocs knnResults = knnSearch(text); @@ -154,9 +148,6 @@ public class KNearestNeighborClassifier implements Classifier { return assignedClasses; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text, int max) throws IOException { TopDocs knnResults = knnSearch(text); @@ -251,7 +242,7 @@ public class KNearestNeighborClassifier implements Classifier { ", classFieldName='" + classFieldName + '\'' + ", k=" + k + ", query=" + query + - ", similarity=" + indexSearcher.getSimilarity(true) + + ", similarity=" + indexSearcher.getSimilarity() + '}'; } } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java index 3509df58511..a1546498de6 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java @@ -98,9 +98,6 @@ public class SimpleNaiveBayesClassifier implements Classifier { this.query = query; } - /** - * {@inheritDoc} - */ @Override public ClassificationResult assignClass(String inputDocument) throws IOException { List> assignedClasses = assignClassNormalizedList(inputDocument); @@ -115,9 +112,6 @@ public class SimpleNaiveBayesClassifier implements Classifier { return assignedClass; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text) throws IOException { List> assignedClasses = assignClassNormalizedList(text); @@ -125,9 +119,6 @@ public class SimpleNaiveBayesClassifier implements Classifier { return assignedClasses; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(String text, int max) throws IOException { List> assignedClasses = assignClassNormalizedList(text); diff --git a/lucene/classification/src/java/org/apache/lucene/classification/document/KNearestNeighborDocumentClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/document/KNearestNeighborDocumentClassifier.java index d687722c87c..39684ee25e7 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/document/KNearestNeighborDocumentClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/document/KNearestNeighborDocumentClassifier.java @@ -72,17 +72,11 @@ public class KNearestNeighborDocumentClassifier extends KNearestNeighborClassifi this.field2analyzer = field2analyzer; } - /** - * {@inheritDoc} - */ @Override public ClassificationResult assignClass(Document document) throws IOException { return classifyFromTopDocs(knnSearch(document)); } - /** - * {@inheritDoc} - */ @Override public List> getClasses(Document document) throws IOException { TopDocs knnResults = knnSearch(document); @@ -91,9 +85,6 @@ public class KNearestNeighborDocumentClassifier extends KNearestNeighborClassifi return assignedClasses; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(Document document, int max) throws IOException { TopDocs knnResults = knnSearch(document); diff --git a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java index 6bc8573c094..f6405901384 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java @@ -71,9 +71,6 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi this.field2analyzer = field2analyzer; } - /** - * {@inheritDoc} - */ @Override public ClassificationResult assignClass(Document document) throws IOException { List> assignedClasses = assignNormClasses(document); @@ -88,9 +85,6 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi return assignedClass; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(Document document) throws IOException { List> assignedClasses = assignNormClasses(document); @@ -98,9 +92,6 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi return assignedClasses; } - /** - * {@inheritDoc} - */ @Override public List> getClasses(Document document, int max) throws IOException { List> assignedClasses = assignNormClasses(document); diff --git a/lucene/classification/src/java/org/apache/lucene/classification/utils/NearestFuzzyQuery.java b/lucene/classification/src/java/org/apache/lucene/classification/utils/NearestFuzzyQuery.java index d4a26341560..308dcdc84d3 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/utils/NearestFuzzyQuery.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/NearestFuzzyQuery.java @@ -29,7 +29,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause; @@ -210,20 +210,20 @@ public class NearestFuzzyQuery extends Query { } private Query newTermQuery(IndexReader reader, Term term) throws IOException { - // we build an artificial TermContext that will give an overall df and ttf + // we build an artificial TermStates that will give an overall df and ttf // equal to 1 - TermContext context = new TermContext(reader.getContext()); + TermStates termStates = new TermStates(reader.getContext()); for (LeafReaderContext leafContext : reader.leaves()) { Terms terms = leafContext.reader().terms(term.field()); if (terms != null) { TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { - int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1 - context.register(termsEnum.termState(), leafContext.ord, freq, freq); + int freq = 1 - termStates.docFreq(); // we want the total df and ttf to be 1 + termStates.register(termsEnum.termState(), leafContext.ord, freq, freq); } } } - return new TermQuery(term, context); + return new TermQuery(term, termStates); } @Override diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 663e733f6b0..f6f4da3d769 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -1309,7 +1309,8 @@ ant test "-Dtests.method=*esi*" ant test -Dtests.seed=DEADBEEF # Repeats _all_ tests of ClassName N times. Every test repetition -# will have a different seed. +# will have a different seed. NOTE: does not reinitialize +# between repetitions, use only for idempotent tests. ant test -Dtests.iters=N -Dtestcase=ClassName # Repeats _all_ tests of ClassName N times. Every test repetition diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index 04101246460..50d1f9fb5d0 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -54,6 +54,8 @@ public final class StandardTokenizer extends Tokenizer { public static final int KATAKANA = 5; /** Hangul token type */ public static final int HANGUL = 6; + /** Emoji token type. */ + public static final int EMOJI = 7; /** String token types that correspond to token type int constants */ public static final String [] TOKEN_TYPES = new String [] { @@ -63,7 +65,8 @@ public final class StandardTokenizer extends Tokenizer { "", "", "", - "" + "", + "" }; /** Absolute maximum sized token */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java index 19e56a40a04..7521763f330 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java @@ -103,11 +103,8 @@ final class IntersectTermsEnum extends TermsEnum { arcs[arcIdx] = new FST.Arc<>(); } - if (fr.index == null) { - fstReader = null; - } else { - fstReader = fr.index.getBytesReader(); - } + + fstReader = fr.index.getBytesReader(); // TODO: if the automaton is "smallish" we really // should use the terms index to seek at least to diff --git a/lucene/core/src/java/org/apache/lucene/index/TermContext.java b/lucene/core/src/java/org/apache/lucene/index/TermStates.java similarity index 63% rename from lucene/core/src/java/org/apache/lucene/index/TermContext.java rename to lucene/core/src/java/org/apache/lucene/index/TermStates.java index 3ba8dd9d848..4bb83fe4e8f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermContext.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermStates.java @@ -17,34 +17,37 @@ package org.apache.lucene.index; -import org.apache.lucene.util.BytesRef; - import java.io.IOException; import java.util.Arrays; /** * Maintains a {@link IndexReader} {@link TermState} view over * {@link IndexReader} instances containing a single term. The - * {@link TermContext} doesn't track if the given {@link TermState} + * {@link TermStates} doesn't track if the given {@link TermState} * objects are valid, neither if the {@link TermState} instances refer to the * same terms in the associated readers. * * @lucene.experimental */ -public final class TermContext { +public final class TermStates { + + private static final TermState EMPTY_TERMSTATE = new TermState() { + @Override + public void copyFrom(TermState other) { + + } + }; // Important: do NOT keep hard references to index readers private final Object topReaderContextIdentity; private final TermState[] states; + private final Term term; // null if stats are to be used private int docFreq; private long totalTermFreq; //public static boolean DEBUG = BlockTreeTermsWriter.DEBUG; - /** - * Creates an empty {@link TermContext} from a {@link IndexReaderContext} - */ - public TermContext(IndexReaderContext context) { + private TermStates(Term term, IndexReaderContext context) { assert context != null && context.isTopLevel; topReaderContextIdentity = context.identity; docFreq = 0; @@ -56,10 +59,18 @@ public final class TermContext { len = context.leaves().size(); } states = new TermState[len]; + this.term = term; } /** - * Expert: Return whether this {@link TermContext} was built for the given + * Creates an empty {@link TermStates} from a {@link IndexReaderContext} + */ + public TermStates(IndexReaderContext context) { + this(null, context); + } + + /** + * Expert: Return whether this {@link TermStates} was built for the given * {@link IndexReaderContext}. This is typically used for assertions. * @lucene.internal */ @@ -68,35 +79,35 @@ public final class TermContext { } /** - * Creates a {@link TermContext} with an initial {@link TermState}, + * Creates a {@link TermStates} with an initial {@link TermState}, * {@link IndexReader} pair. */ - public TermContext(IndexReaderContext context, TermState state, int ord, int docFreq, long totalTermFreq) { - this(context); + public TermStates(IndexReaderContext context, TermState state, int ord, int docFreq, long totalTermFreq) { + this(null, context); register(state, ord, docFreq, totalTermFreq); } /** - * Creates a {@link TermContext} from a top-level {@link IndexReaderContext} and the + * Creates a {@link TermStates} from a top-level {@link IndexReaderContext} and the * given {@link Term}. This method will lookup the given term in all context's leaf readers - * and register each of the readers containing the term in the returned {@link TermContext} + * and register each of the readers containing the term in the returned {@link TermStates} * using the leaf reader's ordinal. *

* Note: the given context must be a top-level context. + * + * @param needsStats if {@code true} then all leaf contexts will be visited up-front to + * collect term statistics. Otherwise, the {@link TermState} objects + * will be built only when requested */ - public static TermContext build(IndexReaderContext context, Term term) + public static TermStates build(IndexReaderContext context, Term term, boolean needsStats) throws IOException { assert context != null && context.isTopLevel; - final String field = term.field(); - final BytesRef bytes = term.bytes(); - final TermContext perReaderTermState = new TermContext(context); - //if (DEBUG) System.out.println("prts.build term=" + term); - for (final LeafReaderContext ctx : context.leaves()) { - //if (DEBUG) System.out.println(" r=" + leaves[i].reader); - final Terms terms = ctx.reader().terms(field); - if (terms != null) { - final TermsEnum termsEnum = terms.iterator(); - if (termsEnum.seekExact(bytes)) { + final TermStates perReaderTermState = new TermStates(needsStats ? null : term, context); + if (needsStats) { + for (final LeafReaderContext ctx : context.leaves()) { + //if (DEBUG) System.out.println(" r=" + leaves[i].reader); + TermsEnum termsEnum = loadTermsEnum(ctx, term); + if (termsEnum != null) { final TermState termState = termsEnum.termState(); //if (DEBUG) System.out.println(" found"); perReaderTermState.register(termState, ctx.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); @@ -106,8 +117,19 @@ public final class TermContext { return perReaderTermState; } + private static TermsEnum loadTermsEnum(LeafReaderContext ctx, Term term) throws IOException { + final Terms terms = ctx.reader().terms(term.field()); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + if (termsEnum.seekExact(term.bytes())) { + return termsEnum; + } + } + return null; + } + /** - * Clears the {@link TermContext} internal state and removes all + * Clears the {@link TermStates} internal state and removes all * registered {@link TermState}s */ public void clear() { @@ -149,17 +171,25 @@ public final class TermContext { } /** - * Returns the {@link TermState} for an leaf ordinal or null if no - * {@link TermState} for the ordinal was registered. + * Returns the {@link TermState} for a leaf reader context or null if no + * {@link TermState} for the context was registered. * - * @param ord - * the readers leaf ordinal to get the {@link TermState} for. + * @param ctx + * the {@link LeafReaderContext} to get the {@link TermState} for. * @return the {@link TermState} for the given readers ord or null if no * {@link TermState} for the reader was registered */ - public TermState get(int ord) { - assert ord >= 0 && ord < states.length; - return states[ord]; + public TermState get(LeafReaderContext ctx) throws IOException { + assert ctx.ord >= 0 && ctx.ord < states.length; + if (term == null) + return states[ctx.ord]; + if (this.states[ctx.ord] == null) { + TermsEnum te = loadTermsEnum(ctx, term); + this.states[ctx.ord] = te == null ? EMPTY_TERMSTATE : te.termState(); + } + if (this.states[ctx.ord] == EMPTY_TERMSTATE) + return null; + return this.states[ctx.ord]; } /** @@ -169,6 +199,9 @@ public final class TermContext { * instances passed to {@link #register(TermState, int, int, long)}. */ public int docFreq() { + if (term != null) { + throw new IllegalStateException("Cannot call docFreq() when needsStats=false"); + } return docFreq; } @@ -179,19 +212,23 @@ public final class TermContext { * instances passed to {@link #register(TermState, int, int, long)}. */ public long totalTermFreq() { + if (term != null) { + throw new IllegalStateException("Cannot call totalTermFreq() when needsStats=false"); + } return totalTermFreq; } @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("TermContext\n"); + sb.append("TermStates\n"); for(TermState termState : states) { sb.append(" state="); - sb.append(termState.toString()); + sb.append(termState); sb.append('\n'); } return sb.toString(); } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java index 219d4535827..cca667575a4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java @@ -25,7 +25,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.util.ArrayUtil; @@ -53,7 +53,7 @@ public final class BlendedTermQuery extends Query { private int numTerms = 0; private Term[] terms = new Term[0]; private float[] boosts = new float[0]; - private TermContext[] contexts = new TermContext[0]; + private TermStates[] contexts = new TermStates[0]; private RewriteMethod rewriteMethod = DISJUNCTION_MAX_REWRITE; /** Sole constructor. */ @@ -82,10 +82,10 @@ public final class BlendedTermQuery extends Query { /** * Expert: Add a {@link Term} with the provided boost and context. - * This method is useful if you already have a {@link TermContext} + * This method is useful if you already have a {@link TermStates} * object constructed for the given term. */ - public Builder add(Term term, float boost, TermContext context) { + public Builder add(Term term, float boost, TermStates context) { if (numTerms >= BooleanQuery.getMaxClauseCount()) { throw new BooleanQuery.TooManyClauses(); } @@ -184,10 +184,10 @@ public final class BlendedTermQuery extends Query { private final Term[] terms; private final float[] boosts; - private final TermContext[] contexts; + private final TermStates[] contexts; private final RewriteMethod rewriteMethod; - private BlendedTermQuery(Term[] terms, float[] boosts, TermContext[] contexts, + private BlendedTermQuery(Term[] terms, float[] boosts, TermStates[] contexts, RewriteMethod rewriteMethod) { assert terms.length == boosts.length; assert terms.length == contexts.length; @@ -205,7 +205,7 @@ public final class BlendedTermQuery extends Query { terms[i] = terms[j]; terms[j] = tmpTerm; - TermContext tmpContext = contexts[i]; + TermStates tmpContext = contexts[i]; contexts[i] = contexts[j]; contexts[j] = tmpContext; @@ -263,10 +263,10 @@ public final class BlendedTermQuery extends Query { @Override public final Query rewrite(IndexReader reader) throws IOException { - final TermContext[] contexts = Arrays.copyOf(this.contexts, this.contexts.length); + final TermStates[] contexts = Arrays.copyOf(this.contexts, this.contexts.length); for (int i = 0; i < contexts.length; ++i) { if (contexts[i] == null || contexts[i].wasBuiltFor(reader.getContext()) == false) { - contexts[i] = TermContext.build(reader.getContext(), terms[i]); + contexts[i] = TermStates.build(reader.getContext(), terms[i], true); } } @@ -275,7 +275,7 @@ public final class BlendedTermQuery extends Query { // ttf will be the sum of all total term freqs int df = 0; long ttf = 0; - for (TermContext ctx : contexts) { + for (TermStates ctx : contexts) { df = Math.max(df, ctx.docFreq()); ttf += ctx.totalTermFreq(); } @@ -294,8 +294,8 @@ public final class BlendedTermQuery extends Query { return rewriteMethod.rewrite(termQueries); } - private static TermContext adjustFrequencies(IndexReaderContext readerContext, - TermContext ctx, int artificialDf, long artificialTtf) { + private static TermStates adjustFrequencies(IndexReaderContext readerContext, + TermStates ctx, int artificialDf, long artificialTtf) throws IOException { List leaves = readerContext.leaves(); final int len; if (leaves == null) { @@ -303,9 +303,9 @@ public final class BlendedTermQuery extends Query { } else { len = leaves.size(); } - TermContext newCtx = new TermContext(readerContext); + TermStates newCtx = new TermStates(readerContext); for (int i = 0; i < len; ++i) { - TermState termState = ctx.get(i); + TermState termState = ctx.get(leaves.get(i)); if (termState == null) { continue; } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index 900a77f076f..fffdd09093f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -48,7 +48,7 @@ final class BooleanWeight extends Weight { super(query); this.query = query; this.scoreMode = scoreMode; - this.similarity = searcher.getSimilarity(scoreMode.needsScores()); + this.similarity = searcher.getSimilarity(); weights = new ArrayList<>(); for (BooleanClause c : query) { Weight w = searcher.createWeight(c.getQuery(), c.isScoring() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, boost); diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index f4a7ca7be10..e2d6d8047f3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.search.similarities.Similarity; final class ExactPhraseScorer extends Scorer { @@ -42,13 +41,13 @@ final class ExactPhraseScorer extends Scorer { private int freq; - private final Similarity.SimScorer docScorer; + private final LeafSimScorer docScorer; private final boolean needsScores, needsTotalHitCount; private float matchCost; private float minCompetitiveScore; ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.SimScorer docScorer, ScoreMode scoreMode, + LeafSimScorer docScorer, ScoreMode scoreMode, float matchCost) throws IOException { super(weight); this.docScorer = docScorer; @@ -123,7 +122,7 @@ final class ExactPhraseScorer extends Scorer { @Override public float maxScore() { - return docScorer.maxScore(Integer.MAX_VALUE); + return docScorer.maxScore(); } /** Advance the given pos enum to the first doc on or after {@code target}. diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 5ee815cb888..da5ed036ddc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -32,7 +32,6 @@ import java.util.concurrent.Future; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; @@ -40,7 +39,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; @@ -75,36 +74,6 @@ import org.apache.lucene.util.ThreadInterruptedException; */ public class IndexSearcher { - /** A search-time {@link Similarity} that does not make use of scoring factors - * and may be used when scores are not needed. */ - private static final Similarity NON_SCORING_SIMILARITY = new Similarity() { - - @Override - public long computeNorm(FieldInvertState state) { - throw new UnsupportedOperationException("This Similarity may only be used for searching, not indexing"); - } - - @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new SimWeight() {}; - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - return new SimScorer() { - @Override - public float score(int doc, float freq) { - return 0f; - } - @Override - public float maxScore(float maxFreq) { - return 0f; - } - }; - } - - }; - private static QueryCache DEFAULT_QUERY_CACHE; private static QueryCachingPolicy DEFAULT_CACHING_POLICY = new UsageTrackingQueryCachingPolicy(); static { @@ -136,7 +105,7 @@ public class IndexSearcher { * Expert: returns a default Similarity instance. * In general, this method is only called to initialize searchers and writers. * User code and query implementations should respect - * {@link IndexSearcher#getSimilarity(boolean)}. + * {@link IndexSearcher#getSimilarity()}. * @lucene.internal */ public static Similarity getDefaultSimilarity() { @@ -329,15 +298,11 @@ public class IndexSearcher { this.similarity = similarity; } - /** Expert: Get the {@link Similarity} to use to compute scores. When - * {@code needsScores} is {@code false}, this method will return a simple - * {@link Similarity} that does not leverage scoring factors such as norms. - * When {@code needsScores} is {@code true}, this returns the + /** Expert: Get the {@link Similarity} to use to compute scores. This returns the * {@link Similarity} that has been set through {@link #setSimilarity(Similarity)} - * or the {@link #getDefaultSimilarity()} default {@link Similarity} if none - * has been set explicitly. */ - public Similarity getSimilarity(boolean needsScores) { - return needsScores ? similarity : NON_SCORING_SIMILARITY; + * or the default {@link Similarity} if none has been set explicitly. */ + public Similarity getSimilarity() { + return similarity; } /** @@ -774,7 +739,7 @@ public class IndexSearcher { * across a distributed collection. * @lucene.experimental */ - public TermStatistics termStatistics(Term term, TermContext context) throws IOException { + public TermStatistics termStatistics(Term term, TermStates context) throws IOException { if (context.docFreq() == 0) { return null; } else { diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java new file mode 100644 index 00000000000..5de82951d22 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.similarities.Similarity.SimScorer; + +/** + * {@link SimScorer} on a specific {@link LeafReader}. + */ +public final class LeafSimScorer { + + private final SimScorer scorer; + private final NumericDocValues norms; + private final float maxScore; + + /** + * Sole constructor: Score documents of {@code reader} with {@code scorer}. + */ + public LeafSimScorer(SimScorer scorer, LeafReader reader, boolean needsScores, float maxFreq) throws IOException { + this.scorer = scorer; + norms = needsScores ? reader.getNormValues(scorer.getField()) : null; + maxScore = needsScores ? scorer.score(maxFreq, 1) : Float.MAX_VALUE; + } + + private long getNormValue(int doc) throws IOException { + if (norms != null) { + boolean found = norms.advanceExact(doc); + assert found; + return norms.longValue(); + } else { + return 1L; // default norm + } + } + + /** Score the provided document assuming the given term document frequency. + * This method must be called on non-decreasing sequences of doc ids. + * @see SimScorer#score(float, long) */ + public float score(int doc, float freq) throws IOException { + return scorer.score(freq, getNormValue(doc)); + } + + /** Explain the score for the provided document assuming the given term document frequency. + * This method must be called on non-decreasing sequences of doc ids. + * @see SimScorer#explain(Explanation, long) */ + public Explanation explain(int doc, Explanation freqExpl) throws IOException { + return scorer.explain(freqExpl, getNormValue(doc)); + } + + /** + * Return an upper bound of the score. + */ + public float maxScore() { + return maxScore; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 34361a728cd..65d6631e9a7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -18,19 +18,26 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -183,36 +190,38 @@ public class MultiPhraseQuery extends Query { private class MultiPhraseWeight extends Weight { private final Similarity similarity; - private final Similarity.SimWeight stats; - private final Map termContexts = new HashMap<>(); + private final Similarity.SimScorer stats; + private final Map termStates = new HashMap<>(); private final ScoreMode scoreMode; public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { super(MultiPhraseQuery.this); this.scoreMode = scoreMode; - this.similarity = searcher.getSimilarity(scoreMode.needsScores()); + this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); // compute idf ArrayList allTermStats = new ArrayList<>(); for(final Term[] terms: termArrays) { for (Term term: terms) { - TermContext termContext = termContexts.get(term); - if (termContext == null) { - termContext = TermContext.build(context, term); - termContexts.put(term, termContext); + TermStates ts = termStates.get(term); + if (ts == null) { + ts = TermStates.build(context, term, scoreMode.needsScores()); + termStates.put(term, ts); } - TermStatistics termStatistics = searcher.termStatistics(term, termContext); - if (termStatistics != null) { - allTermStats.add(termStatistics); + if (scoreMode.needsScores()) { + TermStatistics termStatistics = searcher.termStatistics(term, ts); + if (termStatistics != null) { + allTermStats.add(termStatistics); + } } } } if (allTermStats.isEmpty()) { stats = null; // none of the terms were found, we won't use sim at all } else { - stats = similarity.computeWeight( + stats = similarity.scorer( boost, searcher.collectionStatistics(field), allTermStats.toArray(new TermStatistics[allTermStats.size()])); @@ -253,7 +262,7 @@ public class MultiPhraseQuery extends Query { List postings = new ArrayList<>(); for (Term term : terms) { - TermState termState = termContexts.get(term).get(context.ord); + TermState termState = termStates.get(term).get(context); if (termState != null) { termsEnum.seekExact(term.bytes(), termState); postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS)); @@ -282,11 +291,11 @@ public class MultiPhraseQuery extends Query { if (slop == 0) { return new ExactPhraseScorer(this, postingsFreqs, - similarity.simScorer(stats, context), + new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { return new SloppyPhraseScorer(this, postingsFreqs, slop, - similarity.simScorer(stats, context), + new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } } @@ -303,7 +312,7 @@ public class MultiPhraseQuery extends Query { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { float freq = slop == 0 ? ((ExactPhraseScorer)scorer).freq() : ((SloppyPhraseScorer)scorer).sloppyFreq(); - SimScorer docScorer = similarity.simScorer(stats, context); + LeafSimScorer docScorer = new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); return Explanation.match( diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java index d0869d61b7d..636a7d6757a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java @@ -24,7 +24,7 @@ import org.apache.lucene.index.FilteredTermsEnum; // javadocs import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.SingleTermsEnum; // javadocs import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanQuery.Builder; @@ -166,7 +166,7 @@ public abstract class MultiTermQuery extends Query { } @Override - protected void addClause(BooleanQuery.Builder topLevel, Term term, int docCount, float boost, TermContext states) { + protected void addClause(BooleanQuery.Builder topLevel, Term term, int docCount, float boost, TermStates states) { final TermQuery tq = new TermQuery(term, states); topLevel.add(new BoostQuery(tq, boost), BooleanClause.Occur.SHOULD); } @@ -218,7 +218,7 @@ public abstract class MultiTermQuery extends Query { @Override protected void addClause(BlendedTermQuery.Builder topLevel, Term term, int docCount, - float boost, TermContext states) { + float boost, TermStates states) { topLevel.add(term, boost, states); } } @@ -262,7 +262,7 @@ public abstract class MultiTermQuery extends Query { } @Override - protected void addClause(BooleanQuery.Builder topLevel, Term term, int docFreq, float boost, TermContext states) { + protected void addClause(BooleanQuery.Builder topLevel, Term term, int docFreq, float boost, TermStates states) { final Query q = new ConstantScoreQuery(new TermQuery(term, states)); topLevel.add(new BoostQuery(q, boost), BooleanClause.Occur.SHOULD); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index f82316d9255..3a46b96411c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -25,7 +25,7 @@ import java.util.Objects; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -148,9 +148,9 @@ final class MultiTermQueryConstantScoreWrapper extends // build a boolean query BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (TermAndState t : collectedTerms) { - final TermContext termContext = new TermContext(searcher.getTopReaderContext()); - termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq); - bq.add(new TermQuery(new Term(query.field, t.term), termContext), Occur.SHOULD); + final TermStates termStates = new TermStates(searcher.getTopReaderContext()); + termStates.register(t.state, context.ord, t.docFreq, t.totalTermFreq); + bq.add(new TermQuery(new Term(query.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 3d359b4f5b3..ff1538820d6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -32,12 +32,11 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -352,9 +351,9 @@ public class PhraseQuery extends Query { private class PhraseWeight extends Weight { private final Similarity similarity; - private final Similarity.SimWeight stats; + private final Similarity.SimScorer stats; private final ScoreMode scoreMode; - private transient TermContext states[]; + private transient TermStates states[]; public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { @@ -366,21 +365,23 @@ public class PhraseQuery extends Query { throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first"); } this.scoreMode = scoreMode; - this.similarity = searcher.getSimilarity(scoreMode.needsScores()); + this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); - states = new TermContext[terms.length]; + states = new TermStates[terms.length]; TermStatistics termStats[] = new TermStatistics[terms.length]; int termUpTo = 0; for (int i = 0; i < terms.length; i++) { final Term term = terms[i]; - states[i] = TermContext.build(context, term); - TermStatistics termStatistics = searcher.termStatistics(term, states[i]); - if (termStatistics != null) { - termStats[termUpTo++] = termStatistics; + states[i] = TermStates.build(context, term, scoreMode.needsScores()); + if (scoreMode.needsScores()) { + TermStatistics termStatistics = searcher.termStatistics(term, states[i]); + if (termStatistics != null) { + termStats[termUpTo++] = termStatistics; + } } } if (termUpTo > 0) { - stats = similarity.computeWeight(boost, searcher.collectionStatistics(field), Arrays.copyOf(termStats, termUpTo)); + stats = similarity.scorer(boost, searcher.collectionStatistics(field), Arrays.copyOf(termStats, termUpTo)); } else { stats = null; // no terms at all, we won't use similarity } @@ -415,7 +416,7 @@ public class PhraseQuery extends Query { for (int i = 0; i < terms.length; i++) { final Term t = terms[i]; - final TermState state = states[i].get(context.ord); + final TermState state = states[i].get(context); if (state == null) { /* term doesnt exist in this segment */ assert termNotInReader(reader, t): "no termstate found but term exists in reader"; return null; @@ -433,11 +434,11 @@ public class PhraseQuery extends Query { if (slop == 0) { // optimize exact case return new ExactPhraseScorer(this, postingsFreqs, - similarity.simScorer(stats, context), + new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE), scoreMode, totalMatchCost); } else { return new SloppyPhraseScorer(this, postingsFreqs, slop, - similarity.simScorer(stats, context), + new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE), scoreMode.needsScores(), totalMatchCost); } } @@ -459,7 +460,7 @@ public class PhraseQuery extends Query { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { float freq = slop == 0 ? ((ExactPhraseScorer)scorer).freq() : ((SloppyPhraseScorer)scorer).sloppyFreq(); - SimScorer docScorer = similarity.simScorer(stats, context); + LeafSimScorer docScorer = new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); return Explanation.match( diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java index 6f54a866b1d..9d02b35e961 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java @@ -20,7 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.MultiTermQuery.RewriteMethod; @@ -64,7 +64,7 @@ public abstract class ScoringRewrite extends TermCollectingRewrite { @Override protected void addClause(BooleanQuery.Builder topLevel, Term term, int docCount, - float boost, TermContext states) { + float boost, TermStates states) { final TermQuery tq = new TermQuery(term, states); topLevel.add(new BoostQuery(tq, boost), BooleanClause.Occur.SHOULD); } @@ -109,7 +109,7 @@ public abstract class ScoringRewrite extends TermCollectingRewrite { if (size > 0) { final int sort[] = col.terms.sort(); final float[] boost = col.array.boost; - final TermContext[] termStates = col.array.termState; + final TermStates[] termStates = col.array.termState; for (int i = 0; i < size; i++) { final int pos = sort[i]; final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef())); @@ -146,7 +146,7 @@ public abstract class ScoringRewrite extends TermCollectingRewrite { } else { // new entry: we populate the entry initially array.boost[e] = boostAtt.getBoost(); - array.termState[e] = new TermContext(topReaderContext, state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); + array.termState[e] = new TermStates(topReaderContext, state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); ScoringRewrite.this.checkMaxClauseCount(terms.size()); } return true; @@ -156,7 +156,7 @@ public abstract class ScoringRewrite extends TermCollectingRewrite { /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */ static final class TermFreqBoostByteStart extends DirectBytesStartArray { float[] boost; - TermContext[] termState; + TermStates[] termState; public TermFreqBoostByteStart(int initSize) { super(initSize); @@ -166,7 +166,7 @@ public abstract class ScoringRewrite extends TermCollectingRewrite { public int[] init() { final int[] ord = super.init(); boost = new float[ArrayUtil.oversize(ord.length, Float.BYTES)]; - termState = new TermContext[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + termState = new TermStates[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @@ -176,7 +176,7 @@ public abstract class ScoringRewrite extends TermCollectingRewrite { final int[] ord = super.grow(); boost = ArrayUtil.grow(boost, ord.length); if (termState.length < ord.length) { - TermContext[] tmpTermState = new TermContext[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + TermStates[] tmpTermState = new TermStates[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(termState, 0, tmpTermState, 0, termState.length); termState = tmpTermState; } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index dc5490a5342..60b77c5c4ea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -26,7 +26,6 @@ import java.util.HashSet; import java.util.LinkedHashMap; import org.apache.lucene.index.Term; -import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.FixedBitSet; final class SloppyPhraseScorer extends Scorer { @@ -36,7 +35,7 @@ final class SloppyPhraseScorer extends Scorer { private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). - private final Similarity.SimScorer docScorer; + private final LeafSimScorer docScorer; private final int slop; private final int numPostings; @@ -55,7 +54,7 @@ final class SloppyPhraseScorer extends Scorer { private final float matchCost; SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - int slop, Similarity.SimScorer docScorer, boolean needsScores, + int slop, LeafSimScorer docScorer, boolean needsScores, float matchCost) { super(weight); this.docScorer = docScorer; @@ -558,7 +557,7 @@ final class SloppyPhraseScorer extends Scorer { @Override public float maxScore() { - return docScorer.maxScore(Float.POSITIVE_INFINITY); + return docScorer.maxScore(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index ce9d6e073b2..d9335cfe28c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -31,11 +31,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.BytesRef; /** @@ -127,28 +126,28 @@ public final class SynonymQuery extends Query { } class SynonymWeight extends Weight { - private final TermContext termContexts[]; + private final TermStates termStates[]; private final Similarity similarity; - private final Similarity.SimWeight simWeight; + private final Similarity.SimScorer simWeight; SynonymWeight(Query query, IndexSearcher searcher, float boost) throws IOException { super(query); CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field()); long docFreq = 0; long totalTermFreq = 0; - termContexts = new TermContext[terms.length]; - for (int i = 0; i < termContexts.length; i++) { - termContexts[i] = TermContext.build(searcher.getTopReaderContext(), terms[i]); - TermStatistics termStats = searcher.termStatistics(terms[i], termContexts[i]); + termStates = new TermStates[terms.length]; + for (int i = 0; i < termStates.length; i++) { + termStates[i] = TermStates.build(searcher.getTopReaderContext(), terms[i], true); + TermStatistics termStats = searcher.termStatistics(terms[i], termStates[i]); if (termStats != null) { docFreq = Math.max(termStats.docFreq(), docFreq); totalTermFreq += termStats.totalTermFreq(); } } - this.similarity = searcher.getSimilarity(true); + this.similarity = searcher.getSimilarity(); if (docFreq > 0) { TermStatistics pseudoStats = new TermStatistics(new BytesRef("synonym pseudo-term"), docFreq, totalTermFreq); - this.simWeight = similarity.computeWeight(boost, collectionStats, pseudoStats); + this.simWeight = similarity.scorer(boost, collectionStats, pseudoStats); } else { this.simWeight = null; // no terms exist at all, we won't use similarity } @@ -175,7 +174,7 @@ public final class SynonymQuery extends Query { assert scorer instanceof TermScorer; freq = ((TermScorer)scorer).freq(); } - SimScorer docScorer = similarity.simScorer(simWeight, context); + LeafSimScorer docScorer = new LeafSimScorer(simWeight, context.reader(), true, Float.MAX_VALUE); Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); return Explanation.match( @@ -190,7 +189,6 @@ public final class SynonymQuery extends Query { @Override public Scorer scorer(LeafReaderContext context) throws IOException { - Similarity.SimScorer simScorer = null; IndexOptions indexOptions = IndexOptions.NONE; if (terms.length > 0) { FieldInfo info = context.reader() @@ -202,21 +200,17 @@ public final class SynonymQuery extends Query { } // we use termscorers + disjunction as an impl detail List subScorers = new ArrayList<>(); - long maxFreq = 0; + long totalMaxFreq = 0; for (int i = 0; i < terms.length; i++) { - TermState state = termContexts[i].get(context.ord); + TermState state = termStates[i].get(context); if (state != null) { TermsEnum termsEnum = context.reader().terms(terms[i].field()).iterator(); termsEnum.seekExact(terms[i].bytes(), state); - - maxFreq += getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); - + long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); + totalMaxFreq += termMaxFreq; PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS); - // lazy init sim, in case no terms exist - if (simScorer == null) { - simScorer = similarity.simScorer(simWeight, context); - } - subScorers.add(new TermScorer(this, postings, simScorer, Float.POSITIVE_INFINITY)); + LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); + subScorers.add(new TermScorer(this, postings, simScorer)); } } if (subScorers.isEmpty()) { @@ -225,7 +219,8 @@ public final class SynonymQuery extends Query { // we must optimize this case (term not in segment), disjunctionscorer requires >= 2 subs return subScorers.get(0); } else { - return new SynonymScorer(simScorer, this, subScorers, maxFreq); + LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, totalMaxFreq); + return new SynonymScorer(simScorer, this, subScorers); } } @@ -248,13 +243,11 @@ public final class SynonymQuery extends Query { } static class SynonymScorer extends DisjunctionScorer { - private final Similarity.SimScorer similarity; - private final float maxFreq; + private final LeafSimScorer similarity; - SynonymScorer(Similarity.SimScorer similarity, Weight weight, List subScorers, float maxFreq) { + SynonymScorer(LeafSimScorer similarity, Weight weight, List subScorers) { super(weight, subScorers, true); this.similarity = similarity; - this.maxFreq = maxFreq; } @Override @@ -264,7 +257,7 @@ public final class SynonymQuery extends Query { @Override public float maxScore() { - return similarity.maxScore(maxFreq); + return similarity.maxScore(); } /** combines TF of all subs. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/TermCollectingRewrite.java b/lucene/core/src/java/org/apache/lucene/search/TermCollectingRewrite.java index fffa5a84fca..86bf34f02fc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermCollectingRewrite.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermCollectingRewrite.java @@ -23,7 +23,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.AttributeSource; @@ -43,7 +43,7 @@ abstract class TermCollectingRewrite extends MultiTermQuery.RewriteMethod { addClause(topLevel, term, docCount, boost, null); } - protected abstract void addClause(B topLevel, Term term, int docCount, float boost, TermContext states) throws IOException; + protected abstract void addClause(B topLevel, Term term, int docCount, float boost, TermStates states) throws IOException; final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index 4049e1052c4..a8bf5b0679c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -33,7 +33,7 @@ import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PrefixCodedTerms; import org.apache.lucene.index.PrefixCodedTerms.TermIterator; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -268,9 +268,9 @@ public class TermInSetQuery extends Query implements Accountable { assert builder == null; BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (TermAndState t : matchingTerms) { - final TermContext termContext = new TermContext(searcher.getTopReaderContext()); - termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq); - bq.add(new TermQuery(new Term(t.field, t.term), termContext), Occur.SHOULD); + final TermStates termStates = new TermStates(searcher.getTopReaderContext()); + termStates.register(t.state, context.ord, t.docFreq, t.totalTermFreq); + bq.add(new TermQuery(new Term(t.field, t.term), termStates), Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 925fe93f3c5..d629acd89a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -28,12 +28,10 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.SimScorer; /** * A Query that matches documents containing a term. This may be combined with @@ -42,23 +40,23 @@ import org.apache.lucene.search.similarities.Similarity.SimScorer; public class TermQuery extends Query { private final Term term; - private final TermContext perReaderTermState; + private final TermStates perReaderTermState; final class TermWeight extends Weight { private final Similarity similarity; - private final Similarity.SimWeight stats; - private final TermContext termStates; + private final Similarity.SimScorer simScorer; + private final TermStates termStates; private final boolean needsScores; public TermWeight(IndexSearcher searcher, boolean needsScores, - float boost, TermContext termStates) throws IOException { + float boost, TermStates termStates) throws IOException { super(TermQuery.this); if (needsScores && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.needsScores = needsScores; this.termStates = termStates; - this.similarity = searcher.getSimilarity(needsScores); + this.similarity = searcher.getSimilarity(); final CollectionStatistics collectionStats; final TermStatistics termStats; @@ -72,9 +70,9 @@ public class TermQuery extends Query { } if (termStats == null) { - this.stats = null; // term doesn't exist in any segment, we won't use similarity at all + this.simScorer = null; // term doesn't exist in any segment, we won't use similarity at all } else { - this.stats = similarity.computeWeight(boost, collectionStats, termStats); + this.simScorer = similarity.scorer(boost, collectionStats, termStats); } } @@ -101,8 +99,8 @@ public class TermQuery extends Query { .getIndexOptions(); PostingsEnum docs = termsEnum.postings(null, needsScores ? PostingsEnum.FREQS : PostingsEnum.NONE); assert docs != null; - return new TermScorer(this, docs, similarity.simScorer(stats, context), - getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq())); + float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); + return new TermScorer(this, docs, new LeafSimScorer(simScorer, context.reader(), needsScores, maxFreq)); } private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { @@ -126,30 +124,17 @@ public class TermQuery extends Query { * the term does not exist in the given context */ private TermsEnum getTermsEnum(LeafReaderContext context) throws IOException { - if (termStates != null) { - // TermQuery either used as a Query or the term states have been provided at construction time - assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); - final TermState state = termStates.get(context.ord); - if (state == null) { // term is not present in that reader - assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; - return null; - } - final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(); - termsEnum.seekExact(term.bytes(), state); - return termsEnum; - } else { - // TermQuery used as a filter, so the term states have not been built up front - Terms terms = context.reader().terms(term.field()); - if (terms == null) { - return null; - } - final TermsEnum termsEnum = terms.iterator(); - if (termsEnum.seekExact(term.bytes())) { - return termsEnum; - } else { - return null; - } + assert termStates != null; + assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : + "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); + final TermState state = termStates.get(context); + if (state == null) { // term is not present in that reader + assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; + return null; } + final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(); + termsEnum.seekExact(term.bytes(), state); + return termsEnum; } private boolean termNotInReader(LeafReader reader, Term term) throws IOException { @@ -166,7 +151,7 @@ public class TermQuery extends Query { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { float freq = scorer.freq(); - SimScorer docScorer = similarity.simScorer(stats, context); + LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), true, Integer.MAX_VALUE); Explanation freqExplanation = Explanation.match(freq, "freq, occurrences of term within document"); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); return Explanation.match( @@ -190,7 +175,7 @@ public class TermQuery extends Query { * Expert: constructs a TermQuery that will use the provided docFreq instead * of looking up the docFreq against the searcher. */ - public TermQuery(Term t, TermContext states) { + public TermQuery(Term t, TermStates states) { assert states != null; term = Objects.requireNonNull(t); perReaderTermState = Objects.requireNonNull(states); @@ -204,18 +189,10 @@ public class TermQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); - final TermContext termState; + final TermStates termState; if (perReaderTermState == null || perReaderTermState.wasBuiltFor(context) == false) { - if (scoreMode.needsScores()) { - // make TermQuery single-pass if we don't have a PRTS or if the context - // differs! - termState = TermContext.build(context, term); - } else { - // do not compute the term state, this will help save seeks in the terms - // dict on segments that have a cache entry for this query - termState = null; - } + termState = TermStates.build(context, term, scoreMode.needsScores()); } else { // PRTS was pre-build for this IS termState = this.perReaderTermState; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index a4aeb04eefb..653a60edc9f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -20,14 +20,12 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.search.similarities.Similarity; /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { private final PostingsEnum postingsEnum; - private final Similarity.SimScorer docScorer; - private final float maxFreq; + private final LeafSimScorer docScorer; /** * Construct a TermScorer. @@ -39,14 +37,11 @@ final class TermScorer extends Scorer { * @param docScorer * The Similarity.SimScorer implementation * to be used for score computations. - * @param maxFreq - * An upper bound of the term frequency of the searched term in any document. */ - TermScorer(Weight weight, PostingsEnum td, Similarity.SimScorer docScorer, float maxFreq) { + TermScorer(Weight weight, PostingsEnum td, LeafSimScorer docScorer) { super(weight); this.docScorer = docScorer; this.postingsEnum = td; - this.maxFreq = maxFreq; } @Override @@ -71,7 +66,7 @@ final class TermScorer extends Scorer { @Override public float maxScore() { - return docScorer.maxScore(maxFreq); + return docScorer.maxScore(); } /** Returns a string representation of this TermScorer. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java b/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java index b75836e16b7..dea4b0e4cbf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java @@ -25,7 +25,7 @@ import java.util.PriorityQueue; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.ArrayUtil; @@ -82,7 +82,7 @@ public abstract class TopTermsRewrite extends TermCollectingRewrite { // lazy init the initial ScoreTerm because comparator is not known on ctor: if (st == null) - st = new ScoreTerm(new TermContext(topReaderContext)); + st = new ScoreTerm(new TermStates(topReaderContext)); boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); } @@ -139,7 +139,7 @@ public abstract class TopTermsRewrite extends TermCollectingRewrite { visitedTerms.remove(st.bytes.get()); st.termState.clear(); // reset the termstate! } else { - st = new ScoreTerm(new TermContext(topReaderContext)); + st = new ScoreTerm(new TermStates(topReaderContext)); } assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize"; // set maxBoostAtt with values to help FuzzyTermsEnum to optimize @@ -193,8 +193,8 @@ public abstract class TopTermsRewrite extends TermCollectingRewrite { static final class ScoreTerm implements Comparable { public final BytesRefBuilder bytes = new BytesRefBuilder(); public float boost; - public final TermContext termState; - public ScoreTerm(TermContext termState) { + public final TermStates termState; + public ScoreTerm(TermStates termState) { this.termState = termState; } diff --git a/lucene/core/src/java/org/apache/lucene/search/package-info.java b/lucene/core/src/java/org/apache/lucene/search/package-info.java index 69c5c2a053e..7e53da46620 100644 --- a/lucene/core/src/java/org/apache/lucene/search/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/search/package-info.java @@ -378,7 +378,7 @@ * scored the way it was. * Typically a weight such as TermWeight * that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will make use of the Similarity's implementation: - * {@link org.apache.lucene.search.similarities.Similarity.SimScorer#explain(int, Explanation) SimScorer#explain(int doc, Explanation freq)}. + * {@link org.apache.lucene.search.similarities.Similarity.SimScorer#explain(Explanation, long) SimScorer#explain(Explanation freq, long norm)}. * * * @@ -402,7 +402,7 @@ * {@link org.apache.lucene.search.Scorer#score score()} — Return the score of the * current document. This value can be determined in any appropriate way for an application. For instance, the * {@link org.apache.lucene.search.TermScorer TermScorer} simply defers to the configured Similarity: - * {@link org.apache.lucene.search.similarities.Similarity.SimScorer#score(int, float) SimScorer.score(int doc, float freq)}. + * {@link org.apache.lucene.search.similarities.Similarity.SimScorer#score(float, long) SimScorer.score(float freq, long norm)}. * *

  • * {@link org.apache.lucene.search.Scorer#getChildren getChildren()} — Returns any child subscorers diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java index 1522e5dc3c5..527c2fdb480 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java @@ -112,18 +112,12 @@ public abstract class Axiomatic extends SimilarityBase { return Math.max(0, score); } - @Override - protected double maxScore(BasicStats stats, double maxFreq) { - // TODO: can we compute a better upper bound on the produced scores - return Double.POSITIVE_INFINITY; - } - @Override protected Explanation explain( - BasicStats stats, int doc, Explanation freq, double docLen) { + BasicStats stats, Explanation freq, double docLen) { List subs = new ArrayList<>(); double f = freq.getValue().doubleValue(); - explain(subs, stats, doc, f, docLen); + explain(subs, stats, f, docLen); double score = tf(stats, f, docLen) * ln(stats, f, docLen) @@ -132,7 +126,7 @@ public abstract class Axiomatic extends SimilarityBase { - gamma(stats, f, docLen); Explanation explanation = Explanation.match((float) score, - "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + freq.getValue() +"), computed from:", + "score(" + getClass().getSimpleName() + ", freq=" + freq.getValue() +"), computed from:", subs); if (stats.boost != 1f) { explanation = Explanation.match((float) (score * stats.boost), "Boosted score, computed as (score * boost) from:", @@ -148,7 +142,7 @@ public abstract class Axiomatic extends SimilarityBase { } @Override - protected void explain(List subs, BasicStats stats, int doc, + protected void explain(List subs, BasicStats stats, double freq, double docLen) { if (stats.getBoost() != 1.0d) { subs.add(Explanation.match((float) stats.getBoost(), @@ -165,7 +159,7 @@ public abstract class Axiomatic extends SimilarityBase { subs.add(tflnExplain(stats, freq, docLen)); subs.add(idfExplain(stats, freq, docLen)); subs.add(Explanation.match((float) gamma(stats, freq, docLen), "gamma")); - super.explain(subs, stats, doc, freq, docLen); + super.explain(subs, stats, freq, docLen); } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java index dce156bc060..19ab0d29ca6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java @@ -17,13 +17,10 @@ package org.apache.lucene.search.similarities; -import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; @@ -176,7 +173,7 @@ public class BM25Similarity extends Similarity { } @Override - public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); float avgdl = avgFieldLength(collectionStats); @@ -184,100 +181,17 @@ public class BM25Similarity extends Similarity { for (int i = 0; i < cache.length; i++) { cache[i] = k1 * ((1 - b) + b * LENGTH_TABLE[i] / avgdl); } - return new BM25Stats(collectionStats.field(), boost, k1, idf, avgdl, cache); - } - - @Override - public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException { - BM25Stats bm25stats = (BM25Stats) stats; - return new BM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field)); - } - - private class BM25DocScorer extends SimScorer { - private final BM25Stats stats; - private final float weightValue; // boost * idf * (k1 + 1) - private final NumericDocValues norms; - /** precomputed cache for all length values */ - private final float[] lengthCache; - /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */ - private final float[] cache; - - BM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException { - this.stats = stats; - this.weightValue = stats.weight; - this.norms = norms; - lengthCache = LENGTH_TABLE; - cache = stats.cache; - } - - @Override - public float score(int doc, float freq) throws IOException { - // if there are no norms, we act as if b=0 - double norm; - if (norms == null) { - norm = k1; - } else { - boolean found = norms.advanceExact(doc); - assert found; - norm = cache[((byte) norms.longValue()) & 0xFF]; - } - return weightValue * (float) (freq / (freq + norm)); - } - - @Override - public float maxScore(float maxFreq) { - // TODO: leverage maxFreq and the min norm from the cache - return weightValue; - } - - @Override - public Explanation explain(int doc, Explanation freq) throws IOException { - List subs = new ArrayList<>(); - subs.addAll(stats.explain()); - Explanation tfExpl = explainTF(doc, freq); - subs.add(tfExpl); - return Explanation.match(stats.weight * tfExpl.getValue().floatValue(), - "score(doc="+doc+",freq="+freq.getValue()+"), product of:", subs); - } - - private Explanation explainTF(int doc, Explanation freq) throws IOException { - List subs = new ArrayList<>(); - subs.add(freq); - subs.add(Explanation.match(k1, "k1, term saturation parameter")); - if (norms == null) { - subs.add(Explanation.match(0, "b, field omits length norms")); - return Explanation.match( - (float) (freq.getValue().floatValue() / (freq.getValue().floatValue() + (double) k1)), - "tf, computed as freq / (freq + k1) from:", subs); - } else { - boolean found = norms.advanceExact(doc); - assert found; - byte norm = (byte) norms.longValue(); - float doclen = lengthCache[norm & 0xff]; - subs.add(Explanation.match(b, "b, length normalization parameter")); - if ((norm & 0xFF) > 39) { - subs.add(Explanation.match(doclen, "dl, length of field (approximate)")); - } else { - subs.add(Explanation.match(doclen, "dl, length of field")); - } - subs.add(Explanation.match(stats.avgdl, "avgdl, average length of field")); - float normValue = k1 * ((1 - b) + b * doclen / stats.avgdl); - return Explanation.match( - (float) (freq.getValue().floatValue() / (freq.getValue().floatValue() + (double) normValue)), - "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:", subs); - } - } - + return new BM25Scorer(collectionStats.field(), boost, k1, b, idf, avgdl, cache); } /** Collection statistics for the BM25 model. */ - private static class BM25Stats extends SimWeight { - /** field name, for pulling norms */ - private final String field; + private static class BM25Scorer extends SimScorer { /** query boost */ private final float boost; /** k1 value for scale factor */ private final float k1; + /** b value for length normalization impact */ + private final float b; /** BM25's idf */ private final Explanation idf; /** The average document length. */ @@ -287,17 +201,51 @@ public class BM25Similarity extends Similarity { /** weight (idf * boost) */ private final float weight; - BM25Stats(String field, float boost, float k1, Explanation idf, float avgdl, float[] cache) { - this.field = field; + BM25Scorer(String field, float boost, float k1, float b, Explanation idf, float avgdl, float[] cache) { + super(field); this.boost = boost; this.idf = idf; this.avgdl = avgdl; this.k1 = k1; + this.b = b; this.cache = cache; this.weight = (k1 + 1) * boost * idf.getValue().floatValue(); } - private List explain() { + @Override + public float score(float freq, long encodedNorm) { + double norm = cache[((byte) encodedNorm) & 0xFF]; + return weight * (float) (freq / (freq + norm)); + } + + @Override + public Explanation explain(Explanation freq, long encodedNorm) { + List subs = new ArrayList<>(explainConstantFactors()); + Explanation tfExpl = explainTF(freq, encodedNorm); + subs.add(tfExpl); + return Explanation.match(weight * tfExpl.getValue().floatValue(), + "score(freq="+freq.getValue()+"), product of:", subs); + } + + private Explanation explainTF(Explanation freq, long norm) { + List subs = new ArrayList<>(); + subs.add(freq); + subs.add(Explanation.match(k1, "k1, term saturation parameter")); + float doclen = LENGTH_TABLE[((byte) norm) & 0xff]; + subs.add(Explanation.match(b, "b, length normalization parameter")); + if ((norm & 0xFF) > 39) { + subs.add(Explanation.match(doclen, "dl, length of field (approximate)")); + } else { + subs.add(Explanation.match(doclen, "dl, length of field")); + } + subs.add(Explanation.match(avgdl, "avgdl, average length of field")); + float normValue = k1 * ((1 - b) + b * doclen / avgdl); + return Explanation.match( + (float) (freq.getValue().floatValue() / (freq.getValue().floatValue() + (double) normValue)), + "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:", subs); + } + + private List explainConstantFactors() { List subs = new ArrayList<>(); // scale factor subs.add(Explanation.match(k1 + 1, "scaling factor, k1 + 1")); @@ -311,7 +259,6 @@ public class BM25Similarity extends Similarity { } } - @Override public String toString() { return "BM25(k1=" + k1 + ",b=" + b + ")"; diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicStats.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicStats.java index cc3cab452fb..dc9356f1504 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicStats.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicStats.java @@ -23,7 +23,7 @@ import org.apache.lucene.index.Terms; * Stores all statistics commonly used ranking methods. * @lucene.experimental */ -public class BasicStats extends Similarity.SimWeight { +public class BasicStats { final String field; /** The number of documents. */ protected long numberOfDocuments; diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BooleanSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BooleanSimilarity.java index 713417233bc..3c9206d5b68 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/BooleanSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BooleanSimilarity.java @@ -16,10 +16,7 @@ */ package org.apache.lucene.search.similarities; -import java.io.IOException; - import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; @@ -47,44 +44,31 @@ public class BooleanSimilarity extends Similarity { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new BooleanWeight(boost); + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new BooleanWeight(collectionStats.field(), boost); } - private static class BooleanWeight extends SimWeight { + private static class BooleanWeight extends SimScorer { final float boost; - BooleanWeight(float boost) { + BooleanWeight(String field, float boost) { + super(field); this.boost = boost; } + + @Override + public float score(float freq, long norm) { + return boost; + } + + @Override + public Explanation explain(Explanation freq, long norm) { + Explanation queryBoostExpl = Explanation.match(boost, "boost, query boost"); + return Explanation.match( + queryBoostExpl.getValue(), + "score(" + getClass().getSimpleName() + "), computed from:", + queryBoostExpl); + } } - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - final float boost = ((BooleanWeight) weight).boost; - - return new SimScorer() { - - @Override - public float score(int doc, float freq) throws IOException { - return boost; - } - - @Override - public float maxScore(float maxFreq) { - return boost; - } - - @Override - public Explanation explain(int doc, Explanation freq) throws IOException { - Explanation queryBoostExpl = Explanation.match(boost, "boost, query boost"); - return Explanation.match( - queryBoostExpl.getValue(), - "score(" + getClass().getSimpleName() + ", doc=" + doc + "), computed from:", - queryBoostExpl); - } - - }; - } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java index 66f22be0aea..f7f3d523c14 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java @@ -64,12 +64,6 @@ public class DFISimilarity extends SimilarityBase { return stats.getBoost() * log2(measure + 1); } - @Override - protected double maxScore(BasicStats stats, double maxFreq) { - // TODO: can we compute a better upper bound on the produced scores - return Double.POSITIVE_INFINITY; - } - /** * Returns the measure of independence */ @@ -79,12 +73,12 @@ public class DFISimilarity extends SimilarityBase { @Override protected Explanation explain( - BasicStats stats, int doc, Explanation freq, double docLen) { + BasicStats stats, Explanation freq, double docLen) { final double expected = (stats.getTotalTermFreq() + 1) * docLen / (stats.getNumberOfFieldTokens() + 1); if (freq.getValue().doubleValue() <= expected){ return Explanation.match((float) 0, "score(" + - getClass().getSimpleName() + ", doc=" + doc + ", freq=" + + getClass().getSimpleName() + ", freq=" + freq.getValue() +"), equals to 0"); } Explanation explExpected = Explanation.match((float) expected, @@ -103,7 +97,7 @@ public class DFISimilarity extends SimilarityBase { return Explanation.match( (float) score(stats, freq.getValue().doubleValue(), docLen), - "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + + "score(" + getClass().getSimpleName() + ", freq=" + freq.getValue() +"), computed as boost * log2(measure + 1) from:", Explanation.match( (float)stats.getBoost(), "boost, query boost"), explMeasure); diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java index a41e35cbcf9..cbe6773361f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java @@ -113,15 +113,9 @@ public class DFRSimilarity extends SimilarityBase { return stats.getBoost() * basicModel.score(stats, tfn, aeTimes1pTfn); } - @Override - protected double maxScore(BasicStats stats, double maxFreq) { - // TODO: can we compute a better upper bound on the produced scores - return Double.POSITIVE_INFINITY; - } - @Override protected void explain(List subs, - BasicStats stats, int doc, double freq, double docLen) { + BasicStats stats, double freq, double docLen) { if (stats.getBoost() != 1.0d) { subs.add(Explanation.match( (float)stats.getBoost(), "boost, query boost")); } @@ -136,13 +130,13 @@ public class DFRSimilarity extends SimilarityBase { @Override protected Explanation explain( - BasicStats stats, int doc, Explanation freq, double docLen) { + BasicStats stats, Explanation freq, double docLen) { List subs = new ArrayList<>(); - explain(subs, stats, doc, freq.getValue().doubleValue(), docLen); + explain(subs, stats, freq.getValue().doubleValue(), docLen); return Explanation.match( (float) score(stats, freq.getValue().doubleValue(), docLen), - "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + + "score(" + getClass().getSimpleName() + ", freq=" + freq.getValue() +"), computed as boost * " + "basicModel.score(stats, tfn) * afterEffect.score(stats, tfn) from:", subs); diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java index 9a576085f67..d08bdabf1d9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java @@ -104,15 +104,9 @@ public class IBSimilarity extends SimilarityBase { lambda.lambda(stats)); } - @Override - protected double maxScore(BasicStats stats, double maxFreq) { - // TODO: can we compute a better upper bound on the produced scores - return Double.POSITIVE_INFINITY; - } - @Override protected void explain( - List subs, BasicStats stats, int doc, double freq, double docLen) { + List subs, BasicStats stats, double freq, double docLen) { if (stats.getBoost() != 1.0d) { subs.add(Explanation.match((float)stats.getBoost(), "boost, query boost")); } @@ -125,13 +119,13 @@ public class IBSimilarity extends SimilarityBase { @Override protected Explanation explain( - BasicStats stats, int doc, Explanation freq, double docLen) { + BasicStats stats, Explanation freq, double docLen) { List subs = new ArrayList<>(); - explain(subs, stats, doc, freq.getValue().doubleValue(), docLen); + explain(subs, stats, freq.getValue().doubleValue(), docLen); return Explanation.match( (float) score(stats, freq.getValue().doubleValue(), docLen), - "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + + "score(" + getClass().getSimpleName() + ", freq=" + freq.getValue() +"), computed as boost * " + "distribution.score(stats, normalization.tfn(stats, freq," + " docLen), lambda.lambda(stats)) from:", diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java index c12cba451dc..a66871cfcb2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java @@ -78,13 +78,7 @@ public class LMDirichletSimilarity extends LMSimilarity { } @Override - protected double maxScore(BasicStats stats, double maxFreq) { - // TODO: can we compute a better upper bound on the produced scores - return Double.POSITIVE_INFINITY; - } - - @Override - protected void explain(List subs, BasicStats stats, int doc, + protected void explain(List subs, BasicStats stats, double freq, double docLen) { if (stats.getBoost() != 1.0d) { subs.add(Explanation.match((float) stats.getBoost(), "query boost")); @@ -107,18 +101,18 @@ public class LMDirichletSimilarity extends LMSimilarity { (float)Math.log(mu / (docLen + mu)), "document norm, computed as log(mu / (dl + mu))")); subs.add(Explanation.match((float) docLen,"dl, length of field")); - super.explain(subs, stats, doc, freq, docLen); + super.explain(subs, stats, freq, docLen); } @Override protected Explanation explain( - BasicStats stats, int doc, Explanation freq, double docLen) { + BasicStats stats, Explanation freq, double docLen) { List subs = new ArrayList<>(); - explain(subs, stats, doc, freq.getValue().doubleValue(), docLen); + explain(subs, stats, freq.getValue().doubleValue(), docLen); return Explanation.match( (float) score(stats, freq.getValue().doubleValue(), docLen), - "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + + "score(" + getClass().getSimpleName() + ", freq=" + freq.getValue() +"), computed as boost * " + "(term weight + document norm) from:", subs); diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java index 42e5a7bc319..3f4f41abc3f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java @@ -68,13 +68,7 @@ public class LMJelinekMercerSimilarity extends LMSimilarity { } @Override - protected double maxScore(BasicStats stats, double maxFreq) { - // TODO: can we compute a better upper bound on the produced scores - return Double.POSITIVE_INFINITY; - } - - @Override - protected void explain(List subs, BasicStats stats, int doc, + protected void explain(List subs, BasicStats stats, double freq, double docLen) { if (stats.getBoost() != 1.0d) { subs.add(Explanation.match((float) stats.getBoost(), "boost")); @@ -88,18 +82,18 @@ public class LMJelinekMercerSimilarity extends LMSimilarity { "freq, number of occurrences of term in the document"); subs.add(explFreq); subs.add(Explanation.match((float) docLen,"dl, length of field")); - super.explain(subs, stats, doc, freq, docLen); + super.explain(subs, stats, freq, docLen); } @Override protected Explanation explain( - BasicStats stats, int doc, Explanation freq, double docLen) { + BasicStats stats, Explanation freq, double docLen) { List subs = new ArrayList<>(); - explain(subs, stats, doc, freq.getValue().doubleValue(), docLen); + explain(subs, stats, freq.getValue().doubleValue(), docLen); return Explanation.match( (float) score(stats, freq.getValue().doubleValue(), docLen), - "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + + "score(" + getClass().getSimpleName() + ", freq=" + freq.getValue() +"), computed as boost * " + "log(1 + ((1 - lambda) * freq / dl) /(lambda * P)) from:", subs); diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java index 81548061e5c..73a1276501d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java @@ -70,7 +70,7 @@ public abstract class LMSimilarity extends SimilarityBase { } @Override - protected void explain(List subExpls, BasicStats stats, int doc, + protected void explain(List subExpls, BasicStats stats, double freq, double docLen) { subExpls.add(Explanation.match((float) collectionModel.computeProbability(stats), "collection probability")); diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java index 2f48cc69333..e558c6ec463 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java @@ -17,12 +17,10 @@ package org.apache.lucene.search.similarities; -import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; @@ -49,64 +47,39 @@ public class MultiSimilarity extends Similarity { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - SimWeight subStats[] = new SimWeight[sims.length]; - for (int i = 0; i < subStats.length; i++) { - subStats[i] = sims[i].computeWeight(boost, collectionStats, termStats); - } - return new MultiStats(subStats); - } - - @Override - public SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { SimScorer subScorers[] = new SimScorer[sims.length]; for (int i = 0; i < subScorers.length; i++) { - subScorers[i] = sims[i].simScorer(((MultiStats)stats).subStats[i], context); + subScorers[i] = sims[i].scorer(boost, collectionStats, termStats); } - return new MultiSimScorer(subScorers); + return new MultiSimScorer(collectionStats.field(), subScorers); } static class MultiSimScorer extends SimScorer { private final SimScorer subScorers[]; - MultiSimScorer(SimScorer subScorers[]) { + MultiSimScorer(String field, SimScorer subScorers[]) { + super(field); this.subScorers = subScorers; } @Override - public float score(int doc, float freq) throws IOException { + public float score(float freq, long norm) { float sum = 0.0f; for (SimScorer subScorer : subScorers) { - sum += subScorer.score(doc, freq); + sum += subScorer.score(freq, norm); } return sum; } @Override - public float maxScore(float freq) { - float sumMaxScore = 0; - for (SimScorer subScorer : subScorers) { - sumMaxScore += subScorer.maxScore(freq); - } - return sumMaxScore; - } - - @Override - public Explanation explain(int doc, Explanation freq) throws IOException { + public Explanation explain(Explanation freq, long norm) { List subs = new ArrayList<>(); for (SimScorer subScorer : subScorers) { - subs.add(subScorer.explain(doc, freq)); + subs.add(subScorer.explain(freq, norm)); } - return Explanation.match(score(doc, freq.getValue().floatValue()), "sum of:", subs); + return Explanation.match(score(freq.getValue().floatValue(), norm), "sum of:", subs); } } - - static class MultiStats extends SimWeight { - final SimWeight subStats[]; - - MultiStats(SimWeight subStats[]) { - this.subStats = subStats; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java b/lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java index 6c05616485c..ee2381f6cda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java @@ -17,9 +17,6 @@ package org.apache.lucene.search.similarities; -import java.io.IOException; - -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; @@ -46,26 +43,13 @@ public abstract class PerFieldSimilarityWrapper extends Similarity { } @Override - public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - PerFieldSimWeight weight = new PerFieldSimWeight(); - weight.delegate = get(collectionStats.field()); - weight.delegateWeight = weight.delegate.computeWeight(boost, collectionStats, termStats); - return weight; - } - - @Override - public final SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight; - return perFieldWeight.delegate.simScorer(perFieldWeight.delegateWeight, context); + public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return get(collectionStats.field()).scorer(boost, collectionStats, termStats); } /** * Returns a {@link Similarity} for scoring a field. */ public abstract Similarity get(String name); - - static class PerFieldSimWeight extends SimWeight { - Similarity delegate; - SimWeight delegateWeight; - } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java index 5f0bcd0bcb2..f296c02b523 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java @@ -17,18 +17,15 @@ package org.apache.lucene.search.similarities; -import java.io.IOException; import java.util.Collections; +import java.util.Objects; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermStatistics; -import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.util.SmallFloat; /** @@ -38,9 +35,9 @@ import org.apache.lucene.util.SmallFloat; *

    * This is a low-level API, you should only extend this API if you want to implement * an information retrieval model. If you are instead looking for a convenient way - * to alter Lucene's scoring, consider extending a higher-level implementation - * such as {@link TFIDFSimilarity}, which implements the vector space model with this API, or - * just tweaking the default implementation: {@link BM25Similarity}. + * to alter Lucene's scoring, consider just tweaking the default implementation: + * {@link BM25Similarity} or extend {@link SimilarityBase}, which makes it easy to compute + * a score from index statistics. *

    * Similarity determines how Lucene weights terms, and Lucene interacts with * this class at both index-time and @@ -49,23 +46,22 @@ import org.apache.lucene.util.SmallFloat; * Indexing Time * At indexing time, the indexer calls {@link #computeNorm(FieldInvertState)}, allowing * the Similarity implementation to set a per-document value for the field that will - * be later accessible via {@link org.apache.lucene.index.LeafReader#getNormValues(String)}. Lucene makes no assumption - * about what is in this norm, but it is most useful for encoding length normalization - * information. + * be later accessible via {@link org.apache.lucene.index.LeafReader#getNormValues(String)}. + * Lucene makes no assumption about what is in this norm, but it is most useful for + * encoding length normalization information. *

    * Implementations should carefully consider how the normalization is encoded: while - * Lucene's {@link BM25Similarity} encodes a combination of index-time boost - * and length normalization information with {@link SmallFloat} into a single byte, this - * might not be suitable for all purposes. + * Lucene's {@link BM25Similarity} encodes length normalization information with + * {@link SmallFloat} into a single byte, this might not be suitable for all purposes. *

    * Many formulas require the use of average document length, which can be computed via a * combination of {@link CollectionStatistics#sumTotalTermFreq()} and - * {@link CollectionStatistics#maxDoc()} or {@link CollectionStatistics#docCount()}, - * depending upon whether the average should reflect field sparsity. + * {@link CollectionStatistics#docCount()}. *

    - * Additional scoring factors can be stored in named - * NumericDocValuesFields and accessed - * at query-time with {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)}. + * Additional scoring factors can be stored in named {@link NumericDocValuesField}s and + * accessed at query-time with {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)}. + * However this should not be done in the {@link Similarity} but externally, for instance + * by using FunctionScoreQuery. *

    * Finally, using index-time boosts (either via folding into the normalization byte or * via DocValues), is an inefficient way to boost the scores of different fields if the @@ -76,14 +72,13 @@ import org.apache.lucene.util.SmallFloat; * Query time * At query-time, Queries interact with the Similarity via these steps: *

      - *
    1. The {@link #computeWeight(float, CollectionStatistics, TermStatistics...)} method is called a single time, + *
    2. The {@link #scorer(float, CollectionStatistics, TermStatistics...)} method is called a single time, * allowing the implementation to compute any statistics (such as IDF, average document length, etc) * across the entire collection. The {@link TermStatistics} and {@link CollectionStatistics} passed in * already contain all of the raw statistics involved, so a Similarity can freely use any combination * of statistics without causing any additional I/O. Lucene makes no assumption about what is - * stored in the returned {@link Similarity.SimWeight} object. - *
    3. For each segment in the index, the Query creates a {@link #simScorer(SimWeight, org.apache.lucene.index.LeafReaderContext)} - * The score() method is called for each matching document. + * stored in the returned {@link Similarity.SimScorer} object. + *
    4. Then {@link SimScorer#score(float, long)} is called for every matching document to compute its score. *
    *

    * Explanations @@ -110,7 +105,17 @@ public abstract class Similarity { *

    Matches in longer fields are less precise, so implementations of this * method usually set smaller values when state.getLength() is large, * and larger values when state.getLength() is small. - * + * + *

    Note that for a given term-document frequency, greater unsigned norms + * must produce scores that are lower or equal, ie. for two encoded norms + * {@code n1} and {@code n2} so that + * {@code Long.compareUnsigned(n1, n2) > 0} then + * {@code SimScorer.score(freq, n1) <= SimScorer.score(freq, n2)} + * for any legal {@code freq}. + * + *

    {@code 0} is not a legal norm, so {@code 1} is the norm that produces + * the highest scores. + * * @lucene.experimental * * @param state current processing state for this field @@ -126,71 +131,68 @@ public abstract class Similarity { * @param termStats term-level statistics, such as the document frequency of a term across the collection. * @return SimWeight object with the information this Similarity needs to score a query. */ - public abstract SimWeight computeWeight(float boost, + public abstract SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats); - - /** - * Creates a new {@link Similarity.SimScorer} to score matching documents from a segment of the inverted index. - * @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)} - * @param context segment of the inverted index to be scored. - * @return SloppySimScorer for scoring documents across context - * @throws IOException if there is a low-level I/O error - */ - public abstract SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException; - - /** - * API for scoring "sloppy" queries such as {@link TermQuery}, - * {@link SpanQuery}, and {@link PhraseQuery}. - */ - public static abstract class SimScorer { - - /** - * Sole constructor. (For invocation by subclass - * constructors, typically implicit.) - */ - public SimScorer() {} - - /** - * Score a single document - * @param doc document id within the inverted index segment - * @param freq sloppy term frequency - * @return document's score - */ - public abstract float score(int doc, float freq) throws IOException; - - /** - * Return the maximum score that this scorer may produce for freqs in {@code ]0, maxFreq]}. - * {@code Float.POSITIVE_INFINITY} is a fine return value if scores are not bounded. - * @param maxFreq the maximum frequency - */ - public abstract float maxScore(float maxFreq); - - /** - * Explain the score for a single document - * @param doc document id within the inverted index segment - * @param freq Explanation of how the sloppy term frequency was computed - * @return document's score - */ - public Explanation explain(int doc, Explanation freq) throws IOException { - return Explanation.match( - score(doc, freq.getValue().floatValue()), - "score(doc=" + doc + ",freq=" + freq.getValue() +"), with freq of:", - Collections.singleton(freq)); - } - } /** Stores the weight for a query across the indexed collection. This abstract * implementation is empty; descendants of {@code Similarity} should * subclass {@code SimWeight} and define the statistics they require in the * subclass. Examples include idf, average field length, etc. */ - public static abstract class SimWeight { - + public static abstract class SimScorer { + + private final String field; + /** * Sole constructor. (For invocation by subclass - * constructors, typically implicit.) + * constructors.) */ - public SimWeight() {} + public SimScorer(String field) { + this.field = Objects.requireNonNull(field); + } + + /** Return the field that this {@link SimScorer} operates on. */ + public final String getField() { + return field; + } + + /** + * Score a single document. {@code freq} is the document-term sloppy + * frequency and must be finite and positive. {@code norm} is the + * encoded normalization factor as computed by + * {@link Similarity#computeNorm(FieldInvertState)} at index time, or + * {@code 1} if norms are disabled. {@code norm} is never {@code 0}. + *

    + * Score must not decrease when {@code freq} increases, ie. if + * {@code freq1 > freq2}, then {@code score(freq1, norm) >= + * score(freq2, norm)} for any value of {@code norm} that may be produced + * by {@link Similarity#computeNorm(FieldInvertState)}. + *

    + * Score must not increase when the unsigned {@code norm} increases, ie. if + * {@code Long.compareUnsigned(norm1, norm2) > 0} then + * {@code score(freq, norm1) <= score(freq, norm2)} for any legal + * {@code freq}. + *

    + * As a consequence, the maximum score that this scorer can produce is bound + * by {@code score(Float.MAX_VALUE, 1)}. + * @param freq sloppy term frequency, must be finite and positive + * @param norm encoded normalization factor or {@code 1} if norms are disabled + * @return document's score + */ + public abstract float score(float freq, long norm); + + /** + * Explain the score for a single document + * @param freq Explanation of how the sloppy term frequency was computed + * @param norm encoded normalization factor, as returned by {@link Similarity#computeNorm}, or {@code 1} if norms are disabled + * @return document's score + */ + public Explanation explain(Explanation freq, long norm) { + return Explanation.match( + score(freq.getValue().floatValue(), norm), + "score(freq=" + freq.getValue() +"), with freq of:", + Collections.singleton(freq)); + } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java index f227f38fd36..1aefaed7c8f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java @@ -17,13 +17,10 @@ package org.apache.lucene.search.similarities; -import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; @@ -33,7 +30,7 @@ import org.apache.lucene.util.SmallFloat; * A subclass of {@code Similarity} that provides a simplified API for its * descendants. Subclasses are only required to implement the {@link #score} * and {@link #toString()} methods. Implementing - * {@link #explain(List, BasicStats, int, double, double)} is optional, + * {@link #explain(List, BasicStats, double, double)} is optional, * inasmuch as SimilarityBase already provides a basic explanation of the score * and the term frequency. However, implementers of a subclass are encouraged to * include as much detail about the scoring method as possible. @@ -82,13 +79,18 @@ public abstract class SimilarityBase extends Similarity { } @Override - public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - BasicStats stats[] = new BasicStats[termStats.length]; + public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + SimScorer weights[] = new SimScorer[termStats.length]; for (int i = 0; i < termStats.length; i++) { - stats[i] = newStats(collectionStats.field(), boost); - fillBasicStats(stats[i], collectionStats, termStats[i]); + BasicStats stats = newStats(collectionStats.field(), boost); + fillBasicStats(stats, collectionStats, termStats[i]); + weights[i] = new BasicSimScorer(stats); + } + if (weights.length == 1) { + return weights[0]; + } else { + return new MultiSimilarity.MultiSimScorer(collectionStats.field(), weights); } - return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats); } /** Factory method to return a custom stats object */ @@ -121,13 +123,6 @@ public abstract class SimilarityBase extends Similarity { */ protected abstract double score(BasicStats stats, double freq, double docLen); - /** - * Return the maximum value that may be returned by {@link #score(BasicStats, double, double)} - * for the given stats. - * @see org.apache.lucene.search.similarities.Similarity.SimScorer#maxScore(float) - */ - protected abstract double maxScore(BasicStats stats, double maxFreq); - /** * Subclasses should implement this method to explain the score. {@code expl} * already contains the score, the name of the class and the doc id, as well @@ -137,12 +132,11 @@ public abstract class SimilarityBase extends Similarity { * * @param subExpls the list of details of the explanation to extend * @param stats the corpus level statistics. - * @param doc the document id. * @param freq the term frequency. * @param docLen the document length. */ protected void explain( - List subExpls, BasicStats stats, int doc, double freq, double docLen) {} + List subExpls, BasicStats stats, double freq, double docLen) {} /** * Explains the score. The implementation here provides a basic explanation @@ -151,43 +145,24 @@ public abstract class SimilarityBase extends Similarity { * attaches the score (computed via the {@link #score(BasicStats, double, double)} * method) and the explanation for the term frequency. Subclasses content with * this format may add additional details in - * {@link #explain(List, BasicStats, int, double, double)}. + * {@link #explain(List, BasicStats, double, double)}. * * @param stats the corpus level statistics. - * @param doc the document id. * @param freq the term frequency and its explanation. * @param docLen the document length. * @return the explanation. */ protected Explanation explain( - BasicStats stats, int doc, Explanation freq, double docLen) { + BasicStats stats, Explanation freq, double docLen) { List subs = new ArrayList<>(); - explain(subs, stats, doc, freq.getValue().floatValue(), docLen); + explain(subs, stats, freq.getValue().floatValue(), docLen); return Explanation.match( (float) score(stats, freq.getValue().floatValue(), docLen), - "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + freq.getValue() +"), computed from:", + "score(" + getClass().getSimpleName() + ", freq=" + freq.getValue() +"), computed from:", subs); } - @Override - public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException { - if (stats instanceof MultiSimilarity.MultiStats) { - // a multi term query (e.g. phrase). return the summation, - // scoring almost as if it were boolean query - SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats; - SimScorer subScorers[] = new SimScorer[subStats.length]; - for (int i = 0; i < subScorers.length; i++) { - BasicStats basicstats = (BasicStats) subStats[i]; - subScorers[i] = new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field)); - } - return new MultiSimilarity.MultiSimScorer(subScorers); - } else { - BasicStats basicstats = (BasicStats) stats; - return new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field)); - } - } - /** * Subclasses must override this method to return the name of the Similarity * and preferably the values of parameters (if any) as well. @@ -227,43 +202,32 @@ public abstract class SimilarityBase extends Similarity { // --------------------------------- Classes --------------------------------- - /** Delegates the {@link #score(int, float)} and - * {@link #explain(int, Explanation)} methods to + /** Delegates the {@link #score(float, long)} and + * {@link #explain(Explanation, long)} methods to * {@link SimilarityBase#score(BasicStats, double, double)} and - * {@link SimilarityBase#explain(BasicStats, int, Explanation, double)}, + * {@link SimilarityBase#explain(BasicStats, Explanation, double)}, * respectively. */ final class BasicSimScorer extends SimScorer { - private final BasicStats stats; - private final NumericDocValues norms; + final BasicStats stats; - BasicSimScorer(BasicStats stats, NumericDocValues norms) throws IOException { + BasicSimScorer(BasicStats stats) { + super(stats.field); this.stats = stats; - this.norms = norms; } - double getLengthValue(int doc) throws IOException { - if (norms == null) { - return 1D; - } - boolean found = norms.advanceExact(doc); - assert found; - return LENGTH_TABLE[Byte.toUnsignedInt((byte) norms.longValue())]; + double getLengthValue(long norm) { + return LENGTH_TABLE[Byte.toUnsignedInt((byte) norm)]; } @Override - public float score(int doc, float freq) throws IOException { - return (float) SimilarityBase.this.score(stats, freq, getLengthValue(doc)); + public float score(float freq, long norm) { + return (float) SimilarityBase.this.score(stats, freq, getLengthValue(norm)); } @Override - public float maxScore(float maxFreq) { - return (float) SimilarityBase.this.maxScore(stats, maxFreq); - } - - @Override - public Explanation explain(int doc, Explanation freq) throws IOException { - return SimilarityBase.this.explain(stats, doc, freq, getLengthValue(doc)); + public Explanation explain(Explanation freq, long norm) { + return SimilarityBase.this.explain(stats, freq, getLengthValue(norm)); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java index 0452371abdc..d3224abb3d9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java @@ -17,13 +17,10 @@ package org.apache.lucene.search.similarities; -import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; @@ -511,7 +508,7 @@ public abstract class TFIDFSimilarity extends Similarity { } @Override - public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { final Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); @@ -522,110 +519,59 @@ public abstract class TFIDFSimilarity extends Similarity { normTable[i] = norm; } normTable[0] = 1f / normTable[255]; - return new IDFStats(collectionStats.field(), boost, idf, normTable); + return new TFIDFScorer(collectionStats.field(), boost, idf, normTable); } - @Override - public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException { - IDFStats idfstats = (IDFStats) stats; - // the norms only encode the length, we need a translation table that depends on how lengthNorm is implemented - final float[] normTable = idfstats.normTable; - return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field), normTable); - } - - private final class TFIDFSimScorer extends SimScorer { - private final IDFStats stats; - private final float weightValue; - private final NumericDocValues norms; - private final float[] normTable; - - TFIDFSimScorer(IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException { - this.stats = stats; - this.weightValue = stats.queryWeight; - this.norms = norms; - this.normTable = normTable; - } - - @Override - public float score(int doc, float freq) throws IOException { - final float raw = tf(freq) * weightValue; // compute tf(f)*weight - - if (norms == null) { - return raw; - } else { - boolean found = norms.advanceExact(doc); - assert found; - float normValue = normTable[(int) (norms.longValue() & 0xFF)]; - return raw * normValue; // normalize for field - } - } - - @Override - public float maxScore(float maxFreq) { - final float raw = tf(maxFreq) * weightValue; - if (norms == null) { - return raw; - } else { - float maxNormValue = Float.NEGATIVE_INFINITY; - for (float norm : normTable) { - maxNormValue = Math.max(maxNormValue, norm); - } - return raw * maxNormValue; - } - } - - @Override - public Explanation explain(int doc, Explanation freq) throws IOException { - return explainScore(doc, freq, stats, norms, normTable); - } - } /** Collection statistics for the TF-IDF model. The only statistic of interest * to this model is idf. */ - static class IDFStats extends SimWeight { - private final String field; + class TFIDFScorer extends SimScorer { /** The idf and its explanation */ private final Explanation idf; private final float boost; private final float queryWeight; final float[] normTable; - public IDFStats(String field, float boost, Explanation idf, float[] normTable) { + public TFIDFScorer(String field, float boost, Explanation idf, float[] normTable) { + super(field); // TODO: Validate? - this.field = field; this.idf = idf; this.boost = boost; this.queryWeight = boost * idf.getValue().floatValue(); this.normTable = normTable; } + + @Override + public float score(float freq, long norm) { + final float raw = tf(freq) * queryWeight; // compute tf(f)*weight + float normValue = normTable[(int) (norm & 0xFF)]; + return raw * normValue; // normalize for field + } + + @Override + public Explanation explain(Explanation freq, long norm) { + return explainScore(freq, norm, normTable); + } + + private Explanation explainScore(Explanation freq, long encodedNorm, float[] normTable) { + List subs = new ArrayList(); + if (boost != 1F) { + subs.add(Explanation.match(boost, "boost")); + } + subs.add(idf); + Explanation tf = Explanation.match(tf(freq.getValue().floatValue()), "tf(freq="+freq.getValue()+"), with freq of:", freq); + subs.add(tf); + + float norm = normTable[(int) (encodedNorm & 0xFF)]; + + Explanation fieldNorm = Explanation.match(norm, "fieldNorm"); + subs.add(fieldNorm); + + return Explanation.match( + queryWeight * tf.getValue().floatValue() * norm, + "score(freq="+freq.getValue()+"), product of:", + subs); + } } - private Explanation explainScore(int doc, Explanation freq, IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException { - List subs = new ArrayList(); - if (stats.boost != 1F) { - subs.add(Explanation.match(stats.boost, "boost")); - } - subs.add(stats.idf); - Explanation tf = Explanation.match(tf(freq.getValue().floatValue()), "tf(freq="+freq.getValue()+"), with freq of:", freq); - subs.add(tf); - - float norm; - if (norms == null) { - norm = 1f; - } else { - boolean found = norms.advanceExact(doc); - assert found; - norm = normTable[(int) (norms.longValue() & 0xFF)]; - } - - Explanation fieldNorm = Explanation.match( - norm, - "fieldNorm(doc=" + doc + ")"); - subs.add(fieldNorm); - - return Explanation.match( - stats.queryWeight * tf.getValue().floatValue() * norm, - "score(doc="+doc+",freq="+freq.getValue()+"), product of:", - subs); - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java index 8bb263338ca..23c1e2b8292 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java @@ -20,7 +20,7 @@ package org.apache.lucene.search.spans; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -61,7 +61,7 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable { final SpanWeight bigWeight; final SpanWeight littleWeight; - public SpanContainWeight(IndexSearcher searcher, Map terms, + public SpanContainWeight(IndexSearcher searcher, Map terms, SpanWeight bigWeight, SpanWeight littleWeight, float boost) throws IOException { super(SpanContainQuery.this, searcher, terms, boost); this.bigWeight = bigWeight; @@ -93,9 +93,9 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable { } @Override - public void extractTermContexts(Map contexts) { - bigWeight.extractTermContexts(contexts); - littleWeight.extractTermContexts(contexts); + public void extractTermStates(Map contexts) { + bigWeight.extractTermStates(contexts); + littleWeight.extractTermStates(contexts); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index 0d62f749fb0..63662994bf1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -23,7 +23,7 @@ import java.util.Map; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreMode; @@ -45,15 +45,15 @@ public final class SpanContainingQuery extends SpanContainQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); - SpanWeight littleWeight = little.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); - return new SpanContainingWeight(searcher, scoreMode.needsScores() ? getTermContexts(bigWeight, littleWeight) : null, + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); + return new SpanContainingWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } public class SpanContainingWeight extends SpanContainWeight { - public SpanContainingWeight(IndexSearcher searcher, Map terms, + public SpanContainingWeight(IndexSearcher searcher, Map terms, SpanWeight bigWeight, SpanWeight littleWeight, float boost) throws IOException { super(searcher, terms, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index ee3f5deda3d..088e73092de 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -24,7 +24,7 @@ import java.util.Objects; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiTermQuery; @@ -163,7 +163,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue } @Override - protected void addClause(List topLevel, Term term, int docCount, float boost, TermContext states) { + protected void addClause(List topLevel, Term term, int docCount, float boost, TermStates states) { final SpanTermQuery q = new SpanTermQuery(term, states); topLevel.add(q); } @@ -211,7 +211,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue } @Override - protected void addClause(List topLevel, Term term, int docFreq, float boost, TermContext states) { + protected void addClause(List topLevel, Term term, int docFreq, float boost, TermStates states) { final SpanTermQuery q = new SpanTermQuery(term, states); topLevel.add(q); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 24a047fce51..17b9e515130 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -29,7 +29,7 @@ import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -181,24 +181,24 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { List subWeights = new ArrayList<>(); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, boost)); } - return new SpanNearWeight(subWeights, searcher, scoreMode.needsScores() ? getTermContexts(subWeights) : null, boost); + return new SpanNearWeight(subWeights, searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, boost); } public class SpanNearWeight extends SpanWeight { final List subWeights; - public SpanNearWeight(List subWeights, IndexSearcher searcher, Map terms, float boost) throws IOException { + public SpanNearWeight(List subWeights, IndexSearcher searcher, Map terms, float boost) throws IOException { super(SpanNearQuery.this, searcher, terms, boost); this.subWeights = subWeights; } @Override - public void extractTermContexts(Map contexts) { + public void extractTermStates(Map contexts) { for (SpanWeight w : subWeights) { - w.extractTermContexts(contexts); + w.extractTermStates(contexts); } } @@ -318,7 +318,7 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { } @Override - public void extractTermContexts(Map contexts) { + public void extractTermStates(Map contexts) { } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 5b97f8da178..6c56df3abee 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -25,7 +25,7 @@ import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -99,9 +99,9 @@ public final class SpanNotQuery extends SpanQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight includeWeight = include.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); + SpanWeight includeWeight = include.createWeight(searcher, scoreMode, boost); SpanWeight excludeWeight = exclude.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); - return new SpanNotWeight(searcher, scoreMode.needsScores() ? getTermContexts(includeWeight, excludeWeight) : null, + return new SpanNotWeight(searcher, scoreMode.needsScores() ? getTermStates(includeWeight) : null, includeWeight, excludeWeight, boost); } @@ -110,7 +110,7 @@ public final class SpanNotQuery extends SpanQuery { final SpanWeight includeWeight; final SpanWeight excludeWeight; - public SpanNotWeight(IndexSearcher searcher, Map terms, + public SpanNotWeight(IndexSearcher searcher, Map terms, SpanWeight includeWeight, SpanWeight excludeWeight, float boost) throws IOException { super(SpanNotQuery.this, searcher, terms, boost); this.includeWeight = includeWeight; @@ -118,8 +118,8 @@ public final class SpanNotQuery extends SpanQuery { } @Override - public void extractTermContexts(Map contexts) { - includeWeight.extractTermContexts(contexts); + public void extractTermStates(Map contexts) { + includeWeight.extractTermStates(contexts); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 2e15c92f29e..849edaa30e6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -27,7 +27,7 @@ import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.DisiPriorityQueue; import org.apache.lucene.search.DisiWrapper; import org.apache.lucene.search.DisjunctionDISIApproximation; @@ -119,16 +119,16 @@ public final class SpanOrQuery extends SpanQuery { public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { List subWeights = new ArrayList<>(clauses.size()); for (SpanQuery q : clauses) { - subWeights.add(q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost)); + subWeights.add(q.createWeight(searcher, scoreMode, boost)); } - return new SpanOrWeight(searcher, scoreMode.needsScores() ? getTermContexts(subWeights) : null, subWeights, boost); + return new SpanOrWeight(searcher, scoreMode.needsScores() ? getTermStates(subWeights) : null, subWeights, boost); } public class SpanOrWeight extends SpanWeight { final List subWeights; - public SpanOrWeight(IndexSearcher searcher, Map terms, List subWeights, float boost) throws IOException { + public SpanOrWeight(IndexSearcher searcher, Map terms, List subWeights, float boost) throws IOException { super(SpanOrQuery.this, searcher, terms, boost); this.subWeights = subWeights; } @@ -150,9 +150,9 @@ public final class SpanOrQuery extends SpanQuery { } @Override - public void extractTermContexts(Map contexts) { + public void extractTermStates(Map contexts) { for (SpanWeight w : subWeights) { - w.extractTermContexts(contexts); + w.extractTermStates(contexts); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index f9b76972026..099b627e1ee 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -25,7 +25,7 @@ import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; @@ -69,15 +69,15 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea @Override public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight matchWeight = match.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); - return new SpanPositionCheckWeight(matchWeight, searcher, scoreMode.needsScores() ? getTermContexts(matchWeight) : null, boost); + SpanWeight matchWeight = match.createWeight(searcher, scoreMode, boost); + return new SpanPositionCheckWeight(matchWeight, searcher, scoreMode.needsScores() ? getTermStates(matchWeight) : null, boost); } public class SpanPositionCheckWeight extends SpanWeight { final SpanWeight matchWeight; - public SpanPositionCheckWeight(SpanWeight matchWeight, IndexSearcher searcher, Map terms, float boost) throws IOException { + public SpanPositionCheckWeight(SpanWeight matchWeight, IndexSearcher searcher, Map terms, float boost) throws IOException { super(SpanPositionCheckQuery.this, searcher, terms, boost); this.matchWeight = matchWeight; } @@ -93,8 +93,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea } @Override - public void extractTermContexts(Map contexts) { - matchWeight.extractTermContexts(contexts); + public void extractTermStates(Map contexts) { + matchWeight.extractTermStates(contexts); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java index 607a3755513..ca657b6cff1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -23,7 +23,7 @@ import java.util.Map; import java.util.TreeMap; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; @@ -40,25 +40,25 @@ public abstract class SpanQuery extends Query { public abstract SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException; /** - * Build a map of terms to termcontexts, for use in constructing SpanWeights + * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights * @lucene.internal */ - public static Map getTermContexts(SpanWeight... weights) { - Map terms = new TreeMap<>(); + public static Map getTermStates(SpanWeight... weights) { + Map terms = new TreeMap<>(); for (SpanWeight w : weights) { - w.extractTermContexts(terms); + w.extractTermStates(terms); } return terms; } /** - * Build a map of terms to termcontexts, for use in constructing SpanWeights + * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights * @lucene.internal */ - public static Map getTermContexts(Collection weights) { - Map terms = new TreeMap<>(); + public static Map getTermStates(Collection weights) { + Map terms = new TreeMap<>(); for (SpanWeight w : weights) { - w.extractTermContexts(terms); + w.extractTermStates(terms); } return terms; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 57a68e493a8..044ac7a5960 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -21,9 +21,9 @@ import java.io.IOException; import java.util.Objects; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; -import org.apache.lucene.search.similarities.Similarity; /** * A basic {@link Scorer} over {@link Spans}. @@ -32,7 +32,7 @@ import org.apache.lucene.search.similarities.Similarity; public class SpanScorer extends Scorer { protected final Spans spans; - protected final Similarity.SimScorer docScorer; + protected final LeafSimScorer docScorer; /** accumulated sloppy freq (computed in setFreqCurrentDoc) */ private float freq; @@ -41,7 +41,7 @@ public class SpanScorer extends Scorer { private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for /** Sole constructor. */ - public SpanScorer(SpanWeight weight, Spans spans, Similarity.SimScorer docScorer) { + public SpanScorer(SpanWeight weight, Spans spans, LeafSimScorer docScorer) { super(weight); this.spans = Objects.requireNonNull(spans); this.docScorer = docScorer; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 9eea3aac177..9ac7afb81ee 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -28,7 +28,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -41,21 +41,21 @@ import org.apache.lucene.search.ScoreMode; public class SpanTermQuery extends SpanQuery { protected final Term term; - protected final TermContext termContext; + protected final TermStates termStates; /** Construct a SpanTermQuery matching the named term's spans. */ public SpanTermQuery(Term term) { this.term = Objects.requireNonNull(term); - this.termContext = null; + this.termStates = null; } /** * Expert: Construct a SpanTermQuery matching the named term's spans, using - * the provided TermContext + * the provided TermStates */ - public SpanTermQuery(Term term, TermContext context) { + public SpanTermQuery(Term term, TermStates termStates) { this.term = Objects.requireNonNull(term); - this.termContext = context; + this.termStates = termStates; } /** Return the term whose spans are matched. */ @@ -66,25 +66,25 @@ public class SpanTermQuery extends SpanQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - final TermContext context; + final TermStates context; final IndexReaderContext topContext = searcher.getTopReaderContext(); - if (termContext == null || termContext.wasBuiltFor(topContext) == false) { - context = TermContext.build(topContext, term); + if (termStates == null || termStates.wasBuiltFor(topContext) == false) { + context = TermStates.build(topContext, term, scoreMode.needsScores()); } else { - context = termContext; + context = termStates; } return new SpanTermWeight(context, searcher, scoreMode.needsScores() ? Collections.singletonMap(term, context) : null, boost); } public class SpanTermWeight extends SpanWeight { - final TermContext termContext; + final TermStates termStates; - public SpanTermWeight(TermContext termContext, IndexSearcher searcher, Map terms, float boost) throws IOException { + public SpanTermWeight(TermStates termStates, IndexSearcher searcher, Map terms, float boost) throws IOException { super(SpanTermQuery.this, searcher, terms, boost); - this.termContext = termContext; - assert termContext != null : "TermContext must not be null"; + this.termStates = termStates; + assert termStates != null : "TermStates must not be null"; } @Override @@ -98,16 +98,16 @@ public class SpanTermQuery extends SpanQuery { } @Override - public void extractTermContexts(Map contexts) { - contexts.put(term, termContext); + public void extractTermStates(Map contexts) { + contexts.put(term, termStates); } @Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { - assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); + assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); - final TermState state = termContext.get(context.ord); + final TermState state = termStates.get(context); if (state == null) { // term is not present in that reader assert context.reader().docFreq(term) == 0 : "no termstate found but term exists in reader term=" + term; return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 0dad614bdda..25b58fdc39a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -24,14 +24,14 @@ import java.util.Map; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.SimScorer; /** * Expert-only. Public for use by other weight implementations @@ -72,48 +72,48 @@ public abstract class SpanWeight extends Weight { } protected final Similarity similarity; - protected final Similarity.SimWeight simWeight; + protected final Similarity.SimScorer simScorer; protected final String field; /** * Create a new SpanWeight * @param query the parent query * @param searcher the IndexSearcher to query against - * @param termContexts a map of terms to termcontexts for use in building the similarity. May + * @param termStates a map of terms to {@link TermStates} for use in building the similarity. May * be null if scores are not required * @throws IOException on error */ - public SpanWeight(SpanQuery query, IndexSearcher searcher, Map termContexts, float boost) throws IOException { + public SpanWeight(SpanQuery query, IndexSearcher searcher, Map termStates, float boost) throws IOException { super(query); this.field = query.getField(); - this.similarity = searcher.getSimilarity(termContexts != null); - this.simWeight = buildSimWeight(query, searcher, termContexts, boost); + this.similarity = searcher.getSimilarity(); + this.simScorer = buildSimWeight(query, searcher, termStates, boost); } - private Similarity.SimWeight buildSimWeight(SpanQuery query, IndexSearcher searcher, Map termContexts, float boost) throws IOException { - if (termContexts == null || termContexts.size() == 0 || query.getField() == null) + private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher, Map termStates, float boost) throws IOException { + if (termStates == null || termStates.size() == 0 || query.getField() == null) return null; - TermStatistics[] termStats = new TermStatistics[termContexts.size()]; + TermStatistics[] termStats = new TermStatistics[termStates.size()]; int termUpTo = 0; - for (Term term : termContexts.keySet()) { - TermStatistics termStatistics = searcher.termStatistics(term, termContexts.get(term)); + for (Term term : termStates.keySet()) { + TermStatistics termStatistics = searcher.termStatistics(term, termStates.get(term)); if (termStatistics != null) { termStats[termUpTo++] = termStatistics; } } CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField()); if (termUpTo > 0) { - return similarity.computeWeight(boost, collectionStats, Arrays.copyOf(termStats, termUpTo)); + return similarity.scorer(boost, collectionStats, Arrays.copyOf(termStats, termUpTo)); } else { return null; // no terms at all exist, we won't use similarity } } /** - * Collect all TermContexts used by this Weight - * @param contexts a map to add the TermContexts to + * Collect all TermStates used by this Weight + * @param contexts a map to add the TermStates to */ - public abstract void extractTermContexts(Map contexts); + public abstract void extractTermStates(Map contexts); /** * Expert: Return a Spans object iterating over matches from this Weight @@ -129,18 +129,18 @@ public abstract class SpanWeight extends Weight { if (spans == null) { return null; } - final Similarity.SimScorer docScorer = getSimScorer(context); + final LeafSimScorer docScorer = getSimScorer(context); return new SpanScorer(this, spans, docScorer); } /** - * Return a SimScorer for this context + * Return a LeafSimScorer for this context * @param context the LeafReaderContext * @return a SimWeight * @throws IOException on error */ - public Similarity.SimScorer getSimScorer(LeafReaderContext context) throws IOException { - return simWeight == null ? null : similarity.simScorer(simWeight, context); + public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException { + return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), true, Float.MAX_VALUE); } @Override @@ -150,7 +150,7 @@ public abstract class SpanWeight extends Weight { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { float freq = scorer.sloppyFreq(); - SimScorer docScorer = similarity.simScorer(simWeight, context); + LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), true, Float.MAX_VALUE); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); return Explanation.match(scoreExplanation.getValue(), diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index 9c618dd2e4c..fba85fe6e86 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -23,7 +23,7 @@ import java.util.Map; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreMode; @@ -46,15 +46,15 @@ public final class SpanWithinQuery extends SpanContainQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight bigWeight = big.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); - SpanWeight littleWeight = little.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); - return new SpanWithinWeight(searcher, scoreMode.needsScores() ? getTermContexts(bigWeight, littleWeight) : null, + SpanWeight bigWeight = big.createWeight(searcher, scoreMode, boost); + SpanWeight littleWeight = little.createWeight(searcher, scoreMode, boost); + return new SpanWithinWeight(searcher, scoreMode.needsScores() ? getTermStates(bigWeight, littleWeight) : null, bigWeight, littleWeight, boost); } public class SpanWithinWeight extends SpanContainWeight { - public SpanWithinWeight(IndexSearcher searcher, Map terms, + public SpanWithinWeight(IndexSearcher searcher, Map terms, SpanWeight bigWeight, SpanWeight littleWeight, float boost) throws IOException { super(searcher, terms, bigWeight, littleWeight, boost); } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java index f1e1aed6557..625bb0e7010 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java @@ -22,7 +22,7 @@ import java.util.Objects; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.LeafSimScorer; /** * Expert: @@ -39,7 +39,7 @@ public class TermSpans extends Spans { protected boolean readPayload; private final float positionsCost; - public TermSpans(Similarity.SimScorer scorer, + public TermSpans(LeafSimScorer scorer, PostingsEnum postings, Term term, float positionsCost) { this.postings = Objects.requireNonNull(postings); this.term = Objects.requireNonNull(term); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java b/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java index a8111921ff6..7737de18276 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java @@ -103,12 +103,7 @@ public class TestCustomNorms extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - throw new UnsupportedOperationException(); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCustomTermFreq.java b/lucene/core/src/test/org/apache/lucene/index/TestCustomTermFreq.java index d2eff257648..8bb81d2409c 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCustomTermFreq.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCustomTermFreq.java @@ -17,8 +17,6 @@ package org.apache.lucene.index; -import java.io.IOException; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -432,12 +430,7 @@ public class TestCustomTermFreq extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - throw new UnsupportedOperationException(); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldInvertState.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldInvertState.java index f78b7fa92c5..08635fc79f9 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFieldInvertState.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldInvertState.java @@ -17,7 +17,6 @@ package org.apache.lucene.index; -import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -54,12 +53,7 @@ public class TestFieldInvertState extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - throw new UnsupportedOperationException(); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java index 0a8799d3a29..6b43c162ec7 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java @@ -1947,13 +1947,8 @@ public class TestIndexSorting extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return in.computeWeight(boost, collectionStats, termStats); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - return in.simScorer(weight, context); + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return in.scorer(boost, collectionStats, termStats); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMaxTermFrequency.java b/lucene/core/src/test/org/apache/lucene/index/TestMaxTermFrequency.java index f391c5a2af2..216dc211d3e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMaxTermFrequency.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMaxTermFrequency.java @@ -17,7 +17,6 @@ package org.apache.lucene.index; -import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -109,24 +108,14 @@ public class TestMaxTermFrequency extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new SimWeight() {}; - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - - return new SimScorer() { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new SimScorer(collectionStats.field()) { @Override - public float score(int doc, float freq) throws IOException { + public float score(float freq, long norm) { return 0; } - @Override - public float maxScore(float maxFreq) { - return 0; - } }; } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java index 70c7a3237e6..805c7e58474 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java @@ -116,12 +116,7 @@ public class TestNorms extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - throw new UnsupportedOperationException(); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermStates.java b/lucene/core/src/test/org/apache/lucene/index/TestTermStates.java new file mode 100644 index 00000000000..a89fe7bb04a --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermStates.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; + +public class TestTermStates extends LuceneTestCase { + + public void testToStringOnNullTermState() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + w.addDocument(new Document()); + IndexReader r = w.getReader(); + TermStates states = TermStates.build(r.getContext(), new Term("foo", "bar"), random().nextBoolean()); + assertEquals("TermStates\n state=null\n", states.toString()); + IOUtils.close(r, w, dir); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/index/TestUniqueTermCount.java b/lucene/core/src/test/org/apache/lucene/index/TestUniqueTermCount.java index a0fca4c62b4..2de02346a27 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestUniqueTermCount.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestUniqueTermCount.java @@ -17,7 +17,6 @@ package org.apache.lucene.index; -import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; @@ -106,12 +105,7 @@ public class TestUniqueTermCount extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - throw new UnsupportedOperationException(); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index 0523e2c04aa..c85732ec047 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -194,12 +194,7 @@ final class JustCompileSearch { static final class JustCompileSimilarity extends Similarity { @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public SimScorer simScorer(SimWeight stats, LeafReaderContext context) { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java index 95562819aa0..a9e2891140a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java @@ -148,7 +148,7 @@ public class TestBoolean2 extends LuceneTestCase { } singleSegmentReader = DirectoryReader.open(singleSegmentDirectory); singleSegmentSearcher = newSearcher(singleSegmentReader); - singleSegmentSearcher.setSimilarity(searcher.getSimilarity(true)); + singleSegmentSearcher.setSimilarity(searcher.getSimilarity()); // Make big index dir2 = copyOf(directory); @@ -379,7 +379,7 @@ public class TestBoolean2 extends LuceneTestCase { QueryUtils.check(random(), q1,searcher); // baseline sim try { // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop. - searcher.setSimilarity(bigSearcher.getSimilarity(true)); // random sim + searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim QueryUtils.check(random(), q1, searcher); } finally { searcher.setSimilarity(new ClassicSimilarity()); // restore diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java index 19f45f81bb0..de061a2f8d3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java @@ -38,7 +38,6 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.Scorer.ChildScorer; -import org.apache.lucene.search.similarities.BasicStats; import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; @@ -329,21 +328,12 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new BasicStats("", boost); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - return new SimScorer() { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new SimScorer(collectionStats.field()) { @Override - public float score(int doc, float freq) throws IOException { + public float score(float freq, long norm) { return freq; } - @Override - public float maxScore(float maxFreq) { - return maxFreq; - } }; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index d21f373e045..292dfa9dabf 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -357,7 +357,7 @@ public class TestBooleanRewrites extends LuceneTestCase { return original; } }; - searcher2.setSimilarity(searcher1.getSimilarity(true)); + searcher2.setSimilarity(searcher1.getSimilarity()); final int iters = atLeast(1000); for (int i = 0; i < iters; ++i) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java index a4e959619e5..4cfa4d3f5a5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java @@ -34,7 +34,6 @@ import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; @@ -100,23 +99,13 @@ public class TestConjunctions extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new SimWeight() {}; - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - return new SimScorer() { + return new SimScorer(collectionStats.field()) { @Override - public float score(int doc, float freq) { + public float score(float freq, long norm) { return freq; } - - @Override - public float maxScore(float maxFreq) { - return maxFreq; - } }; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java deleted file mode 100644 index 88564314fab..00000000000 --- a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.search; - - -import java.io.IOException; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FloatDocValuesField; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; - -/** - * Tests the use of indexdocvalues in scoring. - * - * In the example, a docvalues field is used as a per-document boost (separate from the norm) - * @lucene.experimental - */ -public class TestDocValuesScoring extends LuceneTestCase { - private static final float SCORE_EPSILON = 0.001f; /* for comparing floats */ - - public void testSimple() throws Exception { - Directory dir = newDirectory(); - RandomIndexWriter iw = new RandomIndexWriter(random(), dir); - Document doc = new Document(); - Field field = newTextField("foo", "", Field.Store.NO); - doc.add(field); - Field dvField = new FloatDocValuesField("foo_boost", 0.0F); - doc.add(dvField); - Field field2 = newTextField("bar", "", Field.Store.NO); - doc.add(field2); - - field.setStringValue("quick brown fox"); - field2.setStringValue("quick brown fox"); - dvField.setFloatValue(2f); // boost x2 - iw.addDocument(doc); - field.setStringValue("jumps over lazy brown dog"); - field2.setStringValue("jumps over lazy brown dog"); - dvField.setFloatValue(4f); // boost x4 - iw.addDocument(doc); - IndexReader ir = iw.getReader(); - iw.close(); - - // no boosting - IndexSearcher searcher1 = newSearcher(ir, false); - final Similarity base = searcher1.getSimilarity(true); - // boosting - IndexSearcher searcher2 = newSearcher(ir, false); - searcher2.setSimilarity(new PerFieldSimilarityWrapper() { - final Similarity fooSim = new BoostingSimilarity(base, "foo_boost"); - - @Override - public Similarity get(String field) { - return "foo".equals(field) ? fooSim : base; - } - }); - - // in this case, we searched on field "foo". first document should have 2x the score. - TermQuery tq = new TermQuery(new Term("foo", "quick")); - QueryUtils.check(random(), tq, searcher1); - QueryUtils.check(random(), tq, searcher2); - - TopDocs noboost = searcher1.search(tq, 10); - TopDocs boost = searcher2.search(tq, 10); - assertEquals(1, noboost.totalHits); - assertEquals(1, boost.totalHits); - - //System.out.println(searcher2.explain(tq, boost.scoreDocs[0].doc)); - assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score*2f, SCORE_EPSILON); - - // this query matches only the second document, which should have 4x the score. - tq = new TermQuery(new Term("foo", "jumps")); - QueryUtils.check(random(), tq, searcher1); - QueryUtils.check(random(), tq, searcher2); - - noboost = searcher1.search(tq, 10); - boost = searcher2.search(tq, 10); - assertEquals(1, noboost.totalHits); - assertEquals(1, boost.totalHits); - - assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score*4f, SCORE_EPSILON); - - // search on on field bar just for kicks, nothing should happen, since we setup - // our sim provider to only use foo_boost for field foo. - tq = new TermQuery(new Term("bar", "quick")); - QueryUtils.check(random(), tq, searcher1); - QueryUtils.check(random(), tq, searcher2); - - noboost = searcher1.search(tq, 10); - boost = searcher2.search(tq, 10); - assertEquals(1, noboost.totalHits); - assertEquals(1, boost.totalHits); - - assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score, SCORE_EPSILON); - - ir.close(); - dir.close(); - } - - /** - * Similarity that wraps another similarity and boosts the final score - * according to whats in a docvalues field. - * - * @lucene.experimental - */ - static class BoostingSimilarity extends Similarity { - private final Similarity sim; - private final String boostField; - - public BoostingSimilarity(Similarity sim, String boostField) { - this.sim = sim; - this.boostField = boostField; - } - - @Override - public long computeNorm(FieldInvertState state) { - return sim.computeNorm(state); - } - - @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return sim.computeWeight(boost, collectionStats, termStats); - } - - @Override - public SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException { - final SimScorer sub = sim.simScorer(stats, context); - final NumericDocValues values = DocValues.getNumeric(context.reader(), boostField); - - return new SimScorer() { - - private float getValueForDoc(int doc) throws IOException { - int curDocID = values.docID(); - if (doc < curDocID) { - throw new IllegalArgumentException("doc=" + doc + " is before curDocID=" + curDocID); - } - if (doc > curDocID) { - curDocID = values.advance(doc); - } - if (curDocID == doc) { - return Float.intBitsToFloat((int)values.longValue()); - } else { - return 0f; - } - } - - @Override - public float score(int doc, float freq) throws IOException { - return getValueForDoc(doc) * sub.score(doc, freq); - } - - @Override - public float maxScore(float maxFreq) { - return Float.POSITIVE_INFINITY; - } - - @Override - public Explanation explain(int doc, Explanation freq) throws IOException { - Explanation boostExplanation = Explanation.match(getValueForDoc(doc), "indexDocValue(" + boostField + ")"); - Explanation simExplanation = sub.explain(doc, freq); - return Explanation.match( - boostExplanation.getValue().doubleValue() * simExplanation.getValue().doubleValue(), - "product of:", boostExplanation, simExplanation); - } - }; - } - } -} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index 924a1af0e87..30b03ac9f55 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -34,10 +34,9 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; -import org.apache.lucene.search.similarities.Similarity.SimWeight; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -310,7 +309,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase { final int maxDoc; final Set ords = new HashSet<>(); - final SimScorer[] sims; + final LeafSimScorer[] sims; final int minNrShouldMatch; double score = Float.NaN; @@ -321,7 +320,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase { this.maxDoc = reader.maxDoc(); BooleanQuery bq = (BooleanQuery) weight.getQuery(); this.minNrShouldMatch = bq.getMinimumNumberShouldMatch(); - this.sims = new SimScorer[(int)dv.getValueCount()]; + this.sims = new LeafSimScorer[(int)dv.getValueCount()]; for (BooleanClause clause : bq.clauses()) { assert !clause.isProhibited(); assert !clause.isRequired(); @@ -330,11 +329,11 @@ public class TestMinShouldMatch2 extends LuceneTestCase { if (ord >= 0) { boolean success = ords.add(ord); assert success; // no dups - TermContext context = TermContext.build(reader.getContext(), term); - SimWeight w = weight.similarity.computeWeight(1f, + TermStates context = TermStates.build(reader.getContext(), term, true); + SimScorer w = weight.similarity.scorer(1f, searcher.collectionStatistics("field"), searcher.termStatistics(term, context)); - sims[(int)ord] = weight.similarity.simScorer(w, reader.getContext()); + sims[(int)ord] = new LeafSimScorer(w, reader, true, 1); } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java b/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java index a6970f974ad..f360bedd31a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java @@ -17,15 +17,12 @@ package org.apache.lucene.search; -import java.io.IOException; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; @@ -113,21 +110,11 @@ public class TestSimilarityProvider extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new SimWeight() {}; - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - return new SimScorer() { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new SimScorer(collectionStats.field()) { @Override - public float score(int doc, float freq) throws IOException { - return 1; - } - - @Override - public float maxScore(float maxFreq) { + public float score(float freq, long norm) { return 1; } }; @@ -143,21 +130,10 @@ public class TestSimilarityProvider extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new SimWeight() {}; - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - return new SimScorer() { - + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new SimScorer(collectionStats.field()) { @Override - public float score(int doc, float freq) throws IOException { - return 10; - } - - @Override - public float maxScore(float maxFreq) { + public float score(float freq, long norm) { return 10; } }; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java b/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java index 7278a3b2516..f45e304c2fc 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java @@ -34,7 +34,6 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.Scorer.ChildScorer; -import org.apache.lucene.search.similarities.BasicStats; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -231,22 +230,12 @@ public class TestSubScorerFreqs extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new BasicStats("", boost); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - return new SimScorer() { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new SimScorer(collectionStats.field()) { @Override - public float score(int doc, float freq) throws IOException { + public float score(float freq, long norm) { return freq; } - - @Override - public float maxScore(float maxFreq) { - return maxFreq; - } }; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java index f65c54eac0f..dd85c62f663 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java @@ -29,7 +29,7 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -49,7 +49,7 @@ public class TestTermQuery extends LuceneTestCase { new TermQuery(new Term("foo", "baz"))); QueryUtils.checkEqual( new TermQuery(new Term("foo", "bar")), - new TermQuery(new Term("foo", "bar"), TermContext.build(new MultiReader().getContext(), new Term("foo", "bar")))); + new TermQuery(new Term("foo", "bar"), TermStates.build(new MultiReader().getContext(), new Term("foo", "bar"), true))); } public void testCreateWeightDoesNotSeekIfScoresAreNotNeeded() throws IOException { @@ -84,7 +84,7 @@ public class TestTermQuery extends LuceneTestCase { searcher.search(query, collector); assertEquals(1, collector.getTotalHits()); TermQuery queryWithContext = new TermQuery(new Term("foo", "bar"), - TermContext.build(reader.getContext(), new Term("foo", "bar"))); + TermStates.build(reader.getContext(), new Term("foo", "bar"), true)); collector = new TotalHitCountCollector(); searcher.search(queryWithContext, collector); assertEquals(1, collector.getTotalHits()); diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java index a0fa0f371cb..eb7a590bc16 100644 --- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java +++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java @@ -36,7 +36,6 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.similarities.TFIDFSimilarity.IDFStats; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.TestUtil; @@ -157,7 +156,7 @@ public class TestClassicSimilarity extends BaseSimilarityTestCase { public void testSaneNormValues() throws IOException { ClassicSimilarity sim = new ClassicSimilarity(); - TFIDFSimilarity.IDFStats stats = (IDFStats) sim.computeWeight(1f, indexSearcher.collectionStatistics("test")); + TFIDFSimilarity.TFIDFScorer stats = (TFIDFSimilarity.TFIDFScorer) sim.scorer(1f, indexSearcher.collectionStatistics("test")); for (int i = 0; i < 256; i++) { float boost = stats.normTable[i]; assertFalse("negative boost: " + boost + ", byte=" + i, boost < 0.0f); diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java index 279e30ccc0b..b26358251f5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java +++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java @@ -37,6 +37,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.similarities.SimilarityBase.BasicSimScorer; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -207,13 +208,13 @@ public class TestSimilarityBase extends LuceneTestCase { */ private void unitTestCore(BasicStats stats, float freq, int docLen) { for (SimilarityBase sim : sims) { - BasicStats realStats = (BasicStats) sim.computeWeight( + BasicStats realStats = ((BasicSimScorer) sim.scorer( (float)stats.getBoost(), toCollectionStats(stats), - toTermStats(stats)); + toTermStats(stats))).stats; float score = (float)sim.score(realStats, freq, docLen); float explScore = sim.explain( - realStats, 1, Explanation.match(freq, "freq"), docLen).getValue().floatValue(); + realStats, Explanation.match(freq, "freq"), docLen).getValue().floatValue(); assertFalse("Score infinite: " + sim.toString(), Float.isInfinite(score)); assertFalse("Score NaN: " + sim.toString(), Float.isNaN(score)); assertTrue("Score negative: " + sim.toString(), score >= 0); @@ -489,10 +490,10 @@ public class TestSimilarityBase extends LuceneTestCase { */ private void correctnessTestCore(SimilarityBase sim, float gold) { BasicStats stats = createStats(); - BasicStats realStats = (BasicStats) sim.computeWeight( + BasicStats realStats = ((BasicSimScorer) sim.scorer( (float)stats.getBoost(), toCollectionStats(stats), - toTermStats(stats)); + toTermStats(stats))).stats; float score = (float) sim.score(realStats, FREQ, DOC_LEN); assertEquals( sim.toString() + " score not correct.", gold, score, FLOAT_EPSILON); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index 8ed0462c3af..f72ea664b93 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -235,7 +235,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { public void testSimple2() throws Exception { assumeTrue("Broken scoring: LUCENE-3723", - searcher.getSimilarity(true) instanceof TFIDFSimilarity); + searcher.getSimilarity() instanceof TFIDFSimilarity); SpanQuery q1 = new SpanTermQuery(new Term("gender", "female")); SpanQuery q2 = new SpanTermQuery(new Term("last", "smith")); SpanQuery q = new SpanNearQuery(new SpanQuery[] @@ -291,7 +291,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { public void testSpans2() throws Exception { assumeTrue("Broken scoring: LUCENE-3723", - searcher.getSimilarity(true) instanceof TFIDFSimilarity); + searcher.getSimilarity() instanceof TFIDFSimilarity); SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female")); SpanQuery qA2 = new SpanTermQuery(new Term("first", "james")); SpanQuery qA = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender")); diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 004c06ed5a2..fbb59e3d9bf 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -148,7 +148,7 @@ public class WeightedSpanTermExtractor { } } else if (query instanceof CommonTermsQuery) { // specialized since rewriting would change the result query - // this query is TermContext sensitive. + // this query is index sensitive. extractWeightedTerms(terms, query, boost); } else if (query instanceof DisjunctionMaxQuery) { for (Query clause : ((DisjunctionMaxQuery) query)) { diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java index cc9f3186304..a0e6d0a9662 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.text.BreakIterator; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.List; import java.util.PriorityQueue; @@ -136,13 +137,15 @@ public class FieldHighlighter { BreakIterator breakIterator = this.breakIterator; final int contentLength = breakIterator.getText().getEndIndex(); + //TODO consider moving this part to an aggregate OffsetsEnum subclass so we have one enum that already has its weight PriorityQueue offsetsEnumQueue = new PriorityQueue<>(offsetsEnums.size() + 1); for (OffsetsEnum off : offsetsEnums) { off.setWeight(scorer.weight(contentLength, off.freq())); - off.nextPosition(); // go to first position - offsetsEnumQueue.add(off); + if (off.nextPosition()) {// go to first position + offsetsEnumQueue.add(off); + } } - offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY)); // a sentinel for termination + offsetsEnumQueue.add(new OffsetsEnum.OfPostings(new BytesRef(), EMPTY)); // a sentinel for termination PriorityQueue passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> { if (left.getScore() < right.getScore()) { @@ -203,10 +206,9 @@ public class FieldHighlighter { assert term != null; passage.addMatch(start, end, term); // see if there are multiple occurrences of this term in this passage. If so, add them. - if (!off.hasMorePositions()) { + if (!off.nextPosition()) { break; // No more in the entire text. Already removed from pq; move on } - off.nextPosition(); start = off.startOffset(); end = off.endOffset(); if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage @@ -222,7 +224,7 @@ public class FieldHighlighter { p.sort(); } // sort in ascending order - Arrays.sort(passages, (left, right) -> left.getStartOffset() - right.getStartOffset()); + Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset)); return passages; } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java index 155f0a76fb9..faef1062208 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java @@ -20,14 +20,12 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Map; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.automaton.CharacterRunAutomaton; @@ -41,9 +39,9 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton; public abstract class FieldOffsetStrategy { protected final String field; - protected final PhraseHelper phraseHelper; // Query: position-sensitive information TODO: rename - protected final BytesRef[] terms; // Query: free-standing terms - protected final CharacterRunAutomaton[] automata; // Query: free-standing wildcards (multi-term query) + protected final PhraseHelper phraseHelper; // Query: position-sensitive information + protected final BytesRef[] terms; // Query: all terms we extracted (some may be position sensitive) + protected final CharacterRunAutomaton[] automata; // Query: wildcards (i.e. multi-term query), not position sensitive public FieldOffsetStrategy(String field, BytesRef[] queryTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata) { this.field = field; @@ -70,47 +68,50 @@ public abstract class FieldOffsetStrategy { return Collections.emptyList(); } - // For strict positions, get a Map of term to Spans: - // note: ScriptPhraseHelper.NONE does the right thing for these method calls - final Map strictPhrasesTermToSpans = - phraseHelper.getTermToSpans(leafReader, doc); - // Usually simply wraps terms in a List; but if willRewrite() then can be expanded - final List sourceTerms = - phraseHelper.expandTermsIfRewrite(terms, strictPhrasesTermToSpans); + final List offsetsEnums = new ArrayList<>(terms.length + automata.length); - final List offsetsEnums = new ArrayList<>(sourceTerms.size() + automata.length); + // Handle position insensitive terms (a subset of this.terms field): + final BytesRef[] insensitiveTerms; + if (phraseHelper.hasPositionSensitivity()) { + insensitiveTerms = phraseHelper.getAllPositionInsensitiveTerms(); + assert insensitiveTerms.length <= terms.length : "insensitive terms should be smaller set of all terms"; + } else { + insensitiveTerms = terms; + } + if (insensitiveTerms.length > 0) { + createOffsetsEnumsForTerms(insensitiveTerms, termsIndex, doc, offsetsEnums); + } - // Handle sourceTerms: - if (!sourceTerms.isEmpty()) { - TermsEnum termsEnum = termsIndex.iterator();//does not return null - for (BytesRef term : sourceTerms) { - if (termsEnum.seekExact(term)) { - PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); - - if (postingsEnum == null) { - // no offsets or positions available - throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); - } - - if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted - postingsEnum = phraseHelper.filterPostings(term, postingsEnum, strictPhrasesTermToSpans.get(term)); - if (postingsEnum != null) { - offsetsEnums.add(new OffsetsEnum(term, postingsEnum)); - } - } - } - } + // Handle spans + if (phraseHelper.hasPositionSensitivity()) { + phraseHelper.createOffsetsEnumsForSpans(leafReader, doc, offsetsEnums); } // Handle automata if (automata.length > 0) { - offsetsEnums.addAll(createAutomataOffsetsFromTerms(termsIndex, doc)); + createOffsetsEnumsForAutomata(termsIndex, doc, offsetsEnums); } return offsetsEnums; } - protected List createAutomataOffsetsFromTerms(Terms termsIndex, int doc) throws IOException { + protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List results) throws IOException { + TermsEnum termsEnum = termsIndex.iterator();//does not return null + for (BytesRef term : sourceTerms) { + if (termsEnum.seekExact(term)) { + PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); + if (postingsEnum == null) { + // no offsets or positions available + throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); + } + if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted + results.add(new OffsetsEnum.OfPostings(term, postingsEnum)); + } + } + } + } + + protected void createOffsetsEnumsForAutomata(Terms termsIndex, int doc, List results) throws IOException { List> automataPostings = new ArrayList<>(automata.length); for (int i = 0; i < automata.length; i++) { automataPostings.add(new ArrayList<>()); @@ -118,6 +119,7 @@ public abstract class FieldOffsetStrategy { TermsEnum termsEnum = termsIndex.iterator(); BytesRef term; + CharsRefBuilder refBuilder = new CharsRefBuilder(); while ((term = termsEnum.next()) != null) { for (int i = 0; i < automata.length; i++) { @@ -132,7 +134,6 @@ public abstract class FieldOffsetStrategy { } } - List offsetsEnums = new ArrayList<>(automata.length); //will be at most this long for (int i = 0; i < automata.length; i++) { CharacterRunAutomaton automaton = automata[i]; List postingsEnums = automataPostings.get(i); @@ -140,14 +141,13 @@ public abstract class FieldOffsetStrategy { if (size > 0) { //only add if we have offsets BytesRef wildcardTerm = new BytesRef(automaton.toString()); if (size == 1) { //don't wrap in a composite if there's only one OffsetsEnum - offsetsEnums.add(new OffsetsEnum(wildcardTerm, postingsEnums.get(0))); + results.add(new OffsetsEnum.OfPostings(wildcardTerm, postingsEnums.get(0))); } else { - offsetsEnums.add(new OffsetsEnum(wildcardTerm, new CompositeOffsetsPostingsEnum(postingsEnums))); + results.add(new OffsetsEnum.OfPostings(wildcardTerm, new CompositeOffsetsPostingsEnum(postingsEnums))); } } } - return offsetsEnums; } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java index 708f5c33520..f0a46a5d838 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.lucene.search.uhighlight; import java.io.Closeable; @@ -25,25 +26,19 @@ import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.util.BytesRef; /** - * Holds the term ({@link BytesRef}), {@link PostingsEnum}, offset iteration tracking. - * It is advanced with the underlying postings and is placed in a priority queue by + * An enumeration/iterator of a term and its offsets for use by {@link FieldHighlighter}. + * It is advanced and is placed in a priority queue by * {@link FieldHighlighter#highlightOffsetsEnums(List)} based on the start offset. * * @lucene.internal */ -public class OffsetsEnum implements Comparable, Closeable { - private final BytesRef term; - private final PostingsEnum postingsEnum; // with offsets +public abstract class OffsetsEnum implements Comparable, Closeable { private float weight; // set once in highlightOffsetsEnums - private int posCounter = 0; // the occurrence counter of this term within the text being highlighted. - - public OffsetsEnum(BytesRef term, PostingsEnum postingsEnum) throws IOException { - this.term = term; // can be null - this.postingsEnum = Objects.requireNonNull(postingsEnum); - } // note: the ordering clearly changes as the postings enum advances + // note: would be neat to use some Comparator utilities with method + // references but our methods throw IOException @Override public int compareTo(OffsetsEnum other) { try { @@ -51,53 +46,41 @@ public class OffsetsEnum implements Comparable, Closeable { if (cmp != 0) { return cmp; // vast majority of the time we return here. } - if (this.term == null || other.term == null) { - if (this.term == null && other.term == null) { + final BytesRef thisTerm = this.getTerm(); + final BytesRef otherTerm = other.getTerm(); + if (thisTerm == null || otherTerm == null) { + if (thisTerm == null && otherTerm == null) { return 0; - } else if (this.term == null) { + } else if (thisTerm == null) { return 1; // put "this" (wildcard mtq enum) last } else { return -1; } } - return term.compareTo(other.term); + return thisTerm.compareTo(otherTerm); } catch (IOException e) { throw new RuntimeException(e); } } - /** The term at this position; usually always the same. This term is a reference that is safe to continue to refer to, - * even after we move to next position. */ - public BytesRef getTerm() throws IOException { - // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here - return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack! - } + /** + * Advances to the next position and returns true, or if can't then returns false. + * Note that the initial state of this class is not positioned. + */ + public abstract boolean nextPosition() throws IOException; - public PostingsEnum getPostingsEnum() { - return postingsEnum; - } + /** An estimate of the number of occurrences of this term/OffsetsEnum. */ + public abstract int freq() throws IOException; - public int freq() throws IOException { - return postingsEnum.freq(); - } + /** + * The term at this position; usually always the same. + * This BytesRef is safe to continue to refer to, even after we move to the next position. + */ + public abstract BytesRef getTerm() throws IOException; - public boolean hasMorePositions() throws IOException { - return posCounter < postingsEnum.freq(); - } + public abstract int startOffset() throws IOException; - public void nextPosition() throws IOException { - assert hasMorePositions(); - posCounter++; - postingsEnum.nextPosition(); - } - - public int startOffset() throws IOException { - return postingsEnum.startOffset(); - } - - public int endOffset() throws IOException { - return postingsEnum.endOffset(); - } + public abstract int endOffset() throws IOException; public float getWeight() { return weight; @@ -109,9 +92,66 @@ public class OffsetsEnum implements Comparable, Closeable { @Override public void close() throws IOException { - // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op. - if (postingsEnum instanceof Closeable) { - ((Closeable) postingsEnum).close(); + } + + @Override + public String toString() { + final String name = getClass().getSimpleName(); + try { + return name + "(term:" + getTerm().utf8ToString() +")"; + } catch (Exception e) { + return name; } } + + /** + * Based on a {@link PostingsEnum} -- the typical/standard OE impl. + */ + public static class OfPostings extends OffsetsEnum { + private final BytesRef term; + private final PostingsEnum postingsEnum; // with offsets + + private int posCounter = 0; // the occurrence counter of this term within the text being highlighted. + + public OfPostings(BytesRef term, PostingsEnum postingsEnum) throws IOException { + this.term = Objects.requireNonNull(term); + this.postingsEnum = Objects.requireNonNull(postingsEnum); + } + + public PostingsEnum getPostingsEnum() { + return postingsEnum; + } + + @Override + public boolean nextPosition() throws IOException { + if (posCounter < postingsEnum.freq()) { + posCounter++; + postingsEnum.nextPosition(); // note: we don't need to save the position + return true; + } else { + return false; + } + } + + @Override + public int freq() throws IOException { + return postingsEnum.freq(); + } + + @Override + public BytesRef getTerm() throws IOException { + return term; + } + + @Override + public int startOffset() throws IOException { + return postingsEnum.startOffset(); + } + + @Override + public int endOffset() throws IOException { + return postingsEnum.endOffset(); + } + + } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java index 3efb694f9e7..24b1015d104 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java @@ -98,6 +98,24 @@ public class Passage { numMatches = 0; } + /** For debugging. ex: Passage[0-22]{yin[0-3],yang[4-8],yin[10-13]}score=2.4964213 */ + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("Passage[").append(startOffset).append('-').append(endOffset).append(']'); + buf.append('{'); + for (int i = 0; i < numMatches; i++) { + if (i != 0) { + buf.append(','); + } + buf.append(matchTerms[i].utf8ToString()); + buf.append('[').append(matchStarts[i] - startOffset).append('-').append(matchEnds[i] - startOffset).append(']'); + } + buf.append('}'); + buf.append("score=").append(score); + return buf.toString(); + } + /** * Start offset of this passage. * diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java index cfb65708df8..2edb19244c6 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java @@ -17,82 +17,58 @@ package org.apache.lucene.search.uhighlight; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.PriorityQueue; import java.util.Set; import java.util.TreeSet; import java.util.function.Function; import java.util.function.Predicate; -import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; -import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; import org.apache.lucene.search.highlight.WeightedSpanTerm; import org.apache.lucene.search.highlight.WeightedSpanTermExtractor; import org.apache.lucene.search.spans.SpanCollector; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanWeight; +import org.apache.lucene.search.spans.SpanScorer; import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PriorityQueue; /** - * Helps the {@link FieldOffsetStrategy} with strict position highlighting (e.g. highlight phrases correctly). + * Helps the {@link FieldOffsetStrategy} with position sensitive queries (e.g. highlight phrases correctly). * This is a stateful class holding information about the query, but it can (and is) re-used across highlighting - * documents. Despite this state; it's immutable after construction. The approach taken in this class is very similar - * to the standard Highlighter's {@link WeightedSpanTermExtractor} which is in fact re-used here. However, we ought to - * completely rewrite it to use the SpanCollector interface to collect offsets directly. We'll get better - * phrase accuracy. + * documents. Despite this state, it's immutable after construction. * * @lucene.internal */ +// TODO rename to SpanHighlighting ? public class PhraseHelper { public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_", (s) -> false, spanQuery -> null, query -> null, true); - //TODO it seems this ought to be a general thing on Spans? - private static final Comparator SPANS_COMPARATOR = (o1, o2) -> { - int cmp = Integer.compare(o1.docID(), o2.docID()); - if (cmp != 0) { - return cmp; - } - if (o1.docID() == DocIdSetIterator.NO_MORE_DOCS) { - return 0; // don't ask for start/end position; not sure if we can even call those methods - } - cmp = Integer.compare(o1.startPosition(), o2.startPosition()); - if (cmp != 0) { - return cmp; - } else { - return Integer.compare(o1.endPosition(), o2.endPosition()); - } - }; - private final String fieldName; - private final Set positionInsensitiveTerms; // (TermQuery terms) + private final Set positionInsensitiveTerms; // (TermQuery terms) private final Set spanQueries; private final boolean willRewrite; private final Predicate fieldMatcher; @@ -114,13 +90,27 @@ public class PhraseHelper { this.fieldName = field; this.fieldMatcher = fieldMatcher; // filter terms to those we want - positionInsensitiveTerms = new FieldFilteringTermSet(); + positionInsensitiveTerms = new HashSet<>(); spanQueries = new HashSet<>(); // TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls boolean[] mustRewriteHolder = {false}; // boolean wrapped in 1-ary array so it's mutable from inner class + // When we call Weight.extractTerms, we do it on clauses that are NOT position sensitive. + // We only want the to track a Set of bytes for the Term, not Term class with field part. + Set extractPosInsensitiveTermsTarget = new TreeSet() { + @Override + public boolean add(Term term) { + // don't call super.add; we don't actually use the superclass + if (fieldMatcher.test(term.field())) { + return positionInsensitiveTerms.add(term.bytes()); + } else { + return false; + } + } + }; + // For TermQueries or other position insensitive queries, collect the Terms. // For other Query types, WSTE will convert to an equivalent SpanQuery. NOT extracting position spans here. new WeightedSpanTermExtractor(field) { @@ -155,13 +145,15 @@ public class PhraseHelper { return true; //TODO set to false and provide a hook to customize certain queries. } + // called on Query types that are NOT position sensitive, e.g. TermQuery @Override protected void extractWeightedTerms(Map terms, Query query, float boost) throws IOException { query.createWeight(UnifiedHighlighter.EMPTY_INDEXSEARCHER, ScoreMode.COMPLETE_NO_SCORES, boost) - .extractTerms(positionInsensitiveTerms); + .extractTerms(extractPosInsensitiveTermsTarget); } + // called on SpanQueries. Some other position-sensitive queries like PhraseQuery are converted beforehand @Override protected void extractWeightedSpanTerms(Map terms, SpanQuery spanQuery, float boost) throws IOException { @@ -174,7 +166,6 @@ public class PhraseHelper { } } - // TODO allow users to override the answer to mustRewriteQuery boolean mustRewriteQuery = mustRewriteQuery(spanQuery); if (ignoreQueriesNeedingRewrite && mustRewriteQuery) { return;// ignore this query @@ -194,14 +185,14 @@ public class PhraseHelper { willRewrite = mustRewriteHolder[0]; } - Set getSpanQueries() { + public Set getSpanQueries() { return spanQueries; } /** * If there is no position sensitivity then use of the instance of this class can be ignored. */ - boolean hasPositionSensitivity() { + public boolean hasPositionSensitivity() { return spanQueries.isEmpty() == false; } @@ -210,335 +201,85 @@ public class PhraseHelper { * custom things. When true, the resulting term list will probably be different than what it was known * to be initially. */ - boolean willRewrite() { + public boolean willRewrite() { return willRewrite; } - /** - * Collect a list of pre-positioned {@link Spans} for each term, given a reader that has just one document. - * It returns no mapping for query terms that occurs in a position insensitive way which therefore don't - * need to be filtered. - */ - Map getTermToSpans(LeafReader leafReader, int doc) - throws IOException { - if (spanQueries.isEmpty()) { - return Collections.emptyMap(); - } - final LeafReader filteredReader = new SingleFieldFilterLeafReader(leafReader, fieldName); - // for each SpanQuery, collect the member spans into a map. - Map result = new HashMap<>(); - for (SpanQuery spanQuery : spanQueries) { - getTermToSpans(spanQuery, filteredReader.getContext(), doc, result); - } + /** Returns the terms that are position-insensitive (sorted). */ + public BytesRef[] getAllPositionInsensitiveTerms() { + BytesRef[] result = positionInsensitiveTerms.toArray(new BytesRef[positionInsensitiveTerms.size()]); + Arrays.sort(result); return result; } - // code extracted & refactored from WSTE.extractWeightedSpanTerms() - private void getTermToSpans(SpanQuery spanQuery, LeafReaderContext readerContext, - int doc, Map result) - throws IOException { - // note: in WSTE there was some field specific looping that seemed pointless so that isn't here. - final IndexSearcher searcher = new IndexSearcher(readerContext.reader()); + /** Given the internal SpanQueries, produce a number of OffsetsEnum into the {@code results} param. */ + public void createOffsetsEnumsForSpans(LeafReader leafReader, int docId, List results) throws IOException { + leafReader = new SingleFieldWithOffsetsFilterLeafReader(leafReader, fieldName); + //TODO avoid searcher and do what it does to rewrite & get weight? + IndexSearcher searcher = new IndexSearcher(leafReader); searcher.setQueryCache(null); - if (willRewrite) { - spanQuery = (SpanQuery) searcher.rewrite(spanQuery); // searcher.rewrite loops till done - } - - // Get the underlying query terms - TreeSet termSet = new FieldFilteringTermSet(); // sorted so we can loop over results in order shortly... - searcher.createWeight(spanQuery, ScoreMode.COMPLETE_NO_SCORES, 1.0f).extractTerms(termSet);//needsScores==false - - // Get Spans by running the query against the reader - // TODO it might make sense to re-use/cache the Spans instance, to advance forward between docs - SpanWeight spanWeight = (SpanWeight) searcher.createNormalizedWeight(spanQuery, ScoreMode.COMPLETE_NO_SCORES); - Spans spans = spanWeight.getSpans(readerContext, SpanWeight.Postings.POSITIONS); - if (spans == null) { - return; - } - TwoPhaseIterator twoPhaseIterator = spans.asTwoPhaseIterator(); - if (twoPhaseIterator != null) { - if (twoPhaseIterator.approximation().advance(doc) != doc || !twoPhaseIterator.matches()) { - return; - } - } else if (spans.advance(doc) != doc) { // preposition, and return doing nothing if find none - return; - } - - // Consume the Spans into a cache. This instance is used as a source for multiple cloned copies. - // It's important we do this and not re-use the same original Spans instance since these will be iterated - // independently later on; sometimes in ways that prevents sharing the original Spans. - CachedSpans cachedSpansSource = new CachedSpans(spans); // consumes spans for this doc only and caches - spans = null;// we don't use it below - - // Map terms to a Spans instance (aggregate if necessary) - for (final Term queryTerm : termSet) { - // note: we expect that at least one query term will pass these filters. This is because the collected - // spanQuery list were already filtered by these conditions. - if (positionInsensitiveTerms.contains(queryTerm)) { - continue; - } - // copy-constructor refers to same data (shallow) but has iteration state from the beginning - CachedSpans cachedSpans = new CachedSpans(cachedSpansSource); - // Add the span to whatever span may or may not exist - Spans existingSpans = result.get(queryTerm.bytes()); - if (existingSpans != null) { - if (existingSpans instanceof MultiSpans) { - ((MultiSpans) existingSpans).addSpans(cachedSpans); - } else { // upgrade to MultiSpans - MultiSpans multiSpans = new MultiSpans(); - multiSpans.addSpans(existingSpans); - multiSpans.addSpans(cachedSpans); - result.put(queryTerm.bytes(), multiSpans); - } - } else { - result.put(queryTerm.bytes(), cachedSpans); - } - } - } - - /** - * Returns terms as a List, but expanded to any terms in phraseHelper' keySet if present. That can only - * happen if willRewrite() is true. - */ - List expandTermsIfRewrite(BytesRef[] terms, Map strictPhrasesTermToSpans) { - if (willRewrite()) { - Set allTermSet = new LinkedHashSet<>(terms.length + strictPhrasesTermToSpans.size()); - Collections.addAll(allTermSet, terms);//FYI already sorted; will keep order - if (allTermSet.addAll(strictPhrasesTermToSpans.keySet())) { // true if any were added - List sourceTerms = Arrays.asList(allTermSet.toArray(new BytesRef[allTermSet.size()])); - sourceTerms.sort(Comparator.naturalOrder()); - return sourceTerms; - } - } - return Arrays.asList(terms); // no rewrite; use original terms - } - - /** - * Returns a filtered postings where the position must be in the given Spans. - * The Spans must be in a positioned state (not initial) and should not be shared between other terms. - * {@code postingsEnum} should be positioned at the - * document (the same one as the spans) but it hasn't iterated the positions yet. - * The Spans should be the result of a simple - * lookup from {@link #getTermToSpans(LeafReader, int)}, and so it could be null which could mean - * either it's completely filtered or that there should be no filtering; this class knows what to do. - *

    - * Due to limitations in filtering, the {@link PostingsEnum#freq()} is un-changed even if some positions - * get filtered. So when {@link PostingsEnum#nextPosition()} is called or {@code startOffset} or {@code - * endOffset} beyond the "real" positions, these methods returns {@link Integer#MAX_VALUE}. - *

    - * This will return null if it's completely filtered out (i.e. effectively has no postings). - */ - PostingsEnum filterPostings(BytesRef term, PostingsEnum postingsEnum, Spans spans) - throws IOException { - if (spans == null) { - if (hasPositionSensitivity() == false || positionInsensitiveTerms.contains(new Term(fieldName, term))) { - return postingsEnum; // no filtering - } else { - return null; // completely filtered out - } - } - if (postingsEnum.docID() != spans.docID()) { - throw new IllegalStateException("Spans & Postings doc ID misaligned or not positioned"); - } - - return new FilterLeafReader.FilterPostingsEnum(postingsEnum) { - // freq() is max times nextPosition can be called. We'll set this var to -1 when exhausted. - int remainingPositions = postingsEnum.freq(); + // for each SpanQuery, grab it's Spans and put it into a PriorityQueue + PriorityQueue spansPriorityQueue = new PriorityQueue(spanQueries.size()) { @Override - public String toString() { - String where; - try { - where = "[" + startOffset() + ":" + endOffset() + "]"; - } catch (IOException e) { - where = "[" + e + "]"; - } - return "'" + term.utf8ToString() + "'@" + where + " filtered by " + spans; - } - - @Override - public int nextDoc() throws IOException { - throw new IllegalStateException("not expected"); // don't need to implement; just used on one doc - } - - @Override - public int advance(int target) throws IOException { - throw new IllegalStateException("not expected"); // don't need to implement; just used on one doc - } - - @Override - public int nextPosition() throws IOException { - // loop over posting positions... - NEXT_POS_LOOP: - while (remainingPositions > 0) { - final int thisPos = super.nextPosition(); - remainingPositions--; - - // loop spans forward (if necessary) while the span end is behind thisPos - while (spans.endPosition() <= thisPos) { - if (spans.nextStartPosition() == Spans.NO_MORE_POSITIONS) { // advance - break NEXT_POS_LOOP; - } - assert spans.docID() == postingsEnum.docID(); - } - - // is this position within the span? - if (thisPos >= spans.startPosition()) { - assert thisPos < spans.endPosition(); // guaranteed by previous loop - return thisPos; // yay! - } - // else continue and try the next position - } - remainingPositions = -1; // signify done - return Integer.MAX_VALUE; - } - - @Override - public int startOffset() throws IOException { - return remainingPositions >= 0 ? super.startOffset() : Integer.MAX_VALUE; - } - - @Override - public int endOffset() throws IOException { - return remainingPositions >= 0 ? super.endOffset() : Integer.MAX_VALUE; + protected boolean lessThan(Spans a, Spans b) { + return a.startPosition() <= b.startPosition(); } }; - } - - /** - * Simple TreeSet that filters out Terms not matching the provided predicate on {@code add()}. - */ - private class FieldFilteringTermSet extends TreeSet { - @Override - public boolean add(Term term) { - if (fieldMatcher.test(term.field())) { - if (term.field().equals(fieldName)) { - return super.add(term); - } else { - return super.add(new Term(fieldName, term.bytes())); + for (Query query : spanQueries) { + Weight weight = searcher.createNormalizedWeight(query, ScoreMode.COMPLETE_NO_SCORES); + Scorer scorer = weight.scorer(leafReader.getContext()); + if (scorer == null) { + continue; + } + TwoPhaseIterator twoPhaseIterator = scorer.twoPhaseIterator(); + if (twoPhaseIterator != null) { + if (twoPhaseIterator.approximation().advance(docId) != docId || !twoPhaseIterator.matches()) { + continue; } + } else if (scorer.iterator().advance(docId) != docId) { // preposition, and return doing nothing if find none + continue; + } + + Spans spans = ((SpanScorer) scorer).getSpans(); + assert spans.docID() == docId; + if (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { + spansPriorityQueue.add(spans); + } + } + + // Iterate the Spans in the PriorityQueue, collecting as we go. By using a PriorityQueue ordered by position, + // the underlying offsets in our collector will be mostly appended to the end of arrays (efficient). + // note: alternatively it'd interesting if we produced one OffsetsEnum that internally advanced + // this PriorityQueue when nextPosition is called; it would cap what we have to cache for large docs and + // exiting early (due to maxLen) is easy. + // But at least we have an accurate "freq" and it shouldn't be too much data to collect. Even SpanScorer + // navigates the spans fully to compute a good freq (and thus score)! + OffsetSpanCollector spanCollector = new OffsetSpanCollector(); + while (spansPriorityQueue.size() > 0) { + Spans spans = spansPriorityQueue.top(); + //TODO limit to a capped endOffset length somehow so we can break this loop early + spans.collect(spanCollector); + + if (spans.nextStartPosition() == Spans.NO_MORE_POSITIONS) { + spansPriorityQueue.pop(); } else { - return false; + spansPriorityQueue.updateTop(); } } + results.addAll(spanCollector.termToOffsetsEnums.values()); } - /** - * A single {@link Spans} view over multiple spans. At least one span is mandatory, but you should probably - * supply more than one. Furthermore, the given spans are expected to be positioned to a document already - * via a call to next or advance). - */ // TODO move to Lucene core as a Spans utility class? - static class MultiSpans extends Spans { - final PriorityQueue spansQueue = new PriorityQueue<>(SPANS_COMPARATOR); - long cost; - - void addSpans(Spans spans) { - if (spans.docID() < 0 || spans.docID() == NO_MORE_DOCS) { - throw new IllegalArgumentException("Expecting given spans to be in a positioned state."); - } - spansQueue.add(spans); - cost = Math.max(cost, spans.cost()); - } - - // DocIdSetIterator methods: - - @Override - public int nextDoc() throws IOException { - if (spansQueue.isEmpty()) { - return NO_MORE_DOCS; - } - return advance(spansQueue.peek().docID() + 1); - } - - @Override - public int advance(int target) throws IOException { - if (spansQueue.isEmpty()) { - return NO_MORE_DOCS; - } - while (true) { - Spans spans = spansQueue.peek(); - if (spans.docID() >= target) { - return spans.docID(); - } - spansQueue.remove(); // must remove before modify state - if (spans.advance(target) != NO_MORE_DOCS) { // ... otherwise it's not re-added - spansQueue.add(spans); - } else if (spansQueue.isEmpty()) { - return NO_MORE_DOCS; - } - } - } - - @Override - public int docID() { - if (spansQueue.isEmpty()) { - return NO_MORE_DOCS; - } - return spansQueue.peek().docID(); - } - - @Override - public long cost() { - return cost; - } - - // Spans methods: - - @Override - public int nextStartPosition() throws IOException { - // advance any spans at the initial position per document - boolean atDocStart = false; - while (spansQueue.peek().startPosition() == -1) { - atDocStart = true; - Spans headSpans = spansQueue.remove(); // remove because we will change state - headSpans.nextStartPosition(); - spansQueue.add(headSpans); - } - if (!atDocStart) { - Spans headSpans = spansQueue.remove(); // remove because we will change state - headSpans.nextStartPosition(); - spansQueue.add(headSpans); - } - return startPosition(); - } - - @Override - public int startPosition() { - return spansQueue.peek().startPosition(); - } - - @Override - public int endPosition() { - return spansQueue.peek().endPosition(); - } - - @Override - public int width() { - return spansQueue.peek().width(); - } - - @Override - public void collect(SpanCollector collector) throws IOException { - spansQueue.peek().collect(collector); - } - - @Override - public float positionsCost() { - return 100f;// no idea; and we can't delegate due to not allowing to call it dependent on TwoPhaseIterator - } - } - - //TODO move up; it's currently inbetween other inner classes that are related /** * Needed to support the ability to highlight a query irrespective of the field a query refers to * (aka requireFieldMatch=false). * This reader will just delegate every call to a single field in the wrapped * LeafReader. This way we ensure that all queries going through this reader target the same field. */ - static final class SingleFieldFilterLeafReader extends FilterLeafReader { + private static final class SingleFieldWithOffsetsFilterLeafReader extends FilterLeafReader { final String fieldName; - SingleFieldFilterLeafReader(LeafReader in, String fieldName) { + SingleFieldWithOffsetsFilterLeafReader(LeafReader in, String fieldName) { super(in); this.fieldName = fieldName; } @@ -550,22 +291,18 @@ public class PhraseHelper { @Override public Terms terms(String field) throws IOException { - return super.terms(fieldName); - } - - @Override - public NumericDocValues getNumericDocValues(String field) throws IOException { - return super.getNumericDocValues(fieldName); - } - - @Override - public BinaryDocValues getBinaryDocValues(String field) throws IOException { - return super.getBinaryDocValues(fieldName); - } - - @Override - public SortedDocValues getSortedDocValues(String field) throws IOException { - return super.getSortedDocValues(fieldName); + // ensure the underlying PostingsEnum returns offsets. It's sad we have to do this to use the SpanCollector. + return new FilterTerms(super.terms(fieldName)) { + @Override + public TermsEnum iterator() throws IOException { + return new FilterTermsEnum(in.iterator()) { + @Override + public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { + return super.postings(reuse, flags | PostingsEnum.OFFSETS); + } + }; + } + }; } @Override @@ -584,99 +321,102 @@ public class PhraseHelper { } } + private class OffsetSpanCollector implements SpanCollector { + Map termToOffsetsEnums = new HashMap<>(); - /** - * A Spans based on a list of cached spans for one doc. It is pre-positioned to this doc. - */ - private static class CachedSpans extends Spans { - - private static class CachedSpan { - final int start; - final int end; - - CachedSpan(int start, int end) { - this.start = start; - this.end = end; + @Override + public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { + if (!fieldMatcher.test(term.field())) { + return; } - } - final int docId; - final ArrayList cachedSpanList; - int index = -1; - - CachedSpans(Spans spans) throws IOException { - this.docId = spans.docID(); - assert this.docId != -1; - // Consume the spans for this doc into a list. There's always at least one; the first/current one. - cachedSpanList = new ArrayList<>(); - while (spans.nextStartPosition() != NO_MORE_POSITIONS) { - cachedSpanList.add(new CachedSpan(spans.startPosition(), spans.endPosition())); + SpanCollectedOffsetsEnum offsetsEnum = termToOffsetsEnums.get(term.bytes()); + if (offsetsEnum == null) { + // If it's pos insensitive we handle it outside of PhraseHelper. term.field() is from the Query. + if (positionInsensitiveTerms.contains(term.bytes())) { + return; + } + offsetsEnum = new SpanCollectedOffsetsEnum(term.bytes(), postings.freq()); + termToOffsetsEnums.put(term.bytes(), offsetsEnum); } - assert !cachedSpanList.isEmpty(); // bad Span impl? - } - - /** - * Clone; reset iteration state. - */ - CachedSpans(CachedSpans cloneMe) { - docId = cloneMe.docId; - cachedSpanList = cloneMe.cachedSpanList; + offsetsEnum.add(postings.startOffset(), postings.endOffset()); } @Override - public int nextDoc() throws IOException { - throw new UnsupportedOperationException("Not expected"); + public void reset() { // called when at a new position. We don't care. + } + } + + private static class SpanCollectedOffsetsEnum extends OffsetsEnum { + // TODO perhaps optionally collect (and expose) payloads? + private final BytesRef term; + private final int[] startOffsets; + private final int[] endOffsets; + private int numPairs = 0; + private int enumIdx = -1; + + private SpanCollectedOffsetsEnum(BytesRef term, int postingsFreq) { + this.term = term; + this.startOffsets = new int[postingsFreq]; // hopefully not wasteful? At least we needn't resize it. + this.endOffsets = new int[postingsFreq]; + } + + // called from collector before it's navigated + void add(int startOffset, int endOffset) { + assert enumIdx == -1 : "bad state"; + + // loop backwards since we expect a match at the end or close to it. We expect O(1) not O(N). + int pairIdx = numPairs - 1; + for (; pairIdx >= 0; pairIdx--) { + int iStartOffset = startOffsets[pairIdx]; + int iEndOffset = endOffsets[pairIdx]; + int cmp = Integer.compare(iStartOffset, startOffset); + if (cmp == 0) { + cmp = Integer.compare(iEndOffset, endOffset); + } + if (cmp == 0) { + return; // we already have this offset-pair for this term + } else if (cmp < 0) { + break; //we will insert offsetPair to the right of pairIdx + } + } + // pairIdx is now one position to the left of where we insert the new pair + // shift right any pairs by one to make room + final int shiftLen = numPairs - (pairIdx + 1); + if (shiftLen > 0) { + System.arraycopy(startOffsets, pairIdx + 2, startOffsets, pairIdx + 3, shiftLen); + System.arraycopy(endOffsets, pairIdx + 2, endOffsets, pairIdx + 3, shiftLen); + } + // now we can place the offset pair + startOffsets[pairIdx + 1] = startOffset; + endOffsets[pairIdx + 1] = endOffset; + numPairs++; } @Override - public int advance(int target) throws IOException { - throw new UnsupportedOperationException("Not expected"); + public boolean nextPosition() throws IOException { + return ++enumIdx < numPairs; } @Override - public int docID() { - return docId; + public int freq() throws IOException { + return numPairs; } @Override - public long cost() { - return 1; + public BytesRef getTerm() throws IOException { + return term; } @Override - public int nextStartPosition() throws IOException { - index++; - return startPosition(); + public int startOffset() throws IOException { + return startOffsets[enumIdx]; } @Override - public int startPosition() { - return index < 0 ? - -1 : index >= cachedSpanList.size() ? - NO_MORE_POSITIONS : cachedSpanList.get(index).start; + public int endOffset() throws IOException { + return endOffsets[enumIdx]; } + } - @Override - public int endPosition() { - return index < 0 ? - -1 : index >= cachedSpanList.size() ? - NO_MORE_POSITIONS : cachedSpanList.get(index).end; - } - - @Override - public int width() { - return endPosition() - startPosition(); - } - - @Override - public void collect(SpanCollector collector) throws IOException { - throw new UnsupportedOperationException("Not expected"); - } - - @Override - public float positionsCost() { - return 1f; - } - - } // class CachedSpans } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java index 28eb6b1a613..5f47a5daac7 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.search.uhighlight; -import java.io.Closeable; import java.io.IOException; import java.util.Collections; import java.util.List; @@ -26,7 +25,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; @@ -63,29 +61,20 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy { @Override public List getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException { - TokenStream tokenStream = tokenStream(content); - PostingsEnum mtqPostingsEnum = new TokenStreamPostingsEnum(tokenStream, automata); - mtqPostingsEnum.advance(docId); - return Collections.singletonList(new OffsetsEnum(null, mtqPostingsEnum)); + return Collections.singletonList(new TokenStreamOffsetsEnum(tokenStream(content), automata)); } - // See class javadocs. - // TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl? See TODOs in OffsetsEnum. - private static class TokenStreamPostingsEnum extends PostingsEnum implements Closeable { + private static class TokenStreamOffsetsEnum extends OffsetsEnum { TokenStream stream; // becomes null when closed final CharacterRunAutomaton[] matchers; final CharTermAttribute charTermAtt; final OffsetAttribute offsetAtt; - int currentDoc = -1; int currentMatch = -1; - int currentStartOffset = -1; - - int currentEndOffset = -1; final BytesRef matchDescriptions[]; - TokenStreamPostingsEnum(TokenStream ts, CharacterRunAutomaton[] matchers) throws IOException { + TokenStreamOffsetsEnum(TokenStream ts, CharacterRunAutomaton[] matchers) throws IOException { this.stream = ts; this.matchers = matchers; matchDescriptions = new BytesRef[matchers.length]; @@ -95,15 +84,13 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy { } @Override - public int nextPosition() throws IOException { + public boolean nextPosition() throws IOException { if (stream != null) { while (stream.incrementToken()) { for (int i = 0; i < matchers.length; i++) { if (matchers[i].run(charTermAtt.buffer(), 0, charTermAtt.length())) { - currentStartOffset = offsetAtt.startOffset(); - currentEndOffset = offsetAtt.endOffset(); currentMatch = i; - return 0; + return true; } } } @@ -111,8 +98,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy { close(); } // exhausted - currentStartOffset = currentEndOffset = Integer.MAX_VALUE; - return Integer.MAX_VALUE; + return false; } @Override @@ -122,45 +108,23 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy { @Override public int startOffset() throws IOException { - assert currentStartOffset >= 0; - return currentStartOffset; + return offsetAtt.startOffset(); } @Override public int endOffset() throws IOException { - assert currentEndOffset >= 0; - return currentEndOffset; + return offsetAtt.endOffset(); } - // TOTAL HACK; used in OffsetsEnum.getTerm() @Override - public BytesRef getPayload() throws IOException { + public BytesRef getTerm() throws IOException { if (matchDescriptions[currentMatch] == null) { + // these CharRunAutomata are subclassed so that toString() returns the query matchDescriptions[currentMatch] = new BytesRef(matchers[currentMatch].toString()); } return matchDescriptions[currentMatch]; } - @Override - public int docID() { - return currentDoc; - } - - @Override - public int nextDoc() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public int advance(int target) throws IOException { - return currentDoc = target; - } - - @Override - public long cost() { - return 0; - } - @Override public void close() throws IOException { if (stream != null) { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java index 96ec15501ff..086d7a03cfb 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java @@ -23,11 +23,14 @@ import java.nio.charset.StandardCharsets; import java.text.BreakIterator; import java.util.Arrays; import java.util.Collections; +import java.util.EnumSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Predicate; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; @@ -49,6 +52,7 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.junit.After; @@ -81,6 +85,36 @@ public class TestUnifiedHighlighter extends LuceneTestCase { dir.close(); } + static UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analyzer indexAnalyzer) { + return randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.noneOf(HighlightFlag.class)); + } + + static UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analyzer indexAnalyzer, + EnumSet mandatoryFlags) { + if (random().nextBoolean()) { + return new UnifiedHighlighter(searcher, indexAnalyzer); + } else { + final UnifiedHighlighter uh = new UnifiedHighlighter(searcher, indexAnalyzer) { + @Override + protected Set getFlags(String field) { + final EnumSet result = EnumSet.copyOf(mandatoryFlags); + int r = random().nextInt(); + for (HighlightFlag highlightFlag : HighlightFlag.values()) { + if (((1 << highlightFlag.ordinal()) & r) == 0) { + result.add(highlightFlag); + } + } + return result; + } + }; + uh.setCacheFieldValCharsThreshold(random().nextInt(100)); + if (random().nextBoolean()) { + uh.setFieldMatcher(f -> true); // requireFieldMatch==false + } + return uh; + } + } + // // Tests below were ported from the PostingsHighlighter. Possibly augmented. Far below are newer tests. // @@ -101,7 +135,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("body", "highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); @@ -167,7 +201,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setMaxLength(maxLength); String snippets[] = highlighter.highlight("body", query, topDocs); @@ -191,7 +225,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits); @@ -219,7 +253,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); @@ -248,7 +282,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setMaxLength(value.length() * 2 + 1); Query query = new TermQuery(new Term("body", "field")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); @@ -281,7 +315,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); BooleanQuery query = new BooleanQuery.Builder() .add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD) @@ -313,7 +347,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); BooleanQuery query = new BooleanQuery.Builder() .add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD) @@ -345,7 +379,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); @@ -382,7 +416,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { .build(); TopDocs topDocs = searcher.search(query, 10); assertEquals(1, topDocs.totalHits); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setHighlightPhrasesStrictly(false); String snippets[] = highlighter.highlight("body", query, topDocs, 2); assertEquals(1, snippets.length); @@ -410,7 +444,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { .build(); TopDocs topDocs = searcher.search(query, 10); assertEquals(1, topDocs.totalHits); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setHighlightPhrasesStrictly(false); String snippets[] = highlighter.highlight("body", query, topDocs, 2); assertEquals(1, snippets.length); @@ -438,7 +472,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { .build(); TopDocs topDocs = searcher.search(query, 10); assertEquals(1, topDocs.totalHits); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setMaxLength(Integer.MAX_VALUE - 1); String snippets[] = highlighter.highlight("body", query, topDocs, 2); assertEquals(1, snippets.length); @@ -461,7 +495,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits); @@ -494,7 +528,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { TopDocs topDocs = searcher.search(query, 10); assertEquals(1, topDocs.totalHits); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setMaxLength(Integer.MAX_VALUE - 1); String snippets[] = highlighter.highlight("body", query, topDocs, 2); assertEquals(1, snippets.length); @@ -549,7 +583,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("body", "highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); @@ -623,7 +657,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("body", "highlighting")); int[] docIDs = new int[]{0}; String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body"); @@ -652,7 +686,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc; Query query = new TermQuery(new Term("body", "highlighting")); @@ -683,7 +717,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setMaxNoHighlightPassages(0);// don't want any default summary Query query = new TermQuery(new Term("body", "highlighting")); int[] docIDs = new int[]{0}; @@ -743,7 +777,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("bogus", "highlighting")); int[] docIDs = new int[]{0}; String snippets[] = highlighter.highlightFields(new String[]{"bogus"}, query, docIDs, new int[]{2}).get("bogus"); @@ -769,7 +803,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc; Query query = new TermQuery(new Term("body", "highlighting")); @@ -798,7 +832,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc; Query query = new TermQuery(new Term("body", "highlighting")); @@ -834,7 +868,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setCacheFieldValCharsThreshold(random().nextInt(10) * 10);// 0 thru 90 intervals of 10 Query query = new TermQuery(new Term("body", "answer")); TopDocs hits = searcher.search(query, numDocs); @@ -872,7 +906,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); BooleanQuery query = new BooleanQuery.Builder() .add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD) @@ -995,7 +1029,8 @@ public class TestUnifiedHighlighter extends LuceneTestCase { return (qf) -> true; } }; - UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer); + highlighterFieldMatch.setFieldMatcher(null);//default BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder() .add(new TermQuery(new Term("text", "some")), BooleanClause.Occur.SHOULD) @@ -1078,7 +1113,8 @@ public class TestUnifiedHighlighter extends LuceneTestCase { return (qf) -> true; } }; - UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY)); + highlighterFieldMatch.setFieldMatcher(null);//default BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder() .add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD) @@ -1161,7 +1197,8 @@ public class TestUnifiedHighlighter extends LuceneTestCase { return (qf) -> true; } }; - UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.PHRASES)); + highlighterFieldMatch.setFieldMatcher(null);//default BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder() .add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD) diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java index 8791b76e7e7..a9fadc0175f 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java @@ -20,6 +20,7 @@ package org.apache.lucene.search.uhighlight; import java.io.IOException; import java.util.Arrays; import java.util.Collections; +import java.util.EnumSet; import java.util.List; import java.util.Objects; @@ -65,6 +66,7 @@ import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanWeight; +import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag; import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -150,6 +152,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { ir.close(); } + private UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analyzer indexAnalyzer) { + return TestUnifiedHighlighter.randomUnifiedHighlighter(searcher, indexAnalyzer, + EnumSet.of(HighlightFlag.MULTI_TERM_QUERY)); + } + public void testOnePrefix() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); @@ -166,7 +173,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); // wrap in a BoostQuery to also show we see inside it Query query = new BoostQuery(new PrefixQuery(new Term("body", "te")), 2.0f); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); @@ -177,6 +184,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { assertEquals("Test a one sentence document.", snippets[1]); // wrong field + highlighter.setFieldMatcher(null);//default BooleanQuery bq = new BooleanQuery.Builder() .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD) .add(new PrefixQuery(new Term("bogus", "te")), BooleanClause.Occur.SHOULD) @@ -207,7 +215,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new RegexpQuery(new Term("body", "te.*")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); @@ -217,6 +225,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { assertEquals("Test a one sentence document.", snippets[1]); // wrong field + highlighter.setFieldMatcher(null);//default BooleanQuery bq = new BooleanQuery.Builder() .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD) .add(new RegexpQuery(new Term("bogus", "te.*")), BooleanClause.Occur.SHOULD) @@ -247,7 +256,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new FuzzyQuery(new Term("body", "tets"), 1); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); @@ -266,6 +275,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { assertEquals("Test a one sentence document.", snippets[1]); // wrong field + highlighter.setFieldMatcher(null);//default BooleanQuery bq = new BooleanQuery.Builder() .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD) .add(new FuzzyQuery(new Term("bogus", "tets"), 1), BooleanClause.Occur.SHOULD) @@ -296,7 +306,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); @@ -366,6 +376,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { assertEquals("Test a one sentence document.", snippets[1]); // wrong field + highlighter.setFieldMatcher(null);//default bq = new BooleanQuery.Builder() .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD) .add(TermRangeQuery.newStringRange("bogus", "ta", "tf", true, true), BooleanClause.Occur.SHOULD) @@ -396,7 +407,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); BooleanQuery query = new BooleanQuery.Builder() .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD) .build(); @@ -438,7 +449,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); BooleanQuery query = new BooleanQuery.Builder() .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.MUST) .add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.FILTER) @@ -469,7 +480,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); ConstantScoreQuery query = new ConstantScoreQuery(new WildcardQuery(new Term("body", "te*"))); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); @@ -497,7 +508,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); DisjunctionMaxQuery query = new DisjunctionMaxQuery( Collections.singleton(new WildcardQuery(new Term("body", "te*"))), 0); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); @@ -526,7 +537,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); // wrap in a SpanBoostQuery to also show we see inside it Query query = new SpanBoostQuery( new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))), 2.0f); @@ -556,7 +567,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))); Query query = new SpanOrQuery(new SpanQuery[]{childQuery}); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); @@ -585,7 +596,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))); Query query = new SpanNearQuery(new SpanQuery[]{childQuery, childQuery}, 0, false); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); @@ -614,7 +625,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); SpanQuery include = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))); SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus")); Query query = new SpanNotQuery(include, exclude); @@ -644,7 +655,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))); Query query = new SpanFirstQuery(childQuery, 1000000); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); @@ -675,7 +686,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); // use a variety of common MTQ types BooleanQuery query = new BooleanQuery.Builder() .add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD) @@ -765,7 +776,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setMaxLength(25);//a little past first sentence BooleanQuery query = new BooleanQuery.Builder() @@ -798,7 +809,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); highlighter.setMaxLength(32);//a little past first sentence BooleanQuery query = new BooleanQuery.Builder() @@ -846,7 +857,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { }; IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, buggyAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, buggyAnalyzer); highlighter.setHandleMultiTermQuery(true); if (rarely()) { highlighter.setMaxLength(25);//a little past first sentence @@ -903,7 +914,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc; Query query = new PrefixQuery(new Term("body", "nonexistent")); @@ -934,7 +945,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new PrefixQuery(new Term("body", "ab")); TopDocs topDocs = searcher.search(query, 10); @@ -956,7 +967,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.close(); IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc; PhraseQuery pq = new PhraseQuery.Builder() @@ -1076,7 +1087,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { IndexSearcher searcher = newSearcher(ir); Query query = new PrefixQuery(new Term(field, "я")); TopDocs topDocs = searcher.search(query, 1); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, analyzer); String[] snippets = highlighter.highlight(field, query, topDocs); assertEquals("[я]", Arrays.toString(snippets)); ir.close(); @@ -1100,7 +1111,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { iw.commit(); try (IndexReader ir = iw.getReader()) { IndexSearcher searcher = newSearcher(ir); - UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer); + UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, analyzer); highlighter.setBreakIterator(WholeBreakIterator::new); // Test PrefixQuery diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java index acc4bd733ca..08820aa543c 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java @@ -17,6 +17,7 @@ package org.apache.lucene.search.uhighlight; import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -38,6 +39,7 @@ import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; @@ -46,6 +48,7 @@ import org.apache.lucene.search.Weight; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; @@ -55,6 +58,7 @@ import org.apache.lucene.util.QueryBuilder; import org.junit.After; import org.junit.Before; +//TODO rename to reflect position sensitivity public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase { final FieldType fieldType; @@ -151,6 +155,16 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase { String[] snippets = highlighter.highlight("body", query, topDocs); assertArrayEquals(new String[]{"Yin yang, yin gap yang"}, snippets); + + // test the Passage only has 3 matches. We don't want duplicates from "Yin" being in TermQuery & PhraseQuery. + highlighter.setFormatter(new PassageFormatter() { + @Override + public Object format(Passage[] passages, String content) { + return Arrays.toString(passages); + } + }); + assertArrayEquals(new String[]{"[Passage[0-22]{yin[0-3],yang[4-8],yin[10-13]}score=2.4964213]"}, + highlighter.highlight("body", query, topDocs)); } public void testPhraseNotInDoc() throws IOException { @@ -185,6 +199,16 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase { String[] snippets = highlighter.highlight("body", query, topDocs); assertArrayEquals(new String[]{"alpha bravo charlie - charlie bravo alpha"}, snippets); + + // test the Passage only has 3 matches. We don't want duplicates from both PhraseQuery + highlighter.setFormatter(new PassageFormatter() { + @Override + public Object format(Passage[] passages, String content) { + return Arrays.toString(passages); + } + }); + assertArrayEquals(new String[]{"[Passage[0-41]{alpha[0-5],bravo[6-11],charlie[12-19]}score=3.931102]"}, + highlighter.highlight("body", query, topDocs)); } public void testSynonyms() throws IOException { @@ -477,4 +501,68 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase { return wrapped.hashCode(); } } + + // Ported from LUCENE-5455 (fixed in LUCENE-8121). Also see LUCENE-2287. + public void testNestedSpanQueryHighlight() throws Exception { + // For a long time, the highlighters used to assume all query terms within the SpanQuery were valid at the Spans' + // position range. This would highlight occurrences of terms that were actually not matched by the query. + // But now using the SpanCollector API we don't make this kind of mistake. + final String FIELD_NAME = "body"; + final String indexedText = "x y z x z x a"; + indexWriter.addDocument(newDoc(indexedText)); + initReaderSearcherHighlighter(); + TopDocs topDocs = new TopDocs(1, new ScoreDoc[]{new ScoreDoc(0, 1f)}, 1f); + + String expected = "x y z x z x a"; + Query q = new SpanNearQuery(new SpanQuery[] { + new SpanNearQuery(new SpanQuery[] { + new SpanTermQuery(new Term(FIELD_NAME, "x")), + new SpanTermQuery(new Term(FIELD_NAME, "y")), + new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true), + new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false); + String observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0]; + if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed); + assertEquals("Nested SpanNear query not properly highlighted.", expected, observed); + + expected = "x y z x z x a"; + q = new SpanNearQuery(new SpanQuery[] { + new SpanOrQuery( + new SpanNearQuery(new SpanQuery[] { + new SpanTermQuery(new Term(FIELD_NAME, "x")), + new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true), + new SpanNearQuery(new SpanQuery[] { + new SpanTermQuery(new Term(FIELD_NAME, "y")), + new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true)), + new SpanOrQuery( + new SpanTermQuery(new Term(FIELD_NAME, "a")), + new SpanTermQuery(new Term(FIELD_NAME, "b")))}, 10, false); + observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0]; + if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed); + assertEquals("Nested SpanNear query within SpanOr not properly highlighted.", expected, observed); + + expected = "x y z x z x a"; + q = new SpanNearQuery(new SpanQuery[] { + new SpanNearQuery(new SpanQuery[] { + new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(FIELD_NAME, "*"))), + new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true), + new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false); + observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0]; + if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed); + assertEquals("Nested SpanNear query with wildcard not properly highlighted.", expected, observed); + + expected = "x y z x z x a"; + q = new SpanNearQuery(new SpanQuery[] { + new SpanOrQuery( + new SpanNearQuery(new SpanQuery[] { + new SpanTermQuery(new Term(FIELD_NAME, "x")), + new SpanTermQuery(new Term(FIELD_NAME, "y"))}, 0, true), + new SpanNearQuery(new SpanQuery[] { //No hit span query + new SpanTermQuery(new Term(FIELD_NAME, "z")), + new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 0, true)), + new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false); + observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0]; + if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed); + assertEquals("Nested SpanNear query within SpanOr not properly highlighted.", expected, observed); + } + } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java index 738a0b9b4b0..e60b17be766 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java @@ -218,11 +218,9 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase { // this code never runs; just for compilation Passage p; - try (OffsetsEnum oe = new OffsetsEnum(null, EMPTY)) { + try (OffsetsEnum oe = new OffsetsEnum.OfPostings(null, EMPTY)) { oe.getTerm(); - oe.getPostingsEnum(); oe.freq(); - oe.hasMorePositions(); oe.nextPosition(); oe.startOffset(); oe.endOffset(); diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index 72265a743f0..bd2e05257de 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -31,7 +31,7 @@ com.fasterxml.jackson.core.version = 2.5.4 /com.googlecode.mp4parser/isoparser = 1.1.18 /com.healthmarketscience.jackcess/jackcess = 2.1.8 /com.healthmarketscience.jackcess/jackcess-encrypt = 2.1.4 -/com.ibm.icu/icu4j = 59.1 +/com.ibm.icu/icu4j = 60.2 /com.pff/java-libpst = 0.8.1 com.rometools.version = 1.5.1 @@ -230,7 +230,7 @@ org.codehaus.janino.version = 2.7.6 /org.codehaus.woodstox/stax2-api = 3.1.4 /org.codehaus.woodstox/woodstox-core-asl = 4.4.1 -org.eclipse.jetty.version = 9.3.20.v20170531 +org.eclipse.jetty.version = 9.4.8.v20171121 /org.eclipse.jetty/jetty-continuation = ${org.eclipse.jetty.version} /org.eclipse.jetty/jetty-deploy = ${org.eclipse.jetty.version} /org.eclipse.jetty/jetty-http = ${org.eclipse.jetty.version} diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index d70beaf2929..c90bfdc08f3 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -1489,11 +1489,6 @@ public class TestBlockJoin extends LuceneTestCase { protected double score(BasicStats stats, double freq, double docLen) { return freq; } - - @Override - protected double maxScore(BasicStats stats, double maxFreq) { - return maxFreq; - } }; Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig().setSimilarity(sim)); diff --git a/lucene/licenses/icu4j-59.1.jar.sha1 b/lucene/licenses/icu4j-59.1.jar.sha1 deleted file mode 100644 index f3f0018f053..00000000000 --- a/lucene/licenses/icu4j-59.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6f06e820cf4c8968bbbaae66ae0b33f6a256b57f diff --git a/lucene/licenses/icu4j-60.2.jar.sha1 b/lucene/licenses/icu4j-60.2.jar.sha1 new file mode 100644 index 00000000000..e6131111834 --- /dev/null +++ b/lucene/licenses/icu4j-60.2.jar.sha1 @@ -0,0 +1 @@ +e452cba3caaf93b997ff543c7246a6da74ed70f1 diff --git a/lucene/licenses/jetty-continuation-9.3.20.v20170531.jar.sha1 b/lucene/licenses/jetty-continuation-9.3.20.v20170531.jar.sha1 deleted file mode 100644 index 4e086fcf849..00000000000 --- a/lucene/licenses/jetty-continuation-9.3.20.v20170531.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0176f1ef8366257e7b6214c3bbd710cf47593135 diff --git a/lucene/licenses/jetty-continuation-9.4.8.v20171121.jar.sha1 b/lucene/licenses/jetty-continuation-9.4.8.v20171121.jar.sha1 new file mode 100644 index 00000000000..f519f05ae20 --- /dev/null +++ b/lucene/licenses/jetty-continuation-9.4.8.v20171121.jar.sha1 @@ -0,0 +1 @@ +34b64138f6589d3d32d02058fe73ec788cb981bf diff --git a/lucene/licenses/jetty-http-9.3.20.v20170531.jar.sha1 b/lucene/licenses/jetty-http-9.3.20.v20170531.jar.sha1 deleted file mode 100644 index 1936a2e298f..00000000000 --- a/lucene/licenses/jetty-http-9.3.20.v20170531.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -32f5fe22ed468a49df1ffcbb27c39c1b53f261aa diff --git a/lucene/licenses/jetty-http-9.4.8.v20171121.jar.sha1 b/lucene/licenses/jetty-http-9.4.8.v20171121.jar.sha1 new file mode 100644 index 00000000000..1e97da0d5de --- /dev/null +++ b/lucene/licenses/jetty-http-9.4.8.v20171121.jar.sha1 @@ -0,0 +1 @@ +9879d6c4e37400bf43f0cd4b3c6e34a3ba409864 diff --git a/lucene/licenses/jetty-io-9.3.20.v20170531.jar.sha1 b/lucene/licenses/jetty-io-9.3.20.v20170531.jar.sha1 deleted file mode 100644 index 5d47c215049..00000000000 --- a/lucene/licenses/jetty-io-9.3.20.v20170531.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -5b68e7761fcacefcf26ad9ab50943db65fda2c3d diff --git a/lucene/licenses/jetty-io-9.4.8.v20171121.jar.sha1 b/lucene/licenses/jetty-io-9.4.8.v20171121.jar.sha1 new file mode 100644 index 00000000000..2396010ffb2 --- /dev/null +++ b/lucene/licenses/jetty-io-9.4.8.v20171121.jar.sha1 @@ -0,0 +1 @@ +d3fe2dfa62f52ee91ff07cb359f63387e0e30b40 diff --git a/lucene/licenses/jetty-server-9.3.20.v20170531.jar.sha1 b/lucene/licenses/jetty-server-9.3.20.v20170531.jar.sha1 deleted file mode 100644 index 0c9d4357680..00000000000 --- a/lucene/licenses/jetty-server-9.3.20.v20170531.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6a1523d44ebb527eed068a5c8bfd22edd6a20530 diff --git a/lucene/licenses/jetty-server-9.4.8.v20171121.jar.sha1 b/lucene/licenses/jetty-server-9.4.8.v20171121.jar.sha1 new file mode 100644 index 00000000000..1369bae0d7b --- /dev/null +++ b/lucene/licenses/jetty-server-9.4.8.v20171121.jar.sha1 @@ -0,0 +1 @@ +34614bd9a29de57ef28ca31f1f2b49a412af196d diff --git a/lucene/licenses/jetty-servlet-9.3.20.v20170531.jar.sha1 b/lucene/licenses/jetty-servlet-9.3.20.v20170531.jar.sha1 deleted file mode 100644 index 452932d2baf..00000000000 --- a/lucene/licenses/jetty-servlet-9.3.20.v20170531.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -21a698f9d58d03cdf58bf2a40f93de58c2eab138 diff --git a/lucene/licenses/jetty-servlet-9.4.8.v20171121.jar.sha1 b/lucene/licenses/jetty-servlet-9.4.8.v20171121.jar.sha1 new file mode 100644 index 00000000000..5632347596a --- /dev/null +++ b/lucene/licenses/jetty-servlet-9.4.8.v20171121.jar.sha1 @@ -0,0 +1 @@ +bbbb9b5de08f468c7b9b3de6aea0b098d2c679b6 diff --git a/lucene/licenses/jetty-util-9.3.20.v20170531.jar.sha1 b/lucene/licenses/jetty-util-9.3.20.v20170531.jar.sha1 deleted file mode 100644 index 7d020a4a546..00000000000 --- a/lucene/licenses/jetty-util-9.3.20.v20170531.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -19ce4203809da37f8ea7a5632704fa71b6f0ccc2 diff --git a/lucene/licenses/jetty-util-9.4.8.v20171121.jar.sha1 b/lucene/licenses/jetty-util-9.4.8.v20171121.jar.sha1 new file mode 100644 index 00000000000..7a3c6ad69b4 --- /dev/null +++ b/lucene/licenses/jetty-util-9.4.8.v20171121.jar.sha1 @@ -0,0 +1 @@ +d6ec1a1613c7fa72aa6bf5d8c204750afbc3df3b diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java index a4b9d71e545..94cf97448f6 100644 --- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java @@ -50,7 +50,6 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SortedDocValues; @@ -200,12 +199,7 @@ public class TestMemoryIndex extends LuceneTestCase { } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - throw new UnsupportedOperationException(); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(); } diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java b/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java index 2e8491d2f01..5e98349fcb7 100644 --- a/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java +++ b/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -27,7 +28,6 @@ import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; @@ -36,7 +36,6 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -358,7 +357,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase { Occur.SHOULD)); testQuery.add(new BooleanClause(new TermQuery(new Term("year", "1969")), Occur.SHOULD)); - return testQuery.build(); + return new DocValueScoreQuery(testQuery.build(), "weeksAtNumberOne"); } @Override @@ -411,10 +410,6 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase { writer.close(); searcher = newSearcher(reader); artistDocValues = MultiDocValues.getSortedValues(reader, "artist"); - - // All searches sort by song popularity - final Similarity base = searcher.getSimilarity(true); - searcher.setSimilarity(new DocValueSimilarity(base, "weeksAtNumberOne")); } @Override @@ -442,61 +437,108 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase { return result; } - /** - * Similarity that wraps another similarity and replaces the final score - * according to whats in a docvalues field. - * - * @lucene.experimental - */ - static class DocValueSimilarity extends Similarity { - private final Similarity sim; - private final String scoreValueField; + private static final class DocValueScoreQuery extends Query { - public DocValueSimilarity(Similarity sim, String scoreValueField) { - this.sim = sim; - this.scoreValueField = scoreValueField; + private final Query query; + private final String scoreField; + + DocValueScoreQuery(Query query, String scoreField) { + this.query = query; + this.scoreField = scoreField; + } + + @Override + public String toString(String field) { + return "DocValueScore(" + query.toString(field) + ")"; } @Override - public long computeNorm(FieldInvertState state) { - return sim.computeNorm(state); + public boolean equals(Object obj) { + if (obj instanceof DocValueScoreQuery == false) { + return false; + } + return query.equals(((DocValueScoreQuery) obj).query); } @Override - public SimWeight computeWeight(float boost, - CollectionStatistics collectionStats, TermStatistics... termStats) { - return sim.computeWeight(boost, collectionStats, termStats); + public int hashCode() { + int h = getClass().hashCode(); + h = 31 * h + query.hashCode(); + h = 31 * h + scoreField.hashCode(); + return h; } @Override - public SimScorer simScorer(SimWeight stats, LeafReaderContext context) - throws IOException { + public Query rewrite(IndexReader reader) throws IOException { + Query rewritten = query.rewrite(reader); + if (rewritten != query) { + return new DocValueScoreQuery(rewritten, scoreField); + } + return super.rewrite(reader); + } - final NumericDocValues values = DocValues.getNumeric(context.reader(), scoreValueField); - - return new SimScorer() { + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + if (scoreMode.needsScores() == false) { + return query.createWeight(searcher, scoreMode, boost); + } + Weight inner = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); + return new Weight(this) { + @Override - public float score(int doc, float freq) throws IOException { - if (doc != values.docID()) { - values.advance(doc); - } - if (doc == values.docID()) { - return Float.intBitsToFloat((int) values.longValue()); - } else { - return 0f; - } + public boolean isCacheable(LeafReaderContext ctx) { + return true; } - + @Override - public float maxScore(float maxFreq) { - return Float.MAX_VALUE; + public Scorer scorer(LeafReaderContext context) throws IOException { + Scorer innerScorer = inner.scorer(context); + NumericDocValues scoreFactors = DocValues.getNumeric(context.reader(), scoreField); + return new Scorer(this) { + + @Override + public float score() throws IOException { + if (scoreFactors.advanceExact(docID())) { + return Float.intBitsToFloat((int) scoreFactors.longValue()); + } + return 0; + } + + @Override + public float maxScore() { + return Float.POSITIVE_INFINITY; + } + + @Override + public DocIdSetIterator iterator() { + return innerScorer.iterator(); + } + + @Override + public int docID() { + return innerScorer.docID(); + } + }; } - + @Override - public Explanation explain(int doc, Explanation freq) throws IOException { - return Explanation.match(score(doc, 0f), "indexDocValue(" + scoreValueField + ")"); + public void extractTerms(Set terms) { + inner.extractTerms(terms); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + Scorer s = scorer(context); + if (s != null) { + int advanced = s.iterator().advance(doc); + if (doc != advanced) { + return Explanation.match(s.score(), "match"); + } + } + return Explanation.noMatch("no match"); } }; } } + } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java index 2fdeaa797a9..10c232ed453 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java @@ -25,7 +25,7 @@ import java.util.Objects; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause.Occur; @@ -124,9 +124,9 @@ public class CommonTermsQuery extends Query { } final List leaves = reader.leaves(); final int maxDoc = reader.maxDoc(); - final TermContext[] contextArray = new TermContext[terms.size()]; + final TermStates[] contextArray = new TermStates[terms.size()]; final Term[] queryTerms = this.terms.toArray(new Term[0]); - collectTermContext(reader, leaves, contextArray, queryTerms); + collectTermStates(reader, leaves, contextArray, queryTerms); return buildQuery(maxDoc, contextArray, queryTerms); } @@ -146,21 +146,21 @@ public class CommonTermsQuery extends Query { } protected Query buildQuery(final int maxDoc, - final TermContext[] contextArray, final Term[] queryTerms) { + final TermStates[] contextArray, final Term[] queryTerms) { List lowFreqQueries = new ArrayList<>(); List highFreqQueries = new ArrayList<>(); for (int i = 0; i < queryTerms.length; i++) { - TermContext termContext = contextArray[i]; - if (termContext == null) { + TermStates termStates = contextArray[i]; + if (termStates == null) { lowFreqQueries.add(newTermQuery(queryTerms[i], null)); } else { - if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency) - || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency + if ((maxTermFrequency >= 1f && termStates.docFreq() > maxTermFrequency) + || (termStates.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) { highFreqQueries - .add(newTermQuery(queryTerms[i], termContext)); + .add(newTermQuery(queryTerms[i], termStates)); } else { - lowFreqQueries.add(newTermQuery(queryTerms[i], termContext)); + lowFreqQueries.add(newTermQuery(queryTerms[i], termStates)); } } } @@ -208,14 +208,14 @@ public class CommonTermsQuery extends Query { return builder.build(); } - public void collectTermContext(IndexReader reader, - List leaves, TermContext[] contextArray, - Term[] queryTerms) throws IOException { + public void collectTermStates(IndexReader reader, + List leaves, TermStates[] contextArray, + Term[] queryTerms) throws IOException { TermsEnum termsEnum = null; for (LeafReaderContext context : leaves) { for (int i = 0; i < queryTerms.length; i++) { Term term = queryTerms[i]; - TermContext termContext = contextArray[i]; + TermStates termStates = contextArray[i]; final Terms terms = context.reader().terms(term.field()); if (terms == null) { // field does not exist @@ -226,12 +226,12 @@ public class CommonTermsQuery extends Query { if (termsEnum == TermsEnum.EMPTY) continue; if (termsEnum.seekExact(term.bytes())) { - if (termContext == null) { - contextArray[i] = new TermContext(reader.getContext(), + if (termStates == null) { + contextArray[i] = new TermStates(reader.getContext(), termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { - termContext.register(termsEnum.termState(), context.ord, + termStates.register(termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } @@ -402,10 +402,10 @@ public class CommonTermsQuery extends Query { * Builds a new TermQuery instance. *

    This is intended for subclasses that wish to customize the generated queries.

    * @param term term - * @param context the TermContext to be used to create the low level term query. Can be null. + * @param termStates the TermStates to be used to create the low level term query. Can be null. * @return new TermQuery instance */ - protected Query newTermQuery(Term term, TermContext context) { - return context == null ? new TermQuery(term) : new TermQuery(term, context); + protected Query newTermQuery(Term term, TermStates termStates) { + return termStates == null ? new TermQuery(term) : new TermQuery(term, termStates); } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java index d264267270f..09a592b61fe 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java @@ -63,6 +63,38 @@ public final class FunctionScoreQuery extends Query { return in; } + /** + * Returns a FunctionScoreQuery where the scores of a wrapped query are multiplied by + * the value of a DoubleValuesSource. + * + * If the source has no value for a particular document, the score for that document + * is preserved as-is. + * + * @param in the query to boost + * @param boost a {@link DoubleValuesSource} containing the boost values + */ + public static FunctionScoreQuery boostByValue(Query in, DoubleValuesSource boost) { + return new FunctionScoreQuery(in, new MultiplicativeBoostValuesSource(boost)); + } + + /** + * Returns a FunctionScoreQuery where the scores of a wrapped query are multiplied by + * a boost factor if the document being scored also matches a separate boosting query. + * + * Documents that do not match the boosting query have their scores preserved. + * + * This may be used to 'demote' documents that match the boosting query, by passing in + * a boostValue between 0 and 1. + * + * @param in the query to boost + * @param boostMatch the boosting query + * @param boostValue the amount to boost documents which match the boosting query + */ + public static FunctionScoreQuery boostByQuery(Query in, Query boostMatch, float boostValue) { + return new FunctionScoreQuery(in, + new MultiplicativeBoostValuesSource(new QueryBoostValuesSource(DoubleValuesSource.fromQuery(boostMatch), boostValue))); + } + @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { Weight inner = in.createWeight(searcher, scoreMode.needsScores() && source.needsScores() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, 1f); @@ -189,4 +221,123 @@ public final class FunctionScoreQuery extends Query { } } + + private static class MultiplicativeBoostValuesSource extends DoubleValuesSource { + + private final DoubleValuesSource boost; + + private MultiplicativeBoostValuesSource(DoubleValuesSource boost) { + this.boost = boost; + } + + @Override + public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + DoubleValues in = DoubleValues.withDefault(boost.getValues(ctx, scores), 1); + return new DoubleValues() { + @Override + public double doubleValue() throws IOException { + return scores.doubleValue() * in.doubleValue(); + } + + @Override + public boolean advanceExact(int doc) throws IOException { + return in.advanceExact(doc); + } + }; + } + + @Override + public boolean needsScores() { + return true; + } + + @Override + public DoubleValuesSource rewrite(IndexSearcher reader) throws IOException { + return new MultiplicativeBoostValuesSource(boost.rewrite(reader)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MultiplicativeBoostValuesSource that = (MultiplicativeBoostValuesSource) o; + return Objects.equals(boost, that.boost); + } + + @Override + public int hashCode() { + return Objects.hash(boost); + } + + @Override + public String toString() { + return "boost(" + boost.toString() + ")"; + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return boost.isCacheable(ctx); + } + } + + private static class QueryBoostValuesSource extends DoubleValuesSource { + + private final DoubleValuesSource query; + private final float boost; + + QueryBoostValuesSource(DoubleValuesSource query, float boost) { + this.query = query; + this.boost = boost; + } + + @Override + public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + DoubleValues in = query.getValues(ctx, null); + return DoubleValues.withDefault(new DoubleValues() { + @Override + public double doubleValue() { + return boost; + } + + @Override + public boolean advanceExact(int doc) throws IOException { + return in.advanceExact(doc); + } + }, 1); + } + + @Override + public boolean needsScores() { + return false; + } + + @Override + public DoubleValuesSource rewrite(IndexSearcher reader) throws IOException { + return new QueryBoostValuesSource(query.rewrite(reader), boost); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + QueryBoostValuesSource that = (QueryBoostValuesSource) o; + return Float.compare(that.boost, boost) == 0 && + Objects.equals(query, that.query); + } + + @Override + public int hashCode() { + return Objects.hash(query, boost); + } + + @Override + public String toString() { + return "queryboost(" + query + ")^" + boost; + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return query.isCacheable(ctx); + } + } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/FloatDocValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/FloatDocValues.java index 8b9e9427b53..72798d65811 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/FloatDocValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/FloatDocValues.java @@ -57,6 +57,11 @@ public abstract class FloatDocValues extends FunctionValues { return (long)floatVal(doc); } + @Override + public boolean boolVal(int doc) throws IOException { + return floatVal(doc) != 0.0f; + } + @Override public double doubleVal(int doc) throws IOException { return (double)floatVal(doc); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java index 34e56975479..4192f2d183f 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java @@ -47,7 +47,7 @@ public class IDFValueSource extends DocFreqValueSource { @Override public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); - TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(true), field); + TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(), field); if (sim == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)"); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java index ea63de9345e..662f80daa13 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java @@ -25,11 +25,11 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.FloatDocValues; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.similarities.TFIDFSimilarity; import org.apache.lucene.util.BytesRef; import org.apache.lucene.search.similarities.Similarity.SimScorer; -import org.apache.lucene.search.similarities.Similarity.SimWeight; /** * Function that returns the decoded norm for every document. @@ -62,16 +62,16 @@ public class NormValueSource extends ValueSource { @Override public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); - final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), field); + final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field); if (similarity == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)"); } // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf // is 1 when docCount == docFreq == 1 - final SimWeight simWeight = similarity.computeWeight(1f, + final SimScorer simScorer = similarity.scorer(1f, new CollectionStatistics(field, 1, 1, 1, 1), new TermStatistics(new BytesRef("bogus"), 1, 1)); - final SimScorer simScorer = similarity.simScorer(simWeight, readerContext); + final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), true, Float.MAX_VALUE); return new FloatDocValues(this) { int lastDocID = -1; @@ -81,7 +81,7 @@ public class NormValueSource extends ValueSource { throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID); } lastDocID = docID; - return simScorer.score(docID, 1f); + return leafSimScorer.score(docID, 1f); } }; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java index baed0ffd410..731ab1fee24 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java @@ -51,7 +51,7 @@ public class TFValueSource extends TermFreqValueSource { public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { final Terms terms = readerContext.reader().terms(indexedField); IndexSearcher searcher = (IndexSearcher)context.get("searcher"); - final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), indexedField); + final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), indexedField); if (similarity == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)"); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java index 6704ba1525b..bd5d927c627 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java @@ -25,13 +25,12 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.spans.FilterSpans; import org.apache.lucene.search.spans.SpanCollector; import org.apache.lucene.search.spans.SpanQuery; @@ -136,8 +135,8 @@ public class PayloadScoreQuery extends SpanQuery { } @Override - public void extractTermContexts(Map contexts) { - innerWeight.extractTermContexts(contexts); + public void extractTermStates(Map contexts) { + innerWeight.extractTermStates(contexts); } @Override @@ -150,7 +149,7 @@ public class PayloadScoreQuery extends SpanQuery { Spans spans = getSpans(context, Postings.PAYLOADS); if (spans == null) return null; - SimScorer docScorer = innerWeight.getSimScorer(context); + LeafSimScorer docScorer = innerWeight.getSimScorer(context); PayloadSpans payloadSpans = new PayloadSpans(spans, decoder); return new PayloadSpanScorer(this, payloadSpans, docScorer); } @@ -228,7 +227,7 @@ public class PayloadScoreQuery extends SpanQuery { private final PayloadSpans spans; - private PayloadSpanScorer(SpanWeight weight, PayloadSpans spans, Similarity.SimScorer docScorer) throws IOException { + private PayloadSpanScorer(SpanWeight weight, PayloadSpans spans, LeafSimScorer docScorer) throws IOException { super(weight, spans, docScorer); this.spans = spans; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java index 8b23122e546..a9d3bfb2da9 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java @@ -25,12 +25,12 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafSimScorer; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.spans.FilterSpans; import org.apache.lucene.search.spans.FilterSpans.AcceptStatus; import org.apache.lucene.search.spans.SpanCollector; @@ -64,8 +64,8 @@ public class SpanPayloadCheckQuery extends SpanQuery { @Override public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - SpanWeight matchWeight = match.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); - return new SpanPayloadCheckWeight(searcher, scoreMode.needsScores() ? getTermContexts(matchWeight) : null, matchWeight, boost); + SpanWeight matchWeight = match.createWeight(searcher, scoreMode, boost); + return new SpanPayloadCheckWeight(searcher, scoreMode.needsScores() ? getTermStates(matchWeight) : null, matchWeight, boost); } @Override @@ -84,8 +84,8 @@ public class SpanPayloadCheckQuery extends SpanQuery { final SpanWeight matchWeight; - public SpanPayloadCheckWeight(IndexSearcher searcher, Map termContexts, SpanWeight matchWeight, float boost) throws IOException { - super(SpanPayloadCheckQuery.this, searcher, termContexts, boost); + public SpanPayloadCheckWeight(IndexSearcher searcher, Map termStates, SpanWeight matchWeight, float boost) throws IOException { + super(SpanPayloadCheckQuery.this, searcher, termStates, boost); this.matchWeight = matchWeight; } @@ -95,8 +95,8 @@ public class SpanPayloadCheckQuery extends SpanQuery { } @Override - public void extractTermContexts(Map contexts) { - matchWeight.extractTermContexts(contexts); + public void extractTermStates(Map contexts) { + matchWeight.extractTermStates(contexts); } @Override @@ -127,7 +127,7 @@ public class SpanPayloadCheckQuery extends SpanQuery { if (spans == null) { return null; } - final Similarity.SimScorer docScorer = getSimScorer(context); + final LeafSimScorer docScorer = getSimScorer(context); return new SpanScorer(this, spans, docScorer); } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java b/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java index 716e2fb19aa..684344e00e0 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java @@ -33,7 +33,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause.Occur; @@ -541,8 +541,8 @@ public class CommonTermsQueryTest extends LuceneTestCase { } @Override - protected Query newTermQuery(Term term, TermContext context) { - Query query = super.newTermQuery(term, context); + protected Query newTermQuery(Term term, TermStates termStates) { + Query query = super.newTermQuery(term, termStates); if (term.text().equals("universe")) { query = new BoostQuery(query, 100f); } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreQuery.java index c0560bc0313..b865cb71238 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreQuery.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreQuery.java @@ -60,6 +60,45 @@ public class TestFunctionScoreQuery extends FunctionTestSetup { reader.close(); } + public void testEqualities() { + + Query q1 = new FunctionScoreQuery(new TermQuery(new Term(TEXT_FIELD, "a")), DoubleValuesSource.constant(1)); + Query q2 = new FunctionScoreQuery(new TermQuery(new Term(TEXT_FIELD, "b")), DoubleValuesSource.constant(1)); + Query q3 = new FunctionScoreQuery(new TermQuery(new Term(TEXT_FIELD, "b")), DoubleValuesSource.constant(2)); + Query q4 = new FunctionScoreQuery(new TermQuery(new Term(TEXT_FIELD, "b")), DoubleValuesSource.constant(2)); + + QueryUtils.check(q1); + QueryUtils.checkUnequal(q1, q3); + QueryUtils.checkUnequal(q1, q2); + QueryUtils.checkUnequal(q2, q3); + QueryUtils.checkEqual(q3, q4); + + Query bq1 = FunctionScoreQuery.boostByValue(new TermQuery(new Term(TEXT_FIELD, "a")), DoubleValuesSource.constant(2)); + QueryUtils.check(bq1); + Query bq2 = FunctionScoreQuery.boostByValue(new TermQuery(new Term(TEXT_FIELD, "a")), DoubleValuesSource.constant(4)); + QueryUtils.checkUnequal(bq1, bq2); + Query bq3 = FunctionScoreQuery.boostByValue(new TermQuery(new Term(TEXT_FIELD, "b")), DoubleValuesSource.constant(4)); + QueryUtils.checkUnequal(bq1, bq3); + QueryUtils.checkUnequal(bq2, bq3); + Query bq4 = FunctionScoreQuery.boostByValue(new TermQuery(new Term(TEXT_FIELD, "b")), DoubleValuesSource.constant(4)); + QueryUtils.checkEqual(bq3, bq4); + + Query qq1 = FunctionScoreQuery.boostByQuery(new TermQuery(new Term(TEXT_FIELD, "a")), new TermQuery(new Term(TEXT_FIELD, "z")), 0.1f); + QueryUtils.check(qq1); + Query qq2 = FunctionScoreQuery.boostByQuery(new TermQuery(new Term(TEXT_FIELD, "a")), new TermQuery(new Term(TEXT_FIELD, "z")), 0.2f); + QueryUtils.checkUnequal(qq1, qq2); + Query qq3 = FunctionScoreQuery.boostByQuery(new TermQuery(new Term(TEXT_FIELD, "b")), new TermQuery(new Term(TEXT_FIELD, "z")), 0.1f); + QueryUtils.checkUnequal(qq1, qq3); + QueryUtils.checkUnequal(qq2, qq3); + Query qq4 = FunctionScoreQuery.boostByQuery(new TermQuery(new Term(TEXT_FIELD, "a")), new TermQuery(new Term(TEXT_FIELD, "zz")), 0.1f); + QueryUtils.checkUnequal(qq1, qq4); + QueryUtils.checkUnequal(qq2, qq4); + QueryUtils.checkUnequal(qq3, qq4); + Query qq5 = FunctionScoreQuery.boostByQuery(new TermQuery(new Term(TEXT_FIELD, "a")), new TermQuery(new Term(TEXT_FIELD, "z")), 0.1f); + QueryUtils.checkEqual(qq1, qq5); + + } + // FunctionQuery equivalent public void testSimpleSourceScore() throws Exception { @@ -80,18 +119,13 @@ public class TestFunctionScoreQuery extends FunctionTestSetup { // CustomScoreQuery and BoostedQuery equivalent public void testScoreModifyingSource() throws Exception { - SimpleBindings bindings = new SimpleBindings(); - bindings.add("score", DoubleValuesSource.SCORES); - bindings.add("iii", DoubleValuesSource.fromIntField("iii")); - Expression expr = JavascriptCompiler.compile("score * iii"); - BooleanQuery bq = new BooleanQuery.Builder() .add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD) .build(); TopDocs plain = searcher.search(bq, 1); - FunctionScoreQuery fq = new FunctionScoreQuery(bq, expr.getDoubleValuesSource(bindings)); + FunctionScoreQuery fq = FunctionScoreQuery.boostByValue(bq, DoubleValuesSource.fromIntField("iii")); QueryUtils.check(random(), fq, searcher, rarely()); @@ -108,20 +142,16 @@ public class TestFunctionScoreQuery extends FunctionTestSetup { // BoostingQuery equivalent public void testCombiningMultipleQueryScores() throws Exception { - SimpleBindings bindings = new SimpleBindings(); - bindings.add("score", DoubleValuesSource.SCORES); - bindings.add("testquery", DoubleValuesSource.fromQuery(new TermQuery(new Term(TEXT_FIELD, "rechecking")))); - Expression expr = JavascriptCompiler.compile("score + (testquery * 100)"); - TermQuery q = new TermQuery(new Term(TEXT_FIELD, "text")); TopDocs plain = searcher.search(q, 1); - FunctionScoreQuery fq = new FunctionScoreQuery(q, expr.getDoubleValuesSource(bindings)); + FunctionScoreQuery fq + = FunctionScoreQuery.boostByQuery(q, new TermQuery(new Term(TEXT_FIELD, "rechecking")), 100f); QueryUtils.check(random(), fq, searcher, rarely()); - int[] expectedDocs = new int[]{ 6, 1, 0, 2, 8 }; - TopDocs docs = searcher.search(fq, 5); + int[] expectedDocs = new int[]{ 6, 1, 0, 2, 8 }; + TopDocs docs = searcher.search(fq, 20); assertEquals(plain.totalHits, docs.totalHits); for (int i = 0; i < expectedDocs.length; i++) { assertEquals(expectedDocs[i], docs.scoreDocs[i].doc); diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java index 842c117e8ad..ef0f4766800 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java @@ -82,7 +82,7 @@ public class TestLongNormValueSource extends LuceneTestCase { } public void testNorm() throws Exception { - Similarity saved = searcher.getSimilarity(true); + Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(sim); diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java index 876fec8dedd..b12f0269707 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java @@ -225,7 +225,7 @@ public class TestValueSources extends LuceneTestCase { } public void testIDF() throws Exception { - Similarity saved = searcher.getSimilarity(true); + Similarity saved = searcher.getSimilarity(); try { searcher.setSimilarity(new ClassicSimilarity()); ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test")); @@ -362,7 +362,7 @@ public class TestValueSources extends LuceneTestCase { } public void testNorm() throws Exception { - Similarity saved = searcher.getSimilarity(true); + Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new ClassicSimilarity()); @@ -414,7 +414,7 @@ public class TestValueSources extends LuceneTestCase { } public void testQuery() throws Exception { - Similarity saved = searcher.getSimilarity(true); + Similarity saved = searcher.getSimilarity(); try { searcher.setSimilarity(new ClassicSimilarity()); @@ -521,7 +521,7 @@ public class TestValueSources extends LuceneTestCase { } public void testTF() throws Exception { - Similarity saved = searcher.getSimilarity(true); + Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new ClassicSimilarity()); diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/docvalues/TestBoolValOfNumericDVs.java b/lucene/queries/src/test/org/apache/lucene/queries/function/docvalues/TestBoolValOfNumericDVs.java new file mode 100644 index 00000000000..a6046741020 --- /dev/null +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/docvalues/TestBoolValOfNumericDVs.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.queries.function.docvalues; + +import java.io.IOException; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.util.LuceneTestCase; + +/** + *

    + * Sanity check that {@link FunctionValues#boolVal} behaves as expected for trivial subclasses of the various + * (Numeric) DocValue implementations. + *

    + *

    + * Any "non-zero" value should result in "true" + *

    + */ +public class TestBoolValOfNumericDVs extends LuceneTestCase { + + public void test() throws IOException { + check(true); + check(false); + } + + public void check(final boolean expected) throws IOException { + + // create "constant" based instances of each superclass that should returned the expected value based on + // the constant used + final FunctionValues[] values = new FunctionValues[] { + new FloatDocValues(null) { + @Override + public float floatVal(int doc) throws IOException { + return expected ? Float.MIN_VALUE : 0.0F; + } + }, + new DoubleDocValues(null) { + @Override + public double doubleVal(int doc) throws IOException { + return expected ? Double.MIN_VALUE : 0.0D; + } + }, + new IntDocValues(null) { + @Override + public int intVal(int doc) throws IOException { + return expected ? 1 : 0; + } + }, + new LongDocValues(null) { + @Override + public long longVal(int doc) throws IOException { + return expected ? 1L : 0L; + } + }, + }; + + for (FunctionValues fv : values) { + // docId is irrelevant since all of our FunctionValues return a constant value. + assertEquals(fv.getClass().getSuperclass().toString(), expected, fv.boolVal(123)); + } + } +} diff --git a/lucene/replicator/src/test/org/apache/lucene/replicator/ReplicatorTestCase.java b/lucene/replicator/src/test/org/apache/lucene/replicator/ReplicatorTestCase.java index 6d27071d220..98349c35ebe 100644 --- a/lucene/replicator/src/test/org/apache/lucene/replicator/ReplicatorTestCase.java +++ b/lucene/replicator/src/test/org/apache/lucene/replicator/ReplicatorTestCase.java @@ -28,7 +28,7 @@ import org.eclipse.jetty.server.SecureRequestCustomizer; import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.server.SslConnectionFactory; -import org.eclipse.jetty.server.session.HashSessionIdManager; +import org.eclipse.jetty.server.session.DefaultSessionIdManager; import org.eclipse.jetty.util.ssl.SslContextFactory; import org.eclipse.jetty.util.thread.QueuedThreadPool; import org.junit.AfterClass; @@ -109,7 +109,7 @@ public abstract class ReplicatorTestCase extends LuceneTestCase { connector.setHost("127.0.0.1"); server.setConnectors(new Connector[] {connector}); - server.setSessionIdManager(new HashSessionIdManager(new Random(random().nextLong()))); + server.setSessionIdManager(new DefaultSessionIdManager(server, new Random(random().nextLong()))); server.setHandler(handler); server.start(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonBoundingBox.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonBoundingBox.java index c6d8b5551cf..3a2264ccd2e 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonBoundingBox.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonBoundingBox.java @@ -203,29 +203,30 @@ public class LatLonBoundingBox extends Field { sb.append(" <"); sb.append(name); sb.append(':'); + sb.append('['); byte[] b = ((BytesRef)fieldsData).bytes; - toString(b, 0); + sb.append(toString(b, 0)); + sb.append(','); + sb.append(toString(b, 1)); + sb.append(']'); sb.append('>'); - return sb.toString(); } private static String toString(byte[] ranges, int dimension) { - double min, max; - int minOfs = 0; - int maxOfs = ranges.length/2; + double lat, lon; switch (dimension) { case 0: - min = decodeLatitude(ranges, minOfs); - max = decodeLatitude(ranges, maxOfs); + lat = decodeLatitude(ranges, 0); + lon = decodeLongitude(ranges, 4); break; case 1: - min = decodeLongitude(ranges, minOfs); - max = decodeLongitude(ranges, maxOfs); + lat = decodeLatitude(ranges, 8); + lon = decodeLongitude(ranges, 12); break; default: throw new IllegalArgumentException("invalid dimension [" + dimension + "] in toString"); } - return "[" + min + " : " + max + "]"; + return lat + "," + lon; } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java index 840ade3ad20..68850087fec 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java @@ -30,7 +30,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause; @@ -68,7 +68,7 @@ public class FuzzyLikeThisQuery extends Query { // TODO: generalize this query (at least it should not reuse this static sim! // a better way might be to convert this into multitermquery rewrite methods. - // the rewrite method can 'average' the TermContext's term statistics (docfreq,totalTermFreq) + // the rewrite method can 'average' the TermStates's term statistics (docfreq,totalTermFreq) // provided to TermQuery, so that the general idea is agnostic to any scoring system... static TFIDFSimilarity sim=new ClassicSimilarity(); ArrayList fieldVals=new ArrayList<>(); @@ -255,9 +255,9 @@ public class FuzzyLikeThisQuery extends Query if (ignoreTF) { return new ConstantScoreQuery(new TermQuery(term)); } else { - // we build an artificial TermContext that will give an overall df and ttf + // we build an artificial TermStates that will give an overall df and ttf // equal to 1 - TermContext context = new TermContext(reader.getContext()); + TermStates context = new TermStates(reader.getContext()); for (LeafReaderContext leafContext : reader.leaves()) { Terms terms = leafContext.reader().terms(term.field()); if (terms != null) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java index d5607da3ebf..42a5f74216b 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java @@ -29,7 +29,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity; @@ -194,11 +194,11 @@ public class TermAutomatonQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { IndexReaderContext context = searcher.getTopReaderContext(); - Map termStates = new HashMap<>(); + Map termStates = new HashMap<>(); for (Map.Entry ent : termToID.entrySet()) { if (ent.getKey() != null) { - termStates.put(ent.getValue(), TermContext.build(context, new Term(field, ent.getKey()))); + termStates.put(ent.getValue(), TermStates.build(context, new Term(field, ent.getKey()), scoreMode.needsScores())); } } @@ -334,15 +334,15 @@ public class TermAutomatonQuery extends Query { final class TermAutomatonWeight extends Weight { final Automaton automaton; - private final Map termStates; - private final Similarity.SimWeight stats; + private final Map termStates; + private final Similarity.SimScorer stats; private final Similarity similarity; - public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map termStates, float boost) throws IOException { + public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map termStates, float boost) throws IOException { super(TermAutomatonQuery.this); this.automaton = automaton; this.termStates = termStates; - this.similarity = searcher.getSimilarity(true); + this.similarity = searcher.getSimilarity(); List allTermStats = new ArrayList<>(); for(Map.Entry ent : idToTerm.entrySet()) { Integer termID = ent.getKey(); @@ -357,7 +357,7 @@ public class TermAutomatonQuery extends Query { if (allTermStats.isEmpty()) { stats = null; // no terms matched at all, will not use sim } else { - stats = similarity.computeWeight(boost, searcher.collectionStatistics(field), + stats = similarity.scorer(boost, searcher.collectionStatistics(field), allTermStats.toArray(new TermStatistics[allTermStats.size()])); } } @@ -383,11 +383,11 @@ public class TermAutomatonQuery extends Query { EnumAndScorer[] enums = new EnumAndScorer[idToTerm.size()]; boolean any = false; - for(Map.Entry ent : termStates.entrySet()) { - TermContext termContext = ent.getValue(); - assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); + for(Map.Entry ent : termStates.entrySet()) { + TermStates termStates = ent.getValue(); + assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); BytesRef term = idToTerm.get(ent.getKey()); - TermState state = termContext.get(context.ord); + TermState state = termStates.get(context); if (state != null) { TermsEnum termsEnum = context.reader().terms(field).iterator(); termsEnum.seekExact(term, state); @@ -397,7 +397,7 @@ public class TermAutomatonQuery extends Query { } if (any) { - return new TermAutomatonScorer(this, enums, anyTermID, idToTerm, similarity.simScorer(stats, context)); + return new TermAutomatonScorer(this, enums, anyTermID, idToTerm, new LeafSimScorer(stats, context.reader(), true, Float.MAX_VALUE)); } else { return null; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java index 27270e7b0eb..6094c010bde 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java @@ -21,7 +21,6 @@ import java.util.Map; import org.apache.lucene.search.TermAutomatonQuery.EnumAndScorer; import org.apache.lucene.search.TermAutomatonQuery.TermAutomatonWeight; -import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; @@ -47,7 +46,7 @@ class TermAutomatonScorer extends Scorer { // This is -1 if wildcard (null) terms were not used, else it's the id // of the wildcard term: private final int anyTermID; - private final Similarity.SimScorer docScorer; + private final LeafSimScorer docScorer; private int numSubsOnDoc; @@ -56,7 +55,7 @@ class TermAutomatonScorer extends Scorer { private int docID = -1; private int freq; - public TermAutomatonScorer(TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, Map idToTerm, Similarity.SimScorer docScorer) throws IOException { + public TermAutomatonScorer(TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, Map idToTerm, LeafSimScorer docScorer) throws IOException { super(weight); //System.out.println(" automaton:\n" + weight.automaton.toDot()); this.runAutomaton = new TermRunAutomaton(weight.automaton, subs.length); @@ -362,7 +361,7 @@ class TermAutomatonScorer extends Scorer { @Override public float maxScore() { - return docScorer.maxScore(Float.POSITIVE_INFINITY); + return docScorer.maxScore(); } static class TermRunAutomaton extends RunAutomaton { diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestLatLonBoundingBoxQueries.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestLatLonBoundingBoxQueries.java index 39d32a17b65..7506fb89e8a 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/search/TestLatLonBoundingBoxQueries.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestLatLonBoundingBoxQueries.java @@ -97,6 +97,12 @@ public class TestLatLonBoundingBoxQueries extends BaseRangeFieldQueryTestCase { dir.close(); } + public void testToString() { + LatLonBoundingBox field = new LatLonBoundingBox(FIELD_NAME, -20d, -180d, 20d, -100d); + String expected = "LatLonBoundingBox "; + assertEquals(expected, field.toString()); + } + @Override protected int dimension() { return 2; diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java index 370d009172a..617a721616a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java @@ -534,12 +534,7 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - throw new UnsupportedOperationException(); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index 177397767cc..fa113113f81 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -161,7 +161,7 @@ public class QueryUtils { }; IndexSearcher out = LuceneTestCase.newSearcher(new MultiReader(readers)); - out.setSimilarity(s.getSimilarity(true)); + out.setSimilarity(s.getSimilarity()); return out; } @@ -373,7 +373,7 @@ public class QueryUtils { if (lastReader[0] != null) { final LeafReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); - indexSearcher.setSimilarity(s.getSimilarity(true)); + indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = (LeafReaderContext)indexSearcher.getTopReaderContext(); Scorer scorer = w.scorer(ctx); @@ -403,7 +403,7 @@ public class QueryUtils { // previous reader, hits NO_MORE_DOCS final LeafReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); - indexSearcher.setSimilarity(s.getSimilarity(true)); + indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); LeafReaderContext ctx = previousReader.getContext(); Scorer scorer = w.scorer(ctx); @@ -475,7 +475,7 @@ public class QueryUtils { if (lastReader[0] != null) { final LeafReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); - indexSearcher.setSimilarity(s.getSimilarity(true)); + indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); if (scorer != null) { @@ -503,7 +503,7 @@ public class QueryUtils { // previous reader, hits NO_MORE_DOCS final LeafReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); - indexSearcher.setSimilarity(s.getSimilarity(true)); + indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q, ScoreMode.COMPLETE); Scorer scorer = w.scorer((LeafReaderContext)indexSearcher.getTopReaderContext()); if (scorer != null) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java index 2fdef99aa35..b92ed75dfb3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java @@ -30,7 +30,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; @@ -186,8 +186,8 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase { } try { for(Term term : terms) { - final TermContext termContext = TermContext.build(s.getIndexReader().getContext(), term); - stats.put(term, s.termStatistics(term, termContext)); + final TermStates termStates = TermStates.build(s.getIndexReader().getContext(), term, true); + stats.put(term, s.termStatistics(term, termStates)); } } finally { node.searchers.release(s); @@ -262,7 +262,7 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase { } @Override - public TermStatistics termStatistics(Term term, TermContext context) throws IOException { + public TermStatistics termStatistics(Term term, TermStates context) throws IOException { assert term != null; long docFreq = 0; long totalTermFreq = 0; diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/AssertingSimilarity.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/AssertingSimilarity.java index ac41ea91724..0eaf738d9aa 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/AssertingSimilarity.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/AssertingSimilarity.java @@ -16,10 +16,7 @@ */ package org.apache.lucene.search.similarities; -import java.io.IOException; - import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; @@ -44,11 +41,13 @@ public class AssertingSimilarity extends Similarity { assert state.getNumOverlap() < state.getLength(); assert state.getUniqueTermCount() > 0; assert state.getUniqueTermCount() <= state.getLength(); - return delegate.computeNorm(state); + long norm = delegate.computeNorm(state); + assert norm != 0; + return norm; } @Override - public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { assert boost >= 0; assert collectionStats != null; assert termStats.length > 0; @@ -56,70 +55,47 @@ public class AssertingSimilarity extends Similarity { assert term != null; } // TODO: check that TermStats is in bounds with respect to collection? e.g. docFreq <= maxDoc - SimWeight weight = delegate.computeWeight(boost, collectionStats, termStats); - assert weight != null; - return new AssertingWeight(weight, boost); + SimScorer scorer = delegate.scorer(boost, collectionStats, termStats); + assert scorer != null; + return new AssertingSimScorer(scorer, boost); } - static class AssertingWeight extends SimWeight { - final SimWeight delegate; + static class AssertingSimScorer extends SimScorer { + final SimScorer delegate; final float boost; - AssertingWeight(SimWeight delegate, float boost) { + AssertingSimScorer(SimScorer delegate, float boost) { + super(delegate.getField()); this.delegate = delegate; this.boost = boost; } - } - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - assert weight != null; - assert context != null; - AssertingWeight assertingWeight = (AssertingWeight)weight; - SimScorer delegateScorer = delegate.simScorer(assertingWeight.delegate, context); - assert delegateScorer != null; + @Override + public float score(float freq, long norm) { + // freq in bounds + assert Float.isFinite(freq); + assert freq > 0; + // result in bounds + float score = delegate.score(freq, norm); + assert Float.isFinite(score); + assert score <= delegate.score(freq, 1); + assert score >= 0; + return score; + } - return new SimScorer() { - @Override - public float score(int doc, float freq) throws IOException { - // doc in bounds - assert doc >= 0; - assert doc < context.reader().maxDoc(); - // freq in bounds - assert Float.isFinite(freq); - assert freq > 0; - // result in bounds - float score = delegateScorer.score(doc, freq); - assert Float.isFinite(score); - assert score <= maxScore(freq); - assert score >= 0; - return score; - } - - @Override - public float maxScore(float maxFreq) { - float maxScore = delegateScorer.maxScore(maxFreq); - assert Float.isNaN(maxScore) == false; - return maxScore; - } - - @Override - public Explanation explain(int doc, Explanation freq) throws IOException { - // doc in bounds - assert doc >= 0; - assert doc < context.reader().maxDoc(); - // freq in bounds - assert freq != null; - assert Float.isFinite(freq.getValue().floatValue()); - // result in bounds - Explanation explanation = delegateScorer.explain(doc, freq); - assert explanation != null; - assert Float.isFinite(explanation.getValue().floatValue()); - // result matches score exactly - assert explanation.getValue().floatValue() == delegateScorer.score(doc, freq.getValue().floatValue()); - return explanation; - } - }; + @Override + public Explanation explain(Explanation freq, long norm) { + // freq in bounds + assert freq != null; + assert Float.isFinite(freq.getValue().floatValue()); + // result in bounds + Explanation explanation = delegate.explain(freq, norm); + assert explanation != null; + assert Float.isFinite(explanation.getValue().floatValue()); + // result matches score exactly + assert explanation.getValue().floatValue() == delegate.score(freq.getValue().floatValue(), norm); + return explanation; + } } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java index 91e64c09445..dcd65340285 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java @@ -17,24 +17,18 @@ package org.apache.lucene.search.similarities; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Random; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.similarities.Similarity.SimScorer; -import org.apache.lucene.search.similarities.Similarity.SimWeight; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; @@ -54,119 +48,28 @@ import org.junit.BeforeClass; * test fails to catch then this test needs to be improved! */ public abstract class BaseSimilarityTestCase extends LuceneTestCase { - static LeafReader WITHOUT_NORM; - static Directory WITHOUT_NORM_DIR; - - static LeafReader WITH_NORM_BASE; - static Directory WITH_NORM_DIR; - static List NORM_VALUES; + static LeafReader READER; + static Directory DIR; @BeforeClass public static void beforeClass() throws Exception { - // without norms - WITHOUT_NORM_DIR = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random(), WITHOUT_NORM_DIR); - Document doc = new Document(); - doc.add(newTextField("field", "value", Field.Store.NO)); - writer.addDocument(doc); - WITHOUT_NORM = getOnlyLeafReader(writer.getReader()); - writer.close(); - // with norms - WITH_NORM_DIR = newDirectory(); - writer = new RandomIndexWriter(random(), WITH_NORM_DIR); - doc = new Document(); + DIR = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), DIR); + Document doc = new Document(); FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED); fieldType.setOmitNorms(true); doc.add(newField("field", "value", fieldType)); writer.addDocument(doc); - WITH_NORM_BASE = getOnlyLeafReader(writer.getReader()); + READER = getOnlyLeafReader(writer.getReader()); writer.close(); - - // all possible norm values for the doc - NORM_VALUES = new ArrayList<>(); - NORM_VALUES.add(WITHOUT_NORM); - for (int i = 1; i < 256; i++) { - final long value = i; - NORM_VALUES.add(new FilterLeafReader(WITH_NORM_BASE) { - @Override - public CacheHelper getCoreCacheHelper() { - return null; - } - - @Override - public CacheHelper getReaderCacheHelper() { - return null; - } - - @Override - public NumericDocValues getNormValues(String field) throws IOException { - if (field.equals("field")) { - return new CannedNorm(value); - } else { - return super.getNormValues(field); - } - } - }); - } } @AfterClass public static void afterClass() throws Exception { - IOUtils.close(WITH_NORM_BASE, WITH_NORM_DIR, WITHOUT_NORM, WITHOUT_NORM_DIR); - WITH_NORM_BASE = WITHOUT_NORM = null; - WITH_NORM_DIR = WITHOUT_NORM_DIR = null; - NORM_VALUES = null; - } - - /** 1-document norms impl of the given value */ - static class CannedNorm extends NumericDocValues { - int docID = -1; - final long value; - - CannedNorm(long value) { - this.value = value; - } - - @Override - public long longValue() throws IOException { - return value; - } - - @Override - public boolean advanceExact(int target) throws IOException { - assert target == 0; - docID = target; - return true; - } - - @Override - public int docID() { - return docID; - } - - @Override - public int nextDoc() throws IOException { - if (docID == -1) { - return docID = 0; - } else { - return docID = NO_MORE_DOCS; - } - } - - @Override - public int advance(int target) throws IOException { - if (target == 0) { - return docID = 0; - } else { - return docID = NO_MORE_DOCS; - } - } - - @Override - public long cost() { - return 0; - } + IOUtils.close(READER, DIR); + READER = null; + DIR = null; } /** @@ -354,7 +257,7 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { Similarity similarity = getSimilarity(random); for (int j = 0; j < 10; j++) { // for each norm value... - for (int k = 0; k < NORM_VALUES.size(); k++) { + for (int k = 1; k < 256; k++) { CollectionStatistics corpus = newCorpus(random, k); for (int l = 0; l < 10; l++) { TermStatistics term = newTerm(random, corpus); @@ -441,17 +344,18 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { /** runs for a single test case, so that if you hit a test failure you can write a reproducer just for that scenario */ private static void doTestScoring(Similarity similarity, CollectionStatistics corpus, TermStatistics term, float boost, float freq, int norm) throws IOException { boolean success = false; - SimWeight weight = similarity.computeWeight(boost, corpus, term); - SimScorer scorer = similarity.simScorer(weight, NORM_VALUES.get(norm).getContext()); + SimScorer scorer = similarity.scorer(boost, corpus, term); try { - float score = scorer.score(0, freq); + float maxScore = scorer.score(Float.MAX_VALUE, 1); + assertFalse("maxScore is NaN", Float.isNaN(maxScore)); + + float score = scorer.score(freq, norm); // check that score isn't infinite or negative assertTrue("infinite/NaN score: " + score, Float.isFinite(score)); assertTrue("negative score: " + score, score >= 0); - float maxScore = scorer.maxScore(freq); - assertTrue("score > maxScore: " + score + " > " + maxScore, score <= maxScore); + assertTrue("greater than maxScore: " + score + ">" + maxScore, score <= maxScore); // check explanation matches - Explanation explanation = scorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document")); + Explanation explanation = scorer.explain(Explanation.match(freq, "freq, occurrences of term within document"), norm); if (score != explanation.getValue().doubleValue()) { fail("expected: " + score + ", got: " + explanation); } @@ -467,12 +371,12 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { prevFreq = Math.nextDown(freq); } - float prevScore = scorer.score(0, prevFreq); + float prevScore = scorer.score(prevFreq, norm); // check that score isn't infinite or negative assertTrue(Float.isFinite(prevScore)); assertTrue(prevScore >= 0); // check explanation matches - Explanation prevExplanation = scorer.explain(0, Explanation.match(prevFreq, "freq, occurrences of term within document")); + Explanation prevExplanation = scorer.explain(Explanation.match(prevFreq, "freq, occurrences of term within document"), norm); if (prevScore != prevExplanation.getValue().doubleValue()) { fail("expected: " + prevScore + ", got: " + prevExplanation); } @@ -486,13 +390,12 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { // check score(norm-1), given the same freq it should be >= score(norm) [scores non-decreasing as docs get shorter] if (norm > 1) { - SimScorer prevNormScorer = similarity.simScorer(weight, NORM_VALUES.get(norm - 1).getContext()); - float prevNormScore = prevNormScorer.score(0, freq); + float prevNormScore = scorer.score(freq, norm - 1); // check that score isn't infinite or negative assertTrue(Float.isFinite(prevNormScore)); assertTrue(prevNormScore >= 0); // check explanation matches - Explanation prevNormExplanation = prevNormScorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document")); + Explanation prevNormExplanation = scorer.explain(Explanation.match(freq, "freq, occurrences of term within document"), norm - 1); if (prevNormScore != prevNormExplanation.getValue().doubleValue()) { fail("expected: " + prevNormScore + ", got: " + prevNormExplanation); } @@ -508,14 +411,13 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { // check score(term-1), given the same freq/norm it should be >= score(term) [scores non-decreasing as terms get rarer] if (term.docFreq() > 1 && freq < term.totalTermFreq()) { TermStatistics prevTerm = new TermStatistics(term.term(), term.docFreq() - 1, term.totalTermFreq() - 1); - SimWeight prevWeight = similarity.computeWeight(boost, corpus, term); - SimScorer prevTermScorer = similarity.simScorer(prevWeight, NORM_VALUES.get(norm).getContext()); - float prevTermScore = prevTermScorer.score(0, freq); + SimScorer prevTermScorer = similarity.scorer(boost, corpus, term); + float prevTermScore = prevTermScorer.score(freq, norm); // check that score isn't infinite or negative assertTrue(Float.isFinite(prevTermScore)); assertTrue(prevTermScore >= 0); // check explanation matches - Explanation prevTermExplanation = prevTermScorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document")); + Explanation prevTermExplanation = prevTermScorer.explain(Explanation.match(freq, "freq, occurrences of term within document"), norm); if (prevTermScore != prevTermExplanation.getValue().doubleValue()) { fail("expected: " + prevTermScore + ", got: " + prevTermExplanation); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java index a9956b38eec..be04e009209 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java @@ -22,10 +22,10 @@ import java.util.Set; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.LeafSimScorer; /** * Wraps a SpanWeight with additional asserts @@ -45,8 +45,8 @@ public class AssertingSpanWeight extends SpanWeight { } @Override - public void extractTermContexts(Map contexts) { - in.extractTermContexts(contexts); + public void extractTermStates(Map contexts) { + in.extractTermStates(contexts); } @Override @@ -58,7 +58,7 @@ public class AssertingSpanWeight extends SpanWeight { } @Override - public Similarity.SimScorer getSimScorer(LeafReaderContext context) throws IOException { + public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException { return in.getSimScorer(context); } diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c8e36f2dcf9..d0700b6e4e7 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -47,7 +47,29 @@ Carrot2 3.15.0 Velocity 1.7 and Velocity Tools 2.0 Apache UIMA 2.3.1 Apache ZooKeeper 3.4.10 -Jetty 9.3.20.v20170531 +Jetty 9.4.8.v20171121 + +Upgrade Notes +---------------------- + +* SOLR-11748: The throttling mechanism used to limit the rate of autoscaling events processed + has been removed. This deprecates the 'actionThrottlePeriodSeconds' setting in the set-properties + Autoscaling API which is now a no-op. Use the 'triggerCooldownPeriodSeconds' instead to pause event + processing. + +* SOLR-11798: The top-level syntax in solrconfig.xml is now formally + deprecated in favour of equivalent syntax. See also SOLR-1696. + +* SOLR-11809: QueryComponent's rq parameter parsing no longer considers the defType parameter. + +* SOLR-11747: The behaviour of the autoscaling system has been modified to pause all triggers from execution between + the start of actions and end of cool down period. The triggers will be resumed after the cool down period expires. + Previously, the cool down period was a fixed period started after actions for a trigger event complete and during + this time, all triggers continued to run but any events were rejected to be tried later. + +* SOLR-11624: Collections created without specifying a configset name use a copy of the _default configset since 7.0. + Before 7.3, the copied over configset was named the same as the collection name, but 7.3 onwards it will be named + with an additional ".AUTOCREATED" suffix. New Features ---------------------- @@ -63,6 +85,26 @@ New Features * SOLR-11201: Implement autoscaling trigger for arbitrary metrics that creates events when a given metric breaches a threshold (shalin) +* SOLR-11653: TimeRoutedAlias URP now auto-creates new collections on the fly according to alias metadata + rules that sets the time interval for each collection. An internal Overseer command "ROUTEDALIAS_CREATECOLL" + was created to facilitate this. (David Smiley) + +* SOLR-11062: new tag "diskType" in autoscaling policy (noble) + +* SOLR-11063: Suggesters should accept required freedisk as a hint (noble) + +* SOLR-3218: Added range faceting support for CurrencyFieldType. This includes both "facet.range" as well + as json.facet's "type:range" (Andrew Morrison, Jan Høydahl, Vitaliy Zhovtyuk, hossman) + +* SOLR-11064: Collection APIs should use the disk space hint when using policy framework (noble) + +* SOLR-11854: multivalued primitive fields can now be sorted by implicitly choosing the min/max + value for asc/desc sort orders. (hossman) + +* SOLR-11592: Add OpenNLP language detection to the langid contrib. (Koji, Steve Rowe) + +* SOLR-11648: A new admin UI to display and execute suggestions (Apoorv Bhawsar , noble) + Bug Fixes ---------------------- @@ -71,6 +113,17 @@ Bug Fixes * SOLR-11555: If the query terms reduce to nothing, filter(clause) produces an NPE whereas fq=clause does not (Erick Erickson) +* SOLR-11824: Fixed bucket ordering in distributed json.facet type:range when mincount>0 (hossman) + +* SOLR-11821: ConcurrentModificationException in SimSolrCloudTestCase.tearDown (shalin) + +* SOLR-11631: The Schema API should return non-zero status when there are failures. + (Noble Paul, Steve Rowe) + +* SOLR-11839: Fix test failures resulting from SOLR-11218 (Erick Erickson) + +* SOLR-11794: PULL replicas stop replicating after collection RELOAD (Samuel Tatipamula, Tomás Fernández Löbbe) + Optimizations ---------------------- @@ -95,6 +148,65 @@ Other Changes * SOLR-7733: remove "optimize" from the UI. (Erick Erickson) +* SOLR-11748: Remove Autoscaling action throttle. (shalin) + +* SOLR-11805: SolrJ's SolrResponse.getElaspedTime was sometimes a millisecond off. (David Smiley) + +* SOLR-11798: Formally deprecate top-level syntax in solrconfig.xml + in favour of equivalent syntax. (Christine Poerschke) + +* SOLR-11801: Support customisation of the "highlighting" query response element. + (Ramsey Haddad, Pranav Murugappan, David Smiley, Christine Poerschke) + +* SOLR-11692: SolrDispatchFilter's use of a "close shield" in tests should not be applied to + further servlet chain processing. (Jeff Miller, David Smiley) + +* SOLR-11218: Fail and return an error when attempting to delete a collection that's part of an alias (Erick Erickson) + +* SOLR-11817: Move Collections API classes to it's own package (Varun Thacker) + +* SOLR-11810: Upgrade Jetty to 9.4.8.v20171121 (Varun Thacker, Erick Erickson) + +* SOLR-11747: Pause triggers until actions finish executing and the cool down period expires. (shalin) + +* SOLR-11871: MoveReplicaSuggester should not suggest leader if other replicas are available (noble) + +* SOLR-11624: Collections created from _default configset will now be associated with a configset with a suffix + .AUTOCREATED. For example, a new collection "mycollection", created without specifying a configset name, will + use the _default configset and the associated configset name will be "mycollection.AUTOCREATED". If this + collection is deleted and re-created, the autocreated configset will be left behind and will be re-used for + the re-created collection (Ishan Chattopadhyaya, Abhishek Kumar Singh) + +* SOLR-11051: Use disk free metric in default cluster preferences (noble) + +================== 7.2.1 ================== + +Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. + +Versions of Major Components +--------------------- +Apache Tika 1.16 +Carrot2 3.15.0 +Velocity 1.7 and Velocity Tools 2.0 +Apache UIMA 2.3.1 +Apache ZooKeeper 3.4.10 +Jetty 9.3.20.v20170531 + +Bug Fixes +---------------------- + +* SOLR-11771: Overseer can never process some last messages (Cao Manh Dat) + +* SOLR-11783: Rename core in solr standalone mode is not persisted (Erick Erickson) + +* SOLR-11809: QueryComponent.prepare rq parsing could fail under SOLR 7.2.0 - fix: + QueryComponent's rq parameter parsing no longer considers the defType parameter. + (Christine Poerschke and David Smiley in response to bug report/analysis + from Dariusz Wojtas and Diego Ceccarelli) + +* SOLR-11555: If the query terms reduce to nothing, filter(clause) produces an NPE whereas + fq=clause does not (Erick Erickson) + ================== 7.2.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. diff --git a/solr/bin/solr b/solr/bin/solr index ef83ceed949..0ce19cbd4e5 100755 --- a/solr/bin/solr +++ b/solr/bin/solr @@ -19,7 +19,7 @@ # # Use solr -help to see available command-line options. In addition # to passing command-line options, this script looks for an include -# file named solr.in.sh to set environment variables. Specifically, +# file named solr.in.sh to set environment variables. Specifically, # the following locations are searched in this order: # # ./ @@ -34,15 +34,15 @@ # # $ SOLR_INCLUDE=/path/to/solr.in.sh solr start # -# Note: This is particularly handy for running multiple instances on a +# Note: This is particularly handy for running multiple instances on a # single installation, or for quick tests. # -# Finally, developers and enthusiasts who frequently run from an SVN +# Finally, developers and enthusiasts who frequently run from an SVN # checkout, and do not want to locally modify bin/solr.in.sh, can put # a customized include file at ~/.solr.in.sh. # # If you would rather configure startup entirely from the environment, you -# can disable the include by exporting an empty SOLR_INCLUDE, or by +# can disable the include by exporting an empty SOLR_INCLUDE, or by # ensuring that no include files exist in the aforementioned search list. SOLR_SCRIPT="$0" @@ -287,11 +287,11 @@ fi function print_usage() { CMD="$1" ERROR_MSG="$2" - + if [ "$ERROR_MSG" != "" ]; then echo -e "\nERROR: $ERROR_MSG\n" fi - + if [ -z "$CMD" ]; then echo "" echo "Usage: solr COMMAND OPTIONS" @@ -648,7 +648,7 @@ function solr_pid_by_port() { echo "$solrPID" } -# extract the value of the -Djetty.port parameter from a running Solr process +# extract the value of the -Djetty.port parameter from a running Solr process function jetty_port() { SOLR_PID="$1" SOLR_PROC=`ps auxww | grep -w $SOLR_PID | grep start\.jar | grep jetty\.port` @@ -660,11 +660,11 @@ function jetty_port() { local jetty_port="${pair[1]}" break fi - done + done echo "$jetty_port" } # end jetty_port func -# run a Solr command-line tool using the SolrCLI class; +# run a Solr command-line tool using the SolrCLI class; # useful for doing cross-platform work from the command-line using Java function run_tool() { @@ -723,7 +723,7 @@ function get_info() { return $CODE } # end get_info -# tries to gracefully stop Solr using the Jetty +# tries to gracefully stop Solr using the Jetty # stop command and if that fails, then uses kill -9 function stop_solr() { @@ -793,7 +793,7 @@ if [ $# -eq 1 ]; then fi if [ $# -gt 0 ]; then - # if first arg starts with a dash (and it's not -help or -info), + # if first arg starts with a dash (and it's not -help or -info), # then assume they are starting Solr, such as: solr -f if [[ $1 == -* ]]; then SCRIPT_CMD="start" @@ -804,7 +804,7 @@ if [ $# -gt 0 ]; then else # no args - just show usage and exit print_usage "" - exit + exit fi if [ "$SCRIPT_CMD" == "status" ]; then @@ -826,7 +826,7 @@ if [ "$SCRIPT_CMD" == "healthcheck" ]; then VERBOSE="" if [ $# -gt 0 ]; then - while true; do + while true; do case "$1" in -c|-collection) if [[ -z "$2" || "${2:0:1}" == "-" ]]; then @@ -836,7 +836,7 @@ if [ "$SCRIPT_CMD" == "healthcheck" ]; then HEALTHCHECK_COLLECTION="$2" shift 2 ;; - -z|-zkhost) + -z|-zkhost) if [[ -z "$2" || "${2:0:1}" == "-" ]]; then print_usage "$SCRIPT_CMD" "ZooKeeper connection string is required when using the $1 option!" exit 1 @@ -845,7 +845,7 @@ if [ "$SCRIPT_CMD" == "healthcheck" ]; then shift 2 ;; -help|-usage) - print_usage "$SCRIPT_CMD" + print_usage "$SCRIPT_CMD" exit 0 ;; -V|--verbose) @@ -857,29 +857,29 @@ if [ "$SCRIPT_CMD" == "healthcheck" ]; then break ;; *) - if [ "$1" != "" ]; then + if [ "$1" != "" ]; then print_usage "$SCRIPT_CMD" "Unrecognized or misplaced argument: $1!" exit 1 else break # out-of-args, stop looping - fi + fi ;; esac done fi - + if [ -z "$ZK_HOST" ]; then ZK_HOST=localhost:9983 fi - + if [ -z "$HEALTHCHECK_COLLECTION" ]; then echo "collection parameter is required!" print_usage "healthcheck" - exit 1 + exit 1 fi - + run_tool healthcheck -zkHost "$ZK_HOST" -collection "$HEALTHCHECK_COLLECTION" $VERBOSE - + exit $? fi @@ -973,7 +973,7 @@ if [[ "$SCRIPT_CMD" == "create" || "$SCRIPT_CMD" == "create_core" || "$SCRIPT_CM if [ -z "$CREATE_CONFDIR" ]; then CREATE_CONFDIR='_default' fi - + # validate the confdir arg (if provided) if [[ ! -d "$SOLR_TIP/server/solr/configsets/$CREATE_CONFDIR" && ! -d "$CREATE_CONFDIR" ]]; then echo -e "\nSpecified configuration directory $CREATE_CONFDIR not found!\n" @@ -1431,13 +1431,13 @@ if [ -z "$SOLR_ULIMIT_CHECKS" ] || [ "$SOLR_ULIMIT_CHECKS" != "false" ]; then maxProcs=$(ulimit -u) if [ $openFiles -lt "$SOLR_RECOMMENDED_OPEN_FILES" ]; then echo "*** [WARN] *** Your open file limit is currently $openFiles. " - echo " It should be set to $SOLR_RECOMMENDED_OPEN_FILES to avoid operational impariment. " + echo " It should be set to $SOLR_RECOMMENDED_OPEN_FILES to avoid operational disruption. " echo " If you no longer wish to see this warning, set SOLR_ULIMIT_CHECKS to false in your profile or solr.in.sh" fi if [ $maxProcs -lt "$SOLR_RECOMMENDED_MAX_PROCESSES" ]; then echo "*** [WARN] *** Your Max Processes Limit is currently $maxProcs. " - echo " It should be set to $SOLR_RECOMMENDED_MAX_PROCESSES to avoid operational impariment. " + echo " It should be set to $SOLR_RECOMMENDED_MAX_PROCESSES to avoid operational disruption. " echo " If you no longer wish to see this warning, set SOLR_ULIMIT_CHECKS to false in your profile or solr.in.sh" fi else @@ -1456,7 +1456,7 @@ SOLR_OPTS=($SOLR_OPTS) PASS_TO_RUN_EXAMPLE= if [ $# -gt 0 ]; then - while true; do + while true; do case "$1" in -c|-cloud) SOLR_MODE="solrcloud" @@ -1854,9 +1854,9 @@ if [ "$SOLR_MODE" == 'solrcloud' ]; then if [ -z "$ZK_CLIENT_TIMEOUT" ]; then ZK_CLIENT_TIMEOUT="15000" fi - + CLOUD_MODE_OPTS=("-DzkClientTimeout=$ZK_CLIENT_TIMEOUT") - + if [ "$ZK_HOST" != "" ]; then CLOUD_MODE_OPTS+=("-DzkHost=$ZK_HOST") else @@ -1871,7 +1871,7 @@ if [ "$SOLR_MODE" == 'solrcloud' ]; then if [ -e "$SOLR_HOME/collection1/core.properties" ]; then CLOUD_MODE_OPTS+=('-Dbootstrap_confdir=./solr/collection1/conf' '-Dcollection.configName=myconf' '-DnumShards=1') fi - + else if [ ! -e "$SOLR_HOME/solr.xml" ]; then echo -e "\nSolr home directory $SOLR_HOME must contain a solr.xml file!\n" @@ -1928,7 +1928,7 @@ function launch_solr() { run_in_foreground="$1" stop_port="$STOP_PORT" - + SOLR_ADDL_ARGS="$2" SOLR_JETTY_ADDL_CONFIG="$3" @@ -2009,10 +2009,10 @@ function launch_solr() { fi echo -e "\n" fi - + # need to launch solr from the server dir cd "$SOLR_SERVER_DIR" - + if [ ! -e "$SOLR_SERVER_DIR/start.jar" ]; then echo -e "\nERROR: start.jar file not found in $SOLR_SERVER_DIR!\nPlease check your -d parameter to set the correct Solr server directory.\n" exit 1 diff --git a/solr/contrib/langid/README.txt b/solr/contrib/langid/README.txt index 2e6cd54d4c6..68a2ea58c39 100644 --- a/solr/contrib/langid/README.txt +++ b/solr/contrib/langid/README.txt @@ -18,4 +18,5 @@ Please refer to the module documentation at http://wiki.apache.org/solr/Language Dependencies ------------ The Tika detector depends on Tika Core (which is part of extraction contrib) -The Langdetect detector depends on LangDetect library \ No newline at end of file +The Langdetect detector depends on LangDetect library +The OpenNLP detector depends on OpenNLP tools and requires a previously trained user-supplied model diff --git a/solr/contrib/langid/build.xml b/solr/contrib/langid/build.xml index 8341a763354..aca7aebb1cb 100644 --- a/solr/contrib/langid/build.xml +++ b/solr/contrib/langid/build.xml @@ -25,6 +25,17 @@ + + + + + + + + + + + @@ -39,4 +50,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/langid/ivy.xml b/solr/contrib/langid/ivy.xml index 88dc62830ff..04c6b253e5b 100644 --- a/solr/contrib/langid/ivy.xml +++ b/solr/contrib/langid/ivy.xml @@ -25,6 +25,7 @@ + diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java index a8d6523bbe8..3679905dd79 100644 --- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java +++ b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -399,4 +400,67 @@ public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestPro this.enabled = enabled; } + + + /** + * Concatenates content from multiple fields + */ + protected String concatFields(SolrInputDocument doc) { + StringBuilder sb = new StringBuilder(getExpectedSize(doc, inputFields)); + for (String fieldName : inputFields) { + log.debug("Appending field " + fieldName); + if (doc.containsKey(fieldName)) { + Collection fieldValues = doc.getFieldValues(fieldName); + if (fieldValues != null) { + for (Object content : fieldValues) { + if (content instanceof String) { + String stringContent = (String) content; + if (stringContent.length() > maxFieldValueChars) { + sb.append(stringContent.substring(0, maxFieldValueChars)); + } else { + sb.append(stringContent); + } + sb.append(" "); + if (sb.length() > maxTotalChars) { + sb.setLength(maxTotalChars); + break; + } + } else { + log.warn("Field " + fieldName + " not a String value, not including in detection"); + } + } + } + } + } + return sb.toString(); + } + + /** + * Calculate expected string size. + * + * @param doc solr input document + * @param fields fields to select + * @return expected size of string value + */ + private int getExpectedSize(SolrInputDocument doc, String[] fields) { + int docSize = 0; + for (String field : fields) { + if (doc.containsKey(field)) { + Collection contents = doc.getFieldValues(field); + if (contents != null) { + for (Object content : contents) { + if (content instanceof String) { + docSize += Math.min(((String) content).length(), maxFieldValueChars); + } + } + + if (docSize > maxTotalChars) { + docSize = maxTotalChars; + break; + } + } + } + } + return docSize; + } } diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java new file mode 100644 index 00000000000..83f4fe4cdaf --- /dev/null +++ b/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import opennlp.tools.langdetect.Language; +import opennlp.tools.langdetect.LanguageDetectorME; +import opennlp.tools.langdetect.LanguageDetectorModel; + +/** + * Identifies the language of a set of input fields using Apache OpenNLP. + *

    + * See "Language Detector" section of + * https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html + */ +public class OpenNLPLangDetectUpdateProcessor extends LanguageIdentifierUpdateProcessor { + + private final LanguageDetectorModel model; + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** Maps ISO 639-3 (3-letter language code) to ISO 639-1 (2-letter language code) */ + private static final Map ISO639_MAP = make_ISO639_map(); + + public OpenNLPLangDetectUpdateProcessor(SolrQueryRequest req, SolrQueryResponse rsp, + UpdateRequestProcessor next, LanguageDetectorModel model) { + super(req, rsp, next); + this.model = model; + } + + @Override + protected List detectLanguage(SolrInputDocument doc) { + List languages = new ArrayList<>(); + String content = concatFields(doc); + if (content.length() != 0) { + LanguageDetectorME ldme = new LanguageDetectorME(model); + Language[] langs = ldme.predictLanguages(content); + for(Language language: langs){ + languages.add(new DetectedLanguage(ISO639_MAP.get(language.getLang()), language.getConfidence())); + } + } else { + log.debug("No input text to detect language from, returning empty list"); + } + return languages; + } + + private static Map make_ISO639_map() { + Map map = new HashMap<>(); + for (String lang : Locale.getISOLanguages()) { + Locale locale = new Locale(lang); + map.put(locale.getISO3Language(), locale.getLanguage()); + } + return map; + } +} diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java new file mode 100644 index 00000000000..dfbdcbdc51e --- /dev/null +++ b/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.io.IOUtils; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.util.SolrPluginUtils; +import org.apache.solr.util.plugin.SolrCoreAware; + +import opennlp.tools.langdetect.LanguageDetectorModel; + +/** + * Identifies the language of a set of input fields using Apache OpenNLP. + *

    + * The UpdateProcessorChain config entry can take a number of parameters + * which may also be passed as HTTP parameters on the update request + * and override the defaults. Here is the simplest processor config possible: + * + *

    + * <processor class="org.apache.solr.update.processor.OpenNLPLangDetectUpdateProcessorFactory">
    + *   <str name="langid.fl">title,text</str>
    + *   <str name="langid.langField">language_s</str>
    + *   <str name="langid.model">langdetect-183.bin</str>
    + * </processor>
    + * 
    + * See http://wiki.apache.org/solr/LanguageDetection + */ +public class OpenNLPLangDetectUpdateProcessorFactory extends UpdateRequestProcessorFactory + implements SolrCoreAware { + + private static final String MODEL_PARAM = "langid.model"; + private String modelFile; + private LanguageDetectorModel model; + protected SolrParams defaults; + protected SolrParams appends; + protected SolrParams invariants; + private SolrResourceLoader solrResourceLoader; + + @Override + public void init( NamedList args ) + { + if (args != null) { + Object o; + o = args.get("defaults"); + if (o != null && o instanceof NamedList) { + defaults = SolrParams.toSolrParams((NamedList) o); + } else { + defaults = SolrParams.toSolrParams(args); + } + o = args.get("appends"); + if (o != null && o instanceof NamedList) { + appends = SolrParams.toSolrParams((NamedList) o); + } + o = args.get("invariants"); + if (o != null && o instanceof NamedList) { + invariants = SolrParams.toSolrParams((NamedList) o); + } + + // Look for model filename in invariants, then in args, then defaults + if (invariants != null) { + modelFile = invariants.get(MODEL_PARAM); + } + if (modelFile == null) { + o = args.get(MODEL_PARAM); + if (o != null && o instanceof String) { + modelFile = (String)o; + } else { + modelFile = defaults.get(MODEL_PARAM); + if (modelFile == null) { + throw new RuntimeException("Couldn't load language model, will return empty languages always!"); + } + } + } + } + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + // Process defaults, appends and invariants if we got a request + if (req != null) { + SolrPluginUtils.setDefaults(req, defaults, appends, invariants); + } + return new OpenNLPLangDetectUpdateProcessor(req, rsp, next, model); + } + + private void loadModel() throws IOException { + InputStream is = null; + try{ + if (modelFile != null) { + is = solrResourceLoader.openResource(modelFile); + model = new LanguageDetectorModel(is); + } + } + finally{ + IOUtils.closeQuietly(is); + } + } + + @Override + public void inform(SolrCore core){ + solrResourceLoader = core.getResourceLoader(); + try { + loadModel(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java index df0e5f7fa25..5c8146d1db5 100644 --- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java +++ b/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java @@ -28,8 +28,6 @@ import org.apache.solr.common.SolrInputDocument; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Collection; - /** * Identifies the language of a set of input fields using Tika's * LanguageIdentifier. @@ -67,67 +65,4 @@ public class TikaLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpd } return languages; } - - - /** - * Concatenates content from multiple fields - */ - protected String concatFields(SolrInputDocument doc) { - StringBuilder sb = new StringBuilder(getExpectedSize(doc, inputFields)); - for (String fieldName : inputFields) { - log.debug("Appending field " + fieldName); - if (doc.containsKey(fieldName)) { - Collection fieldValues = doc.getFieldValues(fieldName); - if (fieldValues != null) { - for (Object content : fieldValues) { - if (content instanceof String) { - String stringContent = (String) content; - if (stringContent.length() > maxFieldValueChars) { - sb.append(stringContent.substring(0, maxFieldValueChars)); - } else { - sb.append(stringContent); -} - sb.append(" "); - if (sb.length() > maxTotalChars) { - sb.setLength(maxTotalChars); - break; - } - } else { - log.warn("Field " + fieldName + " not a String value, not including in detection"); - } - } - } - } - } - return sb.toString(); - } - - /** - * Calculate expected string size. - * - * @param doc solr input document - * @param fields fields to select - * @return expected size of string value - */ - private int getExpectedSize(SolrInputDocument doc, String[] fields) { - int docSize = 0; - for (String field : fields) { - if (doc.containsKey(field)) { - Collection contents = doc.getFieldValues(field); - if (contents != null) { - for (Object content : contents) { - if (content instanceof String) { - docSize += Math.min(((String) content).length(), maxFieldValueChars); - } - } - - if (docSize > maxTotalChars) { - docSize = maxTotalChars; - break; - } - } - } - } - return docSize; - } } diff --git a/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/opennlp-langdetect.eng-swe-spa-rus-deu.bin b/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/opennlp-langdetect.eng-swe-spa-rus-deu.bin new file mode 100644 index 00000000000..ad584e65be0 Binary files /dev/null and b/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/opennlp-langdetect.eng-swe-spa-rus-deu.bin differ diff --git a/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/solrconfig-languageidentifier.xml b/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/solrconfig-languageidentifier.xml index 9ae54adc148..01dbee9aaea 100644 --- a/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/solrconfig-languageidentifier.xml +++ b/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/solrconfig-languageidentifier.xml @@ -57,11 +57,11 @@ - lang_id + lang_id_tika - + @@ -78,7 +78,7 @@ - + @@ -94,5 +94,22 @@ - + + + + + + true + name,subject + true + language_s + language_sm + th:thai + 0.3 + opennlp-langdetect.eng-swe-spa-rus-deu.bin + + + + + diff --git a/solr/contrib/langid/src/test-files/opennlp.langdetect.trainer.params.txt b/solr/contrib/langid/src/test-files/opennlp.langdetect.trainer.params.txt new file mode 100644 index 00000000000..1ecec823ff3 --- /dev/null +++ b/solr/contrib/langid/src/test-files/opennlp.langdetect.trainer.params.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Algorithm=PERCEPTRON +Cutoff=0 diff --git a/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java b/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java index b90f54a4d3f..21ecd7d6a08 100644 --- a/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java +++ b/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java @@ -38,7 +38,11 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S public static void beforeClass() throws Exception { initCore("solrconfig-languageidentifier.xml", "schema.xml", getFile("langid/solr").getAbsolutePath()); SolrCore core = h.getCore(); - UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("lang_id"); + UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("lang_id_tika"); + assertNotNull(chained); + chained = core.getUpdateProcessingChain("lang_id_lang_detect"); + assertNotNull(chained); + chained = core.getUpdateProcessingChain("lang_id_opennlp"); assertNotNull(chained); } diff --git a/solr/contrib/langid/src/test/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactoryTest.java b/solr/contrib/langid/src/test/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactoryTest.java new file mode 100644 index 00000000000..7b95e6f4ce8 --- /dev/null +++ b/solr/contrib/langid/src/test/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactoryTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.Test; + +public class OpenNLPLangDetectUpdateProcessorFactoryTest extends LanguageIdentifierUpdateProcessorFactoryTestCase { + private static final String TEST_MODEL = "opennlp-langdetect.eng-swe-spa-rus-deu.bin"; + + @Override + protected OpenNLPLangDetectUpdateProcessor createLangIdProcessor(ModifiableSolrParams parameters) throws Exception { + if (parameters.get("langid.model") == null) { // handle superclass tests that don't provide the model filename + parameters.set("langid.model", TEST_MODEL); + } + if (parameters.get("langid.threshold") == null) { // handle superclass tests that don't provide confidence threshold + parameters.set("langid.threshold", "0.3"); + } + SolrQueryRequest req = _parser.buildRequestFrom(h.getCore(), new ModifiableSolrParams(), null); + OpenNLPLangDetectUpdateProcessorFactory factory = new OpenNLPLangDetectUpdateProcessorFactory(); + factory.init(parameters.toNamedList()); + factory.inform(h.getCore()); + return (OpenNLPLangDetectUpdateProcessor)factory.getInstance(req, resp, null); + } + + // this one actually works better it seems with short docs + @Override + protected SolrInputDocument tooShortDoc() { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("text", ""); + return doc; + } + + @Test @Override + public void testLangIdGlobal() throws Exception { + ModifiableSolrParams parameters = new ModifiableSolrParams(); + parameters.add("langid.fl", "name,subject"); + parameters.add("langid.langField", "language_s"); + parameters.add("langid.model", TEST_MODEL); + parameters.add("langid.threshold", "0.3"); + liProcessor = createLangIdProcessor(parameters); + + assertLang("en", "id", "1en", "name", "Lucene", "subject", "Apache Lucene is a free/open source information retrieval software library, originally created in Java by Doug Cutting. It is supported by the Apache Software Foundation and is released under the Apache Software License."); + assertLang("sv", "id", "2sv", "name", "Maven", "subject", "Apache Maven är ett verktyg utvecklat av Apache Software Foundation och används inom systemutveckling av datorprogram i programspråket Java. Maven används för att automatiskt paketera (bygga) programfilerna till en distribuerbar enhet. Maven används inom samma område som Apache Ant men dess byggfiler är deklarativa till skillnad ifrån Ants skriptbaserade."); + assertLang("es", "id", "3es", "name", "Lucene", "subject", "Lucene es un API de código abierto para recuperación de información, originalmente implementada en Java por Doug Cutting. Está apoyado por el Apache Software Foundation y se distribuye bajo la Apache Software License. Lucene tiene versiones para otros lenguajes incluyendo Delphi, Perl, C#, C++, Python, Ruby y PHP."); + assertLang("ru", "id", "4ru", "name", "Lucene", "subject", "The Apache Lucene — это свободная библиотека для высокоскоростного полнотекстового поиска, написанная на Java. Может быть использована для поиска в интернете и других областях компьютерной лингвистики (аналитическая философия)."); + assertLang("de", "id", "5de", "name", "Lucene", "subject", "Lucene ist ein Freie-Software-Projekt der Apache Software Foundation, das eine Suchsoftware erstellt. Durch die hohe Leistungsfähigkeit und Skalierbarkeit können die Lucene-Werkzeuge für beliebige Projektgrößen und Anforderungen eingesetzt werden. So setzt beispielsweise Wikipedia Lucene für die Volltextsuche ein. Zudem verwenden die beiden Desktop-Suchprogramme Beagle und Strigi eine C#- bzw. C++- Portierung von Lucene als Indexer."); + } +} diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/model/LinearModel.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/model/LinearModel.java index 08780ca9bb8..5080edfd171 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/model/LinearModel.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/model/LinearModel.java @@ -129,7 +129,7 @@ public class LinearModel extends LTRScoringModel { "weight on feature")); featureDetails.add(featureExplain); - details.add(Explanation.match(featureExplain.getValue().doubleValue() + details.add(Explanation.match(featureExplain.getValue().floatValue() * featureToWeight[index], "prod of:", featureDetails)); index++; } diff --git a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java index 608b2340506..e5b81f8bc21 100644 --- a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java +++ b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java @@ -53,11 +53,11 @@ import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.server.SslConnectionFactory; import org.eclipse.jetty.server.handler.gzip.GzipHandler; -import org.eclipse.jetty.server.session.HashSessionIdManager; -import org.eclipse.jetty.servlet.BaseHolder; +import org.eclipse.jetty.server.session.DefaultSessionIdManager; import org.eclipse.jetty.servlet.FilterHolder; import org.eclipse.jetty.servlet.ServletContextHandler; import org.eclipse.jetty.servlet.ServletHolder; +import org.eclipse.jetty.servlet.Source; import org.eclipse.jetty.util.component.LifeCycle; import org.eclipse.jetty.util.ssl.SslContextFactory; import org.eclipse.jetty.util.thread.QueuedThreadPool; @@ -248,7 +248,7 @@ public class JettySolrRunner { connector.setIdleTimeout(THREAD_POOL_MAX_IDLE_TIME_MS); server.setConnectors(new Connector[] {connector}); - server.setSessionIdManager(new HashSessionIdManager(new Random())); + server.setSessionIdManager(new DefaultSessionIdManager(server, new Random())); } else { ServerConnector connector = new ServerConnector(server, new HttpConnectionFactory()); connector.setPort(port); @@ -300,7 +300,7 @@ public class JettySolrRunner { String pathSpec = config.extraServlets.get(servletHolder); root.addServlet(servletHolder, pathSpec); } - dispatchFilter = root.getServletHandler().newFilterHolder(BaseHolder.Source.EMBEDDED); + dispatchFilter = root.getServletHandler().newFilterHolder(Source.EMBEDDED); dispatchFilter.setHeldClass(SolrDispatchFilter.class); dispatchFilter.setInitParameter("excludePatterns", excludePatterns); root.addFilter(dispatchFilter, "*", EnumSet.of(DispatcherType.REQUEST)); diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java index 3cdc903e57b..9b16d231e02 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java +++ b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java @@ -18,6 +18,7 @@ package org.apache.solr.cloud; import java.lang.invoke.MethodHandles; +import org.apache.solr.cloud.api.collections.CreateCollectionCmd; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.core.ConfigSetService; diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java index 30de3d4e6d7..0d4512997ee 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java +++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java @@ -132,7 +132,7 @@ public class CloudUtil { } - static boolean usePolicyFramework(DocCollection collection, SolrCloudManager cloudManager) + public static boolean usePolicyFramework(DocCollection collection, SolrCloudManager cloudManager) throws IOException, InterruptedException { AutoScalingConfig autoScalingConfig = cloudManager.getDistribStateManager().getAutoScalingConfig(); return !autoScalingConfig.getPolicy().getClusterPolicy().isEmpty() || collection.getPolicyName() != null; diff --git a/solr/core/src/java/org/apache/solr/cloud/ExclusiveSliceProperty.java b/solr/core/src/java/org/apache/solr/cloud/ExclusiveSliceProperty.java index 2faf6e9be3d..953023f9153 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ExclusiveSliceProperty.java +++ b/solr/core/src/java/org/apache/solr/cloud/ExclusiveSliceProperty.java @@ -28,6 +28,7 @@ import java.util.Random; import java.util.Set; import org.apache.commons.lang.StringUtils; +import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.cloud.overseer.ClusterStateMutator; import org.apache.solr.cloud.overseer.CollectionMutator; import org.apache.solr.cloud.overseer.SliceMutator; @@ -39,8 +40,8 @@ import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.ONLY_ACTIVE_NODES; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.SHARD_UNIQUE; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.ONLY_ACTIVE_NODES; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.SHARD_UNIQUE; import static org.apache.solr.common.params.CollectionParams.CollectionAction.BALANCESHARDUNIQUE; // Class to encapsulate processing replica properties that have at most one replica hosting a property per slice. diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index d1bb13a12b3..edf383884ea 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -29,6 +29,7 @@ import java.util.Set; import com.codahale.metrics.Timer; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; import org.apache.solr.client.solrj.impl.ClusterStateProvider; +import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.cloud.autoscaling.OverseerTriggerThread; import org.apache.solr.cloud.overseer.ClusterStateMutator; import org.apache.solr.cloud.overseer.CollectionMutator; @@ -39,6 +40,7 @@ import org.apache.solr.cloud.overseer.SliceMutator; import org.apache.solr.cloud.overseer.ZkStateWriter; import org.apache.solr.cloud.overseer.ZkWriteCommand; import org.apache.solr.common.SolrCloseable; +import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkNodeProps; @@ -200,7 +202,7 @@ public class Overseer implements SolrCloseable { LinkedList> queue = null; try { // We do not need to filter any nodes here cause all processed nodes are removed once we flush clusterstate - queue = new LinkedList<>(stateUpdateQueue.peekElements(1000, Long.MAX_VALUE, (x) -> true)); + queue = new LinkedList<>(stateUpdateQueue.peekElements(1000, 3000L, (x) -> true)); } catch (KeeperException.SessionExpiredException e) { log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e); return; @@ -267,6 +269,9 @@ public class Overseer implements SolrCloseable { private ClusterState processQueueItem(ZkNodeProps message, ClusterState clusterState, ZkStateWriter zkStateWriter, boolean enableBatching, ZkStateWriter.ZkWriteCallback callback) throws Exception { final String operation = message.getStr(QUEUE_OPERATION); + if (operation == null) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Message missing " + QUEUE_OPERATION + ":" + message); + } List zkWriteCommands = null; final Timer.Context timerContext = stats.time(operation); try { diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionConfigSetProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionConfigSetProcessor.java index 570843a61a9..e8d85ce18c8 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionConfigSetProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionConfigSetProcessor.java @@ -19,6 +19,7 @@ package org.apache.solr.cloud; import java.io.IOException; import org.apache.commons.io.IOUtils; +import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.handler.component.ShardHandler; diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java index d014fc47715..86e356497a1 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java @@ -257,7 +257,6 @@ public class OverseerTaskProcessor implements Runnable, Closeable { } if (runningZKTasks.contains(head.getId())) continue; final ZkNodeProps message = ZkNodeProps.load(head.getBytes()); - OverseerMessageHandler messageHandler = selector.selectOverseerMessageHandler(message); final String asyncId = message.getStr(ASYNC); if (hasLeftOverItems) { if (head.getId().equals(oldestItemInWorkQueue)) @@ -269,6 +268,12 @@ public class OverseerTaskProcessor implements Runnable, Closeable { } } String operation = message.getStr(Overseer.QUEUE_OPERATION); + if (operation == null) { + log.error("Msg does not have required " + Overseer.QUEUE_OPERATION + ": {}", message); + workQueue.remove(head); + continue; + } + OverseerMessageHandler messageHandler = selector.selectOverseerMessageHandler(message); OverseerMessageHandler.Lock lock = messageHandler.lockTask(message, taskBatch); if (lock == null) { log.debug("Exclusivity check failed for [{}]", message.toString()); diff --git a/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/AddReplicaCmd.java similarity index 96% rename from solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/AddReplicaCmd.java index 71a54c149c3..6b4e4275a12 100644 --- a/solr/core/src/java/org/apache/solr/cloud/AddReplicaCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/AddReplicaCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.io.IOException; @@ -32,6 +32,9 @@ import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.cloud.autoscaling.Policy; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; +import org.apache.solr.cloud.ActiveReplicaWatcher; +import org.apache.solr.cloud.CloudUtil; +import org.apache.solr.cloud.Overseer; import org.apache.solr.common.SolrCloseableLatch; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; @@ -49,8 +52,8 @@ import org.apache.solr.handler.component.ShardHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.SKIP_CREATE_REPLICA_IN_CLUSTER_STATE; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.COLL_CONF; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.SKIP_CREATE_REPLICA_IN_CLUSTER_STATE; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; diff --git a/solr/core/src/java/org/apache/solr/cloud/Assign.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java similarity index 97% rename from solr/core/src/java/org/apache/solr/cloud/Assign.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java index c746c94896c..e7ce583003f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Assign.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -56,10 +56,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.POLICY; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE_DEFAULT; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.CREATE_NODE_SET; import static org.apache.solr.common.cloud.DocCollection.SNITCH; import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; @@ -221,12 +218,15 @@ public class Assign { List nodeList; final String createNodeSetStr = message.getStr(CREATE_NODE_SET); - final List createNodeList = (createNodeSetStr == null) ? null : StrUtils.splitSmart((CREATE_NODE_SET_EMPTY.equals(createNodeSetStr) ? "" : createNodeSetStr), ",", true); + final List createNodeList = (createNodeSetStr == null) ? null : + StrUtils.splitSmart((OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY.equals(createNodeSetStr) ? + "" : createNodeSetStr), ",", true); if (createNodeList != null) { nodeList = new ArrayList<>(createNodeList); nodeList.retainAll(liveNodes); - if (message.getBool(CREATE_NODE_SET_SHUFFLE, CREATE_NODE_SET_SHUFFLE_DEFAULT)) { + if (message.getBool(OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE, + OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE_DEFAULT)) { Collections.shuffle(nodeList, random); } } else { diff --git a/solr/core/src/java/org/apache/solr/cloud/BackupCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java similarity index 97% rename from solr/core/src/java/org/apache/solr/cloud/BackupCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java index a4012f05fc7..c411fbc0ae1 100644 --- a/solr/core/src/java/org/apache/solr/cloud/BackupCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; import java.net.URI; @@ -51,7 +51,6 @@ import org.apache.solr.handler.component.ShardHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; import static org.apache.solr.common.params.CommonAdminParams.ASYNC; @@ -74,7 +73,7 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd { Instant startTime = Instant.now(); - CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer(); + CoreContainer cc = ocmh.overseer.getCoreContainer(); BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo)); BackupManager backupMgr = new BackupManager(repository, ocmh.zkStateReader); @@ -116,7 +115,7 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd { properties.put(BackupManager.BACKUP_NAME_PROP, backupName); properties.put(BackupManager.COLLECTION_NAME_PROP, collectionName); - properties.put(COLL_CONF, configName); + properties.put(OverseerCollectionMessageHandler.COLL_CONF, configName); properties.put(BackupManager.START_TIME_PROP, startTime.toString()); properties.put(BackupManager.INDEX_VERSION_PROP, Version.LATEST.toString()); //TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same. @@ -165,7 +164,7 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd { String commitName = request.getStr(CoreAdminParams.COMMIT_NAME); Optional snapshotMeta = Optional.empty(); if (commitName != null) { - SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient(); + SolrZkClient zkClient = ocmh.zkStateReader.getZkClient(); snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName); if (!snapshotMeta.isPresent()) { throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateAliasCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateAliasCmd.java similarity index 96% rename from solr/core/src/java/org/apache/solr/cloud/CreateAliasCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/CreateAliasCmd.java index e10d53e7c1a..c54d792f20a 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CreateAliasCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateAliasCmd.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.util.HashSet; import java.util.List; @@ -23,7 +23,6 @@ import java.util.Locale; import java.util.Set; import java.util.stream.Collectors; -import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ZkNodeProps; @@ -34,7 +33,7 @@ import org.apache.solr.common.util.StrUtils; import static org.apache.solr.common.params.CommonParams.NAME; -public class CreateAliasCmd implements Cmd { +public class CreateAliasCmd implements OverseerCollectionMessageHandler.Cmd { private final OverseerCollectionMessageHandler ocmh; public CreateAliasCmd(OverseerCollectionMessageHandler ocmh) { diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java similarity index 94% rename from solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java index 2171c605bf5..d5ceb6a400e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.io.IOException; @@ -39,7 +39,8 @@ import org.apache.solr.client.solrj.cloud.autoscaling.Policy; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData; -import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd; +import org.apache.solr.cloud.Overseer; +import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.overseer.ClusterStateMutator; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; @@ -70,10 +71,7 @@ import org.apache.zookeeper.KeeperException.NoNodeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.RANDOM; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.COLL_CONF; import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE; import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS; import static org.apache.solr.common.cloud.ZkStateReader.PULL_REPLICAS; @@ -85,7 +83,7 @@ import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STA import static org.apache.solr.common.params.CommonParams.NAME; import static org.apache.solr.common.util.StrUtils.formatString; -public class CreateCollectionCmd implements Cmd { +public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final OverseerCollectionMessageHandler ocmh; private final TimeSource timeSource; @@ -274,17 +272,17 @@ public class CreateCollectionCmd implements Cmd { String policy = message.getStr(Policy.POLICY); boolean usePolicyFramework = !autoScalingConfig.getPolicy().getClusterPolicy().isEmpty() || policy != null; - Integer numSlices = message.getInt(NUM_SLICES, null); + Integer numSlices = message.getInt(OverseerCollectionMessageHandler.NUM_SLICES, null); String router = message.getStr("router.name", DocRouter.DEFAULT_NAME); if(ImplicitDocRouter.NAME.equals(router)){ ClusterStateMutator.getShardNames(shardNames, message.getStr("shards", null)); numSlices = shardNames.size(); } else { if (numSlices == null ) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NUM_SLICES + " is a required param (when using CompositeId router)."); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, OverseerCollectionMessageHandler.NUM_SLICES + " is a required param (when using CompositeId router)."); } if (numSlices <= 0) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NUM_SLICES + " must be > 0"); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, OverseerCollectionMessageHandler.NUM_SLICES + " must be > 0"); } ClusterStateMutator.getShardNames(numSlices, shardNames); } @@ -303,7 +301,7 @@ public class CreateCollectionCmd implements Cmd { // but (for now) require that each core goes on a distinct node. List replicaPositions; - nodeList.addAll(Assign.getLiveOrLiveAndCreateNodeSetList(clusterState.getLiveNodes(), message, RANDOM)); + nodeList.addAll(Assign.getLiveOrLiveAndCreateNodeSetList(clusterState.getLiveNodes(), message, OverseerCollectionMessageHandler.RANDOM)); if (nodeList.isEmpty()) { log.warn("It is unusual to create a collection ("+collectionName+") without cores."); @@ -315,7 +313,7 @@ public class CreateCollectionCmd implements Cmd { + totalNumReplicas + " on collection " + collectionName - + " is higher than the number of Solr instances currently live or live and part of your " + CREATE_NODE_SET + "(" + + " is higher than the number of Solr instances currently live or live and part of your " + OverseerCollectionMessageHandler.CREATE_NODE_SET + "(" + nodeList.size() + "). It's unusual to run two replica of the same slice on the same Solr-instance."); } @@ -327,9 +325,9 @@ public class CreateCollectionCmd implements Cmd { if (maxShardsAllowedToCreate < requestedShardsToCreate) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cannot create collection " + collectionName + ". Value of " + MAX_SHARDS_PER_NODE + " is " + maxShardsPerNode - + ", and the number of nodes currently live or live and part of your "+CREATE_NODE_SET+" is " + nodeList.size() + + ", and the number of nodes currently live or live and part of your "+OverseerCollectionMessageHandler.CREATE_NODE_SET+" is " + nodeList.size() + ". This allows a maximum of " + maxShardsAllowedToCreate - + " to be created. Value of " + NUM_SLICES + " is " + numSlices + + " to be created. Value of " + OverseerCollectionMessageHandler.NUM_SLICES + " is " + numSlices + ", value of " + NRT_REPLICAS + " is " + numNrtReplicas + ", value of " + TLOG_REPLICAS + " is " + numTlogReplicas + " and value of " + PULL_REPLICAS + " is " + numPullReplicas @@ -352,10 +350,13 @@ public class CreateCollectionCmd implements Cmd { try { configNames = ocmh.zkStateReader.getZkClient().getChildren(ZkConfigManager.CONFIGS_ZKNODE, null, true); if (configNames.contains(ConfigSetsHandlerApi.DEFAULT_CONFIGSET_NAME)) { - if (!CollectionAdminParams.SYSTEM_COLL.equals(coll)) { - copyDefaultConfigSetTo(configNames, coll); + if (CollectionAdminParams.SYSTEM_COLL.equals(coll)) { + return coll; + } else { + String intendedConfigSetName = ConfigSetsHandlerApi.getSuffixedNameForAutoGeneratedConfigSet(coll); + copyDefaultConfigSetTo(configNames, intendedConfigSetName); + return intendedConfigSetName; } - return coll; } else if (configNames != null && configNames.size() == 1) { configName = configNames.get(0); // no config set named, but there is only 1 - use it @@ -374,17 +375,11 @@ public class CreateCollectionCmd implements Cmd { private void copyDefaultConfigSetTo(List configNames, String targetConfig) { ZkConfigManager configManager = new ZkConfigManager(ocmh.zkStateReader.getZkClient()); - // if a configset named coll exists, delete the configset so that _default can be copied over + // if a configset named collection exists, re-use it if (configNames.contains(targetConfig)) { log.info("There exists a configset by the same name as the collection we're trying to create: " + targetConfig + - ", deleting it so that we can copy the _default configs over and create the collection."); - try { - configManager.deleteConfigDir(targetConfig); - } catch (Exception e) { - throw new SolrException(ErrorCode.INVALID_STATE, "Error while deleting configset: " + targetConfig, e); - } - } else { - log.info("Only _default config set found, using it."); + ", re-using it."); + return; } // Copy _default into targetConfig try { diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateShardCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateShardCmd.java similarity index 95% rename from solr/core/src/java/org/apache/solr/cloud/CreateShardCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/CreateShardCmd.java index c6afdcc44fc..311d9ef1e89 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CreateShardCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateShardCmd.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.io.IOException; @@ -31,7 +31,8 @@ import com.google.common.collect.ImmutableMap; import org.apache.solr.client.solrj.cloud.autoscaling.Policy; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; -import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd; +import org.apache.solr.cloud.CloudUtil; +import org.apache.solr.cloud.Overseer; import org.apache.solr.common.SolrCloseableLatch; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; @@ -48,8 +49,6 @@ import org.apache.solr.common.util.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.Assign.getNodesForNewReplicas; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.RANDOM; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS; import static org.apache.solr.common.cloud.ZkStateReader.PULL_REPLICAS; @@ -58,7 +57,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS; import static org.apache.solr.common.params.CommonAdminParams.ASYNC; -public class CreateShardCmd implements Cmd { +public class CreateShardCmd implements OverseerCollectionMessageHandler.Cmd { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final OverseerCollectionMessageHandler ocmh; @@ -162,7 +161,7 @@ public class CreateShardCmd implements Cmd { if (collection.getPolicyName() != null) message.getProperties().put(Policy.POLICY, collection.getPolicyName()); positions = Assign.identifyNodes(cloudManager, clusterState, - Assign.getLiveOrLiveAndCreateNodeSetList(clusterState.getLiveNodes(), message, RANDOM), + Assign.getLiveOrLiveAndCreateNodeSetList(clusterState.getLiveNodes(), message, OverseerCollectionMessageHandler.RANDOM), collection.getName(), message, Collections.singletonList(sliceName), @@ -171,7 +170,7 @@ public class CreateShardCmd implements Cmd { numPullReplicas); sessionWrapper.set(PolicyHelper.getLastSessionWrapper(true)); } else { - List sortedNodeList = getNodesForNewReplicas(clusterState, collection.getName(), sliceName, totalReplicas, + List sortedNodeList = Assign.getNodesForNewReplicas(clusterState, collection.getName(), sliceName, totalReplicas, createNodeSetStr, cloudManager); int i = 0; positions = new ArrayList<>(); diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateSnapshotCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateSnapshotCmd.java similarity index 98% rename from solr/core/src/java/org/apache/solr/cloud/CreateSnapshotCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/CreateSnapshotCmd.java index 5de65a4a4bb..32715d66cc2 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CreateSnapshotCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateSnapshotCmd.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; @@ -67,7 +67,7 @@ public class CreateSnapshotCmd implements OverseerCollectionMessageHandler.Cmd { String collectionName = message.getStr(COLLECTION_PROP); String commitName = message.getStr(CoreAdminParams.COMMIT_NAME); String asyncId = message.getStr(ASYNC); - SolrZkClient zkClient = this.ocmh.overseer.getZkController().getZkClient(); + SolrZkClient zkClient = ocmh.zkStateReader.getZkClient(); Date creationDate = new Date(); if(SolrSnapshotManager.snapshotExists(zkClient, collectionName, commitName)) { diff --git a/solr/core/src/java/org/apache/solr/cloud/DeleteAliasCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteAliasCmd.java similarity index 97% rename from solr/core/src/java/org/apache/solr/cloud/DeleteAliasCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteAliasCmd.java index 9c9f1c60e4b..e199d7dbb24 100644 --- a/solr/core/src/java/org/apache/solr/cloud/DeleteAliasCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteAliasCmd.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ZkNodeProps; diff --git a/solr/core/src/java/org/apache/solr/cloud/DeleteCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java similarity index 90% rename from solr/core/src/java/org/apache/solr/cloud/DeleteCollectionCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java index dc91905ed37..bdae8b9e73d 100644 --- a/solr/core/src/java/org/apache/solr/cloud/DeleteCollectionCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java @@ -16,19 +16,21 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; +import org.apache.solr.cloud.Overseer; import org.apache.solr.common.NonExistentCoreException; import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.Aliases; import org.apache.solr.common.cloud.ClusterState; -import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; @@ -60,9 +62,15 @@ public class DeleteCollectionCmd implements OverseerCollectionMessageHandler.Cmd @Override public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception { ZkStateReader zkStateReader = ocmh.zkStateReader; + Aliases aliases = zkStateReader.getAliases(); final String collection = message.getStr(NAME); - DocCollection coll = state.getCollectionOrNull(collection); - String policy = coll == null ? null : coll.getPolicyName(); + for (Map.Entry> ent : aliases.getCollectionAliasListMap().entrySet()) { + if (ent.getValue().contains(collection)) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Collection : " + collection + " is part of alias " + ent.getKey() + " remove or modify the alias before removing this collection."); + } + } + try { // Remove the snapshots meta-data for this collection in ZK. Deleting actual index files // should be taken care of as part of collection delete operation. diff --git a/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteNodeCmd.java similarity index 99% rename from solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteNodeCmd.java index 51b095669e1..ab4dc0c2b21 100644 --- a/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteNodeCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; diff --git a/solr/core/src/java/org/apache/solr/cloud/DeleteReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteReplicaCmd.java similarity index 97% rename from solr/core/src/java/org/apache/solr/cloud/DeleteReplicaCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteReplicaCmd.java index e71d7e89c3d..eefe903fa6f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/DeleteReplicaCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteReplicaCmd.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; import java.util.ArrayList; @@ -26,7 +26,7 @@ import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicReference; -import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd; +import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.Cmd; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; @@ -44,7 +44,6 @@ import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.ONLY_IF_DOWN; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP; import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; @@ -213,7 +212,7 @@ public class DeleteReplicaCmd implements Cmd { // If users are being safe and only want to remove a shard if it is down, they can specify onlyIfDown=true // on the command. - if (Boolean.parseBoolean(message.getStr(ONLY_IF_DOWN)) && replica.getState() != Replica.State.DOWN) { + if (Boolean.parseBoolean(message.getStr(OverseerCollectionMessageHandler.ONLY_IF_DOWN)) && replica.getState() != Replica.State.DOWN) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Attempted to remove replica : " + collectionName + "/" + shard + "/" + replicaName + " with onlyIfDown='true', but state is '" + replica.getStr(ZkStateReader.STATE_PROP) + "'"); diff --git a/solr/core/src/java/org/apache/solr/cloud/DeleteShardCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteShardCmd.java similarity index 98% rename from solr/core/src/java/org/apache/solr/cloud/DeleteShardCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteShardCmd.java index 58c4e63b18d..2ef29554632 100644 --- a/solr/core/src/java/org/apache/solr/cloud/DeleteShardCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteShardCmd.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; import java.util.ArrayList; @@ -27,7 +27,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.apache.solr.client.solrj.cloud.DistributedQueue; -import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd; +import org.apache.solr.cloud.Overseer; import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; @@ -53,7 +53,7 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.DE import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD; import static org.apache.solr.common.params.CommonAdminParams.ASYNC; -public class DeleteShardCmd implements Cmd { +public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final OverseerCollectionMessageHandler ocmh; private final TimeSource timeSource; diff --git a/solr/core/src/java/org/apache/solr/cloud/DeleteSnapshotCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteSnapshotCmd.java similarity index 98% rename from solr/core/src/java/org/apache/solr/cloud/DeleteSnapshotCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteSnapshotCmd.java index 765f4b9bfb6..cf0a234c8c5 100644 --- a/solr/core/src/java/org/apache/solr/cloud/DeleteSnapshotCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteSnapshotCmd.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; @@ -70,7 +70,7 @@ public class DeleteSnapshotCmd implements OverseerCollectionMessageHandler.Cmd { Map requestMap = new HashMap<>(); NamedList shardRequestResults = new NamedList(); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); - SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient(); + SolrZkClient zkClient = ocmh.zkStateReader.getZkClient(); Optional meta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName); if (!meta.isPresent()) { // Snapshot not found. Nothing to do. diff --git a/solr/core/src/java/org/apache/solr/cloud/LeaderRecoveryWatcher.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/LeaderRecoveryWatcher.java similarity index 98% rename from solr/core/src/java/org/apache/solr/cloud/LeaderRecoveryWatcher.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/LeaderRecoveryWatcher.java index 1eb487328ea..a80fdc0a3d0 100644 --- a/solr/core/src/java/org/apache/solr/cloud/LeaderRecoveryWatcher.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/LeaderRecoveryWatcher.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.util.Set; diff --git a/solr/core/src/java/org/apache/solr/cloud/MigrateCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java similarity index 97% rename from solr/core/src/java/org/apache/solr/cloud/MigrateCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java index 02fdb5ccd0b..4edc363a69e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/MigrateCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; import java.util.Collection; @@ -24,6 +24,7 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.solr.client.solrj.request.CoreAdminRequest; +import org.apache.solr.cloud.Overseer; import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; @@ -47,10 +48,6 @@ import org.apache.solr.util.TimeOut; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_PROP_PREFIX; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS; import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; @@ -212,9 +209,9 @@ public class MigrateCmd implements OverseerCollectionMessageHandler.Cmd { Overseer.QUEUE_OPERATION, CREATE.toLower(), NAME, tempSourceCollectionName, NRT_REPLICAS, 1, - NUM_SLICES, 1, - COLL_CONF, configName, - CREATE_NODE_SET, sourceLeader.getNodeName()); + OverseerCollectionMessageHandler.NUM_SLICES, 1, + OverseerCollectionMessageHandler.COLL_CONF, configName, + OverseerCollectionMessageHandler.CREATE_NODE_SET, sourceLeader.getNodeName()); if (asyncId != null) { String internalAsyncId = asyncId + Math.abs(System.nanoTime()); props.put(ASYNC, internalAsyncId); @@ -270,7 +267,7 @@ public class MigrateCmd implements OverseerCollectionMessageHandler.Cmd { props.put(CoreAdminParams.NAME, tempCollectionReplica2); // copy over property params: for (String key : message.keySet()) { - if (key.startsWith(COLL_PROP_PREFIX)) { + if (key.startsWith(OverseerCollectionMessageHandler.COLL_PROP_PREFIX)) { props.put(key, message.getStr(key)); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java similarity index 97% rename from solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java index 44493ec57f2..f9392b5b259 100644 --- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; import java.util.ArrayList; @@ -24,6 +24,7 @@ import java.util.List; import java.util.Locale; import java.util.concurrent.TimeUnit; +import org.apache.solr.cloud.ActiveReplicaWatcher; import org.apache.solr.common.SolrCloseableLatch; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; @@ -42,7 +43,7 @@ import org.apache.solr.util.TimeOut; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.*; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.SKIP_CREATE_REPLICA_IN_CLUSTER_STATE; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP; import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; @@ -51,7 +52,7 @@ import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE; import static org.apache.solr.common.params.CommonAdminParams.TIMEOUT; import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE; -public class MoveReplicaCmd implements Cmd{ +public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final OverseerCollectionMessageHandler ocmh; @@ -105,7 +106,7 @@ public class MoveReplicaCmd implements Cmd{ } Slice slice = clusterState.getCollection(collection).getSlice(shardId); List sliceReplicas = new ArrayList<>(slice.getReplicas()); - Collections.shuffle(sliceReplicas, RANDOM); + Collections.shuffle(sliceReplicas, OverseerCollectionMessageHandler.RANDOM); // this picks up a single random replica from the sourceNode for (Replica r : slice.getReplicas()) { if (r.getNodeName().equals(sourceNode)) { diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java similarity index 98% rename from solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java index abfecab88fa..9529ee1ce1b 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -45,6 +45,14 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.cloud.LockTree; +import org.apache.solr.cloud.Overseer; +import org.apache.solr.cloud.OverseerMessageHandler; +import org.apache.solr.cloud.OverseerNodePrioritizer; +import org.apache.solr.cloud.OverseerSolrResponse; +import org.apache.solr.cloud.OverseerTaskProcessor; +import org.apache.solr.cloud.Stats; +import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.common.SolrCloseable; import org.apache.solr.common.SolrException; @@ -108,7 +116,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler, public static final String NUM_SLICES = "numShards"; - static final boolean CREATE_NODE_SET_SHUFFLE_DEFAULT = true; + public static final boolean CREATE_NODE_SET_SHUFFLE_DEFAULT = true; public static final String CREATE_NODE_SET_SHUFFLE = CollectionAdminParams.CREATE_NODE_SET_SHUFFLE_PARAM; public static final String CREATE_NODE_SET_EMPTY = "EMPTY"; public static final String CREATE_NODE_SET = CollectionAdminParams.CREATE_NODE_SET_PARAM; @@ -162,7 +170,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler, new SynchronousQueue<>(), new DefaultSolrThreadFactory("OverseerCollectionMessageHandlerThreadFactory")); - static final Random RANDOM; + protected static final Random RANDOM; static { // We try to make things reproducible in the context of our tests by initializing the random instance // based on the current seed @@ -219,6 +227,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler, .put(DELETE, new DeleteCollectionCmd(this)) .put(CREATEALIAS, new CreateAliasCmd(this)) .put(DELETEALIAS, new DeleteAliasCmd(this)) + .put(ROUTEDALIAS_CREATECOLL, new RoutedAliasCreateCollectionCmd(this)) .put(OVERSEERSTATUS, new OverseerStatusCmd(this)) .put(DELETESHARD, new DeleteShardCmd(this)) .put(DELETEREPLICA, new DeleteReplicaCmd(this)) @@ -232,7 +241,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler, @Override @SuppressWarnings("unchecked") public SolrResponse processMessage(ZkNodeProps message, String operation) { - log.debug("OverseerCollectionMessageHandler.processMessage : "+ operation + " , "+ message.toString()); + log.debug("OverseerCollectionMessageHandler.processMessage : {} , {}", operation, message); NamedList results = new NamedList(); try { @@ -996,7 +1005,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler, return isClosed; } - interface Cmd { + protected interface Cmd { void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception; } } diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerRoleCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerRoleCmd.java similarity index 95% rename from solr/core/src/java/org/apache/solr/cloud/OverseerRoleCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerRoleCmd.java index 0f450bdc51a..16f93277eef 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerRoleCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerRoleCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; @@ -24,7 +24,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd; +import org.apache.solr.cloud.OverseerNodePrioritizer; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkNodeProps; @@ -40,7 +40,7 @@ import org.slf4j.LoggerFactory; import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDROLE; import static org.apache.solr.common.params.CollectionParams.CollectionAction.REMOVEROLE; -public class OverseerRoleCmd implements Cmd { +public class OverseerRoleCmd implements OverseerCollectionMessageHandler.Cmd { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final OverseerCollectionMessageHandler ocmh; diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerStatusCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerStatusCmd.java similarity index 95% rename from solr/core/src/java/org/apache/solr/cloud/OverseerStatusCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerStatusCmd.java index aba4872122d..6f0bbfd068f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerStatusCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerStatusCmd.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; import java.util.ArrayList; @@ -24,7 +24,8 @@ import java.util.List; import java.util.Map; import com.codahale.metrics.Timer; -import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd; +import org.apache.solr.cloud.OverseerTaskProcessor; +import org.apache.solr.cloud.Stats; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; @@ -35,7 +36,7 @@ import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class OverseerStatusCmd implements Cmd { +public class OverseerStatusCmd implements OverseerCollectionMessageHandler.Cmd { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final OverseerCollectionMessageHandler ocmh; diff --git a/solr/core/src/java/org/apache/solr/cloud/ReplaceNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java similarity index 99% rename from solr/core/src/java/org/apache/solr/cloud/ReplaceNodeCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java index e9030918200..35d2379e08c 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ReplaceNodeCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; @@ -28,6 +28,7 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.solr.cloud.ActiveReplicaWatcher; import org.apache.solr.common.SolrCloseableLatch; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; diff --git a/solr/core/src/java/org/apache/solr/cloud/RestoreCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java similarity index 93% rename from solr/core/src/java/org/apache/solr/cloud/RestoreCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java index 9c9a5c90bbc..09ceb559697 100644 --- a/solr/core/src/java/org/apache/solr/cloud/RestoreCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; @@ -35,6 +35,7 @@ import java.util.Set; import org.apache.solr.client.solrj.cloud.DistributedQueue; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; +import org.apache.solr.cloud.Overseer; import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; @@ -58,13 +59,6 @@ import org.apache.solr.handler.component.ShardHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_PROPS; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.RANDOM; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.SHARDS_PROP; import static org.apache.solr.common.cloud.DocCollection.STATE_FORMAT; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE; @@ -99,7 +93,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd { String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY); Map requestMap = new HashMap<>(); - CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer(); + CoreContainer cc = ocmh.overseer.getCoreContainer(); BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo)); URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION)); @@ -113,7 +107,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd { // Get the Solr nodes to restore a collection. final List nodeList = Assign.getLiveOrLiveAndCreateNodeSetList( - zkStateReader.getClusterState().getLiveNodes(), message, RANDOM); + zkStateReader.getClusterState().getLiveNodes(), message, OverseerCollectionMessageHandler.RANDOM); int numShards = backupCollectionState.getActiveSlices().size(); @@ -136,8 +130,8 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd { } //Upload the configs - String configName = (String) properties.get(COLL_CONF); - String restoreConfigName = message.getStr(COLL_CONF, configName); + String configName = (String) properties.get(OverseerCollectionMessageHandler.COLL_CONF); + String restoreConfigName = message.getStr(OverseerCollectionMessageHandler.COLL_CONF, configName); if (zkStateReader.getConfigManager().configExists(restoreConfigName)) { log.info("Using existing config {}", restoreConfigName); //TODO add overwrite option? @@ -159,7 +153,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd { } // inherit settings from input API, defaulting to the backup's setting. Ex: replicationFactor - for (String collProp : COLL_PROPS.keySet()) { + for (String collProp : OverseerCollectionMessageHandler.COLL_PROPS.keySet()) { Object val = message.getProperties().getOrDefault(collProp, backupCollectionState.get(collProp)); if (val != null) { propMap.put(collProp, val); @@ -167,8 +161,8 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd { } propMap.put(NAME, restoreCollectionName); - propMap.put(CREATE_NODE_SET, CREATE_NODE_SET_EMPTY); //no cores - propMap.put(COLL_CONF, restoreConfigName); + propMap.put(OverseerCollectionMessageHandler.CREATE_NODE_SET, OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY); //no cores + propMap.put(OverseerCollectionMessageHandler.COLL_CONF, restoreConfigName); // router.* @SuppressWarnings("unchecked") @@ -179,9 +173,9 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd { Set sliceNames = backupCollectionState.getActiveSlicesMap().keySet(); if (backupCollectionState.getRouter() instanceof ImplicitDocRouter) { - propMap.put(SHARDS_PROP, StrUtils.join(sliceNames, ',')); + propMap.put(OverseerCollectionMessageHandler.SHARDS_PROP, StrUtils.join(sliceNames, ',')); } else { - propMap.put(NUM_SLICES, sliceNames.size()); + propMap.put(OverseerCollectionMessageHandler.NUM_SLICES, sliceNames.size()); // ClusterStateMutator.createCollection detects that "slices" is in fact a slice structure instead of a // list of names, and if so uses this instead of building it. We clear the replica list. Collection backupSlices = backupCollectionState.getActiveSlices(); @@ -190,7 +184,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd { newSlices.put(backupSlice.getName(), new Slice(backupSlice.getName(), Collections.emptyMap(), backupSlice.getProperties())); } - propMap.put(SHARDS_PROP, newSlices); + propMap.put(OverseerCollectionMessageHandler.SHARDS_PROP, newSlices); } ocmh.commandMap.get(CREATE).call(zkStateReader.getClusterState(), new ZkNodeProps(propMap), new NamedList()); diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAliasCreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAliasCreateCollectionCmd.java new file mode 100644 index 00000000000..8cfd0bd5ad1 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAliasCreateCollectionCmd.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud.api.collections; + +import java.lang.invoke.MethodHandles; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TimeZone; + +import org.apache.solr.cloud.Overseer; +import org.apache.solr.cloud.OverseerSolrResponse; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.Aliases; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.params.CollectionParams; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.handler.admin.CollectionsHandler; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.update.processor.TimeRoutedAliasUpdateProcessor; +import org.apache.solr.util.TimeZoneUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.COLL_CONF; +import static org.apache.solr.common.params.CommonParams.NAME; +import static org.apache.solr.update.processor.TimeRoutedAliasUpdateProcessor.ROUTER_FIELD_METADATA; +import static org.apache.solr.update.processor.TimeRoutedAliasUpdateProcessor.ROUTER_INTERVAL_METADATA; + +/** + * For "routed aliases", creates another collection and adds it to the alias. In some cases it will not + * add a new collection. + * If a collection is created, then collection creation info is returned. + * + * Note: this logic is within an Overseer because we want to leverage the mutual exclusion + * property afforded by the lock it obtains on the alias name. + * @since 7.3 + */ +public class RoutedAliasCreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public static final String IF_MOST_RECENT_COLL_NAME = "ifMostRecentCollName"; + + public static final String COLL_METAPREFIX = "collection-create."; + + private final OverseerCollectionMessageHandler ocmh; + + public RoutedAliasCreateCollectionCmd(OverseerCollectionMessageHandler ocmh) { + this.ocmh = ocmh; + } + + /* TODO: + There are a few classes related to time routed alias processing. We need to share some logic better. + */ + + + @Override + public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception { + //---- PARSE PRIMARY MESSAGE PARAMS + // important that we use NAME for the alias as that is what the Overseer will get a lock on before calling us + final String aliasName = message.getStr(NAME); + // the client believes this is the mostRecent collection name. We assert this if provided. + final String ifMostRecentCollName = message.getStr(IF_MOST_RECENT_COLL_NAME); // optional + + // TODO collection param (or intervalDateMath override?), useful for data capped collections + + //---- PARSE ALIAS INFO FROM ZK + final ZkStateReader.AliasesManager aliasesHolder = ocmh.zkStateReader.aliasesHolder; + final Aliases aliases = aliasesHolder.getAliases(); + final Map aliasMetadata = aliases.getCollectionAliasMetadata(aliasName); + if (aliasMetadata == null) { + throw newAliasMustExistException(aliasName); // if it did exist, we'd have a non-null map + } + + String routeField = aliasMetadata.get(ROUTER_FIELD_METADATA); + if (routeField == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "This command only works on time routed aliases. Expected alias metadata not found."); + } + String intervalDateMath = aliasMetadata.getOrDefault(ROUTER_INTERVAL_METADATA, "+1DAY"); + TimeZone intervalTimeZone = TimeZoneUtils.parseTimezone(aliasMetadata.get(CommonParams.TZ)); + + //TODO this is ugly; how can we organize the code related to this feature better? + final List> parsedCollections = + TimeRoutedAliasUpdateProcessor.parseCollections(aliasName, aliases, () -> newAliasMustExistException(aliasName)); + + //---- GET MOST RECENT COLL + final Map.Entry mostRecentEntry = parsedCollections.get(0); + final Instant mostRecentCollTimestamp = mostRecentEntry.getKey(); + final String mostRecentCollName = mostRecentEntry.getValue(); + if (ifMostRecentCollName != null) { + if (!mostRecentCollName.equals(ifMostRecentCollName)) { + // Possibly due to race conditions in URPs on multiple leaders calling us at the same time + String msg = IF_MOST_RECENT_COLL_NAME + " expected " + ifMostRecentCollName + " but it's " + mostRecentCollName; + if (parsedCollections.stream().map(Map.Entry::getValue).noneMatch(ifMostRecentCollName::equals)) { + msg += ". Furthermore this collection isn't in the list of collections referenced by the alias."; + } + log.info(msg); + results.add("message", msg); + return; + } + } else if (mostRecentCollTimestamp.isAfter(Instant.now())) { + final String msg = "Most recent collection is in the future, so we won't create another."; + log.info(msg); + results.add("message", msg); + return; + } + + //---- COMPUTE NEXT COLLECTION NAME + final Instant nextCollTimestamp = TimeRoutedAliasUpdateProcessor.computeNextCollTimestamp(mostRecentCollTimestamp, intervalDateMath, intervalTimeZone); + assert nextCollTimestamp.isAfter(mostRecentCollTimestamp); + final String createCollName = TimeRoutedAliasUpdateProcessor.formatCollectionNameFromInstant(aliasName, nextCollTimestamp); + + //---- CREATE THE COLLECTION + // Map alias metadata starting with a prefix to a create-collection API request + final ModifiableSolrParams createReqParams = new ModifiableSolrParams(); + for (Map.Entry e : aliasMetadata.entrySet()) { + if (e.getKey().startsWith(COLL_METAPREFIX)) { + createReqParams.set(e.getKey().substring(COLL_METAPREFIX.length()), e.getValue()); + } + } + if (createReqParams.get(COLL_CONF) == null) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "We require an explicit " + COLL_CONF ); + } + createReqParams.set(NAME, createCollName); + createReqParams.set("property." + TimeRoutedAliasUpdateProcessor.TIME_PARTITION_ALIAS_NAME_CORE_PROP, aliasName); + // a CollectionOperation reads params and produces a message (Map) that is supposed to be sent to the Overseer. + // Although we could create the Map without it, there are a fair amount of rules we don't want to reproduce. + final Map createMsgMap = CollectionsHandler.CollectionOperation.CREATE_OP.execute( + new LocalSolrQueryRequest(null, createReqParams), + null, + ocmh.overseer.getCoreContainer().getCollectionsHandler()); + createMsgMap.put(Overseer.QUEUE_OPERATION, "create"); + // Since we are running in the Overseer here, send the message directly to the Overseer CreateCollectionCmd + ocmh.commandMap.get(CollectionParams.CollectionAction.CREATE).call(clusterState, new ZkNodeProps(createMsgMap), results); + + CollectionsHandler.waitForActiveCollection(createCollName, null, ocmh.overseer.getCoreContainer(), new OverseerSolrResponse(results)); + + //TODO delete some of the oldest collection(s) ? + + //---- UPDATE THE ALIAS + aliasesHolder.applyModificationAndExportToZk(curAliases -> { + final List curTargetCollections = curAliases.getCollectionAliasListMap().get(aliasName); + if (curTargetCollections.contains(createCollName)) { + return curAliases; + } else { + List newTargetCollections = new ArrayList<>(curTargetCollections.size() + 1); + // prepend it on purpose (thus reverse sorted). Solr alias resolution defaults to the first collection in a list + newTargetCollections.add(createCollName); + newTargetCollections.addAll(curTargetCollections); + return curAliases.cloneWithCollectionAlias(aliasName, StrUtils.join(newTargetCollections, ',')); + } + }); + + } + + private SolrException newAliasMustExistException(String aliasName) { + return new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Alias " + aliasName + " does not exist."); + } + +} diff --git a/solr/core/src/java/org/apache/solr/cloud/SplitShardCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java similarity index 98% rename from solr/core/src/java/org/apache/solr/cloud/SplitShardCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java index 973261636f8..03e7430ba2b 100644 --- a/solr/core/src/java/org/apache/solr/cloud/SplitShardCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; @@ -32,7 +32,7 @@ import org.apache.solr.client.solrj.cloud.DistributedQueue; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; import org.apache.solr.client.solrj.request.CoreAdminRequest; -import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd; +import org.apache.solr.cloud.Overseer; import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; @@ -56,8 +56,6 @@ import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_PROP_PREFIX; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.SKIP_CREATE_REPLICA_IN_CLUSTER_STATE; import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA; @@ -66,7 +64,7 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.DE import static org.apache.solr.common.params.CommonAdminParams.ASYNC; -public class SplitShardCmd implements Cmd { +public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final OverseerCollectionMessageHandler ocmh; @@ -195,7 +193,7 @@ public class SplitShardCmd implements Cmd { propMap.put(CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState)); // copy over property params: for (String key : message.keySet()) { - if (key.startsWith(COLL_PROP_PREFIX)) { + if (key.startsWith(OverseerCollectionMessageHandler.COLL_PROP_PREFIX)) { propMap.put(key, message.getStr(key)); } } @@ -332,7 +330,7 @@ public class SplitShardCmd implements Cmd { propMap.put(CoreAdminParams.NAME, solrCoreName); // copy over property params: for (String key : message.keySet()) { - if (key.startsWith(COLL_PROP_PREFIX)) { + if (key.startsWith(OverseerCollectionMessageHandler.COLL_PROP_PREFIX)) { propMap.put(key, message.getStr(key)); } } @@ -341,7 +339,7 @@ public class SplitShardCmd implements Cmd { propMap.put(ASYNC, asyncId); } // special flag param to instruct addReplica not to create the replica in cluster state again - propMap.put(SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, "true"); + propMap.put(OverseerCollectionMessageHandler.SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, "true"); propMap.put(CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState)); diff --git a/solr/core/src/java/org/apache/solr/cloud/UtilizeNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/UtilizeNodeCmd.java similarity index 99% rename from solr/core/src/java/org/apache/solr/cloud/UtilizeNodeCmd.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/UtilizeNodeCmd.java index 6a55cfd8346..60da61a3de6 100644 --- a/solr/core/src/java/org/apache/solr/cloud/UtilizeNodeCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/UtilizeNodeCmd.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.api.collections; import java.lang.invoke.MethodHandles; import java.util.ArrayList; diff --git a/solr/core/src/java/org/apache/solr/util/configuration/providers/package-info.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/package-info.java similarity index 86% rename from solr/core/src/java/org/apache/solr/util/configuration/providers/package-info.java rename to solr/core/src/java/org/apache/solr/cloud/api/collections/package-info.java index 7b5e8f85236..651d4fed035 100644 --- a/solr/core/src/java/org/apache/solr/util/configuration/providers/package-info.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/package-info.java @@ -16,8 +16,8 @@ */ /** - * TODO + * Package related to internal implementations of the SolrCloud collections api */ -package org.apache.solr.util.configuration.providers; +package org.apache.solr.cloud.api.collections; diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java index a9a21a6b24d..965299c6bf3 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java @@ -39,11 +39,9 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collectors; -import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.lucene.store.AlreadyClosedException; import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig; @@ -53,7 +51,6 @@ import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData; import org.apache.solr.client.solrj.request.CollectionAdminRequest.RequestStatusResponse; import org.apache.solr.client.solrj.response.RequestStatusState; -import org.apache.solr.cloud.ActionThrottle; import org.apache.solr.cloud.Stats; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ZkStateReader; @@ -116,8 +113,6 @@ public class ScheduledTriggers implements Closeable { private final AtomicLong triggerDelay = new AtomicLong(DEFAULT_SCHEDULED_TRIGGER_DELAY_SECONDS); - private final AtomicReference actionThrottle; - private final SolrCloudManager cloudManager; private final DistribStateManager stateManager; @@ -136,7 +131,6 @@ public class ScheduledTriggers implements Closeable { scheduledThreadPoolExecutor.setRemoveOnCancelPolicy(true); scheduledThreadPoolExecutor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); actionExecutor = ExecutorUtil.newMDCAwareSingleThreadExecutor(new DefaultSolrThreadFactory("AutoscalingActionExecutor")); - actionThrottle = new AtomicReference<>(new ActionThrottle("action", TimeUnit.SECONDS.toMillis(DEFAULT_ACTION_THROTTLE_PERIOD_SECONDS), cloudManager.getTimeSource())); this.cloudManager = cloudManager; this.stateManager = cloudManager.getDistribStateManager(); this.loader = loader; @@ -183,37 +177,17 @@ public class ScheduledTriggers implements Closeable { case TRIGGER_CORE_POOL_SIZE: this.scheduledThreadPoolExecutor.setCorePoolSize(((Number) newProps.get(key)).intValue()); break; - case ACTION_THROTTLE_PERIOD_SECONDS: - long minMsBetweenActions = TimeUnit.SECONDS.toMillis(((Number) newProps.get(key)).longValue()); - ActionThrottle oldThrottle = this.actionThrottle.get(); - ActionThrottle newThrottle = null; - if (oldThrottle.getLastActionStartedAt() != null) { - newThrottle = new ActionThrottle("action", - minMsBetweenActions, - oldThrottle.getLastActionStartedAt(), - cloudManager.getTimeSource()); - } else { - newThrottle = new ActionThrottle("action", minMsBetweenActions, cloudManager.getTimeSource()); - } - this.actionThrottle.set(newThrottle); - break; } } } this.autoScalingConfig = autoScalingConfig; - // reset cooldown and actionThrottle + // reset cooldown cooldownStart.set(cloudManager.getTimeSource().getTime() - cooldownPeriod.get()); - actionThrottle.get().reset(); listeners.setAutoScalingConfig(autoScalingConfig); } - @VisibleForTesting - void resetActionThrottle() { - actionThrottle.get().reset(); - } - /** * Adds a new trigger or replaces an existing one. The replaced trigger, if any, is closed * before the new trigger is run. If a trigger is replaced with itself then this @@ -276,7 +250,8 @@ public class ScheduledTriggers implements Closeable { // we do not want to lose this event just because the trigger was closed, perhaps a replacement will need it return false; } - // reject events during cooldown period + // even though we pause all triggers during action execution there is a possibility that a trigger was already + // running at the time and would have already created an event so we reject such events during cooldown period if (cooldownStart.get() + cooldownPeriod.get() > cloudManager.getTimeSource().getTime()) { log.debug("-------- Cooldown period - rejecting event: " + event); event.getProperties().put(TriggerEvent.COOLDOWN, true); @@ -286,6 +261,9 @@ public class ScheduledTriggers implements Closeable { log.debug("++++++++ Cooldown inactive - processing event: " + event); } if (hasPendingActions.compareAndSet(false, true)) { + // pause all triggers while we execute actions so triggers do not operate on a cluster in transition + pauseTriggers(); + final boolean enqueued; if (replaying) { enqueued = false; @@ -297,7 +275,7 @@ public class ScheduledTriggers implements Closeable { List actions = source.getActions(); if (actions != null) { if (actionExecutor.isShutdown()) { - String msg = String.format(Locale.ROOT, "Ignoring autoscaling event %s because the executor has already been closed", event.toString(), source); + String msg = String.format(Locale.ROOT, "Ignoring autoscaling event %s from trigger %s because the executor has already been closed", event.toString(), source); listeners.fireListeners(event.getSource(), event, TriggerEventProcessorStage.ABORTED, msg); log.warn(msg); // we do not want to lose this event just because the trigger was closed, perhaps a replacement will need it @@ -308,11 +286,6 @@ public class ScheduledTriggers implements Closeable { long eventProcessingStart = cloudManager.getTimeSource().getTime(); log.debug("-- processing actions for " + event); try { - // let the action executor thread wait instead of the trigger thread so we use the throttle here - ActionThrottle actionThrottle = this.actionThrottle.get(); - actionThrottle.minimumWaitBetweenActions(); - actionThrottle.markAttemptingAction(); - // in future, we could wait for pending tasks in a different thread and re-enqueue // this event so that we continue processing other events and not block this action executor waitForPendingTasks(newTrigger, actions); @@ -342,6 +315,8 @@ public class ScheduledTriggers implements Closeable { } finally { cooldownStart.set(cloudManager.getTimeSource().getTime()); hasPendingActions.set(false); + // resume triggers after cool down period + resumeTriggers(cloudManager.getTimeSource().convertDelay(TimeUnit.NANOSECONDS, cooldownPeriod.get(), TimeUnit.MILLISECONDS)); } log.debug("-- processing took {} ms for event id={}", TimeUnit.NANOSECONDS.toMillis(cloudManager.getTimeSource().getTime() - eventProcessingStart), event.id); @@ -356,6 +331,8 @@ public class ScheduledTriggers implements Closeable { } listeners.fireListeners(event.getSource(), event, TriggerEventProcessorStage.SUCCEEDED); hasPendingActions.set(false); + // resume triggers now + resumeTriggers(0); } return true; } else { @@ -370,6 +347,30 @@ public class ScheduledTriggers implements Closeable { TimeUnit.MILLISECONDS); } + /** + * Pauses all scheduled trigger invocations without interrupting any that are in progress + */ + private synchronized void pauseTriggers() { + if (log.isDebugEnabled()) { + log.debug("Pausing all triggers: {}", scheduledTriggers.keySet()); + } + scheduledTriggers.forEach((s, scheduledTrigger) -> scheduledTrigger.scheduledFuture.cancel(false)); + } + + /** + * Resumes all previously cancelled triggers to be scheduled after the given initial delay + * @param afterDelayMillis the initial delay in milliseconds after which triggers should be resumed + */ + private synchronized void resumeTriggers(long afterDelayMillis) { + scheduledTriggers.forEach((s, scheduledTrigger) -> { + if (scheduledTrigger.scheduledFuture.isCancelled()) { + log.debug("Resuming trigger: {} after {}ms", s, afterDelayMillis); + scheduledTrigger.scheduledFuture = scheduledThreadPoolExecutor.scheduleWithFixedDelay(scheduledTrigger, afterDelayMillis, + cloudManager.getTimeSource().convertDelay(TimeUnit.SECONDS, triggerDelay.get(), TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); + } + }); + } + private void waitForPendingTasks(AutoScaling.Trigger newTrigger, List actions) throws AlreadyClosedException { DistribStateManager stateManager = cloudManager.getDistribStateManager(); try { diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java index 55d6a7e8fcd..e5303de8a98 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java @@ -26,7 +26,7 @@ import java.util.Map; import org.apache.solr.client.solrj.cloud.autoscaling.DistribStateManager; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; -import org.apache.solr.cloud.OverseerCollectionMessageHandler; +import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java index dbcdd3dbd79..f2c9a2fca5e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java @@ -17,7 +17,6 @@ package org.apache.solr.cloud.overseer; import java.lang.invoke.MethodHandles; - import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; @@ -31,9 +30,9 @@ import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.cloud.autoscaling.DistribStateManager; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData; -import org.apache.solr.cloud.Assign; import org.apache.solr.cloud.Overseer; -import org.apache.solr.cloud.OverseerCollectionMessageHandler; +import org.apache.solr.cloud.api.collections.Assign; +import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; @@ -45,7 +44,6 @@ import org.apache.solr.common.util.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_PROP_PREFIX; import static org.apache.solr.cloud.overseer.CollectionMutator.checkCollectionKeyExistence; import static org.apache.solr.cloud.overseer.CollectionMutator.checkKeyExistence; import static org.apache.solr.common.params.CommonParams.NAME; @@ -113,7 +111,7 @@ public class ReplicaMutator { String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); String replicaName = message.getStr(ZkStateReader.REPLICA_PROP); String property = message.getStr(ZkStateReader.PROPERTY_PROP).toLowerCase(Locale.ROOT); - if (StringUtils.startsWith(property, COLL_PROP_PREFIX) == false) { + if (StringUtils.startsWith(property, OverseerCollectionMessageHandler.COLL_PROP_PREFIX) == false) { property = OverseerCollectionMessageHandler.COLL_PROP_PREFIX + property; } property = property.toLowerCase(Locale.ROOT); @@ -177,7 +175,7 @@ public class ReplicaMutator { String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); String replicaName = message.getStr(ZkStateReader.REPLICA_PROP); String property = message.getStr(ZkStateReader.PROPERTY_PROP).toLowerCase(Locale.ROOT); - if (StringUtils.startsWith(property, COLL_PROP_PREFIX) == false) { + if (StringUtils.startsWith(property, OverseerCollectionMessageHandler.COLL_PROP_PREFIX) == false) { property = OverseerCollectionMessageHandler.COLL_PROP_PREFIX + property; } @@ -284,7 +282,7 @@ public class ReplicaMutator { replicaProps.put(ZkStateReader.REPLICA_TYPE, oldReplica.getType().toString()); // Move custom props over. for (Map.Entry ent : oldReplica.getProperties().entrySet()) { - if (ent.getKey().startsWith(COLL_PROP_PREFIX)) { + if (ent.getKey().startsWith(OverseerCollectionMessageHandler.COLL_PROP_PREFIX)) { replicaProps.put(ent.getKey(), ent.getValue()); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java index 6718a808d0d..87bf48160a1 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java @@ -16,20 +16,18 @@ */ package org.apache.solr.cloud.overseer; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_PROP_PREFIX; -import static org.apache.solr.cloud.overseer.CollectionMutator.checkCollectionKeyExistence; -import static org.apache.solr.common.util.Utils.makeMap; - import java.lang.invoke.MethodHandles; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; +import com.google.common.collect.ImmutableSet; import org.apache.solr.client.solrj.cloud.autoscaling.DistribStateManager; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; -import org.apache.solr.cloud.Assign; import org.apache.solr.cloud.Overseer; +import org.apache.solr.cloud.api.collections.Assign; +import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; @@ -41,12 +39,13 @@ import org.apache.solr.common.cloud.ZkStateReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableSet; +import static org.apache.solr.cloud.overseer.CollectionMutator.checkCollectionKeyExistence; +import static org.apache.solr.common.util.Utils.makeMap; public class SliceMutator { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - public static final String PREFERRED_LEADER_PROP = COLL_PROP_PREFIX + "preferredleader"; + public static final String PREFERRED_LEADER_PROP = OverseerCollectionMessageHandler.COLL_PROP_PREFIX + "preferredleader"; public static final Set SLICE_UNIQUE_BOOLEAN_PROPERTIES = ImmutableSet.of(PREFERRED_LEADER_PROP); diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index eb137757b42..4e795b69931 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -1300,6 +1300,9 @@ public class CoreContainer { getZkController().startReplicationFromLeader(newCore.getName(), true); } + } else if(replica.getType() == Replica.Type.PULL) { + getZkController().stopReplicationFromLeader(core.getName()); + getZkController().startReplicationFromLeader(newCore.getName(), false); } } } catch (SolrCoreState.CoreIsClosedException e) { diff --git a/solr/core/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java index fd568dfbd36..7f67981fc93 100644 --- a/solr/core/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java @@ -108,9 +108,6 @@ public class DocumentAnalysisRequestHandler extends AnalysisRequestHandlerBase { } } - /** - * {@inheritDoc} - */ @Override protected NamedList doAnalysis(SolrQueryRequest req) throws Exception { DocumentAnalysisRequest analysisRequest = resolveAnalysisRequest(req); diff --git a/solr/core/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java index 7c16606b460..a7e1ab95d48 100644 --- a/solr/core/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java @@ -91,9 +91,6 @@ import java.util.Set; */ public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase { - /** - * {@inheritDoc} - */ @Override protected NamedList doAnalysis(SolrQueryRequest req) throws Exception { FieldAnalysisRequest analysisRequest = resolveAnalysisRequest(req); diff --git a/solr/core/src/java/org/apache/solr/handler/SchemaHandler.java b/solr/core/src/java/org/apache/solr/handler/SchemaHandler.java index e3e292b6554..fb84e84ef5a 100644 --- a/solr/core/src/java/org/apache/solr/handler/SchemaHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/SchemaHandler.java @@ -80,20 +80,18 @@ public class SchemaHandler extends RequestHandlerBase implements SolrCoreAware, String httpMethod = (String) req.getContext().get("httpMethod"); if ("POST".equals(httpMethod)) { if (isImmutableConfigSet) { - rsp.add("errors", "ConfigSet is immutable"); - return; + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ConfigSet is immutable"); } if (req.getContentStreams() == null) { - rsp.add("errors", "no stream"); - return; + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no stream"); } try { List errs = new SchemaManager(req).performOperations(); - if (!errs.isEmpty()) rsp.add("errors", errs); + if (!errs.isEmpty()) + throw new ApiBag.ExceptionWithErrObject(SolrException.ErrorCode.BAD_REQUEST,"error processing commands", errs); } catch (IOException e) { - rsp.add("errors", Collections.singletonList("Error reading input String " + e.getMessage())); - rsp.setException(e); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error reading input String " + e.getMessage(), e); } } else { handleGET(req, rsp); diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index 73dea3c2471..b9a271a6835 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -127,7 +127,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("topic", TopicStream.class) .withFunctionName("commit", CommitStream.class) .withFunctionName("random", RandomStream.class) - .withFunctionName("knn", KnnStream.class) + .withFunctionName("knnSearch", KnnStream.class) // decorator streams .withFunctionName("merge", MergeStream.class) @@ -288,10 +288,32 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("density", DensityEvaluator.class) .withFunctionName("mannWhitney", MannWhitneyUEvaluator.class) .withFunctionName("sumSq", SumSqEvaluator.class) + .withFunctionName("akima", AkimaEvaluator.class) + .withFunctionName("lerp", LerpEvaluator.class) + .withFunctionName("chiSquareDataSet", ChiSquareDataSetEvaluator.class) + .withFunctionName("gtestDataSet", GTestDataSetEvaluator.class) + .withFunctionName("termVectors", TermVectorsEvaluator.class) + .withFunctionName("getColumnLabels", GetColumnLabelsEvaluator.class) + .withFunctionName("getRowLabels", GetRowLabelsEvaluator.class) + .withFunctionName("getAttribute", GetAttributeEvaluator.class) + .withFunctionName("kmeans", KmeansEvaluator.class) + .withFunctionName("getCentroids", GetCentroidsEvaluator.class) + .withFunctionName("getCluster", GetClusterEvaluator.class) + .withFunctionName("topFeatures", TopFeaturesEvaluator.class) + .withFunctionName("featureSelect", FeatureSelectEvaluator.class) + .withFunctionName("rowAt", RowAtEvaluator.class) + .withFunctionName("colAt", ColumnAtEvaluator.class) + .withFunctionName("setColumnLabels", SetColumnLabelsEvaluator.class) + .withFunctionName("setRowLabels", SetRowLabelsEvaluator.class) + .withFunctionName("knn", KnnEvaluator.class) + .withFunctionName("getAttributes", GetAttributesEvaluator.class) + .withFunctionName("indexOf", IndexOfEvaluator.class) + .withFunctionName("columnCount", ColumnCountEvaluator.class) + .withFunctionName("rowCount", RowCountEvaluator.class) // Boolean Stream Evaluators - .withFunctionName("and", AndEvaluator.class) + .withFunctionName("and", AndEvaluator.class) .withFunctionName("eor", ExclusiveOrEvaluator.class) .withFunctionName("eq", EqualToEvaluator.class) .withFunctionName("gt", GreaterThanEvaluator.class) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index d339f27de3b..56f979d3a45 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -42,7 +42,7 @@ import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestSyncShard; import org.apache.solr.client.solrj.response.RequestStatusState; import org.apache.solr.client.solrj.util.SolrIdentifierValidator; import org.apache.solr.cloud.Overseer; -import org.apache.solr.cloud.OverseerCollectionMessageHandler; +import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.cloud.OverseerSolrResponse; import org.apache.solr.cloud.OverseerTaskQueue; import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent; @@ -100,17 +100,17 @@ import static org.apache.solr.client.solrj.response.RequestStatusState.NOT_FOUND import static org.apache.solr.client.solrj.response.RequestStatusState.RUNNING; import static org.apache.solr.client.solrj.response.RequestStatusState.SUBMITTED; import static org.apache.solr.cloud.Overseer.QUEUE_OPERATION; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_PROP_PREFIX; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.ONLY_ACTIVE_NODES; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.ONLY_IF_DOWN; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.REQUESTID; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.SHARDS_PROP; -import static org.apache.solr.cloud.OverseerCollectionMessageHandler.SHARD_UNIQUE; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.COLL_CONF; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.COLL_PROP_PREFIX; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.CREATE_NODE_SET; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.NUM_SLICES; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.ONLY_ACTIVE_NODES; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.ONLY_IF_DOWN; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.REQUESTID; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.SHARDS_PROP; +import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.SHARD_UNIQUE; import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST; import static org.apache.solr.common.cloud.DocCollection.DOC_ROUTER; import static org.apache.solr.common.cloud.DocCollection.RULE; @@ -260,16 +260,19 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission public static long DEFAULT_COLLECTION_OP_TIMEOUT = 180*1000; - void handleResponse(String operation, ZkNodeProps m, + //TODO rename to submitToOverseerRPC + public void handleResponse(String operation, ZkNodeProps m, SolrQueryResponse rsp) throws KeeperException, InterruptedException { handleResponse(operation, m, rsp, DEFAULT_COLLECTION_OP_TIMEOUT); } - private SolrResponse handleResponse(String operation, ZkNodeProps m, + //TODO rename to submitToOverseerRPC + public SolrResponse handleResponse(String operation, ZkNodeProps m, SolrQueryResponse rsp, long timeout) throws KeeperException, InterruptedException { - long time = System.nanoTime(); - - if (m.containsKey(ASYNC) && m.get(ASYNC) != null) { + if (!m.containsKey(QUEUE_OPERATION)) { + throw new SolrException(ErrorCode.BAD_REQUEST, "missing key " + QUEUE_OPERATION); + } + if (m.get(ASYNC) != null) { String asyncId = m.getStr(ASYNC); @@ -297,6 +300,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission return response; } + long time = System.nanoTime(); QueueEvent event = coreContainer.getZkController() .getOverseerCollectionQueue() .offer(Utils.toJSON(m), timeout); @@ -1031,7 +1035,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission } } - private static void waitForActiveCollection(String collectionName, ZkNodeProps message, CoreContainer cc, SolrResponse response) + public static void waitForActiveCollection(String collectionName, ZkNodeProps message, CoreContainer cc, SolrResponse response) throws KeeperException, InterruptedException { if (response.getResponse().get("exception") != null) { diff --git a/solr/core/src/java/org/apache/solr/handler/admin/ConfigSetsHandlerApi.java b/solr/core/src/java/org/apache/solr/handler/admin/ConfigSetsHandlerApi.java index 2028f677c41..1a5f6f33621 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/ConfigSetsHandlerApi.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/ConfigSetsHandlerApi.java @@ -32,10 +32,16 @@ import org.apache.solr.response.SolrQueryResponse; public class ConfigSetsHandlerApi extends BaseHandlerApiSupport { final public static String DEFAULT_CONFIGSET_NAME = "_default"; + final public static String AUTOCREATED_CONFIGSET_SUFFIX = ".AUTOCREATED"; + final ConfigSetsHandler configSetHandler; static Collection apiCommands = createMapping(); + public static String getSuffixedNameForAutoGeneratedConfigSet(String configName) { + return configName + AUTOCREATED_CONFIGSET_SUFFIX; + } + private static Collection createMapping() { Map result = new EnumMap<>(ConfigSetMeta.class); diff --git a/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java b/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java index f1c9680e4f0..0ee6855b71e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java @@ -130,14 +130,9 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni public void inform(SolrCore core) { List children = info.getChildren("highlighting"); if(children.isEmpty()) { - PluginInfo pluginInfo = core.getSolrConfig().getPluginInfo(SolrHighlighter.class.getName()); //TODO deprecated configuration remove later - if (pluginInfo != null) { - solrConfigHighlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName()); - } else { - DefaultSolrHighlighter defHighlighter = new DefaultSolrHighlighter(core); - defHighlighter.init(PluginInfo.EMPTY_INFO); - solrConfigHighlighter = defHighlighter; - } + DefaultSolrHighlighter defHighlighter = new DefaultSolrHighlighter(core); + defHighlighter.init(PluginInfo.EMPTY_INFO); + solrConfigHighlighter = defHighlighter; } else { solrConfigHighlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName()); } @@ -180,7 +175,7 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni if(sumData != null) { // TODO ???? add this directly to the response? - rb.rsp.add("highlighting", sumData); + rb.rsp.add(highlightingResponseField(), convertHighlights(sumData)); } } } @@ -238,7 +233,8 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni public void finishStage(ResponseBuilder rb) { if (rb.doHighlights && rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { - NamedList.NamedListEntry[] arr = new NamedList.NamedListEntry[rb.resultIds.size()]; + final Object[] objArr = newHighlightsArray(rb.resultIds.size()); + final String highlightingResponseField = highlightingResponseField(); // TODO: make a generic routine to do automatic merging of id keyed data for (ShardRequest sreq : rb.finished) { @@ -249,13 +245,12 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni // this should only happen when using shards.tolerant=true continue; } - NamedList hl = (NamedList)srsp.getSolrResponse().getResponse().get("highlighting"); - SolrPluginUtils.copyNamedListIntoArrayByDocPosInResponse(hl, rb.resultIds, arr); + Object hl = srsp.getSolrResponse().getResponse().get(highlightingResponseField); + addHighlights(objArr, hl, rb.resultIds); } } - // remove nulls in case not all docs were able to be retrieved - rb.rsp.add("highlighting", SolrPluginUtils.removeNulls(arr, new SimpleOrderedMap<>())); + rb.rsp.add(highlightingResponseField, getAllHighlights(objArr)); } } @@ -272,4 +267,33 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni public Category getCategory() { return Category.HIGHLIGHTER; } + + //////////////////////////////////////////// + /// highlighting response collation + //////////////////////////////////////////// + + protected String highlightingResponseField() { + return "highlighting"; + } + + protected Object convertHighlights(NamedList hl) { + return hl; + } + + protected Object[] newHighlightsArray(int size) { + return new NamedList.NamedListEntry[size]; + } + + protected void addHighlights(Object[] objArr, Object obj, Map resultIds) { + Map.Entry[] arr = (Map.Entry[])objArr; + NamedList hl = (NamedList)obj; + SolrPluginUtils.copyNamedListIntoArrayByDocPosInResponse(hl, resultIds, arr); + } + + protected Object getAllHighlights(Object[] objArr) { + final Map.Entry[] arr = (Map.Entry[])objArr; + // remove nulls in case not all docs were able to be retrieved + return SolrPluginUtils.removeNulls(arr, new SimpleOrderedMap<>()); + } + } diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 7dbd3113ba6..71ac9c0eaaa 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -167,7 +167,7 @@ public class QueryComponent extends SearchComponent String rankQueryString = rb.req.getParams().get(CommonParams.RQ); if(rankQueryString != null) { - QParser rqparser = QParser.getParser(rankQueryString, defType, req); + QParser rqparser = QParser.getParser(rankQueryString, req); Query rq = rqparser.getQuery(); if(rq instanceof RankQuery) { RankQuery rankQuery = (RankQuery)rq; diff --git a/solr/core/src/java/org/apache/solr/handler/component/RangeFacetRequest.java b/solr/core/src/java/org/apache/solr/handler/component/RangeFacetRequest.java index c2348669b90..8d47a930d6b 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/RangeFacetRequest.java +++ b/solr/core/src/java/org/apache/solr/handler/component/RangeFacetRequest.java @@ -31,8 +31,11 @@ import org.apache.solr.common.params.RequiredSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.schema.CurrencyFieldType; +import org.apache.solr.schema.CurrencyValue; import org.apache.solr.schema.DatePointField; import org.apache.solr.schema.DateRangeField; +import org.apache.solr.schema.ExchangeRateProvider; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; @@ -189,6 +192,8 @@ public class RangeFacetRequest extends FacetComponent.FacetBase { (SolrException.ErrorCode.BAD_REQUEST, "Unable to range facet on Point field of unexpected type:" + this.facetOn); } + } else if (ft instanceof CurrencyFieldType) { + calc = new CurrencyRangeEndpointCalculator(this); } else { throw new SolrException (SolrException.ErrorCode.BAD_REQUEST, @@ -451,12 +456,14 @@ public class RangeFacetRequest extends FacetComponent.FacetBase { this.field = rfr.getSchemaField(); } - public T getComputedEnd() { + /** The Computed End point of all ranges, as an Object of type suitable for direct inclusion in the response data */ + public Object getComputedEnd() { assert computed; return computedEnd; } - public T getStart() { + /** The Start point of all ranges, as an Object of type suitable for direct inclusion in the response data */ + public Object getStart() { assert computed; return start; } @@ -756,6 +763,68 @@ public class RangeFacetRequest extends FacetComponent.FacetBase { } } + private static class CurrencyRangeEndpointCalculator + extends RangeEndpointCalculator { + private String defaultCurrencyCode; + private ExchangeRateProvider exchangeRateProvider; + public CurrencyRangeEndpointCalculator(final RangeFacetRequest rangeFacetRequest) { + super(rangeFacetRequest); + if(!(this.field.getType() instanceof CurrencyFieldType)) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Cannot perform range faceting over non CurrencyField fields"); + } + defaultCurrencyCode = + ((CurrencyFieldType)this.field.getType()).getDefaultCurrency(); + exchangeRateProvider = + ((CurrencyFieldType)this.field.getType()).getProvider(); + } + + @Override + protected Object parseGap(String rawval) throws java.text.ParseException { + return parseVal(rawval).strValue(); + } + + @Override + public String formatValue(CurrencyValue val) { + return val.strValue(); + } + + /** formats the value as a String since {@link CurrencyValue} is not suitable for response writers */ + @Override + public Object getComputedEnd() { + assert computed; + return formatValue(computedEnd); + } + + /** formats the value as a String since {@link CurrencyValue} is not suitable for response writers */ + @Override + public Object getStart() { + assert computed; + return formatValue(start); + } + + @Override + protected CurrencyValue parseVal(String rawval) { + return CurrencyValue.parse(rawval, defaultCurrencyCode); + } + + @Override + public CurrencyValue parseAndAddGap(CurrencyValue value, String gap) { + if(value == null) { + throw new NullPointerException("Cannot perform range faceting on null CurrencyValue"); + } + CurrencyValue gapCurrencyValue = + CurrencyValue.parse(gap, defaultCurrencyCode); + long gapAmount = + CurrencyValue.convertAmount(this.exchangeRateProvider, + gapCurrencyValue.getCurrencyCode(), + gapCurrencyValue.getAmount(), + value.getCurrencyCode()); + return new CurrencyValue(value.getAmount() + gapAmount, + value.getCurrencyCode()); + } + } + /** * Represents a single facet range (or gap) for which the count is to be calculated */ diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java index 752846cf1b8..04bd5f4e887 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java @@ -198,9 +198,6 @@ abstract class AbstractStatsValues implements StatsValues { } } - /** - * {@inheritDoc} - */ @Override public void accumulate(NamedList stv) { if (computeCount) { @@ -260,9 +257,6 @@ abstract class AbstractStatsValues implements StatsValues { } } - /** - * {@inheritDoc} - */ @Override public void accumulate(BytesRef value, int count) { if (null == ft) { @@ -298,9 +292,6 @@ abstract class AbstractStatsValues implements StatsValues { updateTypeSpecificStats(value, count); } - /** - * {@inheritDoc} - */ @Override public void missing() { if (computeMissing) { @@ -308,25 +299,16 @@ abstract class AbstractStatsValues implements StatsValues { } } - /** - * {@inheritDoc} - */ @Override public void addMissing(int count) { missing += count; } - /** - * {@inheritDoc} - */ @Override public void addFacet(String facetName, Map facetValues) { facets.put(facetName, facetValues); } - /** - * {@inheritDoc} - */ @Override public NamedList getStatsValues() { NamedList res = new SimpleOrderedMap<>(); @@ -377,9 +359,6 @@ abstract class AbstractStatsValues implements StatsValues { return res; } - /** - * {@inheritDoc} - */ public void setNextReader(LeafReaderContext ctx) throws IOException { if (valueSource == null) { // first time we've collected local values, get the right ValueSource @@ -503,9 +482,6 @@ class NumericStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ @Override public void updateTypeSpecificStats(NamedList stv) { if (computeSum) { @@ -522,9 +498,6 @@ class NumericStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ @Override public void updateTypeSpecificStats(Number v, int count) { double value = v.doubleValue(); @@ -539,9 +512,6 @@ class NumericStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ @Override protected void updateMinMax(Number min, Number max) { // we always use the double values, because that way the response Object class is @@ -645,9 +615,6 @@ class EnumStatsValues extends AbstractStatsValues { return hasher.hashInt(v.toInt().intValue()).asLong(); } - /** - * {@inheritDoc} - */ @Override public void accumulate(int docID) throws IOException { if (values.exists(docID)) { @@ -660,9 +627,6 @@ class EnumStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ protected void updateMinMax(EnumFieldValue min, EnumFieldValue max) { if (computeMin) { // nested if to encourage JIT to optimize aware final var? if (null != min) { @@ -680,17 +644,11 @@ class EnumStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ @Override protected void updateTypeSpecificStats(NamedList stv) { // No type specific stats } - /** - * {@inheritDoc} - */ @Override protected void updateTypeSpecificStats(EnumFieldValue value, int count) { // No type specific stats @@ -737,9 +695,6 @@ class DateStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ @Override protected void updateTypeSpecificStats(NamedList stv) { if (computeSum) { @@ -750,9 +705,6 @@ class DateStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ @Override public void updateTypeSpecificStats(Date v, int count) { long value = v.getTime(); @@ -764,9 +716,6 @@ class DateStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ @Override protected void updateMinMax(Date min, Date max) { if (computeMin) { // nested if to encourage JIT to optimize aware final var? @@ -846,25 +795,16 @@ class StringStatsValues extends AbstractStatsValues { } } - /** - * {@inheritDoc} - */ @Override protected void updateTypeSpecificStats(NamedList stv) { // No type specific stats } - /** - * {@inheritDoc} - */ @Override protected void updateTypeSpecificStats(String value, int count) { // No type specific stats } - /** - * {@inheritDoc} - */ @Override protected void updateMinMax(String min, String max) { if (computeMin) { // nested if to encourage JIT to optimize aware final var? diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java index b7a1f56a96a..e0949861515 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java @@ -27,7 +27,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.Query; @@ -621,18 +621,18 @@ public class TermsComponent extends SearchComponent { terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i])); } - TermContext[] termContexts = new TermContext[terms.length]; - collectTermContext(topReaderContext, termContexts, terms); + TermStates[] termStates = new TermStates[terms.length]; + collectTermStates(topReaderContext, termStates, terms); NamedList termsMap = new SimpleOrderedMap<>(); for (int i = 0; i < terms.length; i++) { - if (termContexts[i] != null) { + if (termStates[i] != null) { String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString()); - int docFreq = termContexts[i].docFreq(); + int docFreq = termStates[i].docFreq(); if (!includeTotalTermFreq) { termsMap.add(outTerm, docFreq); } else { - long totalTermFreq = termContexts[i].totalTermFreq(); + long totalTermFreq = termStates[i].totalTermFreq(); NamedList termStats = new SimpleOrderedMap<>(); termStats.add("df", (long) docFreq); termStats.add("ttf", totalTermFreq); @@ -645,8 +645,8 @@ public class TermsComponent extends SearchComponent { } } - private static void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray, - Term[] queryTerms) throws IOException { + private static void collectTermStates(IndexReaderContext topReaderContext, TermStates[] contextArray, + Term[] queryTerms) throws IOException { TermsEnum termsEnum = null; for (LeafReaderContext context : topReaderContext.leaves()) { for (int i = 0; i < queryTerms.length; i++) { @@ -661,13 +661,13 @@ public class TermsComponent extends SearchComponent { if (termsEnum == TermsEnum.EMPTY) continue; - TermContext termContext = contextArray[i]; + TermStates termStates = contextArray[i]; if (termsEnum.seekExact(term.bytes())) { - if (termContext == null) { - termContext = new TermContext(topReaderContext); - contextArray[i] = termContext; + if (termStates == null) { + termStates = new TermStates(topReaderContext); + contextArray[i] = termStates; } - termContext.accumulateStatistics(termsEnum.docFreq(), termsEnum.totalTermFreq()); + termStates.accumulateStatistics(termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java index 2b0d08a97af..7e54f8d93ed 100644 --- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java @@ -24,7 +24,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -396,9 +396,9 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro if (count < 0) { BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (TermAndState t : collectedTerms) { - final TermContext termContext = new TermContext(searcher.getTopReaderContext()); - termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq); - bq.add(new TermQuery(new Term( SolrRangeQuery.this.getField(), t.term), termContext), BooleanClause.Occur.SHOULD); + final TermStates termStates = new TermStates(searcher.getTopReaderContext()); + termStates.register(t.state, context.ord, t.docFreq, t.totalTermFreq); + bq.add(new TermQuery(new Term( SolrRangeQuery.this.getField(), t.term), termStates), BooleanClause.Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); final Weight weight = searcher.rewrite(q).createWeight(searcher, needScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES, score()); diff --git a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java index f759c9174a8..f1a718dd5f6 100644 --- a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java +++ b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java @@ -101,17 +101,9 @@ public class SolrRequestInfo { } /** The TimeZone specified by the request, or null if none was specified */ - public TimeZone getClientTimeZone() { - + public TimeZone getClientTimeZone() { if (tz == null) { - String tzStr = req.getParams().get(CommonParams.TZ); - if (tzStr != null) { - tz = TimeZoneUtils.getTimeZone(tzStr); - if (null == tz) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "Solr JVM does not support TZ: " + tzStr); - } - } + tz = TimeZoneUtils.parseTimezone(req.getParams().get(CommonParams.TZ)); } return tz; } diff --git a/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java b/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java index d4ce2680454..06f3c321040 100644 --- a/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java +++ b/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java @@ -250,14 +250,32 @@ public abstract class AbstractEnumField extends PrimitiveFieldType { @Override public SortField getSortField(SchemaField field, boolean top) { - SortField result = getSortField(field, SortField.Type.INT, top, Integer.MIN_VALUE, Integer.MAX_VALUE); + if (field.multiValued()) { + MultiValueSelector selector = field.type.getDefaultMultiValueSelectorForSort(field, top); + if (null != selector) { + final SortField result = getSortedSetSortField(field, selector.getSortedSetSelectorType(), + // yes: Strings, it's how SortedSetSortField works + top, SortField.STRING_FIRST, SortField.STRING_LAST); + if (null == result.getMissingValue()) { + // special case 'enum' default behavior: assume missing values are "below" all enum values + result.setMissingValue(SortField.STRING_FIRST); + } + return result; + } + } + + // else... + // either single valued, or don't support implicit multi selector + // (in which case let getSortField() give the error) + final SortField result = getSortField(field, SortField.Type.INT, top, Integer.MIN_VALUE, Integer.MAX_VALUE); + if (null == result.getMissingValue()) { - // special case default behavior: assume missing values are "below" all enum values + // special case 'enum' default behavior: assume missing values are "below" all enum values result.setMissingValue(Integer.MIN_VALUE); } return result; } - + @Override public ValueSource getValueSource(SchemaField field, QParser qparser) { field.checkFieldCacheSource(); diff --git a/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java b/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java index a6ba164d1db..97195da243a 100644 --- a/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java @@ -89,6 +89,12 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc return null; } + /** The identifier code for the default currency of this field type */ + public String getDefaultCurrency() { + return defaultCurrency; + } + + @Override protected void init(IndexSchema schema, Map args) { super.init(schema, args); @@ -666,164 +672,5 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc } } - /** - * Represents a Currency field value, which includes a long amount and ISO currency code. - */ - static class CurrencyValue { - private long amount; - private String currencyCode; - - /** - * Constructs a new currency value. - * - * @param amount The amount. - * @param currencyCode The currency code. - */ - public CurrencyValue(long amount, String currencyCode) { - this.amount = amount; - this.currencyCode = currencyCode; - } - - /** - * Constructs a new currency value by parsing the specific input. - *

    - * Currency values are expected to be in the format <amount>,<currency code>, - * for example, "500,USD" would represent 5 U.S. Dollars. - *

    - * If no currency code is specified, the default is assumed. - * - * @param externalVal The value to parse. - * @param defaultCurrency The default currency. - * @return The parsed CurrencyValue. - */ - public static CurrencyValue parse(String externalVal, String defaultCurrency) { - if (externalVal == null) { - return null; - } - String amount = externalVal; - String code = defaultCurrency; - - if (externalVal.contains(",")) { - String[] amountAndCode = externalVal.split(","); - amount = amountAndCode[0]; - code = amountAndCode[1]; - } - - if (amount.equals("*")) { - return null; - } - - Currency currency = getCurrency(code); - - if (currency == null) { - throw new SolrException(ErrorCode.BAD_REQUEST, "Currency code not supported by this JVM: " + code); - } - - try { - double value = Double.parseDouble(amount); - long currencyValue = Math.round(value * Math.pow(10.0, currency.getDefaultFractionDigits())); - - return new CurrencyValue(currencyValue, code); - } catch (NumberFormatException e) { - throw new SolrException(ErrorCode.BAD_REQUEST, e); - } - } - - /** - * The amount of the CurrencyValue. - * - * @return The amount. - */ - public long getAmount() { - return amount; - } - - /** - * The ISO currency code of the CurrencyValue. - * - * @return The currency code. - */ - public String getCurrencyCode() { - return currencyCode; - } - - /** - * Performs a currency conversion & unit conversion. - * - * @param exchangeRates Exchange rates to apply. - * @param sourceCurrencyCode The source currency code. - * @param sourceAmount The source amount. - * @param targetCurrencyCode The target currency code. - * @return The converted indexable units after the exchange rate and currency fraction digits are applied. - */ - public static long convertAmount(ExchangeRateProvider exchangeRates, String sourceCurrencyCode, long sourceAmount, String targetCurrencyCode) { - double exchangeRate = exchangeRates.getExchangeRate(sourceCurrencyCode, targetCurrencyCode); - return convertAmount(exchangeRate, sourceCurrencyCode, sourceAmount, targetCurrencyCode); - } - - /** - * Performs a currency conversion & unit conversion. - * - * @param exchangeRate Exchange rate to apply. - * @param sourceFractionDigits The fraction digits of the source. - * @param sourceAmount The source amount. - * @param targetFractionDigits The fraction digits of the target. - * @return The converted indexable units after the exchange rate and currency fraction digits are applied. - */ - public static long convertAmount(final double exchangeRate, final int sourceFractionDigits, final long sourceAmount, final int targetFractionDigits) { - int digitDelta = targetFractionDigits - sourceFractionDigits; - double value = ((double) sourceAmount * exchangeRate); - - if (digitDelta != 0) { - if (digitDelta < 0) { - for (int i = 0; i < -digitDelta; i++) { - value *= 0.1; - } - } else { - for (int i = 0; i < digitDelta; i++) { - value *= 10.0; - } - } - } - - return (long) value; - } - - /** - * Performs a currency conversion & unit conversion. - * - * @param exchangeRate Exchange rate to apply. - * @param sourceCurrencyCode The source currency code. - * @param sourceAmount The source amount. - * @param targetCurrencyCode The target currency code. - * @return The converted indexable units after the exchange rate and currency fraction digits are applied. - */ - public static long convertAmount(double exchangeRate, String sourceCurrencyCode, long sourceAmount, String targetCurrencyCode) { - if (targetCurrencyCode.equals(sourceCurrencyCode)) { - return sourceAmount; - } - - int sourceFractionDigits = Currency.getInstance(sourceCurrencyCode).getDefaultFractionDigits(); - Currency targetCurrency = Currency.getInstance(targetCurrencyCode); - int targetFractionDigits = targetCurrency.getDefaultFractionDigits(); - return convertAmount(exchangeRate, sourceFractionDigits, sourceAmount, targetFractionDigits); - } - - /** - * Returns a new CurrencyValue that is the conversion of this CurrencyValue to the specified currency. - * - * @param exchangeRates The exchange rate provider. - * @param targetCurrencyCode The target currency code to convert this CurrencyValue to. - * @return The converted CurrencyValue. - */ - public CurrencyValue convertTo(ExchangeRateProvider exchangeRates, String targetCurrencyCode) { - return new CurrencyValue(convertAmount(exchangeRates, this.getCurrencyCode(), this.getAmount(), targetCurrencyCode), targetCurrencyCode); - } - - @Override - public String toString() { - return String.valueOf(amount) + "," + currencyCode; - } - } } diff --git a/solr/core/src/java/org/apache/solr/schema/CurrencyValue.java b/solr/core/src/java/org/apache/solr/schema/CurrencyValue.java new file mode 100644 index 00000000000..4c43422bed9 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/CurrencyValue.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +import org.apache.solr.common.SolrException; + +import java.util.Currency; + +/** + * Represents a Currency field value, which includes a long amount and ISO currency code. + */ +public class CurrencyValue implements Comparable { + private long amount; + private String currencyCode; + + /** + * Constructs a new currency value. + * + * @param amount The amount. + * @param currencyCode The currency code. + */ + public CurrencyValue(long amount, String currencyCode) { + this.amount = amount; + this.currencyCode = currencyCode; + } + + /** + * Constructs a new currency value by parsing the specific input. + *

    + * Currency values are expected to be in the format <amount>,<currency code>, + * for example, "500,USD" would represent 5 U.S. Dollars. + *

    + *

    + * If no currency code is specified, the default is assumed. + *

    + * @param externalVal The value to parse. + * @param defaultCurrency The default currency. + * @return The parsed CurrencyValue. + */ + public static CurrencyValue parse(String externalVal, String defaultCurrency) { + if (externalVal == null) { + return null; + } + String amount = externalVal; + String code = defaultCurrency; + + if (externalVal.contains(",")) { + String[] amountAndCode = externalVal.split(","); + amount = amountAndCode[0]; + code = amountAndCode[1]; + } + + if (amount.equals("*")) { + return null; + } + + Currency currency = CurrencyField.getCurrency(code); + + if (currency == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Currency code not supported by this JVM: " + code); + } + + try { + double value = Double.parseDouble(amount); + long currencyValue = Math.round(value * Math.pow(10.0, currency.getDefaultFractionDigits())); + + return new CurrencyValue(currencyValue, code); + } catch (NumberFormatException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + /** + * The amount of the CurrencyValue. + * + * @return The amount. + */ + public long getAmount() { + return amount; + } + + /** + * The ISO currency code of the CurrencyValue. + * + * @return The currency code. + */ + public String getCurrencyCode() { + return currencyCode; + } + + /** + * Performs a currency conversion & unit conversion. + * + * @param exchangeRates Exchange rates to apply. + * @param sourceCurrencyCode The source currency code. + * @param sourceAmount The source amount. + * @param targetCurrencyCode The target currency code. + * @return The converted indexable units after the exchange rate and currency fraction digits are applied. + */ + public static long convertAmount(ExchangeRateProvider exchangeRates, String sourceCurrencyCode, long sourceAmount, String targetCurrencyCode) { + double exchangeRate = exchangeRates.getExchangeRate(sourceCurrencyCode, targetCurrencyCode); + return convertAmount(exchangeRate, sourceCurrencyCode, sourceAmount, targetCurrencyCode); + } + + /** + * Performs a currency conversion & unit conversion. + * + * @param exchangeRate Exchange rate to apply. + * @param sourceFractionDigits The fraction digits of the source. + * @param sourceAmount The source amount. + * @param targetFractionDigits The fraction digits of the target. + * @return The converted indexable units after the exchange rate and currency fraction digits are applied. + */ + public static long convertAmount(final double exchangeRate, final int sourceFractionDigits, final long sourceAmount, final int targetFractionDigits) { + int digitDelta = targetFractionDigits - sourceFractionDigits; + double value = ((double) sourceAmount * exchangeRate); + + if (digitDelta != 0) { + if (digitDelta < 0) { + for (int i = 0; i < -digitDelta; i++) { + value *= 0.1; + } + } else { + for (int i = 0; i < digitDelta; i++) { + value *= 10.0; + } + } + } + + return (long) value; + } + + /** + * Performs a currency conversion & unit conversion. + * + * @param exchangeRate Exchange rate to apply. + * @param sourceCurrencyCode The source currency code. + * @param sourceAmount The source amount. + * @param targetCurrencyCode The target currency code. + * @return The converted indexable units after the exchange rate and currency fraction digits are applied. + */ + public static long convertAmount(double exchangeRate, String sourceCurrencyCode, long sourceAmount, String targetCurrencyCode) { + if (targetCurrencyCode.equals(sourceCurrencyCode)) { + return sourceAmount; + } + + int sourceFractionDigits = Currency.getInstance(sourceCurrencyCode).getDefaultFractionDigits(); + Currency targetCurrency = Currency.getInstance(targetCurrencyCode); + int targetFractionDigits = targetCurrency.getDefaultFractionDigits(); + return convertAmount(exchangeRate, sourceFractionDigits, sourceAmount, targetFractionDigits); + } + + /** + * Returns a new CurrencyValue that is the conversion of this CurrencyValue to the specified currency. + * + * @param exchangeRates The exchange rate provider. + * @param targetCurrencyCode The target currency code to convert this CurrencyValue to. + * @return The converted CurrencyValue. + */ + public CurrencyValue convertTo(ExchangeRateProvider exchangeRates, String targetCurrencyCode) { + return new CurrencyValue(convertAmount(exchangeRates, this.getCurrencyCode(), this.getAmount(), targetCurrencyCode), targetCurrencyCode); + } + + /** + * Returns a string representing the currency value such as "3.14,USD" for + * a CurrencyValue of $3.14 USD. + */ + public String strValue() { + int digits = 0; + try { + Currency currency = + Currency.getInstance(this.getCurrencyCode()); + if (currency == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Invalid currency code " + this.getCurrencyCode()); + } + digits = currency.getDefaultFractionDigits(); +} + catch(IllegalArgumentException exception) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Invalid currency code " + this.getCurrencyCode()); + } + + String amount = Long.toString(this.getAmount()); + if (this.getAmount() == 0) { + amount += "000000".substring(0,digits); + } + return + amount.substring(0, amount.length() - digits) + + "." + amount.substring(amount.length() - digits) + + "," + this.getCurrencyCode(); + } + + @Override + public int compareTo(CurrencyValue o) { + if(o == null) { + throw new NullPointerException("Cannot compare CurrencyValue to a null values"); + } + if(!getCurrencyCode().equals(o.getCurrencyCode())) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Cannot compare CurrencyValues when their currencies are not equal"); + } + if(o.getAmount() < getAmount()) { + return 1; + } + if(o.getAmount() == getAmount()) { + return 0; + } + return -1; + } + + @Override + public String toString() { + return strValue(); + } +} diff --git a/solr/core/src/java/org/apache/solr/schema/DatePointField.java b/solr/core/src/java/org/apache/solr/schema/DatePointField.java index 48619171548..2bbe4ad3b17 100644 --- a/solr/core/src/java/org/apache/solr/schema/DatePointField.java +++ b/solr/core/src/java/org/apache/solr/schema/DatePointField.java @@ -29,7 +29,6 @@ import org.apache.lucene.queries.function.valuesource.LongFieldSource; import org.apache.lucene.queries.function.valuesource.MultiValuedLongFieldSource; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -188,11 +187,6 @@ public class DatePointField extends PointField implements DateValueFieldType { LongPoint.encodeDimension(date.getTime(), result.bytes(), 0); } - @Override - public SortField getSortField(SchemaField field, boolean top) { - return getSortField(field, SortField.Type.LONG, top, Long.MIN_VALUE, Long.MAX_VALUE); - } - @Override public UninvertingReader.Type getUninversionType(SchemaField sf) { if (sf.multiValued()) { diff --git a/solr/core/src/java/org/apache/solr/schema/DoublePointField.java b/solr/core/src/java/org/apache/solr/schema/DoublePointField.java index ba71a8aeada..3b68ece1127 100644 --- a/solr/core/src/java/org/apache/solr/schema/DoublePointField.java +++ b/solr/core/src/java/org/apache/solr/schema/DoublePointField.java @@ -27,7 +27,6 @@ import org.apache.lucene.queries.function.valuesource.DoubleFieldSource; import org.apache.lucene.queries.function.valuesource.MultiValuedDoubleFieldSource; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -132,11 +131,6 @@ public class DoublePointField extends PointField implements DoubleValueFieldType DoublePoint.encodeDimension(parseDoubleFromUser(null, val.toString()), result.bytes(), 0); } - @Override - public SortField getSortField(SchemaField field, boolean top) { - return getSortField(field, SortField.Type.DOUBLE, top, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); - } - @Override public Type getUninversionType(SchemaField sf) { if (sf.multiValued()) { diff --git a/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java b/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java index 4bda8237b21..5b76d4814df 100644 --- a/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java @@ -32,6 +32,7 @@ import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRef; @@ -210,4 +211,14 @@ public class EnumFieldType extends AbstractEnumField { } return new MultiValuedIntFieldSource(field.getName(), selectorType); } + + @Override + public SortField getSortField(SchemaField field, boolean top) { + final SortField result = getNumericSort(field, NumberType.INTEGER, top); + if (null == result.getMissingValue()) { + // special case 'enum' default behavior: assume missing values are "below" all enum values + result.setMissingValue(Integer.MIN_VALUE); + } + return result; + } } diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 31ef6ec7ac7..9dcca2452d5 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -47,10 +47,11 @@ import org.apache.lucene.search.DocValuesRewriteMethod; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.SortedSetSortField; import org.apache.lucene.search.SortedNumericSelector; +import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.similarities.Similarity; @@ -662,7 +663,8 @@ public abstract class FieldType extends FieldProperties { * Returns the SortField instance that should be used to sort fields * of this type. * @see SchemaField#checkSortability - * @see #getSortField(SchemaField,SortField.Type,boolean,Object,Object) + * @see #getStringSort + * @see #getNumericSort */ public abstract SortField getSortField(SchemaField field, boolean top); @@ -703,13 +705,26 @@ public abstract class FieldType extends FieldProperties { boolean reverse, Object missingLow, Object missingHigh) { field.checkSortability(); - SortField sf = new SortedSetSortField(field.getName(), reverse, selector); applySetMissingValue(field, sf, missingLow, missingHigh); return sf; } + /** + * Same as {@link #getSortField} but using {@link SortedNumericSortField}. + */ + protected static SortField getSortedNumericSortField(SchemaField field, SortField.Type sortType, + SortedNumericSelector.Type selector, + boolean reverse, Object missingLow, Object missingHigh) { + + field.checkSortability(); + SortField sf = new SortedNumericSortField(field.getName(), sortType, reverse, selector); + applySetMissingValue(field, sf, missingLow, missingHigh); + + return sf; + } + /** * @see #getSortField * @see #getSortedSetSortField @@ -729,11 +744,49 @@ public abstract class FieldType extends FieldProperties { * Utility usable by subclasses when they want to get basic String sorting * using common checks. * @see SchemaField#checkSortability + * @see #getSortedSetSortField + * @see #getSortField */ protected SortField getStringSort(SchemaField field, boolean reverse) { + if (field.multiValued()) { + MultiValueSelector selector = field.type.getDefaultMultiValueSelectorForSort(field, reverse); + if (null != selector) { + return getSortedSetSortField(field, selector.getSortedSetSelectorType(), + reverse, SortField.STRING_FIRST, SortField.STRING_LAST); + } + } + + // else... + // either single valued, or don't support implicit multi selector + // (in which case let getSortField() give the error) return getSortField(field, SortField.Type.STRING, reverse, SortField.STRING_FIRST, SortField.STRING_LAST); } + /** + * Utility usable by subclasses when they want to get basic Numeric sorting + * using common checks. + * + * @see SchemaField#checkSortability + * @see #getSortedNumericSortField + * @see #getSortField + */ + protected SortField getNumericSort(SchemaField field, NumberType type, boolean reverse) { + if (field.multiValued()) { + MultiValueSelector selector = field.type.getDefaultMultiValueSelectorForSort(field, reverse); + if (null != selector) { + return getSortedNumericSortField(field, type.sortType, selector.getSortedNumericSelectorType(), + reverse, type.sortMissingLow, type.sortMissingHigh); + } + } + + // else... + // either single valued, or don't support implicit multi selector + // (in which case let getSortField() give the error) + return getSortField(field, type.sortType, reverse, type.sortMissingLow, type.sortMissingHigh); + } + + + /** called to get the default value source (normally, from the * Lucene FieldCache.) */ @@ -760,8 +813,23 @@ public abstract class FieldType extends FieldProperties { throw new SolrException(ErrorCode.BAD_REQUEST, "Selecting a single value from a multivalued field is not supported for this field: " + field.getName() + " (type: " + this.getTypeName() + ")"); } - - + + /** + * Method for indicating which {@link MultiValueSelector} (if any) should be used when + * sorting on a multivalued field of this type for the specified direction (asc/desc). + * The default implementation returns null (for all inputs). + * + * @param field The SchemaField (of this type) in question + * @param reverse false if this is an ascending sort, true if this is a descending sort. + * @return the implicit selector to use for this direction, or null if implicit sorting on the specified direction is not supported and should return an error. + * @see MultiValueSelector + */ + public MultiValueSelector getDefaultMultiValueSelectorForSort(SchemaField field, boolean reverse) { + // trivial base case + return null; + } + + /** * Returns a Query instance for doing range searches on this field type. {@link org.apache.solr.search.SolrQueryParser} diff --git a/solr/core/src/java/org/apache/solr/schema/FloatPointField.java b/solr/core/src/java/org/apache/solr/schema/FloatPointField.java index f69a1dbdac1..68155f46998 100644 --- a/solr/core/src/java/org/apache/solr/schema/FloatPointField.java +++ b/solr/core/src/java/org/apache/solr/schema/FloatPointField.java @@ -27,7 +27,6 @@ import org.apache.lucene.queries.function.valuesource.FloatFieldSource; import org.apache.lucene.queries.function.valuesource.MultiValuedFloatFieldSource; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -132,11 +131,6 @@ public class FloatPointField extends PointField implements FloatValueFieldType { FloatPoint.encodeDimension(parseFloatFromUser(null, val.toString()), result.bytes(), 0); } - @Override - public SortField getSortField(SchemaField field, boolean top) { - return getSortField(field, SortField.Type.FLOAT, top, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY); - } - @Override public Type getUninversionType(SchemaField sf) { if (sf.multiValued()) { diff --git a/solr/core/src/java/org/apache/solr/schema/IntPointField.java b/solr/core/src/java/org/apache/solr/schema/IntPointField.java index b179c573e8d..a43639c3181 100644 --- a/solr/core/src/java/org/apache/solr/schema/IntPointField.java +++ b/solr/core/src/java/org/apache/solr/schema/IntPointField.java @@ -27,7 +27,6 @@ import org.apache.lucene.queries.function.valuesource.IntFieldSource; import org.apache.lucene.queries.function.valuesource.MultiValuedIntFieldSource; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -130,11 +129,6 @@ public class IntPointField extends PointField implements IntValueFieldType { IntPoint.encodeDimension(parseIntFromUser(null, val.toString()), result.bytes(), 0); } - @Override - public SortField getSortField(SchemaField field, boolean top) { - return getSortField(field, SortField.Type.INT, top, Integer.MIN_VALUE, Integer.MAX_VALUE); - } - @Override public Type getUninversionType(SchemaField sf) { if (sf.multiValued()) { diff --git a/solr/core/src/java/org/apache/solr/schema/LongPointField.java b/solr/core/src/java/org/apache/solr/schema/LongPointField.java index 547725bfc3e..d5a50726b18 100644 --- a/solr/core/src/java/org/apache/solr/schema/LongPointField.java +++ b/solr/core/src/java/org/apache/solr/schema/LongPointField.java @@ -27,7 +27,6 @@ import org.apache.lucene.queries.function.valuesource.LongFieldSource; import org.apache.lucene.queries.function.valuesource.MultiValuedLongFieldSource; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.solr.search.QParser; @@ -129,11 +128,6 @@ public class LongPointField extends PointField implements LongValueFieldType { LongPoint.encodeDimension(parseLongFromUser(null, val.toString()), result.bytes(), 0); } - @Override - public SortField getSortField(SchemaField field, boolean top) { - return getSortField(field, SortField.Type.LONG, top, Long.MIN_VALUE, Long.MAX_VALUE); - } - @Override public Type getUninversionType(SchemaField sf) { if (sf.multiValued()) { diff --git a/solr/core/src/java/org/apache/solr/schema/NumberType.java b/solr/core/src/java/org/apache/solr/schema/NumberType.java index 2253d671bbc..8f41b6c3b55 100644 --- a/solr/core/src/java/org/apache/solr/schema/NumberType.java +++ b/solr/core/src/java/org/apache/solr/schema/NumberType.java @@ -16,10 +16,34 @@ */ package org.apache.solr.schema; +import org.apache.lucene.search.SortField; + public enum NumberType { - INTEGER, - LONG, - FLOAT, - DOUBLE, - DATE + INTEGER(SortField.Type.INT, Integer.MIN_VALUE, Integer.MAX_VALUE), + LONG(SortField.Type.LONG, Long.MIN_VALUE, Long.MAX_VALUE), + FLOAT(SortField.Type.FLOAT, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY), + DOUBLE(SortField.Type.DOUBLE, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), + DATE(SortField.Type.LONG, Long.MIN_VALUE, Long.MAX_VALUE); + + /** The SortField type that corrisponds with this NumberType */ + public final SortField.Type sortType; + /** + * The effective value to use when sorting on this field should result in docs w/o a value + * sorting "low" (which may be "first" or "last" depending on sort direction) + * @see SortField#setMissingValue + */ + public final Object sortMissingLow; + /** + * The effective value to use when sorting on this field should result in docs w/o a value + * sorting "low" (which may be "first" or "last" depending on sort direction) + * @see SortField#setMissingValue + */ + public final Object sortMissingHigh; + + private NumberType(SortField.Type sortType, Object sortMissingLow, Object sortMissingHigh) { + this.sortType = sortType; + this.sortMissingLow = sortMissingLow; + this.sortMissingHigh = sortMissingHigh; + + } } diff --git a/solr/core/src/java/org/apache/solr/schema/PointField.java b/solr/core/src/java/org/apache/solr/schema/PointField.java index 09d0175ff77..91a342cfa80 100644 --- a/solr/core/src/java/org/apache/solr/schema/PointField.java +++ b/solr/core/src/java/org/apache/solr/schema/PointField.java @@ -33,6 +33,7 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortedNumericSelector; +import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRef; @@ -294,4 +295,9 @@ public abstract class PointField extends NumericFieldType { protected abstract StoredField getStoredField(SchemaField sf, Object value); + @Override + public SortField getSortField(SchemaField field, boolean top) { + return getNumericSort(field, getNumberType(), top); + } + } diff --git a/solr/core/src/java/org/apache/solr/schema/PrimitiveFieldType.java b/solr/core/src/java/org/apache/solr/schema/PrimitiveFieldType.java index 9d9da4709aa..0c0042e78cd 100644 --- a/solr/core/src/java/org/apache/solr/schema/PrimitiveFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/PrimitiveFieldType.java @@ -37,4 +37,9 @@ public abstract class PrimitiveFieldType extends FieldType { @Override protected void checkSupportsDocValues() { // primitive types support DocValues } + + @Override + public MultiValueSelector getDefaultMultiValueSelectorForSort(SchemaField field, boolean reverse) { + return reverse ? MultiValueSelector.MAX : MultiValueSelector.MIN; + } } diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaField.java b/solr/core/src/java/org/apache/solr/schema/SchemaField.java index c2e8cca8bab..256cbae230f 100644 --- a/solr/core/src/java/org/apache/solr/schema/SchemaField.java +++ b/solr/core/src/java/org/apache/solr/schema/SchemaField.java @@ -161,10 +161,14 @@ public final class SchemaField extends FieldProperties implements IndexableField * @see FieldType#getSortField */ public void checkSortability() throws SolrException { - if ( multiValued() ) { + if ( multiValued() + // if either of these are non-null, then we should not error + && null == this.type.getDefaultMultiValueSelectorForSort(this,true) + && null == this.type.getDefaultMultiValueSelectorForSort(this,false) ) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "can not sort on multivalued field: " - + getName()); + + getName() + " of type: " + this.type.getTypeName()); } if (! hasDocValues() ) { if ( ! ( indexed() && null != this.type.getUninversionType(this) ) ) { diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index 3294b0484ea..d9b51d17c29 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -26,8 +26,11 @@ import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; import org.apache.solr.uninverting.UninvertingReader.Type; @@ -104,6 +107,31 @@ public class StrField extends PrimitiveFieldType { public Object unmarshalSortValue(Object value) { return unmarshalStringSortValue(value); } + + @Override + public ValueSource getSingleValueSource(MultiValueSelector choice, SchemaField field, QParser parser) { + // trivial base case + if (!field.multiValued()) { + // single value matches any selector + return getValueSource(field, parser); + } + + // See LUCENE-6709 + if (! field.hasDocValues()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "docValues='true' is required to select '" + choice.toString() + + "' value from multivalued field ("+ field.getName() +") at query time"); + } + SortedSetSelector.Type selectorType = choice.getSortedSetSelectorType(); + if (null == selectorType) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + choice.toString() + " is not a supported option for picking a single value" + + " from the multivalued field: " + field.getName() + + " (type: " + this.getTypeName() + ")"); + } + + return new SortedSetFieldSource(field.getName(), selectorType); + } } diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java index ebe21033056..90b27e459d4 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java @@ -160,30 +160,25 @@ public class TrieField extends NumericFieldType { } @Override - public SortField getSortField(SchemaField field, boolean top) { - field.checkSortability(); + public SortField getSortField(SchemaField field, boolean reverse) { + // NOTE: can't use getNumericSort because our multivalued case is special: we use SortedSet - Object missingValue = null; - boolean sortMissingLast = field.sortMissingLast(); - boolean sortMissingFirst = field.sortMissingFirst(); - - SortField sf; - - switch (type) { - case INTEGER: - return getSortField(field, SortField.Type.INT, top, Integer.MIN_VALUE, Integer.MAX_VALUE); - case FLOAT: - return getSortField(field, SortField.Type.FLOAT, top, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY); - case DATE: // fallthrough - case LONG: - return getSortField(field, SortField.Type.LONG, top, Long.MIN_VALUE, Long.MAX_VALUE); - case DOUBLE: - return getSortField(field, SortField.Type.DOUBLE, top, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); - default: - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name); + if (field.multiValued()) { + MultiValueSelector selector = field.type.getDefaultMultiValueSelectorForSort(field, reverse); + if (null != selector) { + return getSortedSetSortField(field, selector.getSortedSetSelectorType(), + // yes: we really want Strings here, regardless of NumberType + reverse, SortField.STRING_FIRST, SortField.STRING_LAST); + } } + + // else... + // either single valued, or don't support implicit multi selector + // (in which case let getSortField() give the error) + NumberType type = getNumberType(); + return getSortField(field, type.sortType, reverse, type.sortMissingLow, type.sortMissingHigh); } - + @Override public Type getUninversionType(SchemaField sf) { if (sf.multiValued()) { diff --git a/solr/core/src/java/org/apache/solr/search/BoostQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/BoostQParserPlugin.java index 7391f660ede..70e08a6728c 100644 --- a/solr/core/src/java/org/apache/solr/search/BoostQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/BoostQParserPlugin.java @@ -16,18 +16,11 @@ */ package org.apache.solr.search; -import java.text.ParseException; - -import org.apache.lucene.expressions.Expression; -import org.apache.lucene.expressions.SimpleBindings; -import org.apache.lucene.expressions.js.JavascriptCompiler; import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.FunctionScoreQuery; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.QueryValueSource; -import org.apache.lucene.search.DoubleValuesSource; import org.apache.lucene.search.Query; -import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; @@ -67,7 +60,7 @@ public class BoostQParserPlugin extends QParserPlugin { } else { vs = new QueryValueSource(bq, 0.0f); } - return boostQuery(q, vs); + return FunctionScoreQuery.boostByValue(q, vs.asDoubleValuesSource()); } @@ -91,16 +84,4 @@ public class BoostQParserPlugin extends QParserPlugin { }; } - public static Query boostQuery(Query input, ValueSource vs) { - try { - SimpleBindings bindings = new SimpleBindings(); - bindings.add("score", DoubleValuesSource.SCORES); - bindings.add("vs", vs.asDoubleValuesSource()); - Expression expr = JavascriptCompiler.compile("score * vs"); - return new FunctionScoreQuery(input, expr.getDoubleValuesSource(bindings)); - } catch (ParseException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); // should never happen! - } - } - } diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index 5e74f4a5dc4..004d1c042af 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.core.StopFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.index.Term; import org.apache.lucene.queries.function.FunctionQuery; +import org.apache.lucene.queries.function.FunctionScoreQuery; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.ProductFloatFunction; import org.apache.lucene.queries.function.valuesource.QueryValueSource; @@ -196,9 +197,9 @@ public class ExtendedDismaxQParser extends QParser { List boosts = getMultiplicativeBoosts(); if (boosts.size()>1) { ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()])); - topQuery = BoostQParserPlugin.boostQuery(topQuery, prod); + topQuery = FunctionScoreQuery.boostByValue(topQuery, prod.asDoubleValuesSource()); } else if (boosts.size() == 1) { - topQuery = BoostQParserPlugin.boostQuery(topQuery, boosts.get(0)); + topQuery = FunctionScoreQuery.boostByValue(topQuery, boosts.get(0).asDoubleValuesSource()); } return topQuery; diff --git a/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java index 01b0ef88fa3..d1f7ff2e470 100644 --- a/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java @@ -37,7 +37,7 @@ import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PrefixCodedTerms; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -252,15 +252,15 @@ public class GraphTermsQParserPlugin extends QParserPlugin { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - List finalContexts = new ArrayList(); + List finalContexts = new ArrayList(); List finalTerms = new ArrayList(); List contexts = searcher.getTopReaderContext().leaves(); - TermContext[] termContexts = new TermContext[this.queryTerms.length]; - collectTermContext(searcher.getIndexReader(), contexts, termContexts, this.queryTerms); - for(int i=0; i leaves, - TermContext[] contextArray, - Term[] queryTerms) throws IOException { + private void collectTermStates(IndexReader reader, + List leaves, + TermStates[] contextArray, + Term[] queryTerms) throws IOException { TermsEnum termsEnum = null; for (LeafReaderContext context : leaves) { @@ -359,15 +359,15 @@ public class GraphTermsQParserPlugin extends QParserPlugin { for (int i = 0; i < queryTerms.length; i++) { Term term = queryTerms[i]; - TermContext termContext = contextArray[i]; + TermStates termStates = contextArray[i]; if (termsEnum.seekExact(term.bytes())) { - if (termContext == null) { - contextArray[i] = new TermContext(reader.getContext(), + if (termStates == null) { + contextArray[i] = new TermStates(reader.getContext(), termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { - termContext.register(termsEnum.termState(), context.ord, + termStates.register(termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } } diff --git a/solr/core/src/java/org/apache/solr/search/Grouping.java b/solr/core/src/java/org/apache/solr/search/Grouping.java index 2214a049161..4500464767f 100644 --- a/solr/core/src/java/org/apache/solr/search/Grouping.java +++ b/solr/core/src/java/org/apache/solr/search/Grouping.java @@ -706,17 +706,11 @@ public class Grouping { TotalHitCountCollector fallBackCollector; Collection> topGroups; - /** - * {@inheritDoc} - */ @Override protected void prepare() throws IOException { actualGroupsToFind = getMax(offset, numGroups, maxDoc); } - /** - * {@inheritDoc} - */ @Override protected Collector createFirstPassCollector() throws IOException { // Ok we don't want groups, but do want a total count @@ -730,9 +724,6 @@ public class Grouping { return firstPass; } - /** - * {@inheritDoc} - */ @Override protected Collector createSecondPassCollector() throws IOException { if (actualGroupsToFind <= 0) { @@ -767,18 +758,12 @@ public class Grouping { } } - /** - * {@inheritDoc} - */ @Override public AllGroupHeadsCollector createAllGroupCollector() throws IOException { Sort sortWithinGroup = withinGroupSort != null ? withinGroupSort : Sort.RELEVANCE; return AllGroupHeadsCollector.newCollector(new TermGroupSelector(groupBy), sortWithinGroup); } - /** - * {@inheritDoc} - */ @Override protected void finish() throws IOException { result = secondPass != null ? secondPass.getTopGroups(0) : null; @@ -826,9 +811,6 @@ public class Grouping { } } - /** - * {@inheritDoc} - */ @Override public int getMatches() { if (result == null && fallBackCollector == null) { @@ -838,9 +820,6 @@ public class Grouping { return result != null ? result.totalHitCount : fallBackCollector.getTotalHits(); } - /** - * {@inheritDoc} - */ @Override protected Integer getNumberOfGroups() { return allGroupsCollector == null ? null : allGroupsCollector.getGroupCount(); @@ -857,17 +836,11 @@ public class Grouping { TopDocsCollector topCollector; FilterCollector collector; - /** - * {@inheritDoc} - */ @Override protected void prepare() throws IOException { actualGroupsToFind = getMax(offset, numGroups, maxDoc); } - /** - * {@inheritDoc} - */ @Override protected Collector createFirstPassCollector() throws IOException { DocSet groupFilt = searcher.getDocSet(query); @@ -885,9 +858,6 @@ public class Grouping { } } - /** - * {@inheritDoc} - */ @Override protected void finish() throws IOException { TopDocsCollector topDocsCollector = (TopDocsCollector) collector.getDelegate(); @@ -901,9 +871,6 @@ public class Grouping { } } - /** - * {@inheritDoc} - */ @Override public int getMatches() { return collector.getMatches(); @@ -929,9 +896,6 @@ public class Grouping { AllGroupsCollector allGroupsCollector; Collection> topGroups; - /** - * {@inheritDoc} - */ @Override protected void prepare() throws IOException { context = ValueSource.newContext(searcher); @@ -939,9 +903,6 @@ public class Grouping { actualGroupsToFind = getMax(offset, numGroups, maxDoc); } - /** - * {@inheritDoc} - */ @Override protected Collector createFirstPassCollector() throws IOException { // Ok we don't want groups, but do want a total count @@ -955,9 +916,6 @@ public class Grouping { return firstPass; } - /** - * {@inheritDoc} - */ @Override protected Collector createSecondPassCollector() throws IOException { if (actualGroupsToFind <= 0) { @@ -998,9 +956,6 @@ public class Grouping { return AllGroupHeadsCollector.newCollector(newSelector(), sortWithinGroup); } - /** - * {@inheritDoc} - */ @Override protected void finish() throws IOException { result = secondPass != null ? secondPass.getTopGroups(0) : null; @@ -1034,9 +989,6 @@ public class Grouping { } } - /** - * {@inheritDoc} - */ @Override public int getMatches() { if (result == null && fallBackCollector == null) { @@ -1046,9 +998,6 @@ public class Grouping { return result != null ? result.totalHitCount : fallBackCollector.getTotalHits(); } - /** - * {@inheritDoc} - */ @Override protected Integer getNumberOfGroups() { return allGroupsCollector == null ? null : allGroupsCollector.getGroupCount(); diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 9ee5199bdf7..34374764807 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -47,7 +47,7 @@ import org.apache.lucene.index.MultiPostingsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.*; @@ -339,7 +339,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI * Override these two methods to provide a way to use global collection stats. */ @Override - public TermStatistics termStatistics(Term term, TermContext context) throws IOException { + public TermStatistics termStatistics(Term term, TermStates context) throws IOException { final SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo(); if (reqInfo != null) { final StatsSource statsSrc = (StatsSource) reqInfo.getReq().getContext().get(STATS_SOURCE); @@ -362,7 +362,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI return localCollectionStatistics(field); } - public TermStatistics localTermStatistics(Term term, TermContext context) throws IOException { + public TermStatistics localTermStatistics(Term term, TermStates context) throws IOException { return super.termStatistics(term, context); } diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index 367eb641ba0..450d95a7aff 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -27,6 +27,7 @@ import java.util.Map; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.function.FunctionScoreQuery; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.BoolDocValues; @@ -325,7 +326,7 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin { public ValueSource parse(FunctionQParser fp) throws SyntaxError { Query q = fp.parseNestedQuery(); ValueSource vs = fp.parseValueSource(); - return new QueryValueSource(BoostQParserPlugin.boostQuery(q, vs), 0.0f); + return new QueryValueSource(FunctionScoreQuery.boostByValue(q, vs.asDoubleValuesSource()), 0.0f); } }); addParser("joindf", new ValueSourceParser() { diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java index b99b4b874fb..09b8ec057a5 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java @@ -29,6 +29,9 @@ import org.apache.lucene.util.NumericUtils; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.schema.CurrencyFieldType; +import org.apache.solr.schema.CurrencyValue; +import org.apache.solr.schema.ExchangeRateProvider; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.PointField; import org.apache.solr.schema.SchemaField; @@ -93,9 +96,13 @@ class FacetRangeProcessor extends FacetProcessor { super.process(); // Under the normal mincount=0, each shard will need to return 0 counts since we don't calculate buckets at the top level. - // But if mincount>0 then our sub mincount can be set to 1. - - effectiveMincount = fcontext.isShard() ? (freq.mincount > 0 ? 1 : 0) : freq.mincount; + // If mincount>0 then we could *potentially* set our sub mincount to 1... + // ...but that would require sorting the buckets (by their val) at the top level + // + // Tather then do that, which could be complicated by non trivial field types, we'll force the sub-shard effectiveMincount + // to be 0, ensuring that we can trivially merge all the buckets from every shard + // (we have to filter the merged buckets by the original mincount either way) + effectiveMincount = fcontext.isShard() ? 0 : freq.mincount; sf = fcontext.searcher.getSchema().getField(freq.field); response = getRangeCounts(); } @@ -116,6 +123,14 @@ class FacetRangeProcessor extends FacetProcessor { } } + /** + * Returns a {@link Calc} instance to use for term faceting over a numeric field. + * This metod is unused for range faceting, and exists solely as a helper method for other classes + * + * @param sf A field to facet on, must be of a type such that {@link FieldType#getNumberType} is non null + * @return a Calc instance with {@link Calc#bitsToValue} and {@link Calc#bitsToSortableBits} methods suitable for the specified field. + * @see FacetFieldProcessorByHashDV + */ public static Calc getNumericCalc(SchemaField sf) { Calc calc; final FieldType ft = sf.getType(); @@ -199,6 +214,8 @@ class FacetRangeProcessor extends FacetProcessor { (SolrException.ErrorCode.BAD_REQUEST, "Unable to range facet on tried field of unexpected type:" + freq.field); } + } else if (ft instanceof CurrencyFieldType) { + calc = new CurrencyCalc(sf); } else { throw new SolrException (SolrException.ErrorCode.BAD_REQUEST, @@ -256,7 +273,7 @@ class FacetRangeProcessor extends FacetProcessor { (include.contains(FacetParams.FacetRangeInclude.EDGE) && 0 == high.compareTo(end))); - Range range = new Range(low, low, high, incLower, incUpper); + Range range = new Range(calc.buildRangeLabel(low), low, high, incLower, incUpper); rangeList.add( range ); low = high; @@ -396,14 +413,28 @@ class FacetRangeProcessor extends FacetProcessor { this.field = field; } + /** + * Used by {@link FacetFieldProcessorByHashDV} for field faceting on numeric types -- not used for range faceting + */ public Comparable bitsToValue(long bits) { return bits; } + /** + * Used by {@link FacetFieldProcessorByHashDV} for field faceting on numeric types -- not used for range faceting + */ public long bitsToSortableBits(long bits) { return bits; } + /** + * Given the low value for a bucket, generates the appropraite "label" object to use. + * By default return the low object unmodified. + */ + public Object buildRangeLabel(Comparable low) { + return low; + } + /** * Formats a value into a label used in a response * Default Impl just uses toString() @@ -601,6 +632,84 @@ class FacetRangeProcessor extends FacetProcessor { } } + private static class CurrencyCalc extends Calc { + private String defaultCurrencyCode; + private ExchangeRateProvider exchangeRateProvider; + public CurrencyCalc(final SchemaField field) { + super(field); + if(!(this.field.getType() instanceof CurrencyFieldType)) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Cannot perform range faceting over non CurrencyField fields"); + } + defaultCurrencyCode = + ((CurrencyFieldType)this.field.getType()).getDefaultCurrency(); + exchangeRateProvider = + ((CurrencyFieldType)this.field.getType()).getProvider(); + } + + /** + * Throws a Server Error that this type of operation is not supported for this field + * {@inheritDoc} + */ + @Override + public Comparable bitsToValue(long bits) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Currency Field " + field.getName() + " can not be used in this way"); + } + + /** + * Throws a Server Error that this type of operation is not supported for this field + * {@inheritDoc} + */ + @Override + public long bitsToSortableBits(long bits) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Currency Field " + field.getName() + " can not be used in this way"); + } + + /** + * Returns the short string representation of the CurrencyValue + * @see CurrencyValue#strValue + */ + @Override + public Object buildRangeLabel(Comparable low) { + return ((CurrencyValue)low).strValue(); + } + + @Override + public String formatValue(Comparable val) { + return ((CurrencyValue)val).strValue(); + } + + @Override + protected Comparable parseStr(final String rawval) throws java.text.ParseException { + return CurrencyValue.parse(rawval, defaultCurrencyCode); + } + + @Override + protected Object parseGap(final String rawval) throws java.text.ParseException { + return parseStr(rawval); + } + + @Override + protected Comparable parseAndAddGap(Comparable value, String gap) throws java.text.ParseException{ + if (value == null) { + throw new NullPointerException("Cannot perform range faceting on null CurrencyValue"); + } + CurrencyValue val = (CurrencyValue) value; + CurrencyValue gapCurrencyValue = + CurrencyValue.parse(gap, defaultCurrencyCode); + long gapAmount = + CurrencyValue.convertAmount(this.exchangeRateProvider, + gapCurrencyValue.getCurrencyCode(), + gapCurrencyValue.getAmount(), + val.getCurrencyCode()); + return new CurrencyValue(val.getAmount() + gapAmount, + val.getCurrencyCode()); + + } + + } // this refineFacets method is patterned after FacetFieldProcessor.refineFacets and should // probably be merged when range facet becomes more like field facet in it's ability to sort and limit @@ -705,16 +814,14 @@ class FacetRangeProcessor extends FacetProcessor { (include.contains(FacetParams.FacetRangeInclude.EDGE) && 0 == high.compareTo(end))); - Range range = new Range(low, low, high, incLower, incUpper); + Range range = new Range(calc.buildRangeLabel(low), low, high, incLower, incUpper); // now refine this range SimpleOrderedMap bucket = new SimpleOrderedMap<>(); - FieldType ft = sf.getType(); - bucket.add("val", range.low); // use "low" instead of bucketVal because it will be the right type (we may have been passed back long instead of int for example) - // String internal = ft.toInternal( tobj.toString() ); // TODO - we need a better way to get from object to query... - + bucket.add("val", range.label); + Query domainQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); fillBucket(bucket, domainQ, null, skip, facetInfo); diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java index 5fae6c67490..6ddc05e06b9 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java @@ -45,7 +45,7 @@ public class FacetRangeMerger extends FacetRequestSortedMerger { @Override public void sortBuckets() { - // TODO: mincount>0 will mess up order? + // regardless of mincount, every shard returns a consistent set of buckets which are already in the correct order sortedBuckets = new ArrayList<>( buckets.values() ); } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java index 0cc5ea716e0..30dc5c58136 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java @@ -32,9 +32,6 @@ import org.apache.solr.search.grouping.distributed.ShardRequestFactory; */ public class SearchGroupsRequestFactory implements ShardRequestFactory { - /** - * {@inheritDoc} - */ @Override public ShardRequest[] constructRequest(ResponseBuilder rb) { ShardRequest sreq = new ShardRequest(); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java index 5067c518ed5..57776d9764f 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java @@ -48,9 +48,6 @@ public class TopGroupsShardRequestFactory implements ShardRequestFactory { */ public static final String GROUP_NULL_VALUE = "" + ReverseStringFilter.START_OF_HEADING_MARKER; - /** - * {@inheritDoc} - */ @Override public ShardRequest[] constructRequest(ResponseBuilder rb) { // If we have a group.query we need to query all shards... Or we move this to the group first phase queries diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java index 18896e08cff..cc1c87d4860 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java @@ -47,9 +47,6 @@ import org.apache.solr.search.grouping.distributed.shardresultserializer.SearchG */ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor { - /** - * {@inheritDoc} - */ @Override public void process(ResponseBuilder rb, ShardRequest shardRequest) { SortSpec groupSortSpec = rb.getGroupingSpec().getGroupSortSpec(); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/StoredFieldsShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/StoredFieldsShardResponseProcessor.java index dcb3c617ee9..734a0e09033 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/StoredFieldsShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/StoredFieldsShardResponseProcessor.java @@ -31,9 +31,6 @@ import org.apache.solr.search.grouping.distributed.ShardResponseProcessor; */ public class StoredFieldsShardResponseProcessor implements ShardResponseProcessor { - /** - * {@inheritDoc} - */ @Override public void process(ResponseBuilder rb, ShardRequest shardRequest) { boolean returnScores = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0; diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java index 231e9bd548f..cf02580b805 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java @@ -49,9 +49,6 @@ import org.apache.solr.search.grouping.distributed.shardresultserializer.TopGrou */ public class TopGroupsShardResponseProcessor implements ShardResponseProcessor { - /** - * {@inheritDoc} - */ @Override @SuppressWarnings("unchecked") public void process(ResponseBuilder rb, ShardRequest shardRequest) { diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java index 77dfcefd47d..b424670f23e 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java @@ -45,9 +45,6 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer data) throws IOException { final NamedList result = new NamedList<>(data.size()); @@ -73,9 +70,6 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer transformToNative(NamedList shardResponse, Sort groupSort, Sort withinGroupSort, String shard) { final Map result = new HashMap<>(shardResponse.size()); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java index 457814c1df7..8d2b3dca0c9 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java @@ -63,9 +63,6 @@ public class TopGroupsResultTransformer implements ShardResultTransformer data) throws IOException { NamedList result = new NamedList<>(); @@ -88,9 +85,6 @@ public class TopGroupsResultTransformer implements ShardResultTransformer transformToNative(NamedList shardResponse, Sort groupSort, Sort withinGroupSort, String shard) { Map result = new HashMap<>(); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java index de2dee49d26..b060590245f 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java @@ -44,9 +44,6 @@ public class GroupedEndResultTransformer implements EndResultTransformer { this.searcher = searcher; } - /** - * {@inheritDoc} - */ @Override public void transform(Map result, ResponseBuilder rb, SolrDocumentSource solrDocumentSource) { NamedList commands = new SimpleOrderedMap<>(); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/MainEndResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/MainEndResultTransformer.java index 630aa63fcd7..3e11abc5ce0 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/MainEndResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/MainEndResultTransformer.java @@ -31,9 +31,6 @@ import java.util.Map; */ public class MainEndResultTransformer implements EndResultTransformer { - /** - * {@inheritDoc} - */ @Override public void transform(Map result, ResponseBuilder rb, SolrDocumentSource solrDocumentSource) { Object value = result.get(rb.getGroupingSpec().getFields()[0]); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/SimpleEndResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/SimpleEndResultTransformer.java index 8d11674838b..593f84fab5f 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/SimpleEndResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/SimpleEndResultTransformer.java @@ -32,9 +32,6 @@ import java.util.Map; */ public class SimpleEndResultTransformer implements EndResultTransformer { - /** - * {@inheritDoc} - */ @Override public void transform(Map result, ResponseBuilder rb, SolrDocumentSource solrDocumentSource) { NamedList commands = new SimpleOrderedMap<>(); diff --git a/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java b/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java index 35b1b382e3f..f09ddf760a0 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java +++ b/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java @@ -19,7 +19,7 @@ package org.apache.solr.search.stats; import com.google.common.collect.Lists; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; @@ -162,7 +162,7 @@ public class ExactStatsCache extends StatsCache { HashMap statsMap = new HashMap<>(); HashMap colMap = new HashMap<>(); for (Term t : terms) { - TermContext termContext = TermContext.build(context, t); + TermStates termStates = TermStates.build(context, t, true); if (!colMap.containsKey(t.field())) { // collection stats for this field CollectionStatistics collectionStatistics = searcher.localCollectionStatistics(t.field()); @@ -171,7 +171,7 @@ public class ExactStatsCache extends StatsCache { } } - TermStatistics tst = searcher.localTermStatistics(t, termContext); + TermStatistics tst = searcher.localTermStatistics(t, termStates); if (tst == null) { // skip terms that are not present here continue; } @@ -322,7 +322,7 @@ public class ExactStatsCache extends StatsCache { this.colStatsCache = colStatsCache; } - public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermContext context) + public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context) throws IOException { TermStats termStats = termStatsCache.get(term.toString()); // TermStats == null is also true if term has no docFreq anyway, diff --git a/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java b/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java index 99efb8d7530..94e3a5f43e9 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java +++ b/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java @@ -24,7 +24,7 @@ import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; import org.apache.solr.core.PluginInfo; @@ -132,7 +132,7 @@ public class LRUStatsCache extends ExactStatsCache { this.colStatsCache = colStatsCache; } @Override - public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermContext context) + public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context) throws IOException { TermStats termStats = termStatsCache.get(term.toString()); if (termStats == null) { diff --git a/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java b/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java index 989f3ad0353..3a08a610151 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java +++ b/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java @@ -19,7 +19,7 @@ package org.apache.solr.search.stats; import java.io.IOException; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; import org.apache.solr.search.SolrIndexSearcher; @@ -34,7 +34,7 @@ public final class LocalStatsSource extends StatsSource { } @Override - public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermContext context) + public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context) throws IOException { return localSearcher.localTermStatistics(term, context); } diff --git a/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java b/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java index 4daaa48c2e4..c187fef16fc 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java +++ b/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java @@ -19,7 +19,7 @@ package org.apache.solr.search.stats; import java.io.IOException; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermStatistics; @@ -34,7 +34,7 @@ import org.apache.solr.search.SolrIndexSearcher; */ public abstract class StatsSource { - public abstract TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermContext context) + public abstract TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context) throws IOException; public abstract CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field) diff --git a/solr/core/src/java/org/apache/solr/security/AuthenticationPlugin.java b/solr/core/src/java/org/apache/solr/security/AuthenticationPlugin.java index d8f2ef21279..a9d112ac40b 100644 --- a/solr/core/src/java/org/apache/solr/security/AuthenticationPlugin.java +++ b/solr/core/src/java/org/apache/solr/security/AuthenticationPlugin.java @@ -48,6 +48,7 @@ public abstract class AuthenticationPlugin implements Closeable { * the response and status code have already been sent. * @throws Exception any exception thrown during the authentication, e.g. PrivilegedActionException */ + //TODO redeclare params as HttpServletRequest & HttpServletResponse public abstract boolean doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain) throws Exception; diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java index 8f8bda8419e..714d1270be3 100644 --- a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java +++ b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java @@ -326,8 +326,10 @@ public class SolrDispatchFilter extends BaseSolrFilter { doFilter(request, response, chain, false); } - public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain, boolean retry) throws IOException, ServletException { - if (!(request instanceof HttpServletRequest)) return; + public void doFilter(ServletRequest _request, ServletResponse _response, FilterChain chain, boolean retry) throws IOException, ServletException { + if (!(_request instanceof HttpServletRequest)) return; + HttpServletRequest request = (HttpServletRequest)_request; + HttpServletResponse response = (HttpServletResponse)_response; try { @@ -343,28 +345,24 @@ public class SolrDispatchFilter extends BaseSolrFilter { } } - AtomicReference wrappedRequest = new AtomicReference<>(); - if (!authenticateRequest(request, response, wrappedRequest)) { // the response and status code have already been - // sent + AtomicReference wrappedRequest = new AtomicReference<>(); + if (!authenticateRequest(request, response, wrappedRequest)) { // the response and status code have already been sent return; } if (wrappedRequest.get() != null) { request = wrappedRequest.get(); } - request = closeShield(request, retry); - response = closeShield(response, retry); - if (cores.getAuthenticationPlugin() != null) { - log.debug("User principal: {}", ((HttpServletRequest) request).getUserPrincipal()); + log.debug("User principal: {}", request.getUserPrincipal()); } // No need to even create the HttpSolrCall object if this path is excluded. if (excludePatterns != null) { - String requestPath = ((HttpServletRequest) request).getServletPath(); - String extraPath = ((HttpServletRequest) request).getPathInfo(); - if (extraPath != null) { // In embedded mode, servlet path is empty - include all post-context path here for - // testing + String requestPath = request.getServletPath(); + String extraPath = request.getPathInfo(); + if (extraPath != null) { + // In embedded mode, servlet path is empty - include all post-context path here for testing requestPath += extraPath; } for (Pattern p : excludePatterns) { @@ -376,7 +374,7 @@ public class SolrDispatchFilter extends BaseSolrFilter { } } - HttpSolrCall call = getHttpSolrCall((HttpServletRequest) request, (HttpServletResponse) response, retry); + HttpSolrCall call = getHttpSolrCall(closeShield(request, retry), closeShield(response, retry), retry); ExecutorUtil.setServerThreadFlag(Boolean.TRUE); try { Action result = call.call(); @@ -385,7 +383,7 @@ public class SolrDispatchFilter extends BaseSolrFilter { chain.doFilter(request, response); break; case RETRY: - doFilter(request, response, chain, true); + doFilter(request, response, chain, true); // RECURSION break; case FORWARD: request.getRequestDispatcher(call.getPath()).forward(request, response); @@ -396,7 +394,7 @@ public class SolrDispatchFilter extends BaseSolrFilter { ExecutorUtil.setServerThreadFlag(null); } } finally { - consumeInputFully((HttpServletRequest) request); + consumeInputFully(request); } } @@ -430,7 +428,7 @@ public class SolrDispatchFilter extends BaseSolrFilter { } } - private boolean authenticateRequest(ServletRequest request, ServletResponse response, final AtomicReference wrappedRequest) throws IOException { + private boolean authenticateRequest(HttpServletRequest request, HttpServletResponse response, final AtomicReference wrappedRequest) throws IOException { boolean requestContinues = false; final AtomicBoolean isAuthenticated = new AtomicBoolean(false); AuthenticationPlugin authenticationPlugin = cores.getAuthenticationPlugin(); @@ -440,9 +438,9 @@ public class SolrDispatchFilter extends BaseSolrFilter { // /admin/info/key must be always open. see SOLR-9188 // tests work only w/ getPathInfo //otherwise it's just enough to have getServletPath() - if (PKIAuthenticationPlugin.PATH.equals(((HttpServletRequest) request).getServletPath()) || - PKIAuthenticationPlugin.PATH.equals(((HttpServletRequest) request).getPathInfo())) return true; - String header = ((HttpServletRequest) request).getHeader(PKIAuthenticationPlugin.HEADER); + if (PKIAuthenticationPlugin.PATH.equals(request.getServletPath()) || + PKIAuthenticationPlugin.PATH.equals(request.getPathInfo())) return true; + String header = request.getHeader(PKIAuthenticationPlugin.HEADER); if (header != null && cores.getPkiAuthenticationPlugin() != null) authenticationPlugin = cores.getPkiAuthenticationPlugin(); try { @@ -450,7 +448,7 @@ public class SolrDispatchFilter extends BaseSolrFilter { // upon successful authentication, this should call the chain's next filter. requestContinues = authenticationPlugin.doAuthenticate(request, response, (req, rsp) -> { isAuthenticated.set(true); - wrappedRequest.set(req); + wrappedRequest.set((HttpServletRequest) req); }); } catch (Exception e) { log.info("Error authenticating", e); @@ -478,9 +476,9 @@ public class SolrDispatchFilter extends BaseSolrFilter { * @param retry If this is an original request or a retry. * @return A request object with an {@link InputStream} that will ignore calls to close. */ - private ServletRequest closeShield(ServletRequest request, boolean retry) { + private HttpServletRequest closeShield(HttpServletRequest request, boolean retry) { if (testMode && !retry) { - return new HttpServletRequestWrapper((HttpServletRequest) request) { + return new HttpServletRequestWrapper(request) { ServletInputStream stream; @Override @@ -510,9 +508,9 @@ public class SolrDispatchFilter extends BaseSolrFilter { * @param retry If this response corresponds to an original request or a retry. * @return A response object with an {@link OutputStream} that will ignore calls to close. */ - private ServletResponse closeShield(ServletResponse response, boolean retry) { + private HttpServletResponse closeShield(HttpServletResponse response, boolean retry) { if (testMode && !retry) { - return new HttpServletResponseWrapper((HttpServletResponse) response) { + return new HttpServletResponseWrapper(response) { ServletOutputStream stream; @Override diff --git a/solr/core/src/java/org/apache/solr/update/processor/TimeRoutedAliasUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/TimeRoutedAliasUpdateProcessor.java index 91489125670..6f71accd042 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/TimeRoutedAliasUpdateProcessor.java +++ b/solr/core/src/java/org/apache/solr/update/processor/TimeRoutedAliasUpdateProcessor.java @@ -19,6 +19,7 @@ package org.apache.solr.update.processor; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.text.ParseException; import java.time.Instant; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; @@ -29,22 +30,34 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.TimeZone; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import java.util.stream.Collectors; +import org.apache.solr.cloud.Overseer; +import org.apache.solr.cloud.api.collections.RoutedAliasCreateCollectionCmd; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.Aliases; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkCoreNodeProps; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.params.CollectionParams; +import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.UpdateParams; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.admin.CollectionsHandler; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; @@ -52,14 +65,18 @@ import org.apache.solr.update.CommitUpdateCommand; import org.apache.solr.update.DeleteUpdateCommand; import org.apache.solr.update.SolrCmdDistributor; import org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase; +import org.apache.solr.util.DateMathParser; +import org.apache.solr.util.TimeZoneUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.handler.admin.CollectionsHandler.DEFAULT_COLLECTION_OP_TIMEOUT; import static org.apache.solr.update.processor.DistributedUpdateProcessor.DISTRIB_FROM; import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM; /** - * Distributes update requests to rolling series of collections partitioned by a timestamp field. + * Distributes update requests to a rolling series of collections partitioned by a timestamp field. Issues + * requests to create new collections on-demand. * * Depends on this core having a special core property that points to the alias name that this collection is a part of. * And further requires certain metadata on the Alias. @@ -69,16 +86,15 @@ import static org.apache.solr.update.processor.DistributingUpdateProcessorFactor public class TimeRoutedAliasUpdateProcessor extends UpdateRequestProcessor { //TODO do we make this more generic to others who want to partition collections using something else? - // TODO auto add new collection partitions when cross a timestamp boundary. That needs to be coordinated to avoid - // race conditions, remembering that even the lead collection might have multiple instances of this URP - // (multiple shards or perhaps just multiple streams thus instances of this URP) - public static final String ALIAS_DISTRIB_UPDATE_PARAM = "alias." + DISTRIB_UPDATE_PARAM; // param public static final String TIME_PARTITION_ALIAS_NAME_CORE_PROP = "timePartitionAliasName"; // core prop - public static final String ROUTER_FIELD_METADATA = "router.field"; // alias metadata + // alias metadata: + public static final String ROUTER_FIELD_METADATA = "router.field"; + public static final String ROUTER_MAX_FUTURE_TIME_METADATA = "router.maxFutureMs"; + public static final String ROUTER_INTERVAL_METADATA = "router.interval"; // This format must be compatible with collection name limitations - private static final DateTimeFormatter DATE_TIME_FORMATTER = new DateTimeFormatterBuilder() + public static final DateTimeFormatter DATE_TIME_FORMATTER = new DateTimeFormatterBuilder() .append(DateTimeFormatter.ISO_LOCAL_DATE).appendPattern("[_HH[_mm[_ss]]]") //brackets mean optional .parseDefaulting(ChronoField.HOUR_OF_DAY, 0) .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) @@ -87,18 +103,26 @@ public class TimeRoutedAliasUpdateProcessor extends UpdateRequestProcessor { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + // used to limit unnecessary concurrent collection creation requests + private static ConcurrentHashMap aliasToSemaphoreMap = new ConcurrentHashMap<>(4); + private final String thisCollection; private final String aliasName; private final String routeField; + private final long maxFutureMs; + private final String intervalDateMath; + private final TimeZone intervalTimeZone; - private final SolrCmdDistributor cmdDistrib; private final ZkController zkController; + private final SolrCmdDistributor cmdDistrib; + private final CollectionsHandler collHandler; private final SolrParams outParamsToLeader; private List> parsedCollectionsDesc; // k=timestamp (start), v=collection. Sorted descending private Aliases parsedCollectionsAliases; // a cached reference to the source of what we parse into parsedCollectionsDesc public static UpdateRequestProcessor wrap(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + //TODO get from "Collection property" final String timePartitionAliasName = req.getCore().getCoreDescriptor() .getCoreProperty(TIME_PARTITION_ALIAS_NAME_CORE_PROP, null); final DistribPhase shardDistribPhase = @@ -126,12 +150,21 @@ public class TimeRoutedAliasUpdateProcessor extends UpdateRequestProcessor { CoreContainer cc = core.getCoreContainer(); zkController = cc.getZkController(); cmdDistrib = new SolrCmdDistributor(cc.getUpdateShardHandler()); + collHandler = cc.getCollectionsHandler(); final Map aliasMetadata = zkController.getZkStateReader().getAliases().getCollectionAliasMetadata(aliasName); if (aliasMetadata == null) { throw newAliasMustExistException(); // if it did exist, we'd have a non-null map } routeField = aliasMetadata.get(ROUTER_FIELD_METADATA); + intervalDateMath = aliasMetadata.getOrDefault(ROUTER_INTERVAL_METADATA, "+1DAY"); + String futureTimeStr = aliasMetadata.get(ROUTER_MAX_FUTURE_TIME_METADATA); + if (futureTimeStr != null) { + maxFutureMs = Long.parseLong(futureTimeStr); + } else { + maxFutureMs = TimeUnit.MINUTES.toMillis(10); + } + intervalTimeZone = TimeZoneUtils.parseTimezone(aliasMetadata.get(CommonParams.TZ)); ModifiableSolrParams outParams = new ModifiableSolrParams(req.getParams()); // Don't distribute these params; they will be distributed from the local processCommit separately. @@ -153,11 +186,59 @@ public class TimeRoutedAliasUpdateProcessor extends UpdateRequestProcessor { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final Object routeValue = cmd.getSolrInputDocument().getFieldValue(routeField); - final String targetCollection = findTargetCollectionGivenRouteKey(routeValue); - if (targetCollection == null) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "Doc " + cmd.getPrintableId() + " couldn't be routed with " + routeField + "=" + routeValue); - } + final Instant routeTimestamp = parseRouteKey(routeValue); + + updateParsedCollectionAliases(); + String targetCollection; + do { + targetCollection = findTargetCollectionGivenTimestamp(routeTimestamp); + + if (targetCollection == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Doc " + cmd.getPrintableId() + " couldn't be routed with " + routeField + "=" + routeTimestamp); + } + + // Note: the following rule is tempting but not necessary and is not compatible with + // only using this URP when the alias distrib phase is NONE; otherwise a doc may be routed to from a non-recent + // collection to the most recent only to then go there directly instead of realizing a new collection is needed. + // // If it's going to some other collection (not "this") then break to just send it there + // if (!thisCollection.equals(targetCollection)) { + // break; + // } + // Also tempting but not compatible: check that we're the leader, if not then break + + // If the doc goes to the most recent collection then do some checks below, otherwise break the loop. + final Instant mostRecentCollTimestamp = parsedCollectionsDesc.get(0).getKey(); + final String mostRecentCollName = parsedCollectionsDesc.get(0).getValue(); + if (!mostRecentCollName.equals(targetCollection)) { + break; + } + + // Check the doc isn't too far in the future + final Instant maxFutureTime = Instant.now().plusMillis(maxFutureMs); + if (routeTimestamp.isAfter(maxFutureTime)) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "The document's time routed key of " + routeValue + " is too far in the future given " + + ROUTER_MAX_FUTURE_TIME_METADATA + "=" + maxFutureMs); + } + + // Create a new collection? + final Instant nextCollTimestamp = computeNextCollTimestamp(mostRecentCollTimestamp, intervalDateMath, intervalTimeZone); + if (routeTimestamp.isBefore(nextCollTimestamp)) { + break; // thus we don't need another collection + } + + createCollectionAfter(mostRecentCollName); // *should* throw if fails for some reason but... + final boolean updated = updateParsedCollectionAliases(); + if (!updated) { // thus we didn't make progress... + // this is not expected, even in known failure cases, but we check just in case + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "We need to create a new time routed collection but for unknown reasons were unable to do so."); + } + // then retry the loop ... + } while(true); + assert targetCollection != null; + if (thisCollection.equals(targetCollection)) { // pass on through; we've reached the right collection super.processAdd(cmd); @@ -168,7 +249,23 @@ public class TimeRoutedAliasUpdateProcessor extends UpdateRequestProcessor { } } - protected String findTargetCollectionGivenRouteKey(Object routeKey) { + /** Computes the timestamp of the next collection given the timestamp of the one before. */ + public static Instant computeNextCollTimestamp(Instant fromTimestamp, String intervalDateMath, TimeZone intervalTimeZone) { + //TODO overload DateMathParser.parseMath to take tz and "now" + final DateMathParser dateMathParser = new DateMathParser(intervalTimeZone); + dateMathParser.setNow(Date.from(fromTimestamp)); + final Instant nextCollTimestamp; + try { + nextCollTimestamp = dateMathParser.parseMath(intervalDateMath).toInstant(); + } catch (ParseException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Invalid Date Math String:'" + intervalDateMath +'\'', e); + } + assert nextCollTimestamp.isAfter(fromTimestamp); + return nextCollTimestamp; + } + + private Instant parseRouteKey(Object routeKey) { final Instant docTimestamp; if (routeKey instanceof Instant) { docTimestamp = (Instant) routeKey; @@ -179,15 +276,30 @@ public class TimeRoutedAliasUpdateProcessor extends UpdateRequestProcessor { } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unexpected type of routeKey: " + routeKey); } + return docTimestamp; + } + + /** + * Ensure {@link #parsedCollectionsAliases} is up to date. If it was modified, return true. + * Note that this will return true if some other alias was modified or if metadata was modified. These + * are spurious and the caller should be written to be tolerant of no material changes. + */ + private boolean updateParsedCollectionAliases() { final Aliases aliases = zkController.getZkStateReader().getAliases(); // note: might be different from last request if (this.parsedCollectionsAliases != aliases) { if (this.parsedCollectionsAliases != null) { - log.info("Observing possibly updated alias {}", aliasName); + log.debug("Observing possibly updated alias: {}", aliasName); } - this.parsedCollectionsDesc = doParseCollections(aliases); + this.parsedCollectionsDesc = parseCollections(aliasName, aliases, this::newAliasMustExistException); this.parsedCollectionsAliases = aliases; + return true; } - // iterates in reverse chronological order + return false; + } + + /** Given the route key, finds the collection. Returns null if too old to go in last one. */ + private String findTargetCollectionGivenTimestamp(Instant docTimestamp) { + // Lookup targetCollection given route key. Iterates in reverse chronological order. // We're O(N) here but N should be small, the loop is fast, and usually looking for 1st. for (Map.Entry entry : parsedCollectionsDesc) { Instant colStartTime = entry.getKey(); @@ -195,16 +307,77 @@ public class TimeRoutedAliasUpdateProcessor extends UpdateRequestProcessor { return entry.getValue(); //found it } } - return null; + return null; //not found } - /** Parses the timestamp from the collection list and returns them in reverse sorted order (newest 1st) */ - private List> doParseCollections(Aliases aliases) { + private void createCollectionAfter(String mostRecentCollName) { + // Invoke ROUTEDALIAS_CREATECOLL (in the Overseer, locked by alias name). It will create the collection + // and update the alias contingent on the most recent collection name being the same as + // what we think so here, otherwise it will return (without error). + // To avoid needless concurrent communication with the Overseer from this JVM, we + // maintain a Semaphore from an alias name keyed ConcurrentHashMap. + // Alternatively a Lock or CountDownLatch could have been used but they didn't seem + // to make it any easier. + + final Semaphore semaphore = aliasToSemaphoreMap.computeIfAbsent(aliasName, n -> new Semaphore(1)); + if (semaphore.tryAcquire()) { + try { + final String operation = CollectionParams.CollectionAction.ROUTEDALIAS_CREATECOLL.toLower(); + Map msg = new HashMap<>(); + msg.put(Overseer.QUEUE_OPERATION, operation); + msg.put(CollectionParams.NAME, aliasName); + msg.put(RoutedAliasCreateCollectionCmd.IF_MOST_RECENT_COLL_NAME, mostRecentCollName); + SolrQueryResponse rsp = new SolrQueryResponse(); + try { + this.collHandler.handleResponse( + operation, + new ZkNodeProps(msg), + rsp); + if (rsp.getException() != null) { + throw rsp.getException(); + } // otherwise don't care about the response. It's possible no collection was created because + // of a race and that's okay... we'll ultimately retry any way. + + // Ensure our view of the aliases has updated. If we didn't do this, our zkStateReader might + // not yet know about the new alias (thus won't see the newly added collection to it), and we might think + // we failed. + zkController.getZkStateReader().aliasesHolder.update(); + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); + } + } finally { + semaphore.release(); // to signal we're done to anyone waiting on it + } + + } else { + // Failed to acquire permit because another URP instance on this JVM is creating a collection. + // So wait till it's available + log.debug("Collection creation is already in progress so we'll wait then try again."); + try { + if (semaphore.tryAcquire(DEFAULT_COLLECTION_OP_TIMEOUT, TimeUnit.MILLISECONDS)) { + semaphore.release(); // we don't actually want a permit so give it back + // return to continue... + } else { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Waited too long for another update thread to be done with collection creation."); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Interrupted waiting on collection creation.", e); // if we were interrupted, give up. + } + } + } + + /** Parses the timestamp from the collection list and returns them in reverse sorted order (most recent 1st) */ + public static List> parseCollections(String aliasName, Aliases aliases, Supplier aliasNotExist) { final List collections = aliases.getCollectionAliasListMap().get(aliasName); if (collections == null) { - throw newAliasMustExistException(); + throw aliasNotExist.get(); } - // note: I considered TreeMap but didn't like the log(N) just to grab the head when we use it later + // note: I considered TreeMap but didn't like the log(N) just to grab the most recent when we use it later List> result = new ArrayList<>(collections.size()); for (String collection : collections) { Instant colStartTime = parseInstantFromCollectionName(aliasName, collection); @@ -225,6 +398,17 @@ public class TimeRoutedAliasUpdateProcessor extends UpdateRequestProcessor { return DATE_TIME_FORMATTER.parse(dateTimePart, Instant::from); } + public static String formatCollectionNameFromInstant(String aliasName, Instant timestamp) { + String nextCollName = TimeRoutedAliasUpdateProcessor.DATE_TIME_FORMATTER.format(timestamp); + for (int i = 0; i < 3; i++) { // chop off seconds, minutes, hours + if (nextCollName.endsWith("_00")) { + nextCollName = nextCollName.substring(0, nextCollName.length()-3); + } + } + assert TimeRoutedAliasUpdateProcessor.DATE_TIME_FORMATTER.parse(nextCollName, Instant::from).equals(timestamp); + return aliasName + "_" + nextCollName; + } + @Override public void processDelete(DeleteUpdateCommand cmd) throws IOException { final List nodes = lookupShardLeadersOfCollections(); diff --git a/solr/core/src/java/org/apache/solr/util/TimeZoneUtils.java b/solr/core/src/java/org/apache/solr/util/TimeZoneUtils.java index 9d11f81f03e..0600a83170f 100644 --- a/solr/core/src/java/org/apache/solr/util/TimeZoneUtils.java +++ b/solr/core/src/java/org/apache/solr/util/TimeZoneUtils.java @@ -25,6 +25,8 @@ import java.util.Arrays; import java.util.regex.Pattern; import java.util.regex.Matcher; +import org.apache.solr.common.SolrException; + /** * Simple utilities for working with TimeZones * @see java.util.TimeZone @@ -82,4 +84,20 @@ public final class TimeZoneUtils { private static Pattern CUSTOM_ID_REGEX = Pattern.compile("GMT(?:\\+|\\-)(\\d{1,2})(?::?(\\d{2}))?"); + /** + * Parse the specified timezone ID. If null input then return UTC. If we can't resolve it then + * throw an exception. + */ + public static TimeZone parseTimezone(String tzStr) { + if (tzStr != null) { + TimeZone tz = getTimeZone(tzStr); + if (null == tz) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Solr JVM does not support TZ: " + tzStr); + } + return tz; + } else { + return DateMathParser.UTC; //TODO move to TimeZoneUtils + } + } } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema11.xml b/solr/core/src/test-files/solr/collection1/conf/schema11.xml index 25b7e22bf7f..d09e2097247 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema11.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema11.xml @@ -339,10 +339,35 @@ valued. --> + + + + + + + + + + + + + + + + + + + + + + + +
    +

    Cluster Suggestions

    +
    + + + + + + + + + + + + + + + + +
    TypeReasonAction
    NANANA
    {{ x.type }}{{ x.violation.clause }} +
    + +
    +
    +
    +
    +
    + +
    +