diff --git a/.github/workflows/run-checks-gradle-upgrade.yml b/.github/workflows/run-checks-gradle-upgrade.yml index 751c3471b51..4e0ef65f03a 100644 --- a/.github/workflows/run-checks-gradle-upgrade.yml +++ b/.github/workflows/run-checks-gradle-upgrade.yml @@ -30,7 +30,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - java-version: [ '22' ] + java-version: [ '23-ea' ] uses-alt-java: [ true, false ] runs-on: ${{ matrix.os }} @@ -61,7 +61,16 @@ jobs: # https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-environment-variable echo "RUNTIME_JAVA_HOME=${{ env.ALT_JAVA_DIR }}" >> "$GITHUB_ENV" - - run: ./gradlew -p lucene/core check -x test + - name: ./gradlew tidy + run: | + ./gradlew tidy + if [ ! -z "$(git status --porcelain)" ]; then + echo ":warning: **tidy left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + git status --porcelain >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + git reset --hard && git clean -xfd . + fi - name: ./gradlew regenerate run: | @@ -69,7 +78,7 @@ jobs: sudo apt-get install libwww-perl ./gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks if [ ! -z "$(git status --porcelain)" ]; then - echo ":warning: **regenerateleft local checkout in modified state**" >> $GITHUB_STEP_SUMMARY + echo ":warning: **regenerate left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY git status --porcelain >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -79,8 +88,7 @@ jobs: - run: ./gradlew testOpts - run: ./gradlew helpWorkflow - run: ./gradlew licenses updateLicenses - - run: ./gradlew tidy - - run: ./gradlew check -x test + - run: ./gradlew check -x test -Pvalidation.git.failOnModified=false - run: ./gradlew assembleRelease mavenToLocal # Conserve resources: only run these in non-alt-java mode. diff --git a/.github/workflows/run-nightly-smoketester.yml b/.github/workflows/run-nightly-smoketester.yml index c0987afc8aa..18a826e4a36 100644 --- a/.github/workflows/run-nightly-smoketester.yml +++ b/.github/workflows/run-nightly-smoketester.yml @@ -18,7 +18,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - java-version: [ '21', '22' ] + java-version: [ '21', '22', '23-ea' ] runs-on: ${{ matrix.os }} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java index aaf7059bb5f..d084243b2a5 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java @@ -60,8 +60,8 @@ public class WrapperDownloader { public static void checkVersion() { int major = Runtime.version().feature(); - if (major != 21 && major != 22) { - throw new IllegalStateException("java version must be 21 or 22, your version: " + major); + if (major != 21 && major != 22 && major != 23) { + throw new IllegalStateException("java version must be 21, 22 or 23, your version: " + major); } } diff --git a/gradle/documentation/render-javadoc.gradle b/gradle/documentation/render-javadoc.gradle index ddfb68a1b1c..a161fbc24dd 100644 --- a/gradle/documentation/render-javadoc.gradle +++ b/gradle/documentation/render-javadoc.gradle @@ -32,7 +32,7 @@ allprojects { missingdoclet "org.apache.lucene.tools:missing-doclet" } - ext { + project.ext { relativeDocPath = project.path.replaceFirst(/:\w+:/, "").replace(':', '/') } diff --git a/gradle/generation/kuromoji.gradle b/gradle/generation/kuromoji.gradle index cfe2cd559ce..a5aa5fbd604 100644 --- a/gradle/generation/kuromoji.gradle +++ b/gradle/generation/kuromoji.gradle @@ -33,7 +33,7 @@ configure(project(":lucene:analysis:kuromoji")) { apply plugin: deps.plugins.undercouch.download.get().pluginId plugins.withType(JavaPlugin) { - ext { + project.ext { targetDir = file("src/resources") } diff --git a/gradle/generation/nori.gradle b/gradle/generation/nori.gradle index db05babdf03..3a558325964 100644 --- a/gradle/generation/nori.gradle +++ b/gradle/generation/nori.gradle @@ -33,7 +33,7 @@ configure(project(":lucene:analysis:nori")) { apply plugin: deps.plugins.undercouch.download.get().pluginId plugins.withType(JavaPlugin) { - ext { + project.ext { targetDir = file("src/resources") } diff --git a/gradle/testing/beasting.gradle b/gradle/testing/beasting.gradle index 8934100ec10..67c20140ba8 100644 --- a/gradle/testing/beasting.gradle +++ b/gradle/testing/beasting.gradle @@ -27,7 +27,7 @@ def beastingMode = gradle.startParameter.taskNames.any{ name -> name == 'beast' allprojects { plugins.withType(JavaPlugin) { - ext { + project.ext { testOptions += [ [propName: 'tests.dups', value: 0, description: "Reiterate runs of entire test suites ('beast' task)."] ] diff --git a/gradle/testing/profiling.gradle b/gradle/testing/profiling.gradle index 6c71b3f827a..88284fb5454 100644 --- a/gradle/testing/profiling.gradle +++ b/gradle/testing/profiling.gradle @@ -19,7 +19,7 @@ def recordings = files() allprojects { plugins.withType(JavaPlugin) { - ext { + project.ext { testOptions += [ [propName: 'tests.profile', value: false, description: "Enable Java Flight Recorder profiling."] ] diff --git a/gradle/testing/randomization.gradle b/gradle/testing/randomization.gradle index 9ca8625b0ee..670f8ef2689 100644 --- a/gradle/testing/randomization.gradle +++ b/gradle/testing/randomization.gradle @@ -62,7 +62,7 @@ allprojects { // Configure test property defaults and their descriptions. allprojects { plugins.withType(JavaPlugin) { - ext { + project.ext { String randomVectorSize = RandomPicks.randomFrom(new Random(projectSeedLong), ["default", "128", "256", "512"]) testOptions += [ // seed, repetition and amplification. @@ -135,14 +135,14 @@ allprojects { } afterEvaluate { - ext.testOptionsResolved = testOptions.findAll { opt -> + project.ext.testOptionsResolved = testOptions.findAll { opt -> propertyOrDefault(opt.propName, opt.value) != null }.collectEntries { opt -> [(opt.propName): Objects.toString(resolvedTestOption(opt.propName))] } // Compute the "reproduce with" string. - ext.testOptionsForReproduceLine = testOptions.findAll { opt -> + project.ext.testOptionsForReproduceLine = testOptions.findAll { opt -> if (opt["includeInReproLine"] == false) { return false } diff --git a/gradle/testing/slowest-tests-at-end.gradle b/gradle/testing/slowest-tests-at-end.gradle index eaf9cd1a2f1..d24e523394d 100644 --- a/gradle/testing/slowest-tests-at-end.gradle +++ b/gradle/testing/slowest-tests-at-end.gradle @@ -22,7 +22,7 @@ def allSuites = [] allprojects { plugins.withType(JavaPlugin) { - ext { + project.ext { testOptions += [ [propName: 'tests.slowestTests', value: true, description: "Print the summary of the slowest tests."], [propName: 'tests.slowestSuites', value: true, description: "Print the summary of the slowest suites."] diff --git a/gradle/validation/git-status.gradle b/gradle/validation/git-status.gradle index 31b80641683..fa976589836 100644 --- a/gradle/validation/git-status.gradle +++ b/gradle/validation/git-status.gradle @@ -74,21 +74,6 @@ configure(rootProject) { logger.warn("WARNING: Directory is not a valid git checkout (won't check dirty files): ${rootProject.projectDir}") } } else { - // git ignores any folders which are empty (this includes folders with recursively empty sub-folders). - def untrackedNonEmptyFolders = status.untrackedFolders.findAll { path -> - File location = file("${rootProject.projectDir}/${path}") - boolean hasFiles = false - Files.walkFileTree(location.toPath(), new SimpleFileVisitor() { - @Override - FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { - hasFiles = true - // Terminate early. - return FileVisitResult.TERMINATE - } - }) - return hasFiles - } - def offenders = [ // Exclude staged changes. These are fine in precommit. // "(added)": status.added, @@ -97,8 +82,7 @@ configure(rootProject) { "(conflicting)": status.conflicting, "(missing)": status.missing, "(modified)": status.modified, - "(untracked)": status.untracked, - "(untracked non-empty dir)": untrackedNonEmptyFolders + "(untracked)": status.untracked ].collectMany { fileStatus, files -> files.collect {file -> " - ${file} ${fileStatus}" } }.sort() diff --git a/gradle/wrapper/gradle-wrapper.jar.sha256 b/gradle/wrapper/gradle-wrapper.jar.sha256 index 2a6b8668ac7..6d5fdcd3f30 100644 --- a/gradle/wrapper/gradle-wrapper.jar.sha256 +++ b/gradle/wrapper/gradle-wrapper.jar.sha256 @@ -1 +1 @@ -cb0da6751c2b753a16ac168bb354870ebb1e162e9083f116729cec9c781156b8 +2db75c40782f5e8ba1fc278a5574bab070adccb2d21ca5a6e5ed840888448046 \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.jar.version b/gradle/wrapper/gradle-wrapper.jar.version index 3b6825376ad..7f6758ef97b 100644 --- a/gradle/wrapper/gradle-wrapper.jar.version +++ b/gradle/wrapper/gradle-wrapper.jar.version @@ -1 +1 @@ -8.8.0 +8.10.0 diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index a4413138c96..9355b415575 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e956c99289f..42ab9c6fc63 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -112,6 +112,8 @@ API Changes * GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz) +* GITHUB#13708: Move Operations.sameLanguage/subsetOf to test-framework. (Robert Muir) + New Features --------------------- @@ -167,6 +169,8 @@ Improvements * GITHUB#12172: Update Romanian stopwords list to include the modern unicode forms. (Trey Jones) +* GITHUB#13707: Improve Operations.isTotal() to work with non-minimal automata. (Dawid Weiss, Robert Muir) + Optimizations --------------------- @@ -266,6 +270,8 @@ Build * GITHUB#13649: Fix eclipse ide settings generation #13649 (Uwe Schindler, Dawid Weiss) +* GITHUB#13698: Upgrade to gradle 8.10 (Dawid Weiss) + ======================== Lucene 9.12.0 ======================= API Changes @@ -284,10 +290,7 @@ API Changes * GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar) -* GITHUB#13568: Add CollectorOwner class that wraps CollectorManager, and handles list of Collectors and results. - Add IndexSearcher#search method that takes CollectorOwner. (Egor Potemkin) - -* GITHUB#13568: Add DrillSideways#search method that supports any collector types for any drill-sideways dimensions +* GITHUB#13568: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions or drill-down. (Egor Potemkin) New Features @@ -408,6 +411,9 @@ Bug Fixes * GITHUB#13691: Fix incorrect exponent value in explain of SigmoidFunction. (Owais Kazi) +* GITHUB#13703: Fix bug in LatLonPoint queries where narrow polygons close to latitude 90 don't + match any points due to an Integer overflow. (Ignacio Vera) + Build --------------------- diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md index 64de353b153..cebc479aa24 100644 --- a/lucene/MIGRATE.md +++ b/lucene/MIGRATE.md @@ -793,3 +793,7 @@ Specifically, the method `FunctionValues#getScorer(Weight weight, LeafReaderCont Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on Scorer. +### `SearchWithCollectorTask` no longer supports the `collector.class` config parameter + +`collector.class` used to allow users to load a custom collector implementation. `collector.manager.class` +replaces it by allowing users to load a custom collector manager instead. (Luca Cavanna) \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java index 1eb80ea5081..974edc8e9f6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java @@ -1490,7 +1490,7 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase { } assertTrue(approxEquals(actual, expected)); - assertTrue(Operations.sameLanguage(actual, expected)); + assertTrue(AutomatonTestUtil.sameLanguage(actual, expected)); } a.close(); diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java index d7ff62ba3be..818639bee8f 100644 --- a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java +++ b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java @@ -363,7 +363,7 @@ public final class GeoEncodingUtils { */ public boolean test(int lat, int lon) { final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift); - if (lat2 < latBase || lat2 >= latBase + maxLatDelta) { + if (lat2 < latBase || lat2 - latBase >= maxLatDelta) { return false; } int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift); @@ -411,7 +411,7 @@ public final class GeoEncodingUtils { */ public boolean test(int lat, int lon) { final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift); - if (lat2 < latBase || lat2 >= latBase + maxLatDelta) { + if (lat2 < latBase || lat2 - latBase >= maxLatDelta) { return false; } int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift); diff --git a/lucene/core/src/java/org/apache/lucene/search/CollectorOwner.java b/lucene/core/src/java/org/apache/lucene/search/CollectorOwner.java deleted file mode 100644 index 4c5c2f1eea1..00000000000 --- a/lucene/core/src/java/org/apache/lucene/search/CollectorOwner.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.search; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -/** - * This class wraps {@link CollectorManager} and owns the collectors the manager creates. It is - * convenient that clients of the class don't have to worry about keeping the list of collectors, as - * well as about making the collector's type (C) compatible when reduce is called. Instances of this - * class cache results of {@link CollectorManager#reduce(Collection)}. - * - *

Note that instance of this class ignores any {@link Collector} created by {@link - * CollectorManager#newCollector()} directly, not through {@link #newCollector()} - * - * @lucene.experimental - */ -public final class CollectorOwner { - - private final CollectorManager manager; - - private T result; - private boolean reduced; - - // TODO: For IndexSearcher, the list doesn't have to be synchronized - // because we create new collectors sequentially. Drill sideways creates new collectors in - // DrillSidewaysQuery#Weight#bulkScorer which is already called concurrently. - // I think making the list synchronized here is not a huge concern, at the same time, do we want - // to do something about it? - // e.g. have boolean property in constructor that makes it threads friendly when set? - private final List collectors = Collections.synchronizedList(new ArrayList<>()); - - public CollectorOwner(CollectorManager manager) { - this.manager = manager; - } - - /** Return a new {@link Collector}. This must return a different instance on each call. */ - public C newCollector() throws IOException { - C collector = manager.newCollector(); - collectors.add(collector); - return collector; - } - - public C getCollector(int i) { - return collectors.get(i); - } - - /** - * Returns result of {@link CollectorManager#reduce(Collection)}. The result is cached. - * - *

This method is NOT threadsafe. - */ - public T getResult() throws IOException { - if (reduced == false) { - result = manager.reduce(collectors); - reduced = true; - } - return result; - } -} diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index a8f8d14c24f..77d6edf34a0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -671,53 +671,6 @@ public class IndexSearcher { } } - /** - * Lower-level search API. Search all leaves using the given {@link CollectorOwner}, without - * calling {@link CollectorOwner#getResult()} so that clients can reduce and read results - * themselves. - * - *

Note that this method doesn't return anything - users can access results by calling {@link - * CollectorOwner#getResult()} - * - * @lucene.experimental - */ - public void search(Query query, CollectorOwner collectorOwner) - throws IOException { - final C firstCollector = collectorOwner.newCollector(); - query = rewrite(query, firstCollector.scoreMode().needsScores()); - final Weight weight = createWeight(query, firstCollector.scoreMode(), 1); - search(weight, collectorOwner, firstCollector); - } - - private void search( - Weight weight, CollectorOwner collectorOwner, C firstCollector) throws IOException { - final LeafSlice[] leafSlices = getSlices(); - if (leafSlices.length == 0) { - // there are no segments, nothing to offload to the executor - assert leafContexts.isEmpty(); - } else { - final ScoreMode scoreMode = firstCollector.scoreMode(); - for (int i = 1; i < leafSlices.length; ++i) { - final C collector = collectorOwner.newCollector(); - if (scoreMode != collector.scoreMode()) { - throw new IllegalStateException( - "CollectorManager does not always produce collectors with the same score mode"); - } - } - final List> listTasks = new ArrayList<>(leafSlices.length); - for (int i = 0; i < leafSlices.length; ++i) { - final LeafReaderContext[] leaves = leafSlices[i].leaves; - final C collector = collectorOwner.getCollector(i); - listTasks.add( - () -> { - search(Arrays.asList(leaves), weight, collector); - return collector; - }); - } - taskExecutor.invokeAll(listTasks); - } - } - /** * Lower-level search API. * diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java index 5a56286ebed..8192f612165 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java @@ -313,7 +313,7 @@ public abstract class MultiTermQuery extends Query { * Return the number of unique terms contained in this query, if known up-front. If not known, -1 * will be returned. */ - public long getTermsCount() throws IOException { + public long getTermsCount() { return -1; } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index 4e1abab09cc..da01b24f0bd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -137,7 +137,7 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable { } @Override - public long getTermsCount() throws IOException { + public long getTermsCount() { return termData.size(); } diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java index 7ad9eedda6f..34a18446191 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java @@ -339,6 +339,7 @@ public class Automaton implements Accountable, TransitionAccessor { @Override public int getNumTransitions(int state) { assert state >= 0; + assert state < getNumStates(); int count = states[2 * state + 1]; if (count == -1) { return 0; diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/FiniteStringsIterator.java b/lucene/core/src/java/org/apache/lucene/util/automaton/FiniteStringsIterator.java index b9030c19cf0..e141b4b95cf 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/FiniteStringsIterator.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/FiniteStringsIterator.java @@ -86,7 +86,7 @@ public class FiniteStringsIterator { this.emitEmptyString = a.isAccept(0); // Start iteration with node startState. - if (a.getNumTransitions(startState) > 0) { + if (a.getNumStates() > startState && a.getNumTransitions(startState) > 0) { pathStates.set(startState); nodes[0].resetState(a, startState); string.append(startState); diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java index 2052b1c50bf..3cb02c77a82 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java @@ -35,7 +35,6 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -182,30 +181,65 @@ public final class Operations { // Repeating the empty automata will still only accept the empty automata. return a; } + + if (a.isAccept(0) && a.getAcceptStates().cardinality() == 1) { + // If state 0 is the only accept state, then this automaton already repeats itself. + return a; + } + Automaton.Builder builder = new Automaton.Builder(); + // Create the initial state, which is accepted builder.createState(); builder.setAccept(0, true); - builder.copy(a); - Transition t = new Transition(); + + int[] stateMap = new int[a.getNumStates()]; + for (int state = 0; state < a.getNumStates(); ++state) { + if (a.isAccept(state) == false) { + stateMap[state] = builder.createState(); + } else if (a.getNumTransitions(state) == 0) { + // Accept states that have no transitions get merged into state 0. + stateMap[state] = 0; + } else { + int newState = builder.createState(); + stateMap[state] = newState; + builder.setAccept(newState, true); + } + } + + // Now copy the automaton while renumbering states. + for (int state = 0; state < a.getNumStates(); ++state) { + int src = stateMap[state]; + int count = a.initTransition(state, t); + for (int i = 0; i < count; i++) { + a.getNextTransition(t); + int dest = stateMap[t.dest]; + builder.addTransition(src, dest, t.min, t.max); + } + } + + // Now copy transitions of the initial state to our new initial state. int count = a.initTransition(0, t); for (int i = 0; i < count; i++) { a.getNextTransition(t); - builder.addTransition(0, t.dest + 1, t.min, t.max); + builder.addTransition(0, stateMap[t.dest], t.min, t.max); } - int numStates = a.getNumStates(); - for (int s = 0; s < numStates; s++) { - if (a.isAccept(s)) { + // Now copy transitions of the initial state to final states to make the automaton repeat + // itself. + for (int s = a.getAcceptStates().nextSetBit(0); + s != -1; + s = a.getAcceptStates().nextSetBit(s + 1)) { + if (stateMap[s] != 0) { count = a.initTransition(0, t); for (int i = 0; i < count; i++) { a.getNextTransition(t); - builder.addTransition(s + 1, t.dest + 1, t.min, t.max); + builder.addTransition(stateMap[s], stateMap[t.dest], t.min, t.max); } } } - return builder.finish(); + return removeDeadStates(builder.finish()); } /** @@ -374,17 +408,6 @@ public final class Operations { return removeDeadStates(c); } - /** - * Returns true if these two automata accept exactly the same language. This is a costly - * computation! Both automata must be determinized and have no dead states! - */ - public static boolean sameLanguage(Automaton a1, Automaton a2) { - if (a1 == a2) { - return true; - } - return subsetOf(a2, a1) && subsetOf(a1, a2); - } - // TODO: move to test-framework? /** * Returns true if this automaton has any states that cannot be reached from the initial state or @@ -417,73 +440,6 @@ public final class Operations { return reachableFromAccept.isEmpty() == false; } - /** - * Returns true if the language of a1 is a subset of the language of a2. - * Both automata must be determinized and must have no dead states. - * - *

Complexity: quadratic in number of states. - */ - public static boolean subsetOf(Automaton a1, Automaton a2) { - if (a1.isDeterministic() == false) { - throw new IllegalArgumentException("a1 must be deterministic"); - } - if (a2.isDeterministic() == false) { - throw new IllegalArgumentException("a2 must be deterministic"); - } - assert hasDeadStatesFromInitial(a1) == false; - assert hasDeadStatesFromInitial(a2) == false; - if (a1.getNumStates() == 0) { - // Empty language is alwyas a subset of any other language - return true; - } else if (a2.getNumStates() == 0) { - return isEmpty(a1); - } - - // TODO: cutover to iterators instead - Transition[][] transitions1 = a1.getSortedTransitions(); - Transition[][] transitions2 = a2.getSortedTransitions(); - ArrayDeque worklist = new ArrayDeque<>(); - HashSet visited = new HashSet<>(); - StatePair p = new StatePair(0, 0); - worklist.add(p); - visited.add(p); - while (worklist.size() > 0) { - p = worklist.removeFirst(); - if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) { - return false; - } - Transition[] t1 = transitions1[p.s1]; - Transition[] t2 = transitions2[p.s2]; - for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) { - while (b2 < t2.length && t2[b2].max < t1[n1].min) { - b2++; - } - int min1 = t1[n1].min, max1 = t1[n1].max; - - for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) { - if (t2[n2].min > min1) { - return false; - } - if (t2[n2].max < Character.MAX_CODE_POINT) { - min1 = t2[n2].max + 1; - } else { - min1 = Character.MAX_CODE_POINT; - max1 = Character.MIN_CODE_POINT; - } - StatePair q = new StatePair(t1[n1].dest, t2[n2].dest); - if (!visited.contains(q)) { - worklist.add(q); - visited.add(q); - } - } - if (min1 <= max1) { - return false; - } - } - } - return true; - } - /** * Returns an automaton that accepts the union of the languages of the given automata. * @@ -857,22 +813,48 @@ public final class Operations { return true; } - /** Returns true if the given automaton accepts all strings. The automaton must be minimized. */ + /** + * Returns true if the given automaton accepts all strings. + * + *

The automaton must be deterministic, or this method may return false. + * + *

Complexity: linear in number of states and transitions. + */ public static boolean isTotal(Automaton a) { return isTotal(a, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT); } /** * Returns true if the given automaton accepts all strings for the specified min/max range of the - * alphabet. The automaton must be minimized. + * alphabet. + * + *

The automaton must be deterministic, or this method may return false. + * + *

Complexity: linear in number of states and transitions. */ public static boolean isTotal(Automaton a, int minAlphabet, int maxAlphabet) { - if (a.isAccept(0) && a.getNumTransitions(0) == 1) { - Transition t = new Transition(); - a.getTransition(0, 0, t); - return t.dest == 0 && t.min == minAlphabet && t.max == maxAlphabet; + BitSet states = getLiveStates(a); + Transition spare = new Transition(); + int seenStates = 0; + for (int state = states.nextSetBit(0); state >= 0; state = states.nextSetBit(state + 1)) { + // all reachable states must be accept states + if (a.isAccept(state) == false) return false; + // all reachable states must contain transitions covering minAlphabet-maxAlphabet + int previousLabel = minAlphabet - 1; + for (int transition = 0; transition < a.getNumTransitions(state); transition++) { + a.getTransition(state, transition, spare); + // no gaps are allowed + if (spare.min > previousLabel + 1) return false; + previousLabel = spare.max; + } + if (previousLabel < maxAlphabet) return false; + if (state == Integer.MAX_VALUE) { + break; // or (state+1) would overflow + } + seenStates++; } - return false; + // we've checked all the states, automaton is either total or empty + return seenStates > 0; } /** @@ -1004,6 +986,9 @@ public final class Operations { public static Automaton removeDeadStates(Automaton a) { int numStates = a.getNumStates(); BitSet liveSet = getLiveStates(a); + if (liveSet.cardinality() == numStates) { + return a; + } int[] map = new int[numStates]; diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java b/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java index ad1be724f9c..bd003507f2d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java @@ -35,9 +35,14 @@ package org.apache.lucene.util.automaton; * @lucene.experimental */ public class StatePair { + // only mike knows what it does (do not expose) int s; - int s1; - int s2; + + /** first state */ + public final int s1; + + /** second state */ + public final int s2; StatePair(int s, int s1, int s2) { this.s = s; @@ -81,7 +86,7 @@ public class StatePair { @Override public int hashCode() { // Don't use s1 ^ s2 since it's vulnerable to the case where s1 == s2 always --> hashCode = 0, - // e.g. if you call Operations.sameLanguage, + // e.g. if you call AutomatonTestUtil.sameLanguage, // passing the same automaton against itself: return s1 * 31 + s2; } diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java b/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java index 6e51a6bbb0c..4737694702f 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java @@ -625,7 +625,7 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase { Operations.removeDeadStates(expected), DEFAULT_DETERMINIZE_WORK_LIMIT); Automaton actualDet = Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_DETERMINIZE_WORK_LIMIT); - if (Operations.sameLanguage(expectedDet, actualDet) == false) { + if (AutomatonTestUtil.sameLanguage(expectedDet, actualDet) == false) { Set expectedPaths = toPathStrings(expectedDet); Set actualPaths = toPathStrings(actualDet); StringBuilder b = new StringBuilder(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java index ddca01a2387..105579b25d6 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java @@ -183,7 +183,7 @@ public class TestTermsEnum2 extends LuceneTestCase { Automaton actual = Operations.determinize(Automata.makeStringUnion(found), DEFAULT_DETERMINIZE_WORK_LIMIT); - assertTrue(Operations.sameLanguage(expected, actual)); + assertTrue(AutomatonTestUtil.sameLanguage(expected, actual)); } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java index b0634e56da4..1b27683a251 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java @@ -403,12 +403,8 @@ public class TestBoolean2 extends LuceneTestCase { bigSearcher.count(q3.build())); // test diff (randomized) scorers produce the same results on bigSearcher as well - hits1 = - bigSearcher.search(q1, new TopFieldCollectorManager(sort, 1000 * mulFactor, 1)) - .scoreDocs; - hits2 = - bigSearcher.search(q1, new TopFieldCollectorManager(sort, 1000 * mulFactor, 1)) - .scoreDocs; + hits1 = bigSearcher.search(q1, new TopFieldCollectorManager(sort, mulFactor, 1)).scoreDocs; + hits2 = bigSearcher.search(q1, new TopFieldCollectorManager(sort, mulFactor, 1)).scoreDocs; CheckHits.checkEqual(q1, hits1, hits2); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java index 25404eacb30..fa20d124b04 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -17,15 +17,11 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.BitSet; import java.util.Collection; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.util.LuceneTestCase; @@ -35,34 +31,7 @@ import org.apache.lucene.util.FixedBitSet; public class TestScorerPerf extends LuceneTestCase { private final boolean validate = true; // set to false when doing performance testing - public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) - throws Exception { - int[] freq = new int[nTerms]; - Term[] terms = new Term[nTerms]; - for (int i = 0; i < nTerms; i++) { - int f = (nTerms + 1) - i; // make first terms less frequent - freq[i] = (int) Math.ceil(Math.pow(f, power)); - terms[i] = new Term("f", Character.toString((char) ('A' + i))); - } - - IndexWriter iw = - new IndexWriter( - dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE)); - for (int i = 0; i < nDocs; i++) { - Document d = new Document(); - for (int j = 0; j < nTerms; j++) { - if (random().nextInt(freq[j]) == 0) { - d.add(newStringField("f", terms[j].text(), Field.Store.NO)); - // System.out.println(d); - } - } - iw.addDocument(d); - } - iw.forceMerge(1); - iw.close(); - } - - public FixedBitSet randBitSet(int sz, int numBitsToSet) { + private static FixedBitSet randBitSet(int sz, int numBitsToSet) { FixedBitSet set = new FixedBitSet(sz); for (int i = 0; i < numBitsToSet; i++) { set.set(random().nextInt(sz)); @@ -70,7 +39,7 @@ public class TestScorerPerf extends LuceneTestCase { return set; } - public FixedBitSet[] randBitSets(int numSets, int setSize) { + private static FixedBitSet[] randBitSets(int numSets, int setSize) { FixedBitSet[] sets = new FixedBitSet[numSets]; for (int i = 0; i < sets.length; i++) { sets[i] = randBitSet(setSize, random().nextInt(setSize)); @@ -81,22 +50,13 @@ public class TestScorerPerf extends LuceneTestCase { private static final class CountingHitCollectorManager implements CollectorManager { - private final boolean validate; - private final FixedBitSet result; - - CountingHitCollectorManager(boolean validate, FixedBitSet result) { - this.validate = validate; - this.result = result; - } - @Override public CountingHitCollector newCollector() { - return validate ? new MatchingHitCollector(result) : new CountingHitCollector(); + return new CountingHitCollector(); } @Override - public CountingHitCollector reduce(Collection collectors) - throws IOException { + public CountingHitCollector reduce(Collection collectors) { CountingHitCollector result = new CountingHitCollector(); for (CountingHitCollector collector : collectors) { result.count += collector.count; @@ -106,7 +66,7 @@ public class TestScorerPerf extends LuceneTestCase { } } - public static class CountingHitCollector extends SimpleCollector { + private static class CountingHitCollector extends SimpleCollector { int count = 0; int sum = 0; protected int docBase = 0; @@ -121,12 +81,8 @@ public class TestScorerPerf extends LuceneTestCase { return count; } - public int getSum() { - return sum; - } - @Override - protected void doSetNextReader(LeafReaderContext context) throws IOException { + protected void doSetNextReader(LeafReaderContext context) { docBase = context.docBase; } @@ -136,24 +92,6 @@ public class TestScorerPerf extends LuceneTestCase { } } - public static class MatchingHitCollector extends CountingHitCollector { - FixedBitSet answer; - int pos = -1; - - public MatchingHitCollector(FixedBitSet answer) { - this.answer = answer; - } - - public void collect(int doc, float score) { - - pos = answer.nextSetBit(pos + 1); - if (pos != doc + docBase) { - throw new RuntimeException("Expected doc " + pos + " but got " + (doc + docBase)); - } - super.collect(doc); - } - } - private static class BitSetQuery extends Query { private final FixedBitSet docs; @@ -163,11 +101,10 @@ public class TestScorerPerf extends LuceneTestCase { } @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { return new ConstantScoreWeight(this, boost) { @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + public ScorerSupplier scorerSupplier(LeafReaderContext context) { final var scorer = new ConstantScoreScorer( score(), scoreMode, new BitSetIterator(docs, docs.approximateCardinality())); @@ -200,20 +137,22 @@ public class TestScorerPerf extends LuceneTestCase { } } - FixedBitSet addClause(FixedBitSet[] sets, BooleanQuery.Builder bq, FixedBitSet result) { + private FixedBitSet addClause(FixedBitSet[] sets, BooleanQuery.Builder bq, FixedBitSet result) { final FixedBitSet rnd = sets[random().nextInt(sets.length)]; Query q = new BitSetQuery(rnd); bq.add(q, BooleanClause.Occur.MUST); if (validate) { - if (result == null) result = rnd.clone(); - else result.and(rnd); + if (result == null) { + result = rnd.clone(); + } else { + result.and(rnd); + } } return result; } - public int doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int maxClauses) + private void doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int maxClauses) throws IOException { - int ret = 0; for (int i = 0; i < iter; i++) { int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses @@ -222,21 +161,17 @@ public class TestScorerPerf extends LuceneTestCase { for (int j = 0; j < nClauses; j++) { result = addClause(sets, bq, result); } - CountingHitCollector hc = - s.search(bq.build(), new CountingHitCollectorManager(validate, result)); - ret += hc.getSum(); + CountingHitCollector hc = s.search(bq.build(), new CountingHitCollectorManager()); - if (validate) assertEquals(result.cardinality(), hc.getCount()); - // System.out.println(hc.getCount()); + if (validate) { + assertEquals(result.cardinality(), hc.getCount()); + } } - - return ret; } - public int doNestedConjunctions( + private void doNestedConjunctions( IndexSearcher s, FixedBitSet[] sets, int iter, int maxOuterClauses, int maxClauses) throws IOException { - int ret = 0; long nMatches = 0; for (int i = 0; i < iter; i++) { @@ -255,107 +190,15 @@ public class TestScorerPerf extends LuceneTestCase { oq.add(bq.build(), BooleanClause.Occur.MUST); } // outer - CountingHitCollector hc = - s.search(oq.build(), new CountingHitCollectorManager(validate, result)); + CountingHitCollector hc = s.search(oq.build(), new CountingHitCollectorManager()); nMatches += hc.getCount(); - ret += hc.getSum(); - if (validate) assertEquals(result.cardinality(), hc.getCount()); - // System.out.println(hc.getCount()); - } - if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter)); - return ret; - } - - public int doTermConjunctions( - Term[] terms, IndexSearcher s, int termsInIndex, int maxClauses, int iter) - throws IOException { - int ret = 0; - - long nMatches = 0; - for (int i = 0; i < iter; i++) { - int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses - BooleanQuery.Builder bq = new BooleanQuery.Builder(); - BitSet termflag = new BitSet(termsInIndex); - for (int j = 0; j < nClauses; j++) { - int tnum; - // don't pick same clause twice - tnum = random().nextInt(termsInIndex); - if (termflag.get(tnum)) tnum = termflag.nextClearBit(tnum); - if (tnum < 0 || tnum >= termsInIndex) tnum = termflag.nextClearBit(0); - termflag.set(tnum); - Query tq = new TermQuery(terms[tnum]); - bq.add(tq, BooleanClause.Occur.MUST); + if (validate) { + assertEquals(result.cardinality(), hc.getCount()); } - - CountingHitCollector hc = s.search(bq.build(), new CountingHitCollectorManager(false, null)); - nMatches += hc.getCount(); - ret += hc.getSum(); } - if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter)); - - return ret; - } - - public int doNestedTermConjunctions( - IndexSearcher s, - Term[] terms, - int termsInIndex, - int maxOuterClauses, - int maxClauses, - int iter) - throws IOException { - int ret = 0; - long nMatches = 0; - for (int i = 0; i < iter; i++) { - int oClauses = random().nextInt(maxOuterClauses - 1) + 2; - BooleanQuery.Builder oq = new BooleanQuery.Builder(); - for (int o = 0; o < oClauses; o++) { - - int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses - BooleanQuery.Builder bq = new BooleanQuery.Builder(); - BitSet termflag = new BitSet(termsInIndex); - for (int j = 0; j < nClauses; j++) { - int tnum; - // don't pick same clause twice - tnum = random().nextInt(termsInIndex); - if (termflag.get(tnum)) tnum = termflag.nextClearBit(tnum); - if (tnum < 0 || tnum >= 25) tnum = termflag.nextClearBit(0); - termflag.set(tnum); - Query tq = new TermQuery(terms[tnum]); - bq.add(tq, BooleanClause.Occur.MUST); - } // inner - - oq.add(bq.build(), BooleanClause.Occur.MUST); - } // outer - - CountingHitCollector hc = s.search(oq.build(), new CountingHitCollectorManager(false, null)); - nMatches += hc.getCount(); - ret += hc.getSum(); + if (VERBOSE) { + System.out.println("Average number of matches=" + (nMatches / iter)); } - if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter)); - return ret; - } - - public int doSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter) - throws IOException { - int ret = 0; - - for (int i = 0; i < iter; i++) { - int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses - PhraseQuery.Builder builder = new PhraseQuery.Builder(); - for (int j = 0; j < nClauses; j++) { - int tnum = random().nextInt(termsInIndex); - builder.add(new Term("f", Character.toString((char) (tnum + 'A')))); - } - // slop could be random too - builder.setSlop(termsInIndex); - PhraseQuery q = builder.build(); - - CountingHitCollector hc = s.search(q, new CountingHitCollectorManager(false, null)); - ret += hc.getSum(); - } - - return ret; } public void testConjunctions() throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java index e4dd739ef78..3c7d6eea198 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java @@ -87,7 +87,7 @@ public class TestAutomaton extends LuceneTestCase { Automaton a2 = Operations.removeDeadStates( Operations.concatenate(Automata.makeString("foo"), Automata.makeString("bar"))); - assertTrue(Operations.sameLanguage(a1, a2)); + assertTrue(AutomatonTestUtil.sameLanguage(a1, a2)); } public void testCommonPrefixString() throws Exception { @@ -257,7 +257,7 @@ public class TestAutomaton extends LuceneTestCase { Automaton a = Automata.makeString("foobar"); Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT); - assertTrue(Operations.sameLanguage(a, aMin)); + assertTrue(AutomatonTestUtil.sameLanguage(a, aMin)); } public void testMinimize2() throws Exception { @@ -266,7 +266,7 @@ public class TestAutomaton extends LuceneTestCase { Arrays.asList(Automata.makeString("foobar"), Automata.makeString("boobar"))); Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT); assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( Operations.determinize(Operations.removeDeadStates(a), DEFAULT_DETERMINIZE_WORK_LIMIT), aMin)); } @@ -276,7 +276,7 @@ public class TestAutomaton extends LuceneTestCase { Automaton ra = Operations.reverse(a); Automaton a2 = Operations.determinize(Operations.reverse(ra), DEFAULT_DETERMINIZE_WORK_LIMIT); - assertTrue(Operations.sameLanguage(a, a2)); + assertTrue(AutomatonTestUtil.sameLanguage(a, a2)); } public void testOptional() throws Exception { @@ -401,7 +401,7 @@ public class TestAutomaton extends LuceneTestCase { Automaton ra = Operations.reverse(a); Automaton rra = Operations.reverse(ra); assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE), Operations.determinize(Operations.removeDeadStates(rra), Integer.MAX_VALUE))); } @@ -502,7 +502,7 @@ public class TestAutomaton extends LuceneTestCase { } assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE), Operations.determinize( Operations.removeDeadStates(builder.finish()), Integer.MAX_VALUE))); @@ -735,7 +735,8 @@ public class TestAutomaton extends LuceneTestCase { a2.addTransition(0, state, 'a'); a2.finishState(); assertTrue( - Operations.sameLanguage(Operations.removeDeadStates(a), Operations.removeDeadStates(a2))); + AutomatonTestUtil.sameLanguage( + Operations.removeDeadStates(a), Operations.removeDeadStates(a2))); } private Automaton randomNoOp(Automaton a) { @@ -1288,7 +1289,7 @@ public class TestAutomaton extends LuceneTestCase { Automaton a2 = Operations.removeDeadStates(Operations.determinize(unionTerms(terms), Integer.MAX_VALUE)); assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( a2, Operations.removeDeadStates(Operations.determinize(a, Integer.MAX_VALUE)))); // Do same check, in UTF8 space @@ -1613,7 +1614,7 @@ public class TestAutomaton extends LuceneTestCase { public void testAcceptAllEmptyStringMin() throws Exception { Automaton a = Automata.makeBinaryInterval(newBytesRef(), true, null, true); - assertTrue(Operations.sameLanguage(Automata.makeAnyBinary(), a)); + assertTrue(AutomatonTestUtil.sameLanguage(Automata.makeAnyBinary(), a)); } private static IntsRef toIntsRef(String s) { diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java index 65616fa55b9..e69568d3873 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java @@ -41,7 +41,7 @@ public class TestDeterminism extends LuceneTestCase { a = AutomatonTestUtil.determinizeSimple(a); Automaton b = Operations.determinize(a, Integer.MAX_VALUE); // TODO: more verifications possible? - assertTrue(Operations.sameLanguage(a, b)); + assertTrue(AutomatonTestUtil.sameLanguage(a, b)); } } @@ -53,20 +53,20 @@ public class TestDeterminism extends LuceneTestCase { Operations.complement( Operations.complement(a, DEFAULT_DETERMINIZE_WORK_LIMIT), DEFAULT_DETERMINIZE_WORK_LIMIT); - assertTrue(Operations.sameLanguage(a, equivalent)); + assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent)); // a union a = a equivalent = Operations.determinize( Operations.removeDeadStates(Operations.union(a, a)), DEFAULT_DETERMINIZE_WORK_LIMIT); - assertTrue(Operations.sameLanguage(a, equivalent)); + assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent)); // a intersect a = a equivalent = Operations.determinize( Operations.removeDeadStates(Operations.intersection(a, a)), DEFAULT_DETERMINIZE_WORK_LIMIT); - assertTrue(Operations.sameLanguage(a, equivalent)); + assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent)); // a minus a = empty Automaton empty = Operations.minus(a, a, DEFAULT_DETERMINIZE_WORK_LIMIT); @@ -81,7 +81,7 @@ public class TestDeterminism extends LuceneTestCase { equivalent = Operations.minus(optional, Automata.makeEmptyString(), DEFAULT_DETERMINIZE_WORK_LIMIT); // System.out.println("equiv " + equivalent); - assertTrue(Operations.sameLanguage(a, equivalent)); + assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent)); } } } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java index c8adb8751b9..bc6d268c15e 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java @@ -81,44 +81,46 @@ public class TestLevenshteinAutomata extends LuceneTestCase { // check that the dfa for n-1 accepts a subset of the dfa for n if (n > 0) { assertTrue( - Operations.subsetOf( + AutomatonTestUtil.subsetOf( Operations.removeDeadStates(automata[n - 1]), Operations.removeDeadStates(automata[n]))); assertTrue( - Operations.subsetOf( + AutomatonTestUtil.subsetOf( Operations.removeDeadStates(automata[n - 1]), Operations.removeDeadStates(tautomata[n]))); assertTrue( - Operations.subsetOf( + AutomatonTestUtil.subsetOf( Operations.removeDeadStates(tautomata[n - 1]), Operations.removeDeadStates(automata[n]))); assertTrue( - Operations.subsetOf( + AutomatonTestUtil.subsetOf( Operations.removeDeadStates(tautomata[n - 1]), Operations.removeDeadStates(tautomata[n]))); assertNotSame(automata[n - 1], automata[n]); } // check that Lev(N) is a subset of LevT(N) assertTrue( - Operations.subsetOf( + AutomatonTestUtil.subsetOf( Operations.removeDeadStates(automata[n]), Operations.removeDeadStates(tautomata[n]))); // special checks for specific n switch (n) { case 0: // easy, matches the string itself assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( Automata.makeString(s), Operations.removeDeadStates(automata[0]))); assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( Automata.makeString(s), Operations.removeDeadStates(tautomata[0]))); break; case 1: // generate a lev1 naively, and check the accepted lang is the same. assertTrue( - Operations.sameLanguage(naiveLev1(s), Operations.removeDeadStates(automata[1]))); + AutomatonTestUtil.sameLanguage( + naiveLev1(s), Operations.removeDeadStates(automata[1]))); assertTrue( - Operations.sameLanguage(naiveLev1T(s), Operations.removeDeadStates(tautomata[1]))); + AutomatonTestUtil.sameLanguage( + naiveLev1T(s), Operations.removeDeadStates(tautomata[1]))); break; default: assertBruteForce(s, automata[n], n); diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java index a43cc8ae8b1..92be1e4d569 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java @@ -28,7 +28,7 @@ public class TestMinimize extends LuceneTestCase { Automaton a = AutomatonTestUtil.randomAutomaton(random()); Automaton la = Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE); Automaton lb = MinimizationOperations.minimize(a, Integer.MAX_VALUE); - assertTrue(Operations.sameLanguage(la, lb)); + assertTrue(AutomatonTestUtil.sameLanguage(la, lb)); } } @@ -42,7 +42,7 @@ public class TestMinimize extends LuceneTestCase { Automaton a = AutomatonTestUtil.randomAutomaton(random()); a = AutomatonTestUtil.minimizeSimple(a); Automaton b = MinimizationOperations.minimize(a, Integer.MAX_VALUE); - assertTrue(Operations.sameLanguage(a, b)); + assertTrue(AutomatonTestUtil.sameLanguage(a, b)); assertEquals(a.getNumStates(), b.getNumStates()); int numStates = a.getNumStates(); diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java index ec38eafe0ce..c158dd9f3bb 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java @@ -50,7 +50,7 @@ public class TestOperations extends LuceneTestCase { assertTrue(naiveUnion.isDeterministic()); assertFalse(Operations.hasDeadStatesFromInitial(naiveUnion)); - assertTrue(Operations.sameLanguage(union, naiveUnion)); + assertTrue(AutomatonTestUtil.sameLanguage(union, naiveUnion)); } private static Automaton naiveUnion(List strings) { @@ -116,13 +116,13 @@ public class TestOperations extends LuceneTestCase { Automaton concat2 = Operations.concatenate(singleton, nfa); assertFalse(concat2.isDeterministic()); assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( Operations.determinize(concat1, 100), Operations.determinize(concat2, 100))); assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( Operations.determinize(nfa, 100), Operations.determinize(concat1, 100))); assertTrue( - Operations.sameLanguage( + AutomatonTestUtil.sameLanguage( Operations.determinize(nfa, 100), Operations.determinize(concat2, 100))); } @@ -173,6 +173,42 @@ public class TestOperations extends LuceneTestCase { assertTrue(exc.getMessage().contains("input automaton is too large")); } + public void testIsTotal() { + // minimal + assertFalse(Operations.isTotal(Automata.makeEmpty())); + assertFalse(Operations.isTotal(Automata.makeEmptyString())); + assertTrue(Operations.isTotal(Automata.makeAnyString())); + assertTrue(Operations.isTotal(Automata.makeAnyBinary(), 0, 255)); + assertFalse(Operations.isTotal(Automata.makeNonEmptyBinary(), 0, 255)); + // deterministic, but not minimal + assertTrue(Operations.isTotal(Operations.repeat(Automata.makeAnyChar()))); + Automaton tricky = + Operations.repeat( + Operations.union( + Automata.makeCharRange(Character.MIN_CODE_POINT, 100), + Automata.makeCharRange(101, Character.MAX_CODE_POINT))); + assertTrue(Operations.isTotal(tricky)); + // not total, but close + Automaton tricky2 = + Operations.repeat( + Operations.union( + Automata.makeCharRange(Character.MIN_CODE_POINT + 1, 100), + Automata.makeCharRange(101, Character.MAX_CODE_POINT))); + assertFalse(Operations.isTotal(tricky2)); + Automaton tricky3 = + Operations.repeat( + Operations.union( + Automata.makeCharRange(Character.MIN_CODE_POINT, 99), + Automata.makeCharRange(101, Character.MAX_CODE_POINT))); + assertFalse(Operations.isTotal(tricky3)); + Automaton tricky4 = + Operations.repeat( + Operations.union( + Automata.makeCharRange(Character.MIN_CODE_POINT, 100), + Automata.makeCharRange(101, Character.MAX_CODE_POINT - 1))); + assertFalse(Operations.isTotal(tricky4)); + } + /** * Returns the set of all accepted strings. * @@ -254,4 +290,126 @@ public class TestOperations extends LuceneTestCase { a.finishState(); return a; } + + public void testRepeat() { + Automaton emptyLanguage = Automata.makeEmpty(); + assertSame(emptyLanguage, Operations.repeat(emptyLanguage)); + + Automaton emptyString = Automata.makeEmptyString(); + assertSame(emptyString, Operations.repeat(emptyString)); + + Automaton a = Automata.makeChar('a'); + Automaton as = new Automaton(); + as.createState(); + as.setAccept(0, true); + as.addTransition(0, 0, 'a'); + as.finishState(); + assertTrue(AutomatonTestUtil.sameLanguage(as, Operations.repeat(a))); + assertSame(as, Operations.repeat(as)); + + Automaton aOrEmpty = new Automaton(); + aOrEmpty.createState(); + aOrEmpty.setAccept(0, true); + aOrEmpty.createState(); + aOrEmpty.setAccept(1, true); + aOrEmpty.addTransition(0, 1, 'a'); + assertTrue(AutomatonTestUtil.sameLanguage(as, Operations.repeat(aOrEmpty))); + + Automaton ab = Automata.makeString("ab"); + Automaton abs = new Automaton(); + abs.createState(); + abs.createState(); + abs.setAccept(0, true); + abs.addTransition(0, 1, 'a'); + abs.finishState(); + abs.addTransition(1, 0, 'b'); + abs.finishState(); + assertTrue(AutomatonTestUtil.sameLanguage(abs, Operations.repeat(ab))); + assertSame(abs, Operations.repeat(abs)); + + Automaton absThenC = Operations.concatenate(abs, Automata.makeChar('c')); + Automaton absThenCs = new Automaton(); + absThenCs.createState(); + absThenCs.createState(); + absThenCs.createState(); + absThenCs.setAccept(0, true); + absThenCs.addTransition(0, 1, 'a'); + absThenCs.addTransition(0, 0, 'c'); + absThenCs.finishState(); + absThenCs.addTransition(1, 2, 'b'); + absThenCs.finishState(); + absThenCs.addTransition(2, 1, 'a'); + absThenCs.addTransition(2, 0, 'c'); + absThenCs.finishState(); + assertTrue(AutomatonTestUtil.sameLanguage(absThenCs, Operations.repeat(absThenC))); + assertSame(absThenCs, Operations.repeat(absThenCs)); + + Automaton aOrAb = new Automaton(); + aOrAb.createState(); + aOrAb.createState(); + aOrAb.createState(); + aOrAb.setAccept(1, true); + aOrAb.setAccept(2, true); + aOrAb.addTransition(0, 1, 'a'); + aOrAb.finishState(); + aOrAb.addTransition(1, 2, 'b'); + aOrAb.finishState(); + Automaton aOrAbs = new Automaton(); + aOrAbs.createState(); + aOrAbs.createState(); + aOrAbs.setAccept(0, true); + aOrAbs.addTransition(0, 0, 'a'); + aOrAbs.addTransition(0, 1, 'a'); + aOrAbs.finishState(); + aOrAbs.addTransition(1, 0, 'b'); + aOrAbs.finishState(); + assertTrue( + AutomatonTestUtil.sameLanguage( + Operations.determinize(aOrAbs, Integer.MAX_VALUE), + Operations.determinize(Operations.repeat(aOrAb), Integer.MAX_VALUE))); + } + + public void testDuelRepeat() { + final int iters = atLeast(1_000); + for (int iter = 0; iter < iters; ++iter) { + Automaton a = AutomatonTestUtil.randomAutomaton(random()); + Automaton repeat1 = Operations.determinize(Operations.repeat(a), Integer.MAX_VALUE); + Automaton repeat2 = Operations.determinize(naiveRepeat(a), Integer.MAX_VALUE); + assertTrue(AutomatonTestUtil.sameLanguage(repeat1, repeat2)); + } + } + + // This is the original implementation of Operations#repeat, before we improved it to generate + // simpler automata in some common cases. + private static Automaton naiveRepeat(Automaton a) { + if (a.getNumStates() == 0) { + return a; + } + + Automaton.Builder builder = new Automaton.Builder(); + // Create the initial state, which is accepted + builder.createState(); + builder.setAccept(0, true); + builder.copy(a); + + Transition t = new Transition(); + int count = a.initTransition(0, t); + for (int i = 0; i < count; i++) { + a.getNextTransition(t); + builder.addTransition(0, t.dest + 1, t.min, t.max); + } + + int numStates = a.getNumStates(); + for (int s = 0; s < numStates; s++) { + if (a.isAccept(s)) { + count = a.initTransition(0, t); + for (int i = 0; i < count; i++) { + a.getNextTransition(t); + builder.addTransition(s + 1, t.dest + 1, t.min, t.max); + } + } + } + + return builder.finish(); + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExpParsing.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExpParsing.java index 7d0f062f36b..74fb08cb718 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExpParsing.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExpParsing.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Map; import java.util.Set; import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.automaton.AutomatonTestUtil; /** * Simple unit tests for RegExp parsing. @@ -698,7 +699,7 @@ public class TestRegExpParsing extends LuceneTestCase { private void assertSameLanguage(Automaton expected, Automaton actual) { expected = Operations.determinize(expected, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); actual = Operations.determinize(actual, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); - boolean result = Operations.sameLanguage(expected, actual); + boolean result = AutomatonTestUtil.sameLanguage(expected, actual); if (result == false) { System.out.println(expected.toDot()); System.out.println(actual.toDot()); diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestStringsToAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestStringsToAutomaton.java index 0e5a3f9fc30..efaa451258b 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestStringsToAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestStringsToAutomaton.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Set; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.tests.util.automaton.AutomatonTestUtil; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -158,7 +159,7 @@ public class TestStringsToAutomaton extends LuceneTestCase { private static void assertSameAutomaton(Automaton a, Automaton b) { assertEquals(a.getNumStates(), b.getNumStates()); assertEquals(a.getNumTransitions(), b.getNumTransitions()); - assertTrue(Operations.sameLanguage(a, b)); + assertTrue(AutomatonTestUtil.sameLanguage(a, b)); } private List basicTerms() { diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SandboxFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SandboxFacetsExample.java index b6759da115b..5a838954992 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/SandboxFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/SandboxFacetsExample.java @@ -21,7 +21,6 @@ import static org.apache.lucene.sandbox.facet.ComparableUtils.byAggregatedValue; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.document.Document; @@ -58,7 +57,7 @@ import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder; import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder; import org.apache.lucene.sandbox.facet.recorders.MultiFacetsRecorder; import org.apache.lucene.sandbox.facet.recorders.Reducer; -import org.apache.lucene.search.CollectorOwner; +import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.DoubleValuesSource; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.LongValuesSource; @@ -148,9 +147,10 @@ public class SandboxFacetsExample { FacetFieldCollectorManager collectorManager = new FacetFieldCollectorManager<>(defaultTaxoCutter, defaultRecorder); - //// (2.1) if we need to collect data using multiple different collectors, e.g. taxonomy and - //// ranges, or even two taxonomy facets that use different Category List Field, we can - //// use MultiCollectorManager, e.g.: + // (2.1) if we need to collect data using multiple different collectors, e.g. taxonomy and + // ranges, or even two taxonomy facets that use different Category List Field, we can + // use MultiCollectorManager, e.g.: + // // TODO: add a demo for it. // TaxonomyFacetsCutter publishDateCutter = new // TaxonomyFacetsCutter(config.getDimConfig("Publish Date"), taxoReader); @@ -563,17 +563,13 @@ public class SandboxFacetsExample { // FacetFieldCollectorManager anyway, and leaf cutter are not merged or anything like that. FacetFieldCollectorManager publishDayDimensionCollectorManager = new FacetFieldCollectorManager<>(defaultTaxoCutter, publishDayDimensionRecorder); - List> drillSidewaysOwners = - List.of(new CollectorOwner<>(publishDayDimensionCollectorManager)); + List> drillSidewaysManagers = + List.of(publishDayDimensionCollectorManager); //// (3) search // Right now we return the same Recorder we created - so we can ignore results DrillSideways ds = new DrillSideways(searcher, config, taxoReader); - // We must wrap list of drill sideways owner with unmodifiableList to make generics work. - ds.search( - q, - new CollectorOwner<>(drillDownCollectorManager), - Collections.unmodifiableList(drillSidewaysOwners)); + ds.search(q, drillDownCollectorManager, drillSidewaysManagers); //// (4) Get top 10 results by count for Author List facetResults = new ArrayList<>(2); diff --git a/lucene/distribution/binary-release.gradle b/lucene/distribution/binary-release.gradle index 5c03ac3a82a..c3365e961ab 100644 --- a/lucene/distribution/binary-release.gradle +++ b/lucene/distribution/binary-release.gradle @@ -37,9 +37,9 @@ configure(project(":lucene:distribution")) { // Maven-published submodule JARs are part of the binary distribution. // We don't copy their transitive dependencies. - def binaryModules = rootProject.ext.mavenProjects.findAll { p -> !(p in [ + def binaryModules = rootProject.ext.mavenProjects.findAll { p -> !(p.path in [ // Placed in a separate folder (module layer conflicts). - project(":lucene:test-framework"), + ":lucene:test-framework", ]) } for (Project module : binaryModules) { jars(module, { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java index 2db660e1c5b..951a0997cea 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java @@ -18,7 +18,6 @@ package org.apache.lucene.facet; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -31,8 +30,8 @@ import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.CollectorOwner; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -302,25 +301,13 @@ public class DrillSideways { } } - private static class CallableCollector implements Callable { - private final IndexSearcher searcher; - private final Query query; - private final CollectorOwner collectorOwner; - - private CallableCollector( - IndexSearcher searcher, Query query, CollectorOwner collectorOwner) { - this.searcher = searcher; - this.query = query; - this.collectorOwner = collectorOwner; - } + private record CallableCollector( + IndexSearcher searcher, Query query, CollectorManager collectorManager) + implements Callable { @Override - public Void call() throws Exception { - searcher.search(query, collectorOwner); - // Call getResult to trigger reduce, we don't need to return results because users can access - // them directly from collectorOwner - collectorOwner.getResult(); - return null; + public R call() throws Exception { + return searcher.search(query, collectorManager); } } @@ -344,31 +331,30 @@ public class DrillSideways { // Main query FacetsCollectorManager drillDownFacetsCollectorManager = createDrillDownFacetsCollectorManager(); - final CollectorOwner mainCollectorOwner; + final CollectorManager mainCollectorManager; if (drillDownFacetsCollectorManager != null) { // Make sure we populate a facet collector corresponding to the base query if desired: - mainCollectorOwner = - new CollectorOwner<>( - new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager)); + mainCollectorManager = + new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager); } else { - mainCollectorOwner = new CollectorOwner<>(hitCollectorManager); + mainCollectorManager = hitCollectorManager; } // Drill sideways dimensions - final List> drillSidewaysCollectorOwners; + final List> drillSidewaysCollectorManagers; if (query.getDims().isEmpty() == false) { - drillSidewaysCollectorOwners = new ArrayList<>(query.getDims().size()); + drillSidewaysCollectorManagers = new ArrayList<>(query.getDims().size()); for (int i = 0; i < query.getDims().size(); i++) { - drillSidewaysCollectorOwners.add( - new CollectorOwner<>(createDrillSidewaysFacetsCollectorManager())); + drillSidewaysCollectorManagers.add(createDrillSidewaysFacetsCollectorManager()); } } else { - drillSidewaysCollectorOwners = null; + drillSidewaysCollectorManagers = null; } // Execute query + final Result result; if (executor != null) { - searchConcurrently(query, mainCollectorOwner, drillSidewaysCollectorOwners); + result = searchConcurrently(query, mainCollectorManager, drillSidewaysCollectorManagers); } else { - searchSequentially(query, mainCollectorOwner, drillSidewaysCollectorOwners); + result = searchSequentially(query, mainCollectorManager, drillSidewaysCollectorManagers); } // Collect results @@ -377,12 +363,12 @@ public class DrillSideways { if (drillDownFacetsCollectorManager != null) { // drill down collected using MultiCollector // Extract the results: - Object[] drillDownResult = (Object[]) mainCollectorOwner.getResult(); + Object[] drillDownResult = (Object[]) result.drillDownResult; facetsCollectorResult = (FacetsCollector) drillDownResult[0]; hitCollectorResult = (R) drillDownResult[1]; } else { facetsCollectorResult = null; - hitCollectorResult = (R) mainCollectorOwner.getResult(); + hitCollectorResult = (R) result.drillDownResult; } // Getting results for drill sideways dimensions (if any) @@ -391,12 +377,11 @@ public class DrillSideways { if (query.getDims().isEmpty() == false) { drillSidewaysDims = query.getDims().keySet().toArray(new String[0]); int numDims = query.getDims().size(); - assert drillSidewaysCollectorOwners != null; - assert drillSidewaysCollectorOwners.size() == numDims; + assert drillSidewaysCollectorManagers != null; + assert drillSidewaysCollectorManagers.size() == numDims; drillSidewaysCollectors = new FacetsCollector[numDims]; for (int dim = 0; dim < numDims; dim++) { - drillSidewaysCollectors[dim] = - (FacetsCollector) drillSidewaysCollectorOwners.get(dim).getResult(); + drillSidewaysCollectors[dim] = result.drillSidewaysResults.get(dim); } } else { drillSidewaysDims = null; @@ -414,52 +399,51 @@ public class DrillSideways { /** * Search using DrillDownQuery with custom collectors. This method can be used with any {@link - * CollectorOwner}s. It doesn't return anything because it is expected that you read results from - * provided {@link CollectorOwner}s. + * CollectorManager}s. * - *

To read the results, run {@link CollectorOwner#getResult()} for drill down and all drill - * sideways dimensions. - * - *

Note: use {@link Collections#unmodifiableList(List)} to wrap {@code - * drillSidewaysCollectorOwners} to convince compiler that it is safe to use List here. - * - *

Use {@link MultiCollectorManager} wrapped by {@link CollectorOwner} to collect both hits and - * facets for the entire query and/or for drill-sideways dimensions. - * - *

TODO: Class CollectorOwner was created so that we can ignore CollectorManager type C, - * because we want each dimensions to be able to use their own types. Alternatively, we can use - * typesafe heterogeneous container and provide CollectorManager type for each dimension to this - * method? I do like CollectorOwner approach as it seems more intuitive? + *

Note: Use {@link MultiCollectorManager} to collect both hits and facets for the entire query + * and/or for drill-sideways dimensions. You can also use it to wrap different types of {@link + * CollectorManager} for drill-sideways dimensions. */ - public void search( - final DrillDownQuery query, - CollectorOwner drillDownCollectorOwner, - List> drillSidewaysCollectorOwners) + public Result search( + DrillDownQuery query, + CollectorManager drillDownCollectorManager, + List> drillSidewaysCollectorManagers) throws IOException { - if (drillDownCollectorOwner == null) { + if (drillDownCollectorManager == null) { throw new IllegalArgumentException( "This search method requires client to provide drill down collector manager"); } - if (drillSidewaysCollectorOwners == null) { + if (drillSidewaysCollectorManagers == null) { if (query.getDims().isEmpty() == false) { throw new IllegalArgumentException( - "The query requires not null drillSidewaysCollectorOwners"); + "The query requires not null drillSidewaysCollectorManagers"); } - } else if (drillSidewaysCollectorOwners.size() != query.getDims().size()) { + } else if (drillSidewaysCollectorManagers.size() != query.getDims().size()) { throw new IllegalArgumentException( - "drillSidewaysCollectorOwners size must be equal to number of dimensions in the query."); + "drillSidewaysCollectorManagers size must be equal to number of dimensions in the query."); } if (executor != null) { - searchConcurrently(query, drillDownCollectorOwner, drillSidewaysCollectorOwners); + return searchConcurrently(query, drillDownCollectorManager, drillSidewaysCollectorManagers); } else { - searchSequentially(query, drillDownCollectorOwner, drillSidewaysCollectorOwners); + return searchSequentially(query, drillDownCollectorManager, drillSidewaysCollectorManagers); } } - private void searchSequentially( + /** + * {@link #search(DrillDownQuery, CollectorManager, List)} result. It doesn't depend on {@link + * Facets} to allow users to use any type of {@link CollectorManager} for drill-down or + * drill-sideways dimension. + * + * @param drillDownResult result from drill down (main) {@link CollectorManager} + * @param drillSidewaysResults results from drill sideways {@link CollectorManager}s + */ + public record Result(T drillDownResult, List drillSidewaysResults) {} + + private Result searchSequentially( final DrillDownQuery query, - final CollectorOwner drillDownCollectorOwner, - final List> drillSidewaysCollectorOwners) + final CollectorManager drillDownCollectorManager, + final List> drillSidewaysCollectorManagers) throws IOException { Map drillDownDims = query.getDims(); @@ -467,9 +451,7 @@ public class DrillSideways { if (drillDownDims.isEmpty()) { // There are no drill-down dims, so there is no // drill-sideways to compute: - searcher.search(query, drillDownCollectorOwner); - drillDownCollectorOwner.getResult(); - return; + return new Result<>(searcher.search(query, drillDownCollectorManager), null); } Query baseQuery = query.getBaseQuery(); @@ -480,59 +462,60 @@ public class DrillSideways { } Query[] drillDownQueries = query.getDrillDownQueries(); - DrillSidewaysQuery dsq = - new DrillSidewaysQuery( - baseQuery, - // drillDownCollectorOwner, - // Don't pass drill down collector because drill down is collected by IndexSearcher - // itself. - // TODO: deprecate drillDown collection in DrillSidewaysQuery? - null, - drillSidewaysCollectorOwners, - drillDownQueries, - scoreSubDocsAtOnce()); + DrillSidewaysQuery dsq = + new DrillSidewaysQuery<>( + baseQuery, drillSidewaysCollectorManagers, drillDownQueries, scoreSubDocsAtOnce()); - searcher.search(dsq, drillDownCollectorOwner); - // This method doesn't return results as each dimension might have its own result type. - // But we call getResult to trigger results reducing, so that users don't have to worry about - // it. - drillDownCollectorOwner.getResult(); - if (drillSidewaysCollectorOwners != null) { - for (CollectorOwner sidewaysOwner : drillSidewaysCollectorOwners) { - sidewaysOwner.getResult(); + T collectorResult = searcher.search(dsq, drillDownCollectorManager); + List drillSidewaysResults = new ArrayList<>(drillDownDims.size()); + assert drillSidewaysCollectorManagers != null + : "Case without drill sideways dimensions is handled above"; + int numSlices = dsq.managedDrillSidewaysCollectors.size(); + for (int dim = 0; dim < drillDownDims.size(); dim++) { + List collectorsForDim = new ArrayList<>(numSlices); + for (int slice = 0; slice < numSlices; slice++) { + collectorsForDim.add(dsq.managedDrillSidewaysCollectors.get(slice).get(dim)); } + drillSidewaysResults.add( + dim, drillSidewaysCollectorManagers.get(dim).reduce(collectorsForDim)); } + return new Result<>(collectorResult, drillSidewaysResults); } - private void searchConcurrently( + private Result searchConcurrently( final DrillDownQuery query, - final CollectorOwner drillDownCollectorOwner, - final List> drillSidewaysCollectorOwners) - throws IOException { + final CollectorManager drillDownCollectorManager, + final List> drillSidewaysCollectorManagers) { final Map drillDownDims = query.getDims(); - final List callableCollectors = new ArrayList<>(drillDownDims.size() + 1); + final CallableCollector drillDownCallableCollector = + new CallableCollector<>(searcher, query, drillDownCollectorManager); + final List> drillSidewaysCallableCollectors = + new ArrayList<>(drillDownDims.size()); - callableCollectors.add(new CallableCollector(searcher, query, drillDownCollectorOwner)); int i = 0; final Query[] filters = query.getDrillDownQueries(); for (String dim : drillDownDims.keySet()) { - callableCollectors.add( - new CallableCollector( + drillSidewaysCallableCollectors.add( + new CallableCollector<>( searcher, getDrillDownQuery(query, filters, dim), - drillSidewaysCollectorOwners.get(i))); + drillSidewaysCollectorManagers.get(i))); i++; } try { - // Run the query pool - final List> futures = executor.invokeAll(callableCollectors); + final Future drillDownFuture = executor.submit(drillDownCallableCollector); + final List> drillSidewaysFutures = + executor.invokeAll(drillSidewaysCallableCollectors); - // Wait for results. We don't read the results as they are collected by CollectorOwners - for (i = 0; i < futures.size(); i++) { - futures.get(i).get(); + T collectorResult = drillDownFuture.get(); + List drillSidewaysResults = new ArrayList<>(drillDownDims.size()); + + for (i = 0; i < drillSidewaysFutures.size(); i++) { + drillSidewaysResults.add(i, drillSidewaysFutures.get(i).get()); } + return new Result<>(collectorResult, drillSidewaysResults); } catch (InterruptedException e) { throw new ThreadInterruptedException(e); } catch (ExecutionException e) { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java index aa670914d1a..0e61b1a0643 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java @@ -17,19 +17,20 @@ package org.apache.lucene.facet; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Objects; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Collector; -import org.apache.lucene.search.CollectorOwner; +import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.ScoreMode; @@ -41,12 +42,12 @@ import org.apache.lucene.search.Weight; // TODO change the way DrillSidewaysScorer is used, this query does not work // with filter caching -class DrillSidewaysQuery extends Query { +class DrillSidewaysQuery extends Query { final Query baseQuery; - final CollectorOwner drillDownCollectorOwner; - final List> drillSidewaysCollectorOwners; + final List> drillSidewaysCollectorManagers; + final List> managedDrillSidewaysCollectors; final Query[] drillDownQueries; @@ -58,15 +59,36 @@ class DrillSidewaysQuery extends Query { */ DrillSidewaysQuery( Query baseQuery, - CollectorOwner drillDownCollectorOwner, - List> drillSidewaysCollectorOwners, + List> drillSidewaysCollectorManagers, + Query[] drillDownQueries, + boolean scoreSubDocsAtOnce) { + // Note that the "managed" collector lists are synchronized here since bulkScorer() + // can be invoked concurrently and needs to remain thread-safe. We're OK with synchronizing + // on the whole list as contention is expected to remain very low: + this( + baseQuery, + drillSidewaysCollectorManagers, + Collections.synchronizedList(new ArrayList<>()), + drillDownQueries, + scoreSubDocsAtOnce); + } + + /** + * Needed for {@link Query#rewrite(IndexSearcher)}. Ensures the same "managed" lists get used + * since {@link DrillSideways} accesses references to these through the original {@code + * DrillSidewaysQuery}. + */ + private DrillSidewaysQuery( + Query baseQuery, + List> drillSidewaysCollectorManagers, + List> managedDrillSidewaysCollectors, Query[] drillDownQueries, boolean scoreSubDocsAtOnce) { this.baseQuery = Objects.requireNonNull(baseQuery); - this.drillDownCollectorOwner = drillDownCollectorOwner; - this.drillSidewaysCollectorOwners = drillSidewaysCollectorOwners; + this.drillSidewaysCollectorManagers = drillSidewaysCollectorManagers; this.drillDownQueries = drillDownQueries; this.scoreSubDocsAtOnce = scoreSubDocsAtOnce; + this.managedDrillSidewaysCollectors = managedDrillSidewaysCollectors; } @Override @@ -87,10 +109,10 @@ class DrillSidewaysQuery extends Query { if (newQuery == baseQuery) { return super.rewrite(indexSearcher); } else { - return new DrillSidewaysQuery( + return new DrillSidewaysQuery<>( newQuery, - drillDownCollectorOwner, - drillSidewaysCollectorOwners, + drillSidewaysCollectorManagers, + managedDrillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce); } @@ -124,14 +146,8 @@ class DrillSidewaysQuery extends Query { int drillDownCount = drillDowns.length; - Collector drillDownCollector; - final LeafCollector drillDownLeafCollector; - if (drillDownCollectorOwner != null) { - drillDownCollector = drillDownCollectorOwner.newCollector(); - drillDownLeafCollector = drillDownCollector.getLeafCollector(context); - } else { - drillDownLeafCollector = null; - } + List sidewaysCollectors = new ArrayList<>(drillDownCount); + managedDrillSidewaysCollectors.add(sidewaysCollectors); DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDownCount]; @@ -144,7 +160,8 @@ class DrillSidewaysQuery extends Query { scorer = new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty()); } - Collector sidewaysCollector = drillSidewaysCollectorOwners.get(dim).newCollector(); + K sidewaysCollector = drillSidewaysCollectorManagers.get(dim).newCollector(); + sidewaysCollectors.add(dim, sidewaysCollector); dims[dim] = new DrillSidewaysScorer.DocsAndCost( @@ -155,9 +172,6 @@ class DrillSidewaysQuery extends Query { // a null scorer in this case, but we need to make sure #finish gets called on all facet // collectors since IndexSearcher won't handle this for us: if (baseScorerSupplier == null || nullCount > 1) { - if (drillDownLeafCollector != null) { - drillDownLeafCollector.finish(); - } for (DrillSidewaysScorer.DocsAndCost dim : dims) { dim.sidewaysLeafCollector.finish(); } @@ -177,11 +191,7 @@ class DrillSidewaysQuery extends Query { @Override public BulkScorer bulkScorer() throws IOException { return new DrillSidewaysScorer( - context, - baseScorerSupplier.get(Long.MAX_VALUE), - drillDownLeafCollector, - dims, - scoreSubDocsAtOnce); + context, baseScorerSupplier.get(Long.MAX_VALUE), dims, scoreSubDocsAtOnce); } @Override @@ -212,9 +222,8 @@ class DrillSidewaysQuery extends Query { final int prime = 31; int result = classHash(); result = prime * result + Objects.hashCode(baseQuery); - result = prime * result + Objects.hashCode(drillDownCollectorOwner); result = prime * result + Arrays.hashCode(drillDownQueries); - result = prime * result + Objects.hashCode(drillSidewaysCollectorOwners); + result = prime * result + Objects.hashCode(drillSidewaysCollectorManagers); return result; } @@ -223,10 +232,9 @@ class DrillSidewaysQuery extends Query { return sameClassAs(other) && equalsTo(getClass().cast(other)); } - private boolean equalsTo(DrillSidewaysQuery other) { + private boolean equalsTo(DrillSidewaysQuery other) { return Objects.equals(baseQuery, other.baseQuery) - && Objects.equals(drillDownCollectorOwner, other.drillDownCollectorOwner) && Arrays.equals(drillDownQueries, other.drillDownQueries) - && Objects.equals(drillSidewaysCollectorOwners, other.drillSidewaysCollectorOwners); + && Objects.equals(drillSidewaysCollectorManagers, other.drillSidewaysCollectorManagers); } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java index 5ff9c6420ad..3a5bfc3c464 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java @@ -45,8 +45,6 @@ class DrillSidewaysScorer extends BulkScorer { // private static boolean DEBUG = false; - private final LeafCollector drillDownLeafCollector; - private final DocsAndCost[] dims; // DrillDown DocsEnums: @@ -68,7 +66,6 @@ class DrillSidewaysScorer extends BulkScorer { DrillSidewaysScorer( LeafReaderContext context, Scorer baseScorer, - LeafCollector drillDownLeafCollector, DocsAndCost[] dims, boolean scoreSubDocsAtOnce) { this.dims = dims; @@ -81,7 +78,6 @@ class DrillSidewaysScorer extends BulkScorer { } else { this.baseApproximation = baseIterator; } - this.drillDownLeafCollector = drillDownLeafCollector; this.scoreSubDocsAtOnce = scoreSubDocsAtOnce; } @@ -709,9 +705,6 @@ class DrillSidewaysScorer extends BulkScorer { // } collector.collect(collectDocID); - if (drillDownLeafCollector != null) { - drillDownLeafCollector.collect(collectDocID); - } // TODO: we could "fix" faceting of the sideways counts // to do this "union" (of the drill down hits) in the @@ -725,9 +718,6 @@ class DrillSidewaysScorer extends BulkScorer { private void collectHit(LeafCollector collector, DocsAndCost dim) throws IOException { collector.collect(collectDocID); - if (drillDownLeafCollector != null) { - drillDownLeafCollector.collect(collectDocID); - } // Tally sideways count: dim.sidewaysLeafCollector.collect(collectDocID); @@ -735,9 +725,6 @@ class DrillSidewaysScorer extends BulkScorer { private void collectHit(LeafCollector collector, List dims) throws IOException { collector.collect(collectDocID); - if (drillDownLeafCollector != null) { - drillDownLeafCollector.collect(collectDocID); - } // Tally sideways counts: for (DocsAndCost dim : dims) { @@ -756,9 +743,6 @@ class DrillSidewaysScorer extends BulkScorer { // Note: We _only_ call #finish on the facets collectors we're managing here, but not the // "main" collector. This is because IndexSearcher handles calling #finish on the main // collector. - if (drillDownLeafCollector != null) { - drillDownLeafCollector.finish(); - } for (DocsAndCost dim : dims) { dim.sidewaysLeafCollector.finish(); } @@ -766,9 +750,6 @@ class DrillSidewaysScorer extends BulkScorer { private void setScorer(LeafCollector mainCollector, Scorable scorer) throws IOException { mainCollector.setScorer(scorer); - if (drillDownLeafCollector != null) { - drillDownLeafCollector.setScorer(scorer); - } for (DocsAndCost dim : dims) { dim.sidewaysLeafCollector.setScorer(scorer); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/CharBlockArray.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/CharBlockArray.java index f358ed41836..9a514486bad 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/CharBlockArray.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/CharBlockArray.java @@ -16,15 +16,8 @@ */ package org.apache.lucene.facet.taxonomy.writercache; -import java.io.IOException; -import java.io.InputStream; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.OutputStream; -import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.util.SuppressForbidden; /** * Similar to {@link StringBuilder}, but with a more efficient growing strategy. This class uses @@ -32,15 +25,11 @@ import org.apache.lucene.util.SuppressForbidden; * * @lucene.experimental */ -class CharBlockArray implements Appendable, Serializable, CharSequence { - - private static final long serialVersionUID = 1L; +class CharBlockArray implements Appendable, CharSequence { private static final int DefaultBlockSize = 32 * 1024; // 32 KB default size - static final class Block implements Serializable, Cloneable { - private static final long serialVersionUID = 1L; - + static final class Block implements Cloneable { final char[] chars; int length; @@ -185,34 +174,4 @@ class CharBlockArray implements Appendable, Serializable, CharSequence { } return sb.toString(); } - - @SuppressForbidden( - reason = "TODO: don't use java serialization here, inefficient and unnecessary") - void flush(OutputStream out) throws IOException { - ObjectOutputStream oos = null; - try { - oos = new ObjectOutputStream(out); - oos.writeObject(this); - oos.flush(); - } finally { - if (oos != null) { - oos.close(); - } - } - } - - @SuppressForbidden( - reason = "TODO: don't use java serialization here, inefficient and unnecessary") - public static CharBlockArray open(InputStream in) throws IOException, ClassNotFoundException { - ObjectInputStream ois = null; - try { - ois = new ObjectInputStream(in); - CharBlockArray a = (CharBlockArray) ois.readObject(); - return a; - } finally { - if (ois != null) { - ois.close(); - } - } - } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java index 5645f191549..fa82e921eb8 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java @@ -284,7 +284,6 @@ public class TestDrillSideways extends FacetTestCase { Weight dimWeight = searcher.createWeight(dimQ, ScoreMode.COMPLETE_NO_SCORES, 1f); Scorer dimScorer = dimWeight.scorer(ctx); - FacetsCollector baseFC = new FacetsCollector(); FacetsCollector dimFC = new FacetsCollector(); DrillSidewaysScorer.DocsAndCost docsAndCost = new DrillSidewaysScorer.DocsAndCost(dimScorer, dimFC.getLeafCollector(ctx)); @@ -311,7 +310,6 @@ public class TestDrillSideways extends FacetTestCase { new DrillSidewaysScorer( ctx, baseScorer, - baseFC.getLeafCollector(ctx), new DrillSidewaysScorer.DocsAndCost[] {docsAndCost}, scoreSubDocsAtOnce); expectThrows(CollectionTerminatedException.class, () -> scorer.score(baseCollector, null)); @@ -321,7 +319,6 @@ public class TestDrillSideways extends FacetTestCase { // both our base and sideways dim facets collectors. What we really want to test here is // that the matching docs are still correctly present and populated after an early // termination occurs (i.e., #finish is properly called in that scenario): - assertEquals(1, baseFC.getMatchingDocs().size()); assertEquals(1, dimFC.getMatchingDocs().size()); } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetAssociations.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetAssociations.java index c452a01292e..bd2d091a2fd 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetAssociations.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetAssociations.java @@ -242,10 +242,9 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase { public void testIntAssociationRandom() throws Exception { - FacetsCollector fc = new FacetsCollector(); - IndexSearcher searcher = newSearcher(reader); - searcher.search(new TermQuery(new Term("match", "yes")), fc); + FacetsCollector fc = + searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager()); Map expected; Facets facets; @@ -332,10 +331,9 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase { public void testFloatAssociationRandom() throws Exception { - FacetsCollector fc = new FacetsCollector(); - IndexSearcher searcher = newSearcher(reader); - searcher.search(new TermQuery(new Term("match", "yes")), fc); + FacetsCollector fc = + searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager()); Map expected; Facets facets; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCharBlockArray.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCharBlockArray.java index f918ed5e150..3c3ae4938a0 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCharBlockArray.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCharBlockArray.java @@ -16,14 +16,10 @@ */ package org.apache.lucene.facet.taxonomy.writercache; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; import java.nio.ByteBuffer; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; import org.apache.lucene.facet.FacetTestCase; public class TestCharBlockArray extends FacetTestCase { @@ -89,19 +85,6 @@ public class TestCharBlockArray extends FacetTestCase { } assertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array); - - Path tempDir = createTempDir("growingchararray"); - Path f = tempDir.resolve("GrowingCharArrayTest.tmp"); - BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(f)); - array.flush(out); - out.flush(); - out.close(); - - BufferedInputStream in = new BufferedInputStream(Files.newInputStream(f)); - array = CharBlockArray.open(in); - assertEqualsInternal( - "GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array); - in.close(); } private static void assertEqualsInternal( diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java index 729feb19b9c..f781fc6a29e 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java @@ -95,7 +95,7 @@ class TermsQuery extends MultiTermQuery implements Accountable { } @Override - public long getTermsCount() throws IOException { + public long getTermsCount() { return terms.size(); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java index 7b9c933c167..57d5674b254 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/Intervals.java @@ -238,6 +238,7 @@ public final class Intervals { */ public static IntervalsSource regexp(BytesRef regexp, int maxExpansions) { Automaton automaton = new RegExp(new Term("", regexp).text()).toAutomaton(); + automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); CompiledAutomaton ca = new CompiledAutomaton(automaton, false, true, false); return new MultiTermIntervalsSource(ca, maxExpansions, regexp.utf8ToString()); } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java index d9141882c94..71228e4178f 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java @@ -447,4 +447,24 @@ public class TestIntervalQuery extends LuceneTestCase { field, or(term("XXX"), containing(extend(term("message"), 0, 10), term("intend")))); checkHits(q, new int[] {}); } + + public void testEquality() { + assertEquals( + new IntervalQuery("f", Intervals.regexp(new BytesRef(".*foo"))), + new IntervalQuery("f", Intervals.regexp(new BytesRef(".*foo")))); + assertEquals( + new IntervalQuery("f", Intervals.prefix(new BytesRef("p"), 1)), + new IntervalQuery("f", Intervals.prefix(new BytesRef("p"), 1))); + assertEquals( + new IntervalQuery("f", Intervals.fuzzyTerm("kot", 1)), + new IntervalQuery("f", Intervals.fuzzyTerm("kot", 1))); + assertEquals( + new IntervalQuery("f", Intervals.wildcard(new BytesRef("*.txt"))), + new IntervalQuery("f", Intervals.wildcard(new BytesRef("*.txt")))); + assertEquals( + new IntervalQuery( + "f", Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)), + new IntervalQuery( + "f", Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true))); + } } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java index 944530937ce..43fd8fb7fb8 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java @@ -1187,4 +1187,27 @@ public class TestIntervals extends LuceneTestCase { checkVisits(source, 1); } + + // basic test for equality and inequality of instances created by the factories + public void testEquality() { + assertEquals(Intervals.term("wibble"), Intervals.term("wibble")); + assertEquals(Intervals.prefix(new BytesRef("p"), 1), Intervals.prefix(new BytesRef("p"), 1)); + assertEquals(Intervals.fuzzyTerm("kot", 1), Intervals.fuzzyTerm("kot", 1)); + assertEquals(Intervals.regexp(new BytesRef(".*ot")), Intervals.regexp(new BytesRef(".*ot"))); + assertEquals( + Intervals.wildcard(new BytesRef("*.txt")), Intervals.wildcard(new BytesRef("*.txt"))); + assertEquals( + Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true), + Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)); + + assertNotEquals(Intervals.term("wibble"), Intervals.term("wobble")); + assertNotEquals(Intervals.prefix(new BytesRef("p"), 1), Intervals.prefix(new BytesRef("b"), 1)); + assertNotEquals(Intervals.fuzzyTerm("kot", 1), Intervals.fuzzyTerm("kof", 1)); + assertNotEquals(Intervals.regexp(new BytesRef(".*ot")), Intervals.regexp(new BytesRef(".*at"))); + assertNotEquals( + Intervals.wildcard(new BytesRef("*.txt")), Intervals.wildcard(new BytesRef("*.tat"))); + assertNotEquals( + Intervals.range(new BytesRef("warm"), new BytesRef("hot"), true, true), + Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)); + } } diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java index 57c323e18a9..c61eaae33d0 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java @@ -50,7 +50,6 @@ import org.apache.lucene.sandbox.facet.cutters.ranges.LongRangeFacetCutter; import org.apache.lucene.sandbox.facet.labels.OrdToLabel; import org.apache.lucene.sandbox.facet.labels.RangeOrdToLabel; import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder; -import org.apache.lucene.search.CollectorOwner; import org.apache.lucene.search.DoubleValues; import org.apache.lucene.search.DoubleValuesSource; import org.apache.lucene.search.Explanation; @@ -538,7 +537,7 @@ public class TestRangeFacet extends SandboxFacetTestCase { ////// First search, no drill-downs: DrillDownQuery ddq = new DrillDownQuery(config); - ds.search(ddq, new CollectorOwner<>(collectorManager), List.of()); + ds.search(ddq, collectorManager, List.of()); // assertEquals(100, dsr.hits.totalHits.value); assertEquals( @@ -556,10 +555,7 @@ public class TestRangeFacet extends SandboxFacetTestCase { dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder); ddq = new DrillDownQuery(config); ddq.add("dim", "b"); - ds.search( - ddq, - new CollectorOwner<>(fieldCollectorManager), - List.of(new CollectorOwner<>(dimCollectorManager))); + ds.search(ddq, fieldCollectorManager, List.of(dimCollectorManager)); // assertEquals(75, dsr.hits.totalHits.value); assertEquals( @@ -577,10 +573,7 @@ public class TestRangeFacet extends SandboxFacetTestCase { dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder); ddq = new DrillDownQuery(config); ddq.add("field", LongPoint.newRangeQuery("field", 0L, 10L)); - ds.search( - ddq, - new CollectorOwner<>(dimCollectorManager), - List.of(new CollectorOwner<>(fieldCollectorManager))); + ds.search(ddq, dimCollectorManager, List.of(fieldCollectorManager)); // assertEquals(11, dsr.hits.totalHits.value); assertEquals( @@ -1629,14 +1622,12 @@ public class TestRangeFacet extends SandboxFacetTestCase { countRecorder = new CountFacetRecorder(); - CollectorOwner totalHitsCollectorOwner = - new CollectorOwner<>(DummyTotalHitCountCollector.createManager()); - CollectorOwner drillSidewaysCollectorOwner = - new CollectorOwner<>( - new FacetFieldCollectorManager<>(doubleRangeFacetCutter, countRecorder)); - ds.search(ddq, totalHitsCollectorOwner, List.of(drillSidewaysCollectorOwner)); - assertEquals(1, totalHitsCollectorOwner.getResult().intValue()); - drillSidewaysCollectorOwner.getResult(); + DrillSideways.Result result = + ds.search( + ddq, + DummyTotalHitCountCollector.createManager(), + List.of(new FacetFieldCollectorManager<>(doubleRangeFacetCutter, countRecorder))); + assertEquals(1, result.drillDownResult().intValue()); assertEquals( "dim=field path=[] value=-2147483648 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", getAllSortByOrd(getRangeOrdinals(ranges), countRecorder, "field", ordToLabel).toString()); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/geo/BaseGeoPointTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/geo/BaseGeoPointTestCase.java index d371292324d..5c0c134fef3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/geo/BaseGeoPointTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/geo/BaseGeoPointTestCase.java @@ -39,6 +39,7 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.geo.Circle; import org.apache.lucene.geo.Component2D; +import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.LatLonGeometry; import org.apache.lucene.geo.Polygon; @@ -1751,4 +1752,41 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase { newDistanceQuery("point", 32.94823588839368, -179.9538113027811, 120000), 20); assertEquals(3, td.totalHits.value); } + + public void testNarrowPolygonCloseToNorthPole() throws Exception { + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setMergeScheduler(new SerialMergeScheduler()); + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, iwc); + + // index point closes to Lat 90 + Document doc = new Document(); + final int base = Integer.MAX_VALUE; + addPointToDoc( + FIELD_NAME, + doc, + GeoEncodingUtils.decodeLatitude(base - 2), + GeoEncodingUtils.decodeLongitude(base - 2)); + w.addDocument(doc); + w.flush(); + + // query testing + final IndexReader reader = DirectoryReader.open(w); + final IndexSearcher s = newSearcher(reader); + + double minLat = GeoEncodingUtils.decodeLatitude(base - 3); + double maxLat = GeoEncodingUtils.decodeLatitude(base); + double minLon = GeoEncodingUtils.decodeLongitude(base - 3); + double maxLon = GeoEncodingUtils.decodeLongitude(base); + + Query query = + newPolygonQuery( + FIELD_NAME, + new Polygon( + new double[] {minLat, minLat, maxLat, maxLat, minLat}, + new double[] {minLon, maxLon, maxLon, minLon, minLon})); + + assertEquals(1, s.count(query)); + IOUtils.close(w, reader, dir); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/automaton/AutomatonTestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/automaton/AutomatonTestUtil.java index 38819479cfc..df2731f3cb4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/automaton/AutomatonTestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/automaton/AutomatonTestUtil.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.tests.util.automaton; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.BitSet; import java.util.HashMap; @@ -33,6 +34,7 @@ import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.RegExp; +import org.apache.lucene.util.automaton.StatePair; import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.apache.lucene.util.automaton.Transition; @@ -533,4 +535,82 @@ public class AutomatonTestUtil { assert a.isDeterministic() == true; return true; } + + /** + * Returns true if these two automata accept exactly the same language. This is a costly + * computation! Both automata must be determinized and have no dead states! + */ + public static boolean sameLanguage(Automaton a1, Automaton a2) { + if (a1 == a2) { + return true; + } + return subsetOf(a2, a1) && subsetOf(a1, a2); + } + + /** + * Returns true if the language of a1 is a subset of the language of a2. + * Both automata must be determinized and must have no dead states. + * + *

Complexity: quadratic in number of states. + */ + public static boolean subsetOf(Automaton a1, Automaton a2) { + if (a1.isDeterministic() == false) { + throw new IllegalArgumentException("a1 must be deterministic"); + } + if (a2.isDeterministic() == false) { + throw new IllegalArgumentException("a2 must be deterministic"); + } + assert Operations.hasDeadStatesFromInitial(a1) == false; + assert Operations.hasDeadStatesFromInitial(a2) == false; + if (a1.getNumStates() == 0) { + // Empty language is alwyas a subset of any other language + return true; + } else if (a2.getNumStates() == 0) { + return Operations.isEmpty(a1); + } + + // TODO: cutover to iterators instead + Transition[][] transitions1 = a1.getSortedTransitions(); + Transition[][] transitions2 = a2.getSortedTransitions(); + ArrayDeque worklist = new ArrayDeque<>(); + HashSet visited = new HashSet<>(); + StatePair p = new StatePair(0, 0); + worklist.add(p); + visited.add(p); + while (worklist.size() > 0) { + p = worklist.removeFirst(); + if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) { + return false; + } + Transition[] t1 = transitions1[p.s1]; + Transition[] t2 = transitions2[p.s2]; + for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) { + while (b2 < t2.length && t2[b2].max < t1[n1].min) { + b2++; + } + int min1 = t1[n1].min, max1 = t1[n1].max; + + for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) { + if (t2[n2].min > min1) { + return false; + } + if (t2[n2].max < Character.MAX_CODE_POINT) { + min1 = t2[n2].max + 1; + } else { + min1 = Character.MAX_CODE_POINT; + max1 = Character.MIN_CODE_POINT; + } + StatePair q = new StatePair(t1[n1].dest, t2[n2].dest); + if (!visited.contains(q)) { + worklist.add(q); + visited.add(q); + } + } + if (min1 <= max1) { + return false; + } + } + } + return true; + } } diff --git a/versions.toml b/versions.toml index 1a2c52c1f38..96dfc797080 100644 --- a/versions.toml +++ b/versions.toml @@ -9,17 +9,17 @@ errorprone = "2.18.0" flexmark = "0.61.24" # @keep This is GJF version for spotless/ tidy. googleJavaFormat = "1.23.0" -groovy = "3.0.21" +groovy = "4.0.22" hamcrest = "2.2" icu4j = "74.2" javacc = "7.0.12" jflex = "1.8.2" -jgit = "5.13.1.202206130422-r" +jgit = "6.10.0.202406032230-r" jmh = "1.37" jts = "1.17.0" junit = "4.13.1" # @keep Minimum gradle version to run the build -minGradle = "8.8" +minGradle = "8.10" # @keep This is the minimum required Java version. minJava = "21" morfologik = "2.1.9" @@ -49,7 +49,7 @@ flexmark-ext-abbreviation = { module = "com.vladsch.flexmark:flexmark-ext-abbrev flexmark-ext-attributes = { module = "com.vladsch.flexmark:flexmark-ext-attributes", version.ref = "flexmark" } flexmark-ext-autolink = { module = "com.vladsch.flexmark:flexmark-ext-autolink", version.ref = "flexmark" } flexmark-ext-tables = { module = "com.vladsch.flexmark:flexmark-ext-tables", version.ref = "flexmark" } -groovy = { module = "org.codehaus.groovy:groovy-all", version.ref = "groovy" } +groovy = { module = "org.apache.groovy:groovy-all", version.ref = "groovy" } hamcrest = { module = "org.hamcrest:hamcrest", version.ref = "hamcrest" } icu4j = { module = "com.ibm.icu:icu4j", version.ref = "icu4j" } javacc = { module = "net.java.dev.javacc:javacc", version.ref = "javacc" }