Merge branch 'apache:main' into bpv21_main

This commit is contained in:
expani1729 2024-09-06 20:03:20 +05:30 committed by GitHub
commit 1cb896a985
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
54 changed files with 679 additions and 751 deletions

View File

@ -30,7 +30,7 @@ jobs:
strategy: strategy:
matrix: matrix:
os: [ ubuntu-latest ] os: [ ubuntu-latest ]
java-version: [ '22' ] java-version: [ '23-ea' ]
uses-alt-java: [ true, false ] uses-alt-java: [ true, false ]
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
@ -61,7 +61,16 @@ jobs:
# https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-environment-variable # https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-environment-variable
echo "RUNTIME_JAVA_HOME=${{ env.ALT_JAVA_DIR }}" >> "$GITHUB_ENV" echo "RUNTIME_JAVA_HOME=${{ env.ALT_JAVA_DIR }}" >> "$GITHUB_ENV"
- run: ./gradlew -p lucene/core check -x test - name: ./gradlew tidy
run: |
./gradlew tidy
if [ ! -z "$(git status --porcelain)" ]; then
echo ":warning: **tidy left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
git status --porcelain >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
git reset --hard && git clean -xfd .
fi
- name: ./gradlew regenerate - name: ./gradlew regenerate
run: | run: |
@ -69,7 +78,7 @@ jobs:
sudo apt-get install libwww-perl sudo apt-get install libwww-perl
./gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks ./gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks
if [ ! -z "$(git status --porcelain)" ]; then if [ ! -z "$(git status --porcelain)" ]; then
echo ":warning: **regenerateleft local checkout in modified state**" >> $GITHUB_STEP_SUMMARY echo ":warning: **regenerate left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY
git status --porcelain >> $GITHUB_STEP_SUMMARY git status --porcelain >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY
@ -79,8 +88,7 @@ jobs:
- run: ./gradlew testOpts - run: ./gradlew testOpts
- run: ./gradlew helpWorkflow - run: ./gradlew helpWorkflow
- run: ./gradlew licenses updateLicenses - run: ./gradlew licenses updateLicenses
- run: ./gradlew tidy - run: ./gradlew check -x test -Pvalidation.git.failOnModified=false
- run: ./gradlew check -x test
- run: ./gradlew assembleRelease mavenToLocal - run: ./gradlew assembleRelease mavenToLocal
# Conserve resources: only run these in non-alt-java mode. # Conserve resources: only run these in non-alt-java mode.

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
matrix: matrix:
os: [ ubuntu-latest ] os: [ ubuntu-latest ]
java-version: [ '21', '22' ] java-version: [ '21', '22', '23-ea' ]
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}

View File

@ -60,8 +60,8 @@ public class WrapperDownloader {
public static void checkVersion() { public static void checkVersion() {
int major = Runtime.version().feature(); int major = Runtime.version().feature();
if (major != 21 && major != 22) { if (major != 21 && major != 22 && major != 23) {
throw new IllegalStateException("java version must be 21 or 22, your version: " + major); throw new IllegalStateException("java version must be 21, 22 or 23, your version: " + major);
} }
} }

View File

@ -32,7 +32,7 @@ allprojects {
missingdoclet "org.apache.lucene.tools:missing-doclet" missingdoclet "org.apache.lucene.tools:missing-doclet"
} }
ext { project.ext {
relativeDocPath = project.path.replaceFirst(/:\w+:/, "").replace(':', '/') relativeDocPath = project.path.replaceFirst(/:\w+:/, "").replace(':', '/')
} }

View File

@ -33,7 +33,7 @@ configure(project(":lucene:analysis:kuromoji")) {
apply plugin: deps.plugins.undercouch.download.get().pluginId apply plugin: deps.plugins.undercouch.download.get().pluginId
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
ext { project.ext {
targetDir = file("src/resources") targetDir = file("src/resources")
} }

View File

@ -33,7 +33,7 @@ configure(project(":lucene:analysis:nori")) {
apply plugin: deps.plugins.undercouch.download.get().pluginId apply plugin: deps.plugins.undercouch.download.get().pluginId
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
ext { project.ext {
targetDir = file("src/resources") targetDir = file("src/resources")
} }

View File

@ -27,7 +27,7 @@ def beastingMode = gradle.startParameter.taskNames.any{ name -> name == 'beast'
allprojects { allprojects {
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
ext { project.ext {
testOptions += [ testOptions += [
[propName: 'tests.dups', value: 0, description: "Reiterate runs of entire test suites ('beast' task)."] [propName: 'tests.dups', value: 0, description: "Reiterate runs of entire test suites ('beast' task)."]
] ]

View File

@ -19,7 +19,7 @@ def recordings = files()
allprojects { allprojects {
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
ext { project.ext {
testOptions += [ testOptions += [
[propName: 'tests.profile', value: false, description: "Enable Java Flight Recorder profiling."] [propName: 'tests.profile', value: false, description: "Enable Java Flight Recorder profiling."]
] ]

View File

@ -62,7 +62,7 @@ allprojects {
// Configure test property defaults and their descriptions. // Configure test property defaults and their descriptions.
allprojects { allprojects {
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
ext { project.ext {
String randomVectorSize = RandomPicks.randomFrom(new Random(projectSeedLong), ["default", "128", "256", "512"]) String randomVectorSize = RandomPicks.randomFrom(new Random(projectSeedLong), ["default", "128", "256", "512"])
testOptions += [ testOptions += [
// seed, repetition and amplification. // seed, repetition and amplification.
@ -135,14 +135,14 @@ allprojects {
} }
afterEvaluate { afterEvaluate {
ext.testOptionsResolved = testOptions.findAll { opt -> project.ext.testOptionsResolved = testOptions.findAll { opt ->
propertyOrDefault(opt.propName, opt.value) != null propertyOrDefault(opt.propName, opt.value) != null
}.collectEntries { opt -> }.collectEntries { opt ->
[(opt.propName): Objects.toString(resolvedTestOption(opt.propName))] [(opt.propName): Objects.toString(resolvedTestOption(opt.propName))]
} }
// Compute the "reproduce with" string. // Compute the "reproduce with" string.
ext.testOptionsForReproduceLine = testOptions.findAll { opt -> project.ext.testOptionsForReproduceLine = testOptions.findAll { opt ->
if (opt["includeInReproLine"] == false) { if (opt["includeInReproLine"] == false) {
return false return false
} }

View File

@ -22,7 +22,7 @@ def allSuites = []
allprojects { allprojects {
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
ext { project.ext {
testOptions += [ testOptions += [
[propName: 'tests.slowestTests', value: true, description: "Print the summary of the slowest tests."], [propName: 'tests.slowestTests', value: true, description: "Print the summary of the slowest tests."],
[propName: 'tests.slowestSuites', value: true, description: "Print the summary of the slowest suites."] [propName: 'tests.slowestSuites', value: true, description: "Print the summary of the slowest suites."]

View File

@ -74,21 +74,6 @@ configure(rootProject) {
logger.warn("WARNING: Directory is not a valid git checkout (won't check dirty files): ${rootProject.projectDir}") logger.warn("WARNING: Directory is not a valid git checkout (won't check dirty files): ${rootProject.projectDir}")
} }
} else { } else {
// git ignores any folders which are empty (this includes folders with recursively empty sub-folders).
def untrackedNonEmptyFolders = status.untrackedFolders.findAll { path ->
File location = file("${rootProject.projectDir}/${path}")
boolean hasFiles = false
Files.walkFileTree(location.toPath(), new SimpleFileVisitor<Path>() {
@Override
FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
hasFiles = true
// Terminate early.
return FileVisitResult.TERMINATE
}
})
return hasFiles
}
def offenders = [ def offenders = [
// Exclude staged changes. These are fine in precommit. // Exclude staged changes. These are fine in precommit.
// "(added)": status.added, // "(added)": status.added,
@ -97,8 +82,7 @@ configure(rootProject) {
"(conflicting)": status.conflicting, "(conflicting)": status.conflicting,
"(missing)": status.missing, "(missing)": status.missing,
"(modified)": status.modified, "(modified)": status.modified,
"(untracked)": status.untracked, "(untracked)": status.untracked
"(untracked non-empty dir)": untrackedNonEmptyFolders
].collectMany { fileStatus, files -> ].collectMany { fileStatus, files ->
files.collect {file -> " - ${file} ${fileStatus}" } files.collect {file -> " - ${file} ${fileStatus}" }
}.sort() }.sort()

View File

@ -1 +1 @@
cb0da6751c2b753a16ac168bb354870ebb1e162e9083f116729cec9c781156b8 2db75c40782f5e8ba1fc278a5574bab070adccb2d21ca5a6e5ed840888448046

View File

@ -1 +1 @@
8.8.0 8.10.0

View File

@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
networkTimeout=10000 networkTimeout=10000
validateDistributionUrl=true validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME zipStoreBase=GRADLE_USER_HOME

View File

@ -112,6 +112,8 @@ API Changes
* GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz) * GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz)
* GITHUB#13708: Move Operations.sameLanguage/subsetOf to test-framework. (Robert Muir)
New Features New Features
--------------------- ---------------------
@ -167,6 +169,8 @@ Improvements
* GITHUB#12172: Update Romanian stopwords list to include the modern unicode forms. (Trey Jones) * GITHUB#12172: Update Romanian stopwords list to include the modern unicode forms. (Trey Jones)
* GITHUB#13707: Improve Operations.isTotal() to work with non-minimal automata. (Dawid Weiss, Robert Muir)
Optimizations Optimizations
--------------------- ---------------------
@ -266,6 +270,8 @@ Build
* GITHUB#13649: Fix eclipse ide settings generation #13649 (Uwe Schindler, Dawid Weiss) * GITHUB#13649: Fix eclipse ide settings generation #13649 (Uwe Schindler, Dawid Weiss)
* GITHUB#13698: Upgrade to gradle 8.10 (Dawid Weiss)
======================== Lucene 9.12.0 ======================= ======================== Lucene 9.12.0 =======================
API Changes API Changes
@ -284,10 +290,7 @@ API Changes
* GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and * GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and
MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar) MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar)
* GITHUB#13568: Add CollectorOwner class that wraps CollectorManager, and handles list of Collectors and results. * GITHUB#13568: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions
Add IndexSearcher#search method that takes CollectorOwner. (Egor Potemkin)
* GITHUB#13568: Add DrillSideways#search method that supports any collector types for any drill-sideways dimensions
or drill-down. (Egor Potemkin) or drill-down. (Egor Potemkin)
New Features New Features
@ -408,6 +411,9 @@ Bug Fixes
* GITHUB#13691: Fix incorrect exponent value in explain of SigmoidFunction. (Owais Kazi) * GITHUB#13691: Fix incorrect exponent value in explain of SigmoidFunction. (Owais Kazi)
* GITHUB#13703: Fix bug in LatLonPoint queries where narrow polygons close to latitude 90 don't
match any points due to an Integer overflow. (Ignacio Vera)
Build Build
--------------------- ---------------------

View File

@ -793,3 +793,7 @@ Specifically, the method `FunctionValues#getScorer(Weight weight, LeafReaderCont
Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on
Scorer. Scorer.
### `SearchWithCollectorTask` no longer supports the `collector.class` config parameter
`collector.class` used to allow users to load a custom collector implementation. `collector.manager.class`
replaces it by allowing users to load a custom collector manager instead. (Luca Cavanna)

View File

@ -1490,7 +1490,7 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
} }
assertTrue(approxEquals(actual, expected)); assertTrue(approxEquals(actual, expected));
assertTrue(Operations.sameLanguage(actual, expected)); assertTrue(AutomatonTestUtil.sameLanguage(actual, expected));
} }
a.close(); a.close();

View File

@ -363,7 +363,7 @@ public final class GeoEncodingUtils {
*/ */
public boolean test(int lat, int lon) { public boolean test(int lat, int lon) {
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift); final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
if (lat2 < latBase || lat2 >= latBase + maxLatDelta) { if (lat2 < latBase || lat2 - latBase >= maxLatDelta) {
return false; return false;
} }
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift); int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
@ -411,7 +411,7 @@ public final class GeoEncodingUtils {
*/ */
public boolean test(int lat, int lon) { public boolean test(int lat, int lon) {
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift); final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
if (lat2 < latBase || lat2 >= latBase + maxLatDelta) { if (lat2 < latBase || lat2 - latBase >= maxLatDelta) {
return false; return false;
} }
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift); int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);

View File

@ -1,78 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/**
* This class wraps {@link CollectorManager} and owns the collectors the manager creates. It is
* convenient that clients of the class don't have to worry about keeping the list of collectors, as
* well as about making the collector's type (C) compatible when reduce is called. Instances of this
* class cache results of {@link CollectorManager#reduce(Collection)}.
*
* <p>Note that instance of this class ignores any {@link Collector} created by {@link
* CollectorManager#newCollector()} directly, not through {@link #newCollector()}
*
* @lucene.experimental
*/
public final class CollectorOwner<C extends Collector, T> {
private final CollectorManager<C, T> manager;
private T result;
private boolean reduced;
// TODO: For IndexSearcher, the list doesn't have to be synchronized
// because we create new collectors sequentially. Drill sideways creates new collectors in
// DrillSidewaysQuery#Weight#bulkScorer which is already called concurrently.
// I think making the list synchronized here is not a huge concern, at the same time, do we want
// to do something about it?
// e.g. have boolean property in constructor that makes it threads friendly when set?
private final List<C> collectors = Collections.synchronizedList(new ArrayList<>());
public CollectorOwner(CollectorManager<C, T> manager) {
this.manager = manager;
}
/** Return a new {@link Collector}. This must return a different instance on each call. */
public C newCollector() throws IOException {
C collector = manager.newCollector();
collectors.add(collector);
return collector;
}
public C getCollector(int i) {
return collectors.get(i);
}
/**
* Returns result of {@link CollectorManager#reduce(Collection)}. The result is cached.
*
* <p>This method is NOT threadsafe.
*/
public T getResult() throws IOException {
if (reduced == false) {
result = manager.reduce(collectors);
reduced = true;
}
return result;
}
}

View File

@ -671,53 +671,6 @@ public class IndexSearcher {
} }
} }
/**
* Lower-level search API. Search all leaves using the given {@link CollectorOwner}, without
* calling {@link CollectorOwner#getResult()} so that clients can reduce and read results
* themselves.
*
* <p>Note that this method doesn't return anything - users can access results by calling {@link
* CollectorOwner#getResult()}
*
* @lucene.experimental
*/
public <C extends Collector> void search(Query query, CollectorOwner<C, ?> collectorOwner)
throws IOException {
final C firstCollector = collectorOwner.newCollector();
query = rewrite(query, firstCollector.scoreMode().needsScores());
final Weight weight = createWeight(query, firstCollector.scoreMode(), 1);
search(weight, collectorOwner, firstCollector);
}
private <C extends Collector> void search(
Weight weight, CollectorOwner<C, ?> collectorOwner, C firstCollector) throws IOException {
final LeafSlice[] leafSlices = getSlices();
if (leafSlices.length == 0) {
// there are no segments, nothing to offload to the executor
assert leafContexts.isEmpty();
} else {
final ScoreMode scoreMode = firstCollector.scoreMode();
for (int i = 1; i < leafSlices.length; ++i) {
final C collector = collectorOwner.newCollector();
if (scoreMode != collector.scoreMode()) {
throw new IllegalStateException(
"CollectorManager does not always produce collectors with the same score mode");
}
}
final List<Callable<C>> listTasks = new ArrayList<>(leafSlices.length);
for (int i = 0; i < leafSlices.length; ++i) {
final LeafReaderContext[] leaves = leafSlices[i].leaves;
final C collector = collectorOwner.getCollector(i);
listTasks.add(
() -> {
search(Arrays.asList(leaves), weight, collector);
return collector;
});
}
taskExecutor.invokeAll(listTasks);
}
}
/** /**
* Lower-level search API. * Lower-level search API.
* *

View File

@ -313,7 +313,7 @@ public abstract class MultiTermQuery extends Query {
* Return the number of unique terms contained in this query, if known up-front. If not known, -1 * Return the number of unique terms contained in this query, if known up-front. If not known, -1
* will be returned. * will be returned.
*/ */
public long getTermsCount() throws IOException { public long getTermsCount() {
return -1; return -1;
} }

View File

@ -137,7 +137,7 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable {
} }
@Override @Override
public long getTermsCount() throws IOException { public long getTermsCount() {
return termData.size(); return termData.size();
} }

View File

@ -339,6 +339,7 @@ public class Automaton implements Accountable, TransitionAccessor {
@Override @Override
public int getNumTransitions(int state) { public int getNumTransitions(int state) {
assert state >= 0; assert state >= 0;
assert state < getNumStates();
int count = states[2 * state + 1]; int count = states[2 * state + 1];
if (count == -1) { if (count == -1) {
return 0; return 0;

View File

@ -86,7 +86,7 @@ public class FiniteStringsIterator {
this.emitEmptyString = a.isAccept(0); this.emitEmptyString = a.isAccept(0);
// Start iteration with node startState. // Start iteration with node startState.
if (a.getNumTransitions(startState) > 0) { if (a.getNumStates() > startState && a.getNumTransitions(startState) > 0) {
pathStates.set(startState); pathStates.set(startState);
nodes[0].resetState(a, startState); nodes[0].resetState(a, startState);
string.append(startState); string.append(startState);

View File

@ -35,7 +35,6 @@ import java.util.Arrays;
import java.util.BitSet; import java.util.BitSet;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -182,30 +181,65 @@ public final class Operations {
// Repeating the empty automata will still only accept the empty automata. // Repeating the empty automata will still only accept the empty automata.
return a; return a;
} }
if (a.isAccept(0) && a.getAcceptStates().cardinality() == 1) {
// If state 0 is the only accept state, then this automaton already repeats itself.
return a;
}
Automaton.Builder builder = new Automaton.Builder(); Automaton.Builder builder = new Automaton.Builder();
// Create the initial state, which is accepted
builder.createState(); builder.createState();
builder.setAccept(0, true); builder.setAccept(0, true);
builder.copy(a);
Transition t = new Transition(); Transition t = new Transition();
int[] stateMap = new int[a.getNumStates()];
for (int state = 0; state < a.getNumStates(); ++state) {
if (a.isAccept(state) == false) {
stateMap[state] = builder.createState();
} else if (a.getNumTransitions(state) == 0) {
// Accept states that have no transitions get merged into state 0.
stateMap[state] = 0;
} else {
int newState = builder.createState();
stateMap[state] = newState;
builder.setAccept(newState, true);
}
}
// Now copy the automaton while renumbering states.
for (int state = 0; state < a.getNumStates(); ++state) {
int src = stateMap[state];
int count = a.initTransition(state, t);
for (int i = 0; i < count; i++) {
a.getNextTransition(t);
int dest = stateMap[t.dest];
builder.addTransition(src, dest, t.min, t.max);
}
}
// Now copy transitions of the initial state to our new initial state.
int count = a.initTransition(0, t); int count = a.initTransition(0, t);
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
a.getNextTransition(t); a.getNextTransition(t);
builder.addTransition(0, t.dest + 1, t.min, t.max); builder.addTransition(0, stateMap[t.dest], t.min, t.max);
} }
int numStates = a.getNumStates(); // Now copy transitions of the initial state to final states to make the automaton repeat
for (int s = 0; s < numStates; s++) { // itself.
if (a.isAccept(s)) { for (int s = a.getAcceptStates().nextSetBit(0);
s != -1;
s = a.getAcceptStates().nextSetBit(s + 1)) {
if (stateMap[s] != 0) {
count = a.initTransition(0, t); count = a.initTransition(0, t);
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
a.getNextTransition(t); a.getNextTransition(t);
builder.addTransition(s + 1, t.dest + 1, t.min, t.max); builder.addTransition(stateMap[s], stateMap[t.dest], t.min, t.max);
} }
} }
} }
return builder.finish(); return removeDeadStates(builder.finish());
} }
/** /**
@ -374,17 +408,6 @@ public final class Operations {
return removeDeadStates(c); return removeDeadStates(c);
} }
/**
* Returns true if these two automata accept exactly the same language. This is a costly
* computation! Both automata must be determinized and have no dead states!
*/
public static boolean sameLanguage(Automaton a1, Automaton a2) {
if (a1 == a2) {
return true;
}
return subsetOf(a2, a1) && subsetOf(a1, a2);
}
// TODO: move to test-framework? // TODO: move to test-framework?
/** /**
* Returns true if this automaton has any states that cannot be reached from the initial state or * Returns true if this automaton has any states that cannot be reached from the initial state or
@ -417,73 +440,6 @@ public final class Operations {
return reachableFromAccept.isEmpty() == false; return reachableFromAccept.isEmpty() == false;
} }
/**
* Returns true if the language of <code>a1</code> is a subset of the language of <code>a2</code>.
* Both automata must be determinized and must have no dead states.
*
* <p>Complexity: quadratic in number of states.
*/
public static boolean subsetOf(Automaton a1, Automaton a2) {
if (a1.isDeterministic() == false) {
throw new IllegalArgumentException("a1 must be deterministic");
}
if (a2.isDeterministic() == false) {
throw new IllegalArgumentException("a2 must be deterministic");
}
assert hasDeadStatesFromInitial(a1) == false;
assert hasDeadStatesFromInitial(a2) == false;
if (a1.getNumStates() == 0) {
// Empty language is alwyas a subset of any other language
return true;
} else if (a2.getNumStates() == 0) {
return isEmpty(a1);
}
// TODO: cutover to iterators instead
Transition[][] transitions1 = a1.getSortedTransitions();
Transition[][] transitions2 = a2.getSortedTransitions();
ArrayDeque<StatePair> worklist = new ArrayDeque<>();
HashSet<StatePair> visited = new HashSet<>();
StatePair p = new StatePair(0, 0);
worklist.add(p);
visited.add(p);
while (worklist.size() > 0) {
p = worklist.removeFirst();
if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) {
return false;
}
Transition[] t1 = transitions1[p.s1];
Transition[] t2 = transitions2[p.s2];
for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
while (b2 < t2.length && t2[b2].max < t1[n1].min) {
b2++;
}
int min1 = t1[n1].min, max1 = t1[n1].max;
for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) {
if (t2[n2].min > min1) {
return false;
}
if (t2[n2].max < Character.MAX_CODE_POINT) {
min1 = t2[n2].max + 1;
} else {
min1 = Character.MAX_CODE_POINT;
max1 = Character.MIN_CODE_POINT;
}
StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
if (!visited.contains(q)) {
worklist.add(q);
visited.add(q);
}
}
if (min1 <= max1) {
return false;
}
}
}
return true;
}
/** /**
* Returns an automaton that accepts the union of the languages of the given automata. * Returns an automaton that accepts the union of the languages of the given automata.
* *
@ -857,22 +813,48 @@ public final class Operations {
return true; return true;
} }
/** Returns true if the given automaton accepts all strings. The automaton must be minimized. */ /**
* Returns true if the given automaton accepts all strings.
*
* <p>The automaton must be deterministic, or this method may return false.
*
* <p>Complexity: linear in number of states and transitions.
*/
public static boolean isTotal(Automaton a) { public static boolean isTotal(Automaton a) {
return isTotal(a, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT); return isTotal(a, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
} }
/** /**
* Returns true if the given automaton accepts all strings for the specified min/max range of the * Returns true if the given automaton accepts all strings for the specified min/max range of the
* alphabet. The automaton must be minimized. * alphabet.
*
* <p>The automaton must be deterministic, or this method may return false.
*
* <p>Complexity: linear in number of states and transitions.
*/ */
public static boolean isTotal(Automaton a, int minAlphabet, int maxAlphabet) { public static boolean isTotal(Automaton a, int minAlphabet, int maxAlphabet) {
if (a.isAccept(0) && a.getNumTransitions(0) == 1) { BitSet states = getLiveStates(a);
Transition t = new Transition(); Transition spare = new Transition();
a.getTransition(0, 0, t); int seenStates = 0;
return t.dest == 0 && t.min == minAlphabet && t.max == maxAlphabet; for (int state = states.nextSetBit(0); state >= 0; state = states.nextSetBit(state + 1)) {
// all reachable states must be accept states
if (a.isAccept(state) == false) return false;
// all reachable states must contain transitions covering minAlphabet-maxAlphabet
int previousLabel = minAlphabet - 1;
for (int transition = 0; transition < a.getNumTransitions(state); transition++) {
a.getTransition(state, transition, spare);
// no gaps are allowed
if (spare.min > previousLabel + 1) return false;
previousLabel = spare.max;
}
if (previousLabel < maxAlphabet) return false;
if (state == Integer.MAX_VALUE) {
break; // or (state+1) would overflow
}
seenStates++;
} }
return false; // we've checked all the states, automaton is either total or empty
return seenStates > 0;
} }
/** /**
@ -1004,6 +986,9 @@ public final class Operations {
public static Automaton removeDeadStates(Automaton a) { public static Automaton removeDeadStates(Automaton a) {
int numStates = a.getNumStates(); int numStates = a.getNumStates();
BitSet liveSet = getLiveStates(a); BitSet liveSet = getLiveStates(a);
if (liveSet.cardinality() == numStates) {
return a;
}
int[] map = new int[numStates]; int[] map = new int[numStates];

View File

@ -35,9 +35,14 @@ package org.apache.lucene.util.automaton;
* @lucene.experimental * @lucene.experimental
*/ */
public class StatePair { public class StatePair {
// only mike knows what it does (do not expose)
int s; int s;
int s1;
int s2; /** first state */
public final int s1;
/** second state */
public final int s2;
StatePair(int s, int s1, int s2) { StatePair(int s, int s1, int s2) {
this.s = s; this.s = s;
@ -81,7 +86,7 @@ public class StatePair {
@Override @Override
public int hashCode() { public int hashCode() {
// Don't use s1 ^ s2 since it's vulnerable to the case where s1 == s2 always --> hashCode = 0, // Don't use s1 ^ s2 since it's vulnerable to the case where s1 == s2 always --> hashCode = 0,
// e.g. if you call Operations.sameLanguage, // e.g. if you call AutomatonTestUtil.sameLanguage,
// passing the same automaton against itself: // passing the same automaton against itself:
return s1 * 31 + s2; return s1 * 31 + s2;
} }

View File

@ -625,7 +625,7 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
Operations.removeDeadStates(expected), DEFAULT_DETERMINIZE_WORK_LIMIT); Operations.removeDeadStates(expected), DEFAULT_DETERMINIZE_WORK_LIMIT);
Automaton actualDet = Automaton actualDet =
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_DETERMINIZE_WORK_LIMIT); Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_DETERMINIZE_WORK_LIMIT);
if (Operations.sameLanguage(expectedDet, actualDet) == false) { if (AutomatonTestUtil.sameLanguage(expectedDet, actualDet) == false) {
Set<String> expectedPaths = toPathStrings(expectedDet); Set<String> expectedPaths = toPathStrings(expectedDet);
Set<String> actualPaths = toPathStrings(actualDet); Set<String> actualPaths = toPathStrings(actualDet);
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();

View File

@ -183,7 +183,7 @@ public class TestTermsEnum2 extends LuceneTestCase {
Automaton actual = Automaton actual =
Operations.determinize(Automata.makeStringUnion(found), DEFAULT_DETERMINIZE_WORK_LIMIT); Operations.determinize(Automata.makeStringUnion(found), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(expected, actual)); assertTrue(AutomatonTestUtil.sameLanguage(expected, actual));
} }
} }
} }

View File

@ -403,12 +403,8 @@ public class TestBoolean2 extends LuceneTestCase {
bigSearcher.count(q3.build())); bigSearcher.count(q3.build()));
// test diff (randomized) scorers produce the same results on bigSearcher as well // test diff (randomized) scorers produce the same results on bigSearcher as well
hits1 = hits1 = bigSearcher.search(q1, new TopFieldCollectorManager(sort, mulFactor, 1)).scoreDocs;
bigSearcher.search(q1, new TopFieldCollectorManager(sort, 1000 * mulFactor, 1)) hits2 = bigSearcher.search(q1, new TopFieldCollectorManager(sort, mulFactor, 1)).scoreDocs;
.scoreDocs;
hits2 =
bigSearcher.search(q1, new TopFieldCollectorManager(sort, 1000 * mulFactor, 1))
.scoreDocs;
CheckHits.checkEqual(q1, hits1, hits2); CheckHits.checkEqual(q1, hits1, hits2);
} }

View File

@ -17,15 +17,11 @@
package org.apache.lucene.search; package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.BitSet;
import java.util.Collection; import java.util.Collection;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
@ -35,34 +31,7 @@ import org.apache.lucene.util.FixedBitSet;
public class TestScorerPerf extends LuceneTestCase { public class TestScorerPerf extends LuceneTestCase {
private final boolean validate = true; // set to false when doing performance testing private final boolean validate = true; // set to false when doing performance testing
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) private static FixedBitSet randBitSet(int sz, int numBitsToSet) {
throws Exception {
int[] freq = new int[nTerms];
Term[] terms = new Term[nTerms];
for (int i = 0; i < nTerms; i++) {
int f = (nTerms + 1) - i; // make first terms less frequent
freq[i] = (int) Math.ceil(Math.pow(f, power));
terms[i] = new Term("f", Character.toString((char) ('A' + i)));
}
IndexWriter iw =
new IndexWriter(
dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
for (int i = 0; i < nDocs; i++) {
Document d = new Document();
for (int j = 0; j < nTerms; j++) {
if (random().nextInt(freq[j]) == 0) {
d.add(newStringField("f", terms[j].text(), Field.Store.NO));
// System.out.println(d);
}
}
iw.addDocument(d);
}
iw.forceMerge(1);
iw.close();
}
public FixedBitSet randBitSet(int sz, int numBitsToSet) {
FixedBitSet set = new FixedBitSet(sz); FixedBitSet set = new FixedBitSet(sz);
for (int i = 0; i < numBitsToSet; i++) { for (int i = 0; i < numBitsToSet; i++) {
set.set(random().nextInt(sz)); set.set(random().nextInt(sz));
@ -70,7 +39,7 @@ public class TestScorerPerf extends LuceneTestCase {
return set; return set;
} }
public FixedBitSet[] randBitSets(int numSets, int setSize) { private static FixedBitSet[] randBitSets(int numSets, int setSize) {
FixedBitSet[] sets = new FixedBitSet[numSets]; FixedBitSet[] sets = new FixedBitSet[numSets];
for (int i = 0; i < sets.length; i++) { for (int i = 0; i < sets.length; i++) {
sets[i] = randBitSet(setSize, random().nextInt(setSize)); sets[i] = randBitSet(setSize, random().nextInt(setSize));
@ -81,22 +50,13 @@ public class TestScorerPerf extends LuceneTestCase {
private static final class CountingHitCollectorManager private static final class CountingHitCollectorManager
implements CollectorManager<CountingHitCollector, CountingHitCollector> { implements CollectorManager<CountingHitCollector, CountingHitCollector> {
private final boolean validate;
private final FixedBitSet result;
CountingHitCollectorManager(boolean validate, FixedBitSet result) {
this.validate = validate;
this.result = result;
}
@Override @Override
public CountingHitCollector newCollector() { public CountingHitCollector newCollector() {
return validate ? new MatchingHitCollector(result) : new CountingHitCollector(); return new CountingHitCollector();
} }
@Override @Override
public CountingHitCollector reduce(Collection<CountingHitCollector> collectors) public CountingHitCollector reduce(Collection<CountingHitCollector> collectors) {
throws IOException {
CountingHitCollector result = new CountingHitCollector(); CountingHitCollector result = new CountingHitCollector();
for (CountingHitCollector collector : collectors) { for (CountingHitCollector collector : collectors) {
result.count += collector.count; result.count += collector.count;
@ -106,7 +66,7 @@ public class TestScorerPerf extends LuceneTestCase {
} }
} }
public static class CountingHitCollector extends SimpleCollector { private static class CountingHitCollector extends SimpleCollector {
int count = 0; int count = 0;
int sum = 0; int sum = 0;
protected int docBase = 0; protected int docBase = 0;
@ -121,12 +81,8 @@ public class TestScorerPerf extends LuceneTestCase {
return count; return count;
} }
public int getSum() {
return sum;
}
@Override @Override
protected void doSetNextReader(LeafReaderContext context) throws IOException { protected void doSetNextReader(LeafReaderContext context) {
docBase = context.docBase; docBase = context.docBase;
} }
@ -136,24 +92,6 @@ public class TestScorerPerf extends LuceneTestCase {
} }
} }
public static class MatchingHitCollector extends CountingHitCollector {
FixedBitSet answer;
int pos = -1;
public MatchingHitCollector(FixedBitSet answer) {
this.answer = answer;
}
public void collect(int doc, float score) {
pos = answer.nextSetBit(pos + 1);
if (pos != doc + docBase) {
throw new RuntimeException("Expected doc " + pos + " but got " + (doc + docBase));
}
super.collect(doc);
}
}
private static class BitSetQuery extends Query { private static class BitSetQuery extends Query {
private final FixedBitSet docs; private final FixedBitSet docs;
@ -163,11 +101,10 @@ public class TestScorerPerf extends LuceneTestCase {
} }
@Override @Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
throws IOException {
return new ConstantScoreWeight(this, boost) { return new ConstantScoreWeight(this, boost) {
@Override @Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { public ScorerSupplier scorerSupplier(LeafReaderContext context) {
final var scorer = final var scorer =
new ConstantScoreScorer( new ConstantScoreScorer(
score(), scoreMode, new BitSetIterator(docs, docs.approximateCardinality())); score(), scoreMode, new BitSetIterator(docs, docs.approximateCardinality()));
@ -200,20 +137,22 @@ public class TestScorerPerf extends LuceneTestCase {
} }
} }
FixedBitSet addClause(FixedBitSet[] sets, BooleanQuery.Builder bq, FixedBitSet result) { private FixedBitSet addClause(FixedBitSet[] sets, BooleanQuery.Builder bq, FixedBitSet result) {
final FixedBitSet rnd = sets[random().nextInt(sets.length)]; final FixedBitSet rnd = sets[random().nextInt(sets.length)];
Query q = new BitSetQuery(rnd); Query q = new BitSetQuery(rnd);
bq.add(q, BooleanClause.Occur.MUST); bq.add(q, BooleanClause.Occur.MUST);
if (validate) { if (validate) {
if (result == null) result = rnd.clone(); if (result == null) {
else result.and(rnd); result = rnd.clone();
} else {
result.and(rnd);
}
} }
return result; return result;
} }
public int doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int maxClauses) private void doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int maxClauses)
throws IOException { throws IOException {
int ret = 0;
for (int i = 0; i < iter; i++) { for (int i = 0; i < iter; i++) {
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
@ -222,21 +161,17 @@ public class TestScorerPerf extends LuceneTestCase {
for (int j = 0; j < nClauses; j++) { for (int j = 0; j < nClauses; j++) {
result = addClause(sets, bq, result); result = addClause(sets, bq, result);
} }
CountingHitCollector hc = CountingHitCollector hc = s.search(bq.build(), new CountingHitCollectorManager());
s.search(bq.build(), new CountingHitCollectorManager(validate, result));
ret += hc.getSum();
if (validate) assertEquals(result.cardinality(), hc.getCount()); if (validate) {
// System.out.println(hc.getCount()); assertEquals(result.cardinality(), hc.getCount());
}
} }
return ret;
} }
public int doNestedConjunctions( private void doNestedConjunctions(
IndexSearcher s, FixedBitSet[] sets, int iter, int maxOuterClauses, int maxClauses) IndexSearcher s, FixedBitSet[] sets, int iter, int maxOuterClauses, int maxClauses)
throws IOException { throws IOException {
int ret = 0;
long nMatches = 0; long nMatches = 0;
for (int i = 0; i < iter; i++) { for (int i = 0; i < iter; i++) {
@ -255,107 +190,15 @@ public class TestScorerPerf extends LuceneTestCase {
oq.add(bq.build(), BooleanClause.Occur.MUST); oq.add(bq.build(), BooleanClause.Occur.MUST);
} // outer } // outer
CountingHitCollector hc = CountingHitCollector hc = s.search(oq.build(), new CountingHitCollectorManager());
s.search(oq.build(), new CountingHitCollectorManager(validate, result));
nMatches += hc.getCount(); nMatches += hc.getCount();
ret += hc.getSum(); if (validate) {
if (validate) assertEquals(result.cardinality(), hc.getCount()); assertEquals(result.cardinality(), hc.getCount());
// System.out.println(hc.getCount());
}
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter));
return ret;
}
public int doTermConjunctions(
Term[] terms, IndexSearcher s, int termsInIndex, int maxClauses, int iter)
throws IOException {
int ret = 0;
long nMatches = 0;
for (int i = 0; i < iter; i++) {
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
BooleanQuery.Builder bq = new BooleanQuery.Builder();
BitSet termflag = new BitSet(termsInIndex);
for (int j = 0; j < nClauses; j++) {
int tnum;
// don't pick same clause twice
tnum = random().nextInt(termsInIndex);
if (termflag.get(tnum)) tnum = termflag.nextClearBit(tnum);
if (tnum < 0 || tnum >= termsInIndex) tnum = termflag.nextClearBit(0);
termflag.set(tnum);
Query tq = new TermQuery(terms[tnum]);
bq.add(tq, BooleanClause.Occur.MUST);
} }
CountingHitCollector hc = s.search(bq.build(), new CountingHitCollectorManager(false, null));
nMatches += hc.getCount();
ret += hc.getSum();
} }
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter)); if (VERBOSE) {
System.out.println("Average number of matches=" + (nMatches / iter));
return ret;
}
public int doNestedTermConjunctions(
IndexSearcher s,
Term[] terms,
int termsInIndex,
int maxOuterClauses,
int maxClauses,
int iter)
throws IOException {
int ret = 0;
long nMatches = 0;
for (int i = 0; i < iter; i++) {
int oClauses = random().nextInt(maxOuterClauses - 1) + 2;
BooleanQuery.Builder oq = new BooleanQuery.Builder();
for (int o = 0; o < oClauses; o++) {
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
BooleanQuery.Builder bq = new BooleanQuery.Builder();
BitSet termflag = new BitSet(termsInIndex);
for (int j = 0; j < nClauses; j++) {
int tnum;
// don't pick same clause twice
tnum = random().nextInt(termsInIndex);
if (termflag.get(tnum)) tnum = termflag.nextClearBit(tnum);
if (tnum < 0 || tnum >= 25) tnum = termflag.nextClearBit(0);
termflag.set(tnum);
Query tq = new TermQuery(terms[tnum]);
bq.add(tq, BooleanClause.Occur.MUST);
} // inner
oq.add(bq.build(), BooleanClause.Occur.MUST);
} // outer
CountingHitCollector hc = s.search(oq.build(), new CountingHitCollectorManager(false, null));
nMatches += hc.getCount();
ret += hc.getSum();
} }
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter));
return ret;
}
public int doSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter)
throws IOException {
int ret = 0;
for (int i = 0; i < iter; i++) {
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
PhraseQuery.Builder builder = new PhraseQuery.Builder();
for (int j = 0; j < nClauses; j++) {
int tnum = random().nextInt(termsInIndex);
builder.add(new Term("f", Character.toString((char) (tnum + 'A'))));
}
// slop could be random too
builder.setSlop(termsInIndex);
PhraseQuery q = builder.build();
CountingHitCollector hc = s.search(q, new CountingHitCollectorManager(false, null));
ret += hc.getSum();
}
return ret;
} }
public void testConjunctions() throws Exception { public void testConjunctions() throws Exception {

View File

@ -87,7 +87,7 @@ public class TestAutomaton extends LuceneTestCase {
Automaton a2 = Automaton a2 =
Operations.removeDeadStates( Operations.removeDeadStates(
Operations.concatenate(Automata.makeString("foo"), Automata.makeString("bar"))); Operations.concatenate(Automata.makeString("foo"), Automata.makeString("bar")));
assertTrue(Operations.sameLanguage(a1, a2)); assertTrue(AutomatonTestUtil.sameLanguage(a1, a2));
} }
public void testCommonPrefixString() throws Exception { public void testCommonPrefixString() throws Exception {
@ -257,7 +257,7 @@ public class TestAutomaton extends LuceneTestCase {
Automaton a = Automata.makeString("foobar"); Automaton a = Automata.makeString("foobar");
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT); Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, aMin)); assertTrue(AutomatonTestUtil.sameLanguage(a, aMin));
} }
public void testMinimize2() throws Exception { public void testMinimize2() throws Exception {
@ -266,7 +266,7 @@ public class TestAutomaton extends LuceneTestCase {
Arrays.asList(Automata.makeString("foobar"), Automata.makeString("boobar"))); Arrays.asList(Automata.makeString("foobar"), Automata.makeString("boobar")));
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT); Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
Operations.determinize(Operations.removeDeadStates(a), DEFAULT_DETERMINIZE_WORK_LIMIT), Operations.determinize(Operations.removeDeadStates(a), DEFAULT_DETERMINIZE_WORK_LIMIT),
aMin)); aMin));
} }
@ -276,7 +276,7 @@ public class TestAutomaton extends LuceneTestCase {
Automaton ra = Operations.reverse(a); Automaton ra = Operations.reverse(a);
Automaton a2 = Operations.determinize(Operations.reverse(ra), DEFAULT_DETERMINIZE_WORK_LIMIT); Automaton a2 = Operations.determinize(Operations.reverse(ra), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, a2)); assertTrue(AutomatonTestUtil.sameLanguage(a, a2));
} }
public void testOptional() throws Exception { public void testOptional() throws Exception {
@ -401,7 +401,7 @@ public class TestAutomaton extends LuceneTestCase {
Automaton ra = Operations.reverse(a); Automaton ra = Operations.reverse(a);
Automaton rra = Operations.reverse(ra); Automaton rra = Operations.reverse(ra);
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE), Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
Operations.determinize(Operations.removeDeadStates(rra), Integer.MAX_VALUE))); Operations.determinize(Operations.removeDeadStates(rra), Integer.MAX_VALUE)));
} }
@ -502,7 +502,7 @@ public class TestAutomaton extends LuceneTestCase {
} }
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE), Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
Operations.determinize( Operations.determinize(
Operations.removeDeadStates(builder.finish()), Integer.MAX_VALUE))); Operations.removeDeadStates(builder.finish()), Integer.MAX_VALUE)));
@ -735,7 +735,8 @@ public class TestAutomaton extends LuceneTestCase {
a2.addTransition(0, state, 'a'); a2.addTransition(0, state, 'a');
a2.finishState(); a2.finishState();
assertTrue( assertTrue(
Operations.sameLanguage(Operations.removeDeadStates(a), Operations.removeDeadStates(a2))); AutomatonTestUtil.sameLanguage(
Operations.removeDeadStates(a), Operations.removeDeadStates(a2)));
} }
private Automaton randomNoOp(Automaton a) { private Automaton randomNoOp(Automaton a) {
@ -1288,7 +1289,7 @@ public class TestAutomaton extends LuceneTestCase {
Automaton a2 = Automaton a2 =
Operations.removeDeadStates(Operations.determinize(unionTerms(terms), Integer.MAX_VALUE)); Operations.removeDeadStates(Operations.determinize(unionTerms(terms), Integer.MAX_VALUE));
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
a2, Operations.removeDeadStates(Operations.determinize(a, Integer.MAX_VALUE)))); a2, Operations.removeDeadStates(Operations.determinize(a, Integer.MAX_VALUE))));
// Do same check, in UTF8 space // Do same check, in UTF8 space
@ -1613,7 +1614,7 @@ public class TestAutomaton extends LuceneTestCase {
public void testAcceptAllEmptyStringMin() throws Exception { public void testAcceptAllEmptyStringMin() throws Exception {
Automaton a = Automata.makeBinaryInterval(newBytesRef(), true, null, true); Automaton a = Automata.makeBinaryInterval(newBytesRef(), true, null, true);
assertTrue(Operations.sameLanguage(Automata.makeAnyBinary(), a)); assertTrue(AutomatonTestUtil.sameLanguage(Automata.makeAnyBinary(), a));
} }
private static IntsRef toIntsRef(String s) { private static IntsRef toIntsRef(String s) {

View File

@ -41,7 +41,7 @@ public class TestDeterminism extends LuceneTestCase {
a = AutomatonTestUtil.determinizeSimple(a); a = AutomatonTestUtil.determinizeSimple(a);
Automaton b = Operations.determinize(a, Integer.MAX_VALUE); Automaton b = Operations.determinize(a, Integer.MAX_VALUE);
// TODO: more verifications possible? // TODO: more verifications possible?
assertTrue(Operations.sameLanguage(a, b)); assertTrue(AutomatonTestUtil.sameLanguage(a, b));
} }
} }
@ -53,20 +53,20 @@ public class TestDeterminism extends LuceneTestCase {
Operations.complement( Operations.complement(
Operations.complement(a, DEFAULT_DETERMINIZE_WORK_LIMIT), Operations.complement(a, DEFAULT_DETERMINIZE_WORK_LIMIT),
DEFAULT_DETERMINIZE_WORK_LIMIT); DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, equivalent)); assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
// a union a = a // a union a = a
equivalent = equivalent =
Operations.determinize( Operations.determinize(
Operations.removeDeadStates(Operations.union(a, a)), DEFAULT_DETERMINIZE_WORK_LIMIT); Operations.removeDeadStates(Operations.union(a, a)), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, equivalent)); assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
// a intersect a = a // a intersect a = a
equivalent = equivalent =
Operations.determinize( Operations.determinize(
Operations.removeDeadStates(Operations.intersection(a, a)), Operations.removeDeadStates(Operations.intersection(a, a)),
DEFAULT_DETERMINIZE_WORK_LIMIT); DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, equivalent)); assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
// a minus a = empty // a minus a = empty
Automaton empty = Operations.minus(a, a, DEFAULT_DETERMINIZE_WORK_LIMIT); Automaton empty = Operations.minus(a, a, DEFAULT_DETERMINIZE_WORK_LIMIT);
@ -81,7 +81,7 @@ public class TestDeterminism extends LuceneTestCase {
equivalent = equivalent =
Operations.minus(optional, Automata.makeEmptyString(), DEFAULT_DETERMINIZE_WORK_LIMIT); Operations.minus(optional, Automata.makeEmptyString(), DEFAULT_DETERMINIZE_WORK_LIMIT);
// System.out.println("equiv " + equivalent); // System.out.println("equiv " + equivalent);
assertTrue(Operations.sameLanguage(a, equivalent)); assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
} }
} }
} }

View File

@ -81,44 +81,46 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
// check that the dfa for n-1 accepts a subset of the dfa for n // check that the dfa for n-1 accepts a subset of the dfa for n
if (n > 0) { if (n > 0) {
assertTrue( assertTrue(
Operations.subsetOf( AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(automata[n - 1]), Operations.removeDeadStates(automata[n - 1]),
Operations.removeDeadStates(automata[n]))); Operations.removeDeadStates(automata[n])));
assertTrue( assertTrue(
Operations.subsetOf( AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(automata[n - 1]), Operations.removeDeadStates(automata[n - 1]),
Operations.removeDeadStates(tautomata[n]))); Operations.removeDeadStates(tautomata[n])));
assertTrue( assertTrue(
Operations.subsetOf( AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(tautomata[n - 1]), Operations.removeDeadStates(tautomata[n - 1]),
Operations.removeDeadStates(automata[n]))); Operations.removeDeadStates(automata[n])));
assertTrue( assertTrue(
Operations.subsetOf( AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(tautomata[n - 1]), Operations.removeDeadStates(tautomata[n - 1]),
Operations.removeDeadStates(tautomata[n]))); Operations.removeDeadStates(tautomata[n])));
assertNotSame(automata[n - 1], automata[n]); assertNotSame(automata[n - 1], automata[n]);
} }
// check that Lev(N) is a subset of LevT(N) // check that Lev(N) is a subset of LevT(N)
assertTrue( assertTrue(
Operations.subsetOf( AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(automata[n]), Operations.removeDeadStates(tautomata[n]))); Operations.removeDeadStates(automata[n]), Operations.removeDeadStates(tautomata[n])));
// special checks for specific n // special checks for specific n
switch (n) { switch (n) {
case 0: case 0:
// easy, matches the string itself // easy, matches the string itself
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
Automata.makeString(s), Operations.removeDeadStates(automata[0]))); Automata.makeString(s), Operations.removeDeadStates(automata[0])));
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
Automata.makeString(s), Operations.removeDeadStates(tautomata[0]))); Automata.makeString(s), Operations.removeDeadStates(tautomata[0])));
break; break;
case 1: case 1:
// generate a lev1 naively, and check the accepted lang is the same. // generate a lev1 naively, and check the accepted lang is the same.
assertTrue( assertTrue(
Operations.sameLanguage(naiveLev1(s), Operations.removeDeadStates(automata[1]))); AutomatonTestUtil.sameLanguage(
naiveLev1(s), Operations.removeDeadStates(automata[1])));
assertTrue( assertTrue(
Operations.sameLanguage(naiveLev1T(s), Operations.removeDeadStates(tautomata[1]))); AutomatonTestUtil.sameLanguage(
naiveLev1T(s), Operations.removeDeadStates(tautomata[1])));
break; break;
default: default:
assertBruteForce(s, automata[n], n); assertBruteForce(s, automata[n], n);

View File

@ -28,7 +28,7 @@ public class TestMinimize extends LuceneTestCase {
Automaton a = AutomatonTestUtil.randomAutomaton(random()); Automaton a = AutomatonTestUtil.randomAutomaton(random());
Automaton la = Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE); Automaton la = Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE);
Automaton lb = MinimizationOperations.minimize(a, Integer.MAX_VALUE); Automaton lb = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
assertTrue(Operations.sameLanguage(la, lb)); assertTrue(AutomatonTestUtil.sameLanguage(la, lb));
} }
} }
@ -42,7 +42,7 @@ public class TestMinimize extends LuceneTestCase {
Automaton a = AutomatonTestUtil.randomAutomaton(random()); Automaton a = AutomatonTestUtil.randomAutomaton(random());
a = AutomatonTestUtil.minimizeSimple(a); a = AutomatonTestUtil.minimizeSimple(a);
Automaton b = MinimizationOperations.minimize(a, Integer.MAX_VALUE); Automaton b = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
assertTrue(Operations.sameLanguage(a, b)); assertTrue(AutomatonTestUtil.sameLanguage(a, b));
assertEquals(a.getNumStates(), b.getNumStates()); assertEquals(a.getNumStates(), b.getNumStates());
int numStates = a.getNumStates(); int numStates = a.getNumStates();

View File

@ -50,7 +50,7 @@ public class TestOperations extends LuceneTestCase {
assertTrue(naiveUnion.isDeterministic()); assertTrue(naiveUnion.isDeterministic());
assertFalse(Operations.hasDeadStatesFromInitial(naiveUnion)); assertFalse(Operations.hasDeadStatesFromInitial(naiveUnion));
assertTrue(Operations.sameLanguage(union, naiveUnion)); assertTrue(AutomatonTestUtil.sameLanguage(union, naiveUnion));
} }
private static Automaton naiveUnion(List<BytesRef> strings) { private static Automaton naiveUnion(List<BytesRef> strings) {
@ -116,13 +116,13 @@ public class TestOperations extends LuceneTestCase {
Automaton concat2 = Operations.concatenate(singleton, nfa); Automaton concat2 = Operations.concatenate(singleton, nfa);
assertFalse(concat2.isDeterministic()); assertFalse(concat2.isDeterministic());
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
Operations.determinize(concat1, 100), Operations.determinize(concat2, 100))); Operations.determinize(concat1, 100), Operations.determinize(concat2, 100)));
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
Operations.determinize(nfa, 100), Operations.determinize(concat1, 100))); Operations.determinize(nfa, 100), Operations.determinize(concat1, 100)));
assertTrue( assertTrue(
Operations.sameLanguage( AutomatonTestUtil.sameLanguage(
Operations.determinize(nfa, 100), Operations.determinize(concat2, 100))); Operations.determinize(nfa, 100), Operations.determinize(concat2, 100)));
} }
@ -173,6 +173,42 @@ public class TestOperations extends LuceneTestCase {
assertTrue(exc.getMessage().contains("input automaton is too large")); assertTrue(exc.getMessage().contains("input automaton is too large"));
} }
public void testIsTotal() {
// minimal
assertFalse(Operations.isTotal(Automata.makeEmpty()));
assertFalse(Operations.isTotal(Automata.makeEmptyString()));
assertTrue(Operations.isTotal(Automata.makeAnyString()));
assertTrue(Operations.isTotal(Automata.makeAnyBinary(), 0, 255));
assertFalse(Operations.isTotal(Automata.makeNonEmptyBinary(), 0, 255));
// deterministic, but not minimal
assertTrue(Operations.isTotal(Operations.repeat(Automata.makeAnyChar())));
Automaton tricky =
Operations.repeat(
Operations.union(
Automata.makeCharRange(Character.MIN_CODE_POINT, 100),
Automata.makeCharRange(101, Character.MAX_CODE_POINT)));
assertTrue(Operations.isTotal(tricky));
// not total, but close
Automaton tricky2 =
Operations.repeat(
Operations.union(
Automata.makeCharRange(Character.MIN_CODE_POINT + 1, 100),
Automata.makeCharRange(101, Character.MAX_CODE_POINT)));
assertFalse(Operations.isTotal(tricky2));
Automaton tricky3 =
Operations.repeat(
Operations.union(
Automata.makeCharRange(Character.MIN_CODE_POINT, 99),
Automata.makeCharRange(101, Character.MAX_CODE_POINT)));
assertFalse(Operations.isTotal(tricky3));
Automaton tricky4 =
Operations.repeat(
Operations.union(
Automata.makeCharRange(Character.MIN_CODE_POINT, 100),
Automata.makeCharRange(101, Character.MAX_CODE_POINT - 1)));
assertFalse(Operations.isTotal(tricky4));
}
/** /**
* Returns the set of all accepted strings. * Returns the set of all accepted strings.
* *
@ -254,4 +290,126 @@ public class TestOperations extends LuceneTestCase {
a.finishState(); a.finishState();
return a; return a;
} }
public void testRepeat() {
Automaton emptyLanguage = Automata.makeEmpty();
assertSame(emptyLanguage, Operations.repeat(emptyLanguage));
Automaton emptyString = Automata.makeEmptyString();
assertSame(emptyString, Operations.repeat(emptyString));
Automaton a = Automata.makeChar('a');
Automaton as = new Automaton();
as.createState();
as.setAccept(0, true);
as.addTransition(0, 0, 'a');
as.finishState();
assertTrue(AutomatonTestUtil.sameLanguage(as, Operations.repeat(a)));
assertSame(as, Operations.repeat(as));
Automaton aOrEmpty = new Automaton();
aOrEmpty.createState();
aOrEmpty.setAccept(0, true);
aOrEmpty.createState();
aOrEmpty.setAccept(1, true);
aOrEmpty.addTransition(0, 1, 'a');
assertTrue(AutomatonTestUtil.sameLanguage(as, Operations.repeat(aOrEmpty)));
Automaton ab = Automata.makeString("ab");
Automaton abs = new Automaton();
abs.createState();
abs.createState();
abs.setAccept(0, true);
abs.addTransition(0, 1, 'a');
abs.finishState();
abs.addTransition(1, 0, 'b');
abs.finishState();
assertTrue(AutomatonTestUtil.sameLanguage(abs, Operations.repeat(ab)));
assertSame(abs, Operations.repeat(abs));
Automaton absThenC = Operations.concatenate(abs, Automata.makeChar('c'));
Automaton absThenCs = new Automaton();
absThenCs.createState();
absThenCs.createState();
absThenCs.createState();
absThenCs.setAccept(0, true);
absThenCs.addTransition(0, 1, 'a');
absThenCs.addTransition(0, 0, 'c');
absThenCs.finishState();
absThenCs.addTransition(1, 2, 'b');
absThenCs.finishState();
absThenCs.addTransition(2, 1, 'a');
absThenCs.addTransition(2, 0, 'c');
absThenCs.finishState();
assertTrue(AutomatonTestUtil.sameLanguage(absThenCs, Operations.repeat(absThenC)));
assertSame(absThenCs, Operations.repeat(absThenCs));
Automaton aOrAb = new Automaton();
aOrAb.createState();
aOrAb.createState();
aOrAb.createState();
aOrAb.setAccept(1, true);
aOrAb.setAccept(2, true);
aOrAb.addTransition(0, 1, 'a');
aOrAb.finishState();
aOrAb.addTransition(1, 2, 'b');
aOrAb.finishState();
Automaton aOrAbs = new Automaton();
aOrAbs.createState();
aOrAbs.createState();
aOrAbs.setAccept(0, true);
aOrAbs.addTransition(0, 0, 'a');
aOrAbs.addTransition(0, 1, 'a');
aOrAbs.finishState();
aOrAbs.addTransition(1, 0, 'b');
aOrAbs.finishState();
assertTrue(
AutomatonTestUtil.sameLanguage(
Operations.determinize(aOrAbs, Integer.MAX_VALUE),
Operations.determinize(Operations.repeat(aOrAb), Integer.MAX_VALUE)));
}
public void testDuelRepeat() {
final int iters = atLeast(1_000);
for (int iter = 0; iter < iters; ++iter) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
Automaton repeat1 = Operations.determinize(Operations.repeat(a), Integer.MAX_VALUE);
Automaton repeat2 = Operations.determinize(naiveRepeat(a), Integer.MAX_VALUE);
assertTrue(AutomatonTestUtil.sameLanguage(repeat1, repeat2));
}
}
// This is the original implementation of Operations#repeat, before we improved it to generate
// simpler automata in some common cases.
private static Automaton naiveRepeat(Automaton a) {
if (a.getNumStates() == 0) {
return a;
}
Automaton.Builder builder = new Automaton.Builder();
// Create the initial state, which is accepted
builder.createState();
builder.setAccept(0, true);
builder.copy(a);
Transition t = new Transition();
int count = a.initTransition(0, t);
for (int i = 0; i < count; i++) {
a.getNextTransition(t);
builder.addTransition(0, t.dest + 1, t.min, t.max);
}
int numStates = a.getNumStates();
for (int s = 0; s < numStates; s++) {
if (a.isAccept(s)) {
count = a.initTransition(0, t);
for (int i = 0; i < count; i++) {
a.getNextTransition(t);
builder.addTransition(s + 1, t.dest + 1, t.min, t.max);
}
}
}
return builder.finish();
}
} }

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
/** /**
* Simple unit tests for RegExp parsing. * Simple unit tests for RegExp parsing.
@ -698,7 +699,7 @@ public class TestRegExpParsing extends LuceneTestCase {
private void assertSameLanguage(Automaton expected, Automaton actual) { private void assertSameLanguage(Automaton expected, Automaton actual) {
expected = Operations.determinize(expected, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); expected = Operations.determinize(expected, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
actual = Operations.determinize(actual, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); actual = Operations.determinize(actual, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
boolean result = Operations.sameLanguage(expected, actual); boolean result = AutomatonTestUtil.sameLanguage(expected, actual);
if (result == false) { if (result == false) {
System.out.println(expected.toDot()); System.out.println(expected.toDot());
System.out.println(actual.toDot()); System.out.println(actual.toDot());

View File

@ -28,6 +28,7 @@ import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
@ -158,7 +159,7 @@ public class TestStringsToAutomaton extends LuceneTestCase {
private static void assertSameAutomaton(Automaton a, Automaton b) { private static void assertSameAutomaton(Automaton a, Automaton b) {
assertEquals(a.getNumStates(), b.getNumStates()); assertEquals(a.getNumStates(), b.getNumStates());
assertEquals(a.getNumTransitions(), b.getNumTransitions()); assertEquals(a.getNumTransitions(), b.getNumTransitions());
assertTrue(Operations.sameLanguage(a, b)); assertTrue(AutomatonTestUtil.sameLanguage(a, b));
} }
private List<BytesRef> basicTerms() { private List<BytesRef> basicTerms() {

View File

@ -21,7 +21,6 @@ import static org.apache.lucene.sandbox.facet.ComparableUtils.byAggregatedValue;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -58,7 +57,7 @@ import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder; import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.MultiFacetsRecorder; import org.apache.lucene.sandbox.facet.recorders.MultiFacetsRecorder;
import org.apache.lucene.sandbox.facet.recorders.Reducer; import org.apache.lucene.sandbox.facet.recorders.Reducer;
import org.apache.lucene.search.CollectorOwner; import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.DoubleValuesSource; import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource; import org.apache.lucene.search.LongValuesSource;
@ -148,9 +147,10 @@ public class SandboxFacetsExample {
FacetFieldCollectorManager<CountFacetRecorder> collectorManager = FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, defaultRecorder); new FacetFieldCollectorManager<>(defaultTaxoCutter, defaultRecorder);
//// (2.1) if we need to collect data using multiple different collectors, e.g. taxonomy and // (2.1) if we need to collect data using multiple different collectors, e.g. taxonomy and
//// ranges, or even two taxonomy facets that use different Category List Field, we can // ranges, or even two taxonomy facets that use different Category List Field, we can
//// use MultiCollectorManager, e.g.: // use MultiCollectorManager, e.g.:
//
// TODO: add a demo for it. // TODO: add a demo for it.
// TaxonomyFacetsCutter publishDateCutter = new // TaxonomyFacetsCutter publishDateCutter = new
// TaxonomyFacetsCutter(config.getDimConfig("Publish Date"), taxoReader); // TaxonomyFacetsCutter(config.getDimConfig("Publish Date"), taxoReader);
@ -563,17 +563,13 @@ public class SandboxFacetsExample {
// FacetFieldCollectorManager anyway, and leaf cutter are not merged or anything like that. // FacetFieldCollectorManager anyway, and leaf cutter are not merged or anything like that.
FacetFieldCollectorManager<CountFacetRecorder> publishDayDimensionCollectorManager = FacetFieldCollectorManager<CountFacetRecorder> publishDayDimensionCollectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, publishDayDimensionRecorder); new FacetFieldCollectorManager<>(defaultTaxoCutter, publishDayDimensionRecorder);
List<CollectorOwner<FacetFieldCollector, CountFacetRecorder>> drillSidewaysOwners = List<CollectorManager<FacetFieldCollector, CountFacetRecorder>> drillSidewaysManagers =
List.of(new CollectorOwner<>(publishDayDimensionCollectorManager)); List.of(publishDayDimensionCollectorManager);
//// (3) search //// (3) search
// Right now we return the same Recorder we created - so we can ignore results // Right now we return the same Recorder we created - so we can ignore results
DrillSideways ds = new DrillSideways(searcher, config, taxoReader); DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
// We must wrap list of drill sideways owner with unmodifiableList to make generics work. ds.search(q, drillDownCollectorManager, drillSidewaysManagers);
ds.search(
q,
new CollectorOwner<>(drillDownCollectorManager),
Collections.unmodifiableList(drillSidewaysOwners));
//// (4) Get top 10 results by count for Author //// (4) Get top 10 results by count for Author
List<FacetResult> facetResults = new ArrayList<>(2); List<FacetResult> facetResults = new ArrayList<>(2);

View File

@ -37,9 +37,9 @@ configure(project(":lucene:distribution")) {
// Maven-published submodule JARs are part of the binary distribution. // Maven-published submodule JARs are part of the binary distribution.
// We don't copy their transitive dependencies. // We don't copy their transitive dependencies.
def binaryModules = rootProject.ext.mavenProjects.findAll { p -> !(p in [ def binaryModules = rootProject.ext.mavenProjects.findAll { p -> !(p.path in [
// Placed in a separate folder (module layer conflicts). // Placed in a separate folder (module layer conflicts).
project(":lucene:test-framework"), ":lucene:test-framework",
]) } ]) }
for (Project module : binaryModules) { for (Project module : binaryModules) {
jars(module, { jars(module, {

View File

@ -18,7 +18,6 @@ package org.apache.lucene.facet;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -31,8 +30,8 @@ import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts; import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.CollectorOwner;
import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
@ -302,25 +301,13 @@ public class DrillSideways {
} }
} }
private static class CallableCollector implements Callable<Void> { private record CallableCollector<R>(
private final IndexSearcher searcher; IndexSearcher searcher, Query query, CollectorManager<?, R> collectorManager)
private final Query query; implements Callable<R> {
private final CollectorOwner<?, ?> collectorOwner;
private CallableCollector(
IndexSearcher searcher, Query query, CollectorOwner<?, ?> collectorOwner) {
this.searcher = searcher;
this.query = query;
this.collectorOwner = collectorOwner;
}
@Override @Override
public Void call() throws Exception { public R call() throws Exception {
searcher.search(query, collectorOwner); return searcher.search(query, collectorManager);
// Call getResult to trigger reduce, we don't need to return results because users can access
// them directly from collectorOwner
collectorOwner.getResult();
return null;
} }
} }
@ -344,31 +331,30 @@ public class DrillSideways {
// Main query // Main query
FacetsCollectorManager drillDownFacetsCollectorManager = FacetsCollectorManager drillDownFacetsCollectorManager =
createDrillDownFacetsCollectorManager(); createDrillDownFacetsCollectorManager();
final CollectorOwner<?, ?> mainCollectorOwner; final CollectorManager<?, ?> mainCollectorManager;
if (drillDownFacetsCollectorManager != null) { if (drillDownFacetsCollectorManager != null) {
// Make sure we populate a facet collector corresponding to the base query if desired: // Make sure we populate a facet collector corresponding to the base query if desired:
mainCollectorOwner = mainCollectorManager =
new CollectorOwner<>( new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager);
new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager));
} else { } else {
mainCollectorOwner = new CollectorOwner<>(hitCollectorManager); mainCollectorManager = hitCollectorManager;
} }
// Drill sideways dimensions // Drill sideways dimensions
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners; final List<CollectorManager<FacetsCollector, FacetsCollector>> drillSidewaysCollectorManagers;
if (query.getDims().isEmpty() == false) { if (query.getDims().isEmpty() == false) {
drillSidewaysCollectorOwners = new ArrayList<>(query.getDims().size()); drillSidewaysCollectorManagers = new ArrayList<>(query.getDims().size());
for (int i = 0; i < query.getDims().size(); i++) { for (int i = 0; i < query.getDims().size(); i++) {
drillSidewaysCollectorOwners.add( drillSidewaysCollectorManagers.add(createDrillSidewaysFacetsCollectorManager());
new CollectorOwner<>(createDrillSidewaysFacetsCollectorManager()));
} }
} else { } else {
drillSidewaysCollectorOwners = null; drillSidewaysCollectorManagers = null;
} }
// Execute query // Execute query
final Result<?, FacetsCollector> result;
if (executor != null) { if (executor != null) {
searchConcurrently(query, mainCollectorOwner, drillSidewaysCollectorOwners); result = searchConcurrently(query, mainCollectorManager, drillSidewaysCollectorManagers);
} else { } else {
searchSequentially(query, mainCollectorOwner, drillSidewaysCollectorOwners); result = searchSequentially(query, mainCollectorManager, drillSidewaysCollectorManagers);
} }
// Collect results // Collect results
@ -377,12 +363,12 @@ public class DrillSideways {
if (drillDownFacetsCollectorManager != null) { if (drillDownFacetsCollectorManager != null) {
// drill down collected using MultiCollector // drill down collected using MultiCollector
// Extract the results: // Extract the results:
Object[] drillDownResult = (Object[]) mainCollectorOwner.getResult(); Object[] drillDownResult = (Object[]) result.drillDownResult;
facetsCollectorResult = (FacetsCollector) drillDownResult[0]; facetsCollectorResult = (FacetsCollector) drillDownResult[0];
hitCollectorResult = (R) drillDownResult[1]; hitCollectorResult = (R) drillDownResult[1];
} else { } else {
facetsCollectorResult = null; facetsCollectorResult = null;
hitCollectorResult = (R) mainCollectorOwner.getResult(); hitCollectorResult = (R) result.drillDownResult;
} }
// Getting results for drill sideways dimensions (if any) // Getting results for drill sideways dimensions (if any)
@ -391,12 +377,11 @@ public class DrillSideways {
if (query.getDims().isEmpty() == false) { if (query.getDims().isEmpty() == false) {
drillSidewaysDims = query.getDims().keySet().toArray(new String[0]); drillSidewaysDims = query.getDims().keySet().toArray(new String[0]);
int numDims = query.getDims().size(); int numDims = query.getDims().size();
assert drillSidewaysCollectorOwners != null; assert drillSidewaysCollectorManagers != null;
assert drillSidewaysCollectorOwners.size() == numDims; assert drillSidewaysCollectorManagers.size() == numDims;
drillSidewaysCollectors = new FacetsCollector[numDims]; drillSidewaysCollectors = new FacetsCollector[numDims];
for (int dim = 0; dim < numDims; dim++) { for (int dim = 0; dim < numDims; dim++) {
drillSidewaysCollectors[dim] = drillSidewaysCollectors[dim] = result.drillSidewaysResults.get(dim);
(FacetsCollector) drillSidewaysCollectorOwners.get(dim).getResult();
} }
} else { } else {
drillSidewaysDims = null; drillSidewaysDims = null;
@ -414,52 +399,51 @@ public class DrillSideways {
/** /**
* Search using DrillDownQuery with custom collectors. This method can be used with any {@link * Search using DrillDownQuery with custom collectors. This method can be used with any {@link
* CollectorOwner}s. It doesn't return anything because it is expected that you read results from * CollectorManager}s.
* provided {@link CollectorOwner}s.
* *
* <p>To read the results, run {@link CollectorOwner#getResult()} for drill down and all drill * <p>Note: Use {@link MultiCollectorManager} to collect both hits and facets for the entire query
* sideways dimensions. * and/or for drill-sideways dimensions. You can also use it to wrap different types of {@link
* * CollectorManager} for drill-sideways dimensions.
* <p>Note: use {@link Collections#unmodifiableList(List)} to wrap {@code
* drillSidewaysCollectorOwners} to convince compiler that it is safe to use List here.
*
* <p>Use {@link MultiCollectorManager} wrapped by {@link CollectorOwner} to collect both hits and
* facets for the entire query and/or for drill-sideways dimensions.
*
* <p>TODO: Class CollectorOwner was created so that we can ignore CollectorManager type C,
* because we want each dimensions to be able to use their own types. Alternatively, we can use
* typesafe heterogeneous container and provide CollectorManager type for each dimension to this
* method? I do like CollectorOwner approach as it seems more intuitive?
*/ */
public void search( public <C extends Collector, T, K extends Collector, R> Result<T, R> search(
final DrillDownQuery query, DrillDownQuery query,
CollectorOwner<?, ?> drillDownCollectorOwner, CollectorManager<C, T> drillDownCollectorManager,
List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners) List<CollectorManager<K, R>> drillSidewaysCollectorManagers)
throws IOException { throws IOException {
if (drillDownCollectorOwner == null) { if (drillDownCollectorManager == null) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"This search method requires client to provide drill down collector manager"); "This search method requires client to provide drill down collector manager");
} }
if (drillSidewaysCollectorOwners == null) { if (drillSidewaysCollectorManagers == null) {
if (query.getDims().isEmpty() == false) { if (query.getDims().isEmpty() == false) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"The query requires not null drillSidewaysCollectorOwners"); "The query requires not null drillSidewaysCollectorManagers");
} }
} else if (drillSidewaysCollectorOwners.size() != query.getDims().size()) { } else if (drillSidewaysCollectorManagers.size() != query.getDims().size()) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"drillSidewaysCollectorOwners size must be equal to number of dimensions in the query."); "drillSidewaysCollectorManagers size must be equal to number of dimensions in the query.");
} }
if (executor != null) { if (executor != null) {
searchConcurrently(query, drillDownCollectorOwner, drillSidewaysCollectorOwners); return searchConcurrently(query, drillDownCollectorManager, drillSidewaysCollectorManagers);
} else { } else {
searchSequentially(query, drillDownCollectorOwner, drillSidewaysCollectorOwners); return searchSequentially(query, drillDownCollectorManager, drillSidewaysCollectorManagers);
} }
} }
private void searchSequentially( /**
* {@link #search(DrillDownQuery, CollectorManager, List)} result. It doesn't depend on {@link
* Facets} to allow users to use any type of {@link CollectorManager} for drill-down or
* drill-sideways dimension.
*
* @param drillDownResult result from drill down (main) {@link CollectorManager}
* @param drillSidewaysResults results from drill sideways {@link CollectorManager}s
*/
public record Result<T, R>(T drillDownResult, List<R> drillSidewaysResults) {}
private <C extends Collector, T, K extends Collector, R> Result<T, R> searchSequentially(
final DrillDownQuery query, final DrillDownQuery query,
final CollectorOwner<?, ?> drillDownCollectorOwner, final CollectorManager<C, T> drillDownCollectorManager,
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners) final List<CollectorManager<K, R>> drillSidewaysCollectorManagers)
throws IOException { throws IOException {
Map<String, Integer> drillDownDims = query.getDims(); Map<String, Integer> drillDownDims = query.getDims();
@ -467,9 +451,7 @@ public class DrillSideways {
if (drillDownDims.isEmpty()) { if (drillDownDims.isEmpty()) {
// There are no drill-down dims, so there is no // There are no drill-down dims, so there is no
// drill-sideways to compute: // drill-sideways to compute:
searcher.search(query, drillDownCollectorOwner); return new Result<>(searcher.search(query, drillDownCollectorManager), null);
drillDownCollectorOwner.getResult();
return;
} }
Query baseQuery = query.getBaseQuery(); Query baseQuery = query.getBaseQuery();
@ -480,59 +462,60 @@ public class DrillSideways {
} }
Query[] drillDownQueries = query.getDrillDownQueries(); Query[] drillDownQueries = query.getDrillDownQueries();
DrillSidewaysQuery dsq = DrillSidewaysQuery<K, R> dsq =
new DrillSidewaysQuery( new DrillSidewaysQuery<>(
baseQuery, baseQuery, drillSidewaysCollectorManagers, drillDownQueries, scoreSubDocsAtOnce());
// drillDownCollectorOwner,
// Don't pass drill down collector because drill down is collected by IndexSearcher
// itself.
// TODO: deprecate drillDown collection in DrillSidewaysQuery?
null,
drillSidewaysCollectorOwners,
drillDownQueries,
scoreSubDocsAtOnce());
searcher.search(dsq, drillDownCollectorOwner); T collectorResult = searcher.search(dsq, drillDownCollectorManager);
// This method doesn't return results as each dimension might have its own result type. List<R> drillSidewaysResults = new ArrayList<>(drillDownDims.size());
// But we call getResult to trigger results reducing, so that users don't have to worry about assert drillSidewaysCollectorManagers != null
// it. : "Case without drill sideways dimensions is handled above";
drillDownCollectorOwner.getResult(); int numSlices = dsq.managedDrillSidewaysCollectors.size();
if (drillSidewaysCollectorOwners != null) { for (int dim = 0; dim < drillDownDims.size(); dim++) {
for (CollectorOwner<?, ?> sidewaysOwner : drillSidewaysCollectorOwners) { List<K> collectorsForDim = new ArrayList<>(numSlices);
sidewaysOwner.getResult(); for (int slice = 0; slice < numSlices; slice++) {
collectorsForDim.add(dsq.managedDrillSidewaysCollectors.get(slice).get(dim));
} }
drillSidewaysResults.add(
dim, drillSidewaysCollectorManagers.get(dim).reduce(collectorsForDim));
} }
return new Result<>(collectorResult, drillSidewaysResults);
} }
private void searchConcurrently( private <C extends Collector, T, K extends Collector, R> Result<T, R> searchConcurrently(
final DrillDownQuery query, final DrillDownQuery query,
final CollectorOwner<?, ?> drillDownCollectorOwner, final CollectorManager<C, T> drillDownCollectorManager,
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners) final List<CollectorManager<K, R>> drillSidewaysCollectorManagers) {
throws IOException {
final Map<String, Integer> drillDownDims = query.getDims(); final Map<String, Integer> drillDownDims = query.getDims();
final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1); final CallableCollector<T> drillDownCallableCollector =
new CallableCollector<>(searcher, query, drillDownCollectorManager);
final List<CallableCollector<R>> drillSidewaysCallableCollectors =
new ArrayList<>(drillDownDims.size());
callableCollectors.add(new CallableCollector(searcher, query, drillDownCollectorOwner));
int i = 0; int i = 0;
final Query[] filters = query.getDrillDownQueries(); final Query[] filters = query.getDrillDownQueries();
for (String dim : drillDownDims.keySet()) { for (String dim : drillDownDims.keySet()) {
callableCollectors.add( drillSidewaysCallableCollectors.add(
new CallableCollector( new CallableCollector<>(
searcher, searcher,
getDrillDownQuery(query, filters, dim), getDrillDownQuery(query, filters, dim),
drillSidewaysCollectorOwners.get(i))); drillSidewaysCollectorManagers.get(i)));
i++; i++;
} }
try { try {
// Run the query pool final Future<T> drillDownFuture = executor.submit(drillDownCallableCollector);
final List<Future<Void>> futures = executor.invokeAll(callableCollectors); final List<Future<R>> drillSidewaysFutures =
executor.invokeAll(drillSidewaysCallableCollectors);
// Wait for results. We don't read the results as they are collected by CollectorOwners T collectorResult = drillDownFuture.get();
for (i = 0; i < futures.size(); i++) { List<R> drillSidewaysResults = new ArrayList<>(drillDownDims.size());
futures.get(i).get();
for (i = 0; i < drillSidewaysFutures.size(); i++) {
drillSidewaysResults.add(i, drillSidewaysFutures.get(i).get());
} }
return new Result<>(collectorResult, drillSidewaysResults);
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new ThreadInterruptedException(e); throw new ThreadInterruptedException(e);
} catch (ExecutionException e) { } catch (ExecutionException e) {

View File

@ -17,19 +17,20 @@
package org.apache.lucene.facet; package org.apache.lucene.facet;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.Collector; import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorOwner; import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -41,12 +42,12 @@ import org.apache.lucene.search.Weight;
// TODO change the way DrillSidewaysScorer is used, this query does not work // TODO change the way DrillSidewaysScorer is used, this query does not work
// with filter caching // with filter caching
class DrillSidewaysQuery extends Query { class DrillSidewaysQuery<K extends Collector, R> extends Query {
final Query baseQuery; final Query baseQuery;
final CollectorOwner<?, ?> drillDownCollectorOwner; final List<CollectorManager<K, R>> drillSidewaysCollectorManagers;
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners; final List<List<K>> managedDrillSidewaysCollectors;
final Query[] drillDownQueries; final Query[] drillDownQueries;
@ -58,15 +59,36 @@ class DrillSidewaysQuery extends Query {
*/ */
DrillSidewaysQuery( DrillSidewaysQuery(
Query baseQuery, Query baseQuery,
CollectorOwner<?, ?> drillDownCollectorOwner, List<CollectorManager<K, R>> drillSidewaysCollectorManagers,
List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners, Query[] drillDownQueries,
boolean scoreSubDocsAtOnce) {
// Note that the "managed" collector lists are synchronized here since bulkScorer()
// can be invoked concurrently and needs to remain thread-safe. We're OK with synchronizing
// on the whole list as contention is expected to remain very low:
this(
baseQuery,
drillSidewaysCollectorManagers,
Collections.synchronizedList(new ArrayList<>()),
drillDownQueries,
scoreSubDocsAtOnce);
}
/**
* Needed for {@link Query#rewrite(IndexSearcher)}. Ensures the same "managed" lists get used
* since {@link DrillSideways} accesses references to these through the original {@code
* DrillSidewaysQuery}.
*/
private DrillSidewaysQuery(
Query baseQuery,
List<CollectorManager<K, R>> drillSidewaysCollectorManagers,
List<List<K>> managedDrillSidewaysCollectors,
Query[] drillDownQueries, Query[] drillDownQueries,
boolean scoreSubDocsAtOnce) { boolean scoreSubDocsAtOnce) {
this.baseQuery = Objects.requireNonNull(baseQuery); this.baseQuery = Objects.requireNonNull(baseQuery);
this.drillDownCollectorOwner = drillDownCollectorOwner; this.drillSidewaysCollectorManagers = drillSidewaysCollectorManagers;
this.drillSidewaysCollectorOwners = drillSidewaysCollectorOwners;
this.drillDownQueries = drillDownQueries; this.drillDownQueries = drillDownQueries;
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce; this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
this.managedDrillSidewaysCollectors = managedDrillSidewaysCollectors;
} }
@Override @Override
@ -87,10 +109,10 @@ class DrillSidewaysQuery extends Query {
if (newQuery == baseQuery) { if (newQuery == baseQuery) {
return super.rewrite(indexSearcher); return super.rewrite(indexSearcher);
} else { } else {
return new DrillSidewaysQuery( return new DrillSidewaysQuery<>(
newQuery, newQuery,
drillDownCollectorOwner, drillSidewaysCollectorManagers,
drillSidewaysCollectorOwners, managedDrillSidewaysCollectors,
drillDownQueries, drillDownQueries,
scoreSubDocsAtOnce); scoreSubDocsAtOnce);
} }
@ -124,14 +146,8 @@ class DrillSidewaysQuery extends Query {
int drillDownCount = drillDowns.length; int drillDownCount = drillDowns.length;
Collector drillDownCollector; List<K> sidewaysCollectors = new ArrayList<>(drillDownCount);
final LeafCollector drillDownLeafCollector; managedDrillSidewaysCollectors.add(sidewaysCollectors);
if (drillDownCollectorOwner != null) {
drillDownCollector = drillDownCollectorOwner.newCollector();
drillDownLeafCollector = drillDownCollector.getLeafCollector(context);
} else {
drillDownLeafCollector = null;
}
DrillSidewaysScorer.DocsAndCost[] dims = DrillSidewaysScorer.DocsAndCost[] dims =
new DrillSidewaysScorer.DocsAndCost[drillDownCount]; new DrillSidewaysScorer.DocsAndCost[drillDownCount];
@ -144,7 +160,8 @@ class DrillSidewaysQuery extends Query {
scorer = new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty()); scorer = new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
} }
Collector sidewaysCollector = drillSidewaysCollectorOwners.get(dim).newCollector(); K sidewaysCollector = drillSidewaysCollectorManagers.get(dim).newCollector();
sidewaysCollectors.add(dim, sidewaysCollector);
dims[dim] = dims[dim] =
new DrillSidewaysScorer.DocsAndCost( new DrillSidewaysScorer.DocsAndCost(
@ -155,9 +172,6 @@ class DrillSidewaysQuery extends Query {
// a null scorer in this case, but we need to make sure #finish gets called on all facet // a null scorer in this case, but we need to make sure #finish gets called on all facet
// collectors since IndexSearcher won't handle this for us: // collectors since IndexSearcher won't handle this for us:
if (baseScorerSupplier == null || nullCount > 1) { if (baseScorerSupplier == null || nullCount > 1) {
if (drillDownLeafCollector != null) {
drillDownLeafCollector.finish();
}
for (DrillSidewaysScorer.DocsAndCost dim : dims) { for (DrillSidewaysScorer.DocsAndCost dim : dims) {
dim.sidewaysLeafCollector.finish(); dim.sidewaysLeafCollector.finish();
} }
@ -177,11 +191,7 @@ class DrillSidewaysQuery extends Query {
@Override @Override
public BulkScorer bulkScorer() throws IOException { public BulkScorer bulkScorer() throws IOException {
return new DrillSidewaysScorer( return new DrillSidewaysScorer(
context, context, baseScorerSupplier.get(Long.MAX_VALUE), dims, scoreSubDocsAtOnce);
baseScorerSupplier.get(Long.MAX_VALUE),
drillDownLeafCollector,
dims,
scoreSubDocsAtOnce);
} }
@Override @Override
@ -212,9 +222,8 @@ class DrillSidewaysQuery extends Query {
final int prime = 31; final int prime = 31;
int result = classHash(); int result = classHash();
result = prime * result + Objects.hashCode(baseQuery); result = prime * result + Objects.hashCode(baseQuery);
result = prime * result + Objects.hashCode(drillDownCollectorOwner);
result = prime * result + Arrays.hashCode(drillDownQueries); result = prime * result + Arrays.hashCode(drillDownQueries);
result = prime * result + Objects.hashCode(drillSidewaysCollectorOwners); result = prime * result + Objects.hashCode(drillSidewaysCollectorManagers);
return result; return result;
} }
@ -223,10 +232,9 @@ class DrillSidewaysQuery extends Query {
return sameClassAs(other) && equalsTo(getClass().cast(other)); return sameClassAs(other) && equalsTo(getClass().cast(other));
} }
private boolean equalsTo(DrillSidewaysQuery other) { private boolean equalsTo(DrillSidewaysQuery<?, ?> other) {
return Objects.equals(baseQuery, other.baseQuery) return Objects.equals(baseQuery, other.baseQuery)
&& Objects.equals(drillDownCollectorOwner, other.drillDownCollectorOwner)
&& Arrays.equals(drillDownQueries, other.drillDownQueries) && Arrays.equals(drillDownQueries, other.drillDownQueries)
&& Objects.equals(drillSidewaysCollectorOwners, other.drillSidewaysCollectorOwners); && Objects.equals(drillSidewaysCollectorManagers, other.drillSidewaysCollectorManagers);
} }
} }

View File

@ -45,8 +45,6 @@ class DrillSidewaysScorer extends BulkScorer {
// private static boolean DEBUG = false; // private static boolean DEBUG = false;
private final LeafCollector drillDownLeafCollector;
private final DocsAndCost[] dims; private final DocsAndCost[] dims;
// DrillDown DocsEnums: // DrillDown DocsEnums:
@ -68,7 +66,6 @@ class DrillSidewaysScorer extends BulkScorer {
DrillSidewaysScorer( DrillSidewaysScorer(
LeafReaderContext context, LeafReaderContext context,
Scorer baseScorer, Scorer baseScorer,
LeafCollector drillDownLeafCollector,
DocsAndCost[] dims, DocsAndCost[] dims,
boolean scoreSubDocsAtOnce) { boolean scoreSubDocsAtOnce) {
this.dims = dims; this.dims = dims;
@ -81,7 +78,6 @@ class DrillSidewaysScorer extends BulkScorer {
} else { } else {
this.baseApproximation = baseIterator; this.baseApproximation = baseIterator;
} }
this.drillDownLeafCollector = drillDownLeafCollector;
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce; this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
} }
@ -709,9 +705,6 @@ class DrillSidewaysScorer extends BulkScorer {
// } // }
collector.collect(collectDocID); collector.collect(collectDocID);
if (drillDownLeafCollector != null) {
drillDownLeafCollector.collect(collectDocID);
}
// TODO: we could "fix" faceting of the sideways counts // TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the // to do this "union" (of the drill down hits) in the
@ -725,9 +718,6 @@ class DrillSidewaysScorer extends BulkScorer {
private void collectHit(LeafCollector collector, DocsAndCost dim) throws IOException { private void collectHit(LeafCollector collector, DocsAndCost dim) throws IOException {
collector.collect(collectDocID); collector.collect(collectDocID);
if (drillDownLeafCollector != null) {
drillDownLeafCollector.collect(collectDocID);
}
// Tally sideways count: // Tally sideways count:
dim.sidewaysLeafCollector.collect(collectDocID); dim.sidewaysLeafCollector.collect(collectDocID);
@ -735,9 +725,6 @@ class DrillSidewaysScorer extends BulkScorer {
private void collectHit(LeafCollector collector, List<DocsAndCost> dims) throws IOException { private void collectHit(LeafCollector collector, List<DocsAndCost> dims) throws IOException {
collector.collect(collectDocID); collector.collect(collectDocID);
if (drillDownLeafCollector != null) {
drillDownLeafCollector.collect(collectDocID);
}
// Tally sideways counts: // Tally sideways counts:
for (DocsAndCost dim : dims) { for (DocsAndCost dim : dims) {
@ -756,9 +743,6 @@ class DrillSidewaysScorer extends BulkScorer {
// Note: We _only_ call #finish on the facets collectors we're managing here, but not the // Note: We _only_ call #finish on the facets collectors we're managing here, but not the
// "main" collector. This is because IndexSearcher handles calling #finish on the main // "main" collector. This is because IndexSearcher handles calling #finish on the main
// collector. // collector.
if (drillDownLeafCollector != null) {
drillDownLeafCollector.finish();
}
for (DocsAndCost dim : dims) { for (DocsAndCost dim : dims) {
dim.sidewaysLeafCollector.finish(); dim.sidewaysLeafCollector.finish();
} }
@ -766,9 +750,6 @@ class DrillSidewaysScorer extends BulkScorer {
private void setScorer(LeafCollector mainCollector, Scorable scorer) throws IOException { private void setScorer(LeafCollector mainCollector, Scorable scorer) throws IOException {
mainCollector.setScorer(scorer); mainCollector.setScorer(scorer);
if (drillDownLeafCollector != null) {
drillDownLeafCollector.setScorer(scorer);
}
for (DocsAndCost dim : dims) { for (DocsAndCost dim : dims) {
dim.sidewaysLeafCollector.setScorer(scorer); dim.sidewaysLeafCollector.setScorer(scorer);
} }

View File

@ -16,15 +16,8 @@
*/ */
package org.apache.lucene.facet.taxonomy.writercache; package org.apache.lucene.facet.taxonomy.writercache;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.util.SuppressForbidden;
/** /**
* Similar to {@link StringBuilder}, but with a more efficient growing strategy. This class uses * Similar to {@link StringBuilder}, but with a more efficient growing strategy. This class uses
@ -32,15 +25,11 @@ import org.apache.lucene.util.SuppressForbidden;
* *
* @lucene.experimental * @lucene.experimental
*/ */
class CharBlockArray implements Appendable, Serializable, CharSequence { class CharBlockArray implements Appendable, CharSequence {
private static final long serialVersionUID = 1L;
private static final int DefaultBlockSize = 32 * 1024; // 32 KB default size private static final int DefaultBlockSize = 32 * 1024; // 32 KB default size
static final class Block implements Serializable, Cloneable { static final class Block implements Cloneable {
private static final long serialVersionUID = 1L;
final char[] chars; final char[] chars;
int length; int length;
@ -185,34 +174,4 @@ class CharBlockArray implements Appendable, Serializable, CharSequence {
} }
return sb.toString(); return sb.toString();
} }
@SuppressForbidden(
reason = "TODO: don't use java serialization here, inefficient and unnecessary")
void flush(OutputStream out) throws IOException {
ObjectOutputStream oos = null;
try {
oos = new ObjectOutputStream(out);
oos.writeObject(this);
oos.flush();
} finally {
if (oos != null) {
oos.close();
}
}
}
@SuppressForbidden(
reason = "TODO: don't use java serialization here, inefficient and unnecessary")
public static CharBlockArray open(InputStream in) throws IOException, ClassNotFoundException {
ObjectInputStream ois = null;
try {
ois = new ObjectInputStream(in);
CharBlockArray a = (CharBlockArray) ois.readObject();
return a;
} finally {
if (ois != null) {
ois.close();
}
}
}
} }

View File

@ -284,7 +284,6 @@ public class TestDrillSideways extends FacetTestCase {
Weight dimWeight = searcher.createWeight(dimQ, ScoreMode.COMPLETE_NO_SCORES, 1f); Weight dimWeight = searcher.createWeight(dimQ, ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer dimScorer = dimWeight.scorer(ctx); Scorer dimScorer = dimWeight.scorer(ctx);
FacetsCollector baseFC = new FacetsCollector();
FacetsCollector dimFC = new FacetsCollector(); FacetsCollector dimFC = new FacetsCollector();
DrillSidewaysScorer.DocsAndCost docsAndCost = DrillSidewaysScorer.DocsAndCost docsAndCost =
new DrillSidewaysScorer.DocsAndCost(dimScorer, dimFC.getLeafCollector(ctx)); new DrillSidewaysScorer.DocsAndCost(dimScorer, dimFC.getLeafCollector(ctx));
@ -311,7 +310,6 @@ public class TestDrillSideways extends FacetTestCase {
new DrillSidewaysScorer( new DrillSidewaysScorer(
ctx, ctx,
baseScorer, baseScorer,
baseFC.getLeafCollector(ctx),
new DrillSidewaysScorer.DocsAndCost[] {docsAndCost}, new DrillSidewaysScorer.DocsAndCost[] {docsAndCost},
scoreSubDocsAtOnce); scoreSubDocsAtOnce);
expectThrows(CollectionTerminatedException.class, () -> scorer.score(baseCollector, null)); expectThrows(CollectionTerminatedException.class, () -> scorer.score(baseCollector, null));
@ -321,7 +319,6 @@ public class TestDrillSideways extends FacetTestCase {
// both our base and sideways dim facets collectors. What we really want to test here is // both our base and sideways dim facets collectors. What we really want to test here is
// that the matching docs are still correctly present and populated after an early // that the matching docs are still correctly present and populated after an early
// termination occurs (i.e., #finish is properly called in that scenario): // termination occurs (i.e., #finish is properly called in that scenario):
assertEquals(1, baseFC.getMatchingDocs().size());
assertEquals(1, dimFC.getMatchingDocs().size()); assertEquals(1, dimFC.getMatchingDocs().size());
} }
} }

View File

@ -242,10 +242,9 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
public void testIntAssociationRandom() throws Exception { public void testIntAssociationRandom() throws Exception {
FacetsCollector fc = new FacetsCollector();
IndexSearcher searcher = newSearcher(reader); IndexSearcher searcher = newSearcher(reader);
searcher.search(new TermQuery(new Term("match", "yes")), fc); FacetsCollector fc =
searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager());
Map<String, Integer> expected; Map<String, Integer> expected;
Facets facets; Facets facets;
@ -332,10 +331,9 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
public void testFloatAssociationRandom() throws Exception { public void testFloatAssociationRandom() throws Exception {
FacetsCollector fc = new FacetsCollector();
IndexSearcher searcher = newSearcher(reader); IndexSearcher searcher = newSearcher(reader);
searcher.search(new TermQuery(new Term("match", "yes")), fc); FacetsCollector fc =
searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager());
Map<String, Float> expected; Map<String, Float> expected;
Facets facets; Facets facets;

View File

@ -16,14 +16,10 @@
*/ */
package org.apache.lucene.facet.taxonomy.writercache; package org.apache.lucene.facet.taxonomy.writercache;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.FacetTestCase;
public class TestCharBlockArray extends FacetTestCase { public class TestCharBlockArray extends FacetTestCase {
@ -89,19 +85,6 @@ public class TestCharBlockArray extends FacetTestCase {
} }
assertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array); assertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array);
Path tempDir = createTempDir("growingchararray");
Path f = tempDir.resolve("GrowingCharArrayTest.tmp");
BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(f));
array.flush(out);
out.flush();
out.close();
BufferedInputStream in = new BufferedInputStream(Files.newInputStream(f));
array = CharBlockArray.open(in);
assertEqualsInternal(
"GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array);
in.close();
} }
private static void assertEqualsInternal( private static void assertEqualsInternal(

View File

@ -95,7 +95,7 @@ class TermsQuery extends MultiTermQuery implements Accountable {
} }
@Override @Override
public long getTermsCount() throws IOException { public long getTermsCount() {
return terms.size(); return terms.size();
} }

View File

@ -238,6 +238,7 @@ public final class Intervals {
*/ */
public static IntervalsSource regexp(BytesRef regexp, int maxExpansions) { public static IntervalsSource regexp(BytesRef regexp, int maxExpansions) {
Automaton automaton = new RegExp(new Term("", regexp).text()).toAutomaton(); Automaton automaton = new RegExp(new Term("", regexp).text()).toAutomaton();
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, true, false); CompiledAutomaton ca = new CompiledAutomaton(automaton, false, true, false);
return new MultiTermIntervalsSource(ca, maxExpansions, regexp.utf8ToString()); return new MultiTermIntervalsSource(ca, maxExpansions, regexp.utf8ToString());
} }

View File

@ -447,4 +447,24 @@ public class TestIntervalQuery extends LuceneTestCase {
field, or(term("XXX"), containing(extend(term("message"), 0, 10), term("intend")))); field, or(term("XXX"), containing(extend(term("message"), 0, 10), term("intend"))));
checkHits(q, new int[] {}); checkHits(q, new int[] {});
} }
public void testEquality() {
assertEquals(
new IntervalQuery("f", Intervals.regexp(new BytesRef(".*foo"))),
new IntervalQuery("f", Intervals.regexp(new BytesRef(".*foo"))));
assertEquals(
new IntervalQuery("f", Intervals.prefix(new BytesRef("p"), 1)),
new IntervalQuery("f", Intervals.prefix(new BytesRef("p"), 1)));
assertEquals(
new IntervalQuery("f", Intervals.fuzzyTerm("kot", 1)),
new IntervalQuery("f", Intervals.fuzzyTerm("kot", 1)));
assertEquals(
new IntervalQuery("f", Intervals.wildcard(new BytesRef("*.txt"))),
new IntervalQuery("f", Intervals.wildcard(new BytesRef("*.txt"))));
assertEquals(
new IntervalQuery(
"f", Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)),
new IntervalQuery(
"f", Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)));
}
} }

View File

@ -1187,4 +1187,27 @@ public class TestIntervals extends LuceneTestCase {
checkVisits(source, 1); checkVisits(source, 1);
} }
// basic test for equality and inequality of instances created by the factories
public void testEquality() {
assertEquals(Intervals.term("wibble"), Intervals.term("wibble"));
assertEquals(Intervals.prefix(new BytesRef("p"), 1), Intervals.prefix(new BytesRef("p"), 1));
assertEquals(Intervals.fuzzyTerm("kot", 1), Intervals.fuzzyTerm("kot", 1));
assertEquals(Intervals.regexp(new BytesRef(".*ot")), Intervals.regexp(new BytesRef(".*ot")));
assertEquals(
Intervals.wildcard(new BytesRef("*.txt")), Intervals.wildcard(new BytesRef("*.txt")));
assertEquals(
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true),
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true));
assertNotEquals(Intervals.term("wibble"), Intervals.term("wobble"));
assertNotEquals(Intervals.prefix(new BytesRef("p"), 1), Intervals.prefix(new BytesRef("b"), 1));
assertNotEquals(Intervals.fuzzyTerm("kot", 1), Intervals.fuzzyTerm("kof", 1));
assertNotEquals(Intervals.regexp(new BytesRef(".*ot")), Intervals.regexp(new BytesRef(".*at")));
assertNotEquals(
Intervals.wildcard(new BytesRef("*.txt")), Intervals.wildcard(new BytesRef("*.tat")));
assertNotEquals(
Intervals.range(new BytesRef("warm"), new BytesRef("hot"), true, true),
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true));
}
} }

View File

@ -50,7 +50,6 @@ import org.apache.lucene.sandbox.facet.cutters.ranges.LongRangeFacetCutter;
import org.apache.lucene.sandbox.facet.labels.OrdToLabel; import org.apache.lucene.sandbox.facet.labels.OrdToLabel;
import org.apache.lucene.sandbox.facet.labels.RangeOrdToLabel; import org.apache.lucene.sandbox.facet.labels.RangeOrdToLabel;
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder; import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.search.CollectorOwner;
import org.apache.lucene.search.DoubleValues; import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource; import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
@ -538,7 +537,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
////// First search, no drill-downs: ////// First search, no drill-downs:
DrillDownQuery ddq = new DrillDownQuery(config); DrillDownQuery ddq = new DrillDownQuery(config);
ds.search(ddq, new CollectorOwner<>(collectorManager), List.of()); ds.search(ddq, collectorManager, List.of());
// assertEquals(100, dsr.hits.totalHits.value); // assertEquals(100, dsr.hits.totalHits.value);
assertEquals( assertEquals(
@ -556,10 +555,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder); dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder);
ddq = new DrillDownQuery(config); ddq = new DrillDownQuery(config);
ddq.add("dim", "b"); ddq.add("dim", "b");
ds.search( ds.search(ddq, fieldCollectorManager, List.of(dimCollectorManager));
ddq,
new CollectorOwner<>(fieldCollectorManager),
List.of(new CollectorOwner<>(dimCollectorManager)));
// assertEquals(75, dsr.hits.totalHits.value); // assertEquals(75, dsr.hits.totalHits.value);
assertEquals( assertEquals(
@ -577,10 +573,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder); dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder);
ddq = new DrillDownQuery(config); ddq = new DrillDownQuery(config);
ddq.add("field", LongPoint.newRangeQuery("field", 0L, 10L)); ddq.add("field", LongPoint.newRangeQuery("field", 0L, 10L));
ds.search( ds.search(ddq, dimCollectorManager, List.of(fieldCollectorManager));
ddq,
new CollectorOwner<>(dimCollectorManager),
List.of(new CollectorOwner<>(fieldCollectorManager)));
// assertEquals(11, dsr.hits.totalHits.value); // assertEquals(11, dsr.hits.totalHits.value);
assertEquals( assertEquals(
@ -1629,14 +1622,12 @@ public class TestRangeFacet extends SandboxFacetTestCase {
countRecorder = new CountFacetRecorder(); countRecorder = new CountFacetRecorder();
CollectorOwner<DummyTotalHitCountCollector, Integer> totalHitsCollectorOwner = DrillSideways.Result<Integer, CountFacetRecorder> result =
new CollectorOwner<>(DummyTotalHitCountCollector.createManager()); ds.search(
CollectorOwner<FacetFieldCollector, CountFacetRecorder> drillSidewaysCollectorOwner = ddq,
new CollectorOwner<>( DummyTotalHitCountCollector.createManager(),
new FacetFieldCollectorManager<>(doubleRangeFacetCutter, countRecorder)); List.of(new FacetFieldCollectorManager<>(doubleRangeFacetCutter, countRecorder)));
ds.search(ddq, totalHitsCollectorOwner, List.of(drillSidewaysCollectorOwner)); assertEquals(1, result.drillDownResult().intValue());
assertEquals(1, totalHitsCollectorOwner.getResult().intValue());
drillSidewaysCollectorOwner.getResult();
assertEquals( assertEquals(
"dim=field path=[] value=-2147483648 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", "dim=field path=[] value=-2147483648 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n",
getAllSortByOrd(getRangeOrdinals(ranges), countRecorder, "field", ordToLabel).toString()); getAllSortByOrd(getRangeOrdinals(ranges), countRecorder, "field", ordToLabel).toString());

View File

@ -39,6 +39,7 @@ import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.geo.Circle; import org.apache.lucene.geo.Circle;
import org.apache.lucene.geo.Component2D; import org.apache.lucene.geo.Component2D;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.LatLonGeometry; import org.apache.lucene.geo.LatLonGeometry;
import org.apache.lucene.geo.Polygon; import org.apache.lucene.geo.Polygon;
@ -1751,4 +1752,41 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
newDistanceQuery("point", 32.94823588839368, -179.9538113027811, 120000), 20); newDistanceQuery("point", 32.94823588839368, -179.9538113027811, 120000), 20);
assertEquals(3, td.totalHits.value); assertEquals(3, td.totalHits.value);
} }
public void testNarrowPolygonCloseToNorthPole() throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergeScheduler(new SerialMergeScheduler());
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, iwc);
// index point closes to Lat 90
Document doc = new Document();
final int base = Integer.MAX_VALUE;
addPointToDoc(
FIELD_NAME,
doc,
GeoEncodingUtils.decodeLatitude(base - 2),
GeoEncodingUtils.decodeLongitude(base - 2));
w.addDocument(doc);
w.flush();
// query testing
final IndexReader reader = DirectoryReader.open(w);
final IndexSearcher s = newSearcher(reader);
double minLat = GeoEncodingUtils.decodeLatitude(base - 3);
double maxLat = GeoEncodingUtils.decodeLatitude(base);
double minLon = GeoEncodingUtils.decodeLongitude(base - 3);
double maxLon = GeoEncodingUtils.decodeLongitude(base);
Query query =
newPolygonQuery(
FIELD_NAME,
new Polygon(
new double[] {minLat, minLat, maxLat, maxLat, minLat},
new double[] {minLon, maxLon, maxLon, minLon, minLon}));
assertEquals(1, s.count(query));
IOUtils.close(w, reader, dir);
}
} }

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.tests.util.automaton; package org.apache.lucene.tests.util.automaton;
import java.util.ArrayDeque;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.BitSet; import java.util.BitSet;
import java.util.HashMap; import java.util.HashMap;
@ -33,6 +34,7 @@ import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.automaton.StatePair;
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
import org.apache.lucene.util.automaton.Transition; import org.apache.lucene.util.automaton.Transition;
@ -533,4 +535,82 @@ public class AutomatonTestUtil {
assert a.isDeterministic() == true; assert a.isDeterministic() == true;
return true; return true;
} }
/**
* Returns true if these two automata accept exactly the same language. This is a costly
* computation! Both automata must be determinized and have no dead states!
*/
public static boolean sameLanguage(Automaton a1, Automaton a2) {
if (a1 == a2) {
return true;
}
return subsetOf(a2, a1) && subsetOf(a1, a2);
}
/**
* Returns true if the language of <code>a1</code> is a subset of the language of <code>a2</code>.
* Both automata must be determinized and must have no dead states.
*
* <p>Complexity: quadratic in number of states.
*/
public static boolean subsetOf(Automaton a1, Automaton a2) {
if (a1.isDeterministic() == false) {
throw new IllegalArgumentException("a1 must be deterministic");
}
if (a2.isDeterministic() == false) {
throw new IllegalArgumentException("a2 must be deterministic");
}
assert Operations.hasDeadStatesFromInitial(a1) == false;
assert Operations.hasDeadStatesFromInitial(a2) == false;
if (a1.getNumStates() == 0) {
// Empty language is alwyas a subset of any other language
return true;
} else if (a2.getNumStates() == 0) {
return Operations.isEmpty(a1);
}
// TODO: cutover to iterators instead
Transition[][] transitions1 = a1.getSortedTransitions();
Transition[][] transitions2 = a2.getSortedTransitions();
ArrayDeque<StatePair> worklist = new ArrayDeque<>();
HashSet<StatePair> visited = new HashSet<>();
StatePair p = new StatePair(0, 0);
worklist.add(p);
visited.add(p);
while (worklist.size() > 0) {
p = worklist.removeFirst();
if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) {
return false;
}
Transition[] t1 = transitions1[p.s1];
Transition[] t2 = transitions2[p.s2];
for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
while (b2 < t2.length && t2[b2].max < t1[n1].min) {
b2++;
}
int min1 = t1[n1].min, max1 = t1[n1].max;
for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) {
if (t2[n2].min > min1) {
return false;
}
if (t2[n2].max < Character.MAX_CODE_POINT) {
min1 = t2[n2].max + 1;
} else {
min1 = Character.MAX_CODE_POINT;
max1 = Character.MIN_CODE_POINT;
}
StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
if (!visited.contains(q)) {
worklist.add(q);
visited.add(q);
}
}
if (min1 <= max1) {
return false;
}
}
}
return true;
}
} }

View File

@ -9,17 +9,17 @@ errorprone = "2.18.0"
flexmark = "0.61.24" flexmark = "0.61.24"
# @keep This is GJF version for spotless/ tidy. # @keep This is GJF version for spotless/ tidy.
googleJavaFormat = "1.23.0" googleJavaFormat = "1.23.0"
groovy = "3.0.21" groovy = "4.0.22"
hamcrest = "2.2" hamcrest = "2.2"
icu4j = "74.2" icu4j = "74.2"
javacc = "7.0.12" javacc = "7.0.12"
jflex = "1.8.2" jflex = "1.8.2"
jgit = "5.13.1.202206130422-r" jgit = "6.10.0.202406032230-r"
jmh = "1.37" jmh = "1.37"
jts = "1.17.0" jts = "1.17.0"
junit = "4.13.1" junit = "4.13.1"
# @keep Minimum gradle version to run the build # @keep Minimum gradle version to run the build
minGradle = "8.8" minGradle = "8.10"
# @keep This is the minimum required Java version. # @keep This is the minimum required Java version.
minJava = "21" minJava = "21"
morfologik = "2.1.9" morfologik = "2.1.9"
@ -49,7 +49,7 @@ flexmark-ext-abbreviation = { module = "com.vladsch.flexmark:flexmark-ext-abbrev
flexmark-ext-attributes = { module = "com.vladsch.flexmark:flexmark-ext-attributes", version.ref = "flexmark" } flexmark-ext-attributes = { module = "com.vladsch.flexmark:flexmark-ext-attributes", version.ref = "flexmark" }
flexmark-ext-autolink = { module = "com.vladsch.flexmark:flexmark-ext-autolink", version.ref = "flexmark" } flexmark-ext-autolink = { module = "com.vladsch.flexmark:flexmark-ext-autolink", version.ref = "flexmark" }
flexmark-ext-tables = { module = "com.vladsch.flexmark:flexmark-ext-tables", version.ref = "flexmark" } flexmark-ext-tables = { module = "com.vladsch.flexmark:flexmark-ext-tables", version.ref = "flexmark" }
groovy = { module = "org.codehaus.groovy:groovy-all", version.ref = "groovy" } groovy = { module = "org.apache.groovy:groovy-all", version.ref = "groovy" }
hamcrest = { module = "org.hamcrest:hamcrest", version.ref = "hamcrest" } hamcrest = { module = "org.hamcrest:hamcrest", version.ref = "hamcrest" }
icu4j = { module = "com.ibm.icu:icu4j", version.ref = "icu4j" } icu4j = { module = "com.ibm.icu:icu4j", version.ref = "icu4j" }
javacc = { module = "net.java.dev.javacc:javacc", version.ref = "javacc" } javacc = { module = "net.java.dev.javacc:javacc", version.ref = "javacc" }