mirror of https://github.com/apache/lucene.git
Merge branch 'apache:main' into bpv21_main
This commit is contained in:
commit
1cb896a985
|
@ -30,7 +30,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ ubuntu-latest ]
|
os: [ ubuntu-latest ]
|
||||||
java-version: [ '22' ]
|
java-version: [ '23-ea' ]
|
||||||
uses-alt-java: [ true, false ]
|
uses-alt-java: [ true, false ]
|
||||||
|
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
@ -61,7 +61,16 @@ jobs:
|
||||||
# https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-environment-variable
|
# https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-environment-variable
|
||||||
echo "RUNTIME_JAVA_HOME=${{ env.ALT_JAVA_DIR }}" >> "$GITHUB_ENV"
|
echo "RUNTIME_JAVA_HOME=${{ env.ALT_JAVA_DIR }}" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
- run: ./gradlew -p lucene/core check -x test
|
- name: ./gradlew tidy
|
||||||
|
run: |
|
||||||
|
./gradlew tidy
|
||||||
|
if [ ! -z "$(git status --porcelain)" ]; then
|
||||||
|
echo ":warning: **tidy left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
git status --porcelain >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
git reset --hard && git clean -xfd .
|
||||||
|
fi
|
||||||
|
|
||||||
- name: ./gradlew regenerate
|
- name: ./gradlew regenerate
|
||||||
run: |
|
run: |
|
||||||
|
@ -69,7 +78,7 @@ jobs:
|
||||||
sudo apt-get install libwww-perl
|
sudo apt-get install libwww-perl
|
||||||
./gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks
|
./gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks
|
||||||
if [ ! -z "$(git status --porcelain)" ]; then
|
if [ ! -z "$(git status --porcelain)" ]; then
|
||||||
echo ":warning: **regenerateleft local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
|
echo ":warning: **regenerate left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
|
||||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
git status --porcelain >> $GITHUB_STEP_SUMMARY
|
git status --porcelain >> $GITHUB_STEP_SUMMARY
|
||||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
@ -79,8 +88,7 @@ jobs:
|
||||||
- run: ./gradlew testOpts
|
- run: ./gradlew testOpts
|
||||||
- run: ./gradlew helpWorkflow
|
- run: ./gradlew helpWorkflow
|
||||||
- run: ./gradlew licenses updateLicenses
|
- run: ./gradlew licenses updateLicenses
|
||||||
- run: ./gradlew tidy
|
- run: ./gradlew check -x test -Pvalidation.git.failOnModified=false
|
||||||
- run: ./gradlew check -x test
|
|
||||||
- run: ./gradlew assembleRelease mavenToLocal
|
- run: ./gradlew assembleRelease mavenToLocal
|
||||||
|
|
||||||
# Conserve resources: only run these in non-alt-java mode.
|
# Conserve resources: only run these in non-alt-java mode.
|
||||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ ubuntu-latest ]
|
os: [ ubuntu-latest ]
|
||||||
java-version: [ '21', '22' ]
|
java-version: [ '21', '22', '23-ea' ]
|
||||||
|
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
|
|
|
@ -60,8 +60,8 @@ public class WrapperDownloader {
|
||||||
|
|
||||||
public static void checkVersion() {
|
public static void checkVersion() {
|
||||||
int major = Runtime.version().feature();
|
int major = Runtime.version().feature();
|
||||||
if (major != 21 && major != 22) {
|
if (major != 21 && major != 22 && major != 23) {
|
||||||
throw new IllegalStateException("java version must be 21 or 22, your version: " + major);
|
throw new IllegalStateException("java version must be 21, 22 or 23, your version: " + major);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ allprojects {
|
||||||
missingdoclet "org.apache.lucene.tools:missing-doclet"
|
missingdoclet "org.apache.lucene.tools:missing-doclet"
|
||||||
}
|
}
|
||||||
|
|
||||||
ext {
|
project.ext {
|
||||||
relativeDocPath = project.path.replaceFirst(/:\w+:/, "").replace(':', '/')
|
relativeDocPath = project.path.replaceFirst(/:\w+:/, "").replace(':', '/')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ configure(project(":lucene:analysis:kuromoji")) {
|
||||||
apply plugin: deps.plugins.undercouch.download.get().pluginId
|
apply plugin: deps.plugins.undercouch.download.get().pluginId
|
||||||
|
|
||||||
plugins.withType(JavaPlugin) {
|
plugins.withType(JavaPlugin) {
|
||||||
ext {
|
project.ext {
|
||||||
targetDir = file("src/resources")
|
targetDir = file("src/resources")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ configure(project(":lucene:analysis:nori")) {
|
||||||
apply plugin: deps.plugins.undercouch.download.get().pluginId
|
apply plugin: deps.plugins.undercouch.download.get().pluginId
|
||||||
|
|
||||||
plugins.withType(JavaPlugin) {
|
plugins.withType(JavaPlugin) {
|
||||||
ext {
|
project.ext {
|
||||||
targetDir = file("src/resources")
|
targetDir = file("src/resources")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ def beastingMode = gradle.startParameter.taskNames.any{ name -> name == 'beast'
|
||||||
|
|
||||||
allprojects {
|
allprojects {
|
||||||
plugins.withType(JavaPlugin) {
|
plugins.withType(JavaPlugin) {
|
||||||
ext {
|
project.ext {
|
||||||
testOptions += [
|
testOptions += [
|
||||||
[propName: 'tests.dups', value: 0, description: "Reiterate runs of entire test suites ('beast' task)."]
|
[propName: 'tests.dups', value: 0, description: "Reiterate runs of entire test suites ('beast' task)."]
|
||||||
]
|
]
|
||||||
|
|
|
@ -19,7 +19,7 @@ def recordings = files()
|
||||||
|
|
||||||
allprojects {
|
allprojects {
|
||||||
plugins.withType(JavaPlugin) {
|
plugins.withType(JavaPlugin) {
|
||||||
ext {
|
project.ext {
|
||||||
testOptions += [
|
testOptions += [
|
||||||
[propName: 'tests.profile', value: false, description: "Enable Java Flight Recorder profiling."]
|
[propName: 'tests.profile', value: false, description: "Enable Java Flight Recorder profiling."]
|
||||||
]
|
]
|
||||||
|
|
|
@ -62,7 +62,7 @@ allprojects {
|
||||||
// Configure test property defaults and their descriptions.
|
// Configure test property defaults and their descriptions.
|
||||||
allprojects {
|
allprojects {
|
||||||
plugins.withType(JavaPlugin) {
|
plugins.withType(JavaPlugin) {
|
||||||
ext {
|
project.ext {
|
||||||
String randomVectorSize = RandomPicks.randomFrom(new Random(projectSeedLong), ["default", "128", "256", "512"])
|
String randomVectorSize = RandomPicks.randomFrom(new Random(projectSeedLong), ["default", "128", "256", "512"])
|
||||||
testOptions += [
|
testOptions += [
|
||||||
// seed, repetition and amplification.
|
// seed, repetition and amplification.
|
||||||
|
@ -135,14 +135,14 @@ allprojects {
|
||||||
}
|
}
|
||||||
|
|
||||||
afterEvaluate {
|
afterEvaluate {
|
||||||
ext.testOptionsResolved = testOptions.findAll { opt ->
|
project.ext.testOptionsResolved = testOptions.findAll { opt ->
|
||||||
propertyOrDefault(opt.propName, opt.value) != null
|
propertyOrDefault(opt.propName, opt.value) != null
|
||||||
}.collectEntries { opt ->
|
}.collectEntries { opt ->
|
||||||
[(opt.propName): Objects.toString(resolvedTestOption(opt.propName))]
|
[(opt.propName): Objects.toString(resolvedTestOption(opt.propName))]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute the "reproduce with" string.
|
// Compute the "reproduce with" string.
|
||||||
ext.testOptionsForReproduceLine = testOptions.findAll { opt ->
|
project.ext.testOptionsForReproduceLine = testOptions.findAll { opt ->
|
||||||
if (opt["includeInReproLine"] == false) {
|
if (opt["includeInReproLine"] == false) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ def allSuites = []
|
||||||
|
|
||||||
allprojects {
|
allprojects {
|
||||||
plugins.withType(JavaPlugin) {
|
plugins.withType(JavaPlugin) {
|
||||||
ext {
|
project.ext {
|
||||||
testOptions += [
|
testOptions += [
|
||||||
[propName: 'tests.slowestTests', value: true, description: "Print the summary of the slowest tests."],
|
[propName: 'tests.slowestTests', value: true, description: "Print the summary of the slowest tests."],
|
||||||
[propName: 'tests.slowestSuites', value: true, description: "Print the summary of the slowest suites."]
|
[propName: 'tests.slowestSuites', value: true, description: "Print the summary of the slowest suites."]
|
||||||
|
|
|
@ -74,21 +74,6 @@ configure(rootProject) {
|
||||||
logger.warn("WARNING: Directory is not a valid git checkout (won't check dirty files): ${rootProject.projectDir}")
|
logger.warn("WARNING: Directory is not a valid git checkout (won't check dirty files): ${rootProject.projectDir}")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// git ignores any folders which are empty (this includes folders with recursively empty sub-folders).
|
|
||||||
def untrackedNonEmptyFolders = status.untrackedFolders.findAll { path ->
|
|
||||||
File location = file("${rootProject.projectDir}/${path}")
|
|
||||||
boolean hasFiles = false
|
|
||||||
Files.walkFileTree(location.toPath(), new SimpleFileVisitor<Path>() {
|
|
||||||
@Override
|
|
||||||
FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
|
|
||||||
hasFiles = true
|
|
||||||
// Terminate early.
|
|
||||||
return FileVisitResult.TERMINATE
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return hasFiles
|
|
||||||
}
|
|
||||||
|
|
||||||
def offenders = [
|
def offenders = [
|
||||||
// Exclude staged changes. These are fine in precommit.
|
// Exclude staged changes. These are fine in precommit.
|
||||||
// "(added)": status.added,
|
// "(added)": status.added,
|
||||||
|
@ -97,8 +82,7 @@ configure(rootProject) {
|
||||||
"(conflicting)": status.conflicting,
|
"(conflicting)": status.conflicting,
|
||||||
"(missing)": status.missing,
|
"(missing)": status.missing,
|
||||||
"(modified)": status.modified,
|
"(modified)": status.modified,
|
||||||
"(untracked)": status.untracked,
|
"(untracked)": status.untracked
|
||||||
"(untracked non-empty dir)": untrackedNonEmptyFolders
|
|
||||||
].collectMany { fileStatus, files ->
|
].collectMany { fileStatus, files ->
|
||||||
files.collect {file -> " - ${file} ${fileStatus}" }
|
files.collect {file -> " - ${file} ${fileStatus}" }
|
||||||
}.sort()
|
}.sort()
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
cb0da6751c2b753a16ac168bb354870ebb1e162e9083f116729cec9c781156b8
|
2db75c40782f5e8ba1fc278a5574bab070adccb2d21ca5a6e5ed840888448046
|
|
@ -1 +1 @@
|
||||||
8.8.0
|
8.10.0
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
|
||||||
networkTimeout=10000
|
networkTimeout=10000
|
||||||
validateDistributionUrl=true
|
validateDistributionUrl=true
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
|
|
|
@ -112,6 +112,8 @@ API Changes
|
||||||
|
|
||||||
* GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz)
|
* GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz)
|
||||||
|
|
||||||
|
* GITHUB#13708: Move Operations.sameLanguage/subsetOf to test-framework. (Robert Muir)
|
||||||
|
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
|
@ -167,6 +169,8 @@ Improvements
|
||||||
|
|
||||||
* GITHUB#12172: Update Romanian stopwords list to include the modern unicode forms. (Trey Jones)
|
* GITHUB#12172: Update Romanian stopwords list to include the modern unicode forms. (Trey Jones)
|
||||||
|
|
||||||
|
* GITHUB#13707: Improve Operations.isTotal() to work with non-minimal automata. (Dawid Weiss, Robert Muir)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
@ -266,6 +270,8 @@ Build
|
||||||
|
|
||||||
* GITHUB#13649: Fix eclipse ide settings generation #13649 (Uwe Schindler, Dawid Weiss)
|
* GITHUB#13649: Fix eclipse ide settings generation #13649 (Uwe Schindler, Dawid Weiss)
|
||||||
|
|
||||||
|
* GITHUB#13698: Upgrade to gradle 8.10 (Dawid Weiss)
|
||||||
|
|
||||||
======================== Lucene 9.12.0 =======================
|
======================== Lucene 9.12.0 =======================
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
@ -284,10 +290,7 @@ API Changes
|
||||||
* GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and
|
* GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and
|
||||||
MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar)
|
MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar)
|
||||||
|
|
||||||
* GITHUB#13568: Add CollectorOwner class that wraps CollectorManager, and handles list of Collectors and results.
|
* GITHUB#13568: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions
|
||||||
Add IndexSearcher#search method that takes CollectorOwner. (Egor Potemkin)
|
|
||||||
|
|
||||||
* GITHUB#13568: Add DrillSideways#search method that supports any collector types for any drill-sideways dimensions
|
|
||||||
or drill-down. (Egor Potemkin)
|
or drill-down. (Egor Potemkin)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
@ -408,6 +411,9 @@ Bug Fixes
|
||||||
|
|
||||||
* GITHUB#13691: Fix incorrect exponent value in explain of SigmoidFunction. (Owais Kazi)
|
* GITHUB#13691: Fix incorrect exponent value in explain of SigmoidFunction. (Owais Kazi)
|
||||||
|
|
||||||
|
* GITHUB#13703: Fix bug in LatLonPoint queries where narrow polygons close to latitude 90 don't
|
||||||
|
match any points due to an Integer overflow. (Ignacio Vera)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -793,3 +793,7 @@ Specifically, the method `FunctionValues#getScorer(Weight weight, LeafReaderCont
|
||||||
Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on
|
Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on
|
||||||
Scorer.
|
Scorer.
|
||||||
|
|
||||||
|
### `SearchWithCollectorTask` no longer supports the `collector.class` config parameter
|
||||||
|
|
||||||
|
`collector.class` used to allow users to load a custom collector implementation. `collector.manager.class`
|
||||||
|
replaces it by allowing users to load a custom collector manager instead. (Luca Cavanna)
|
|
@ -1490,7 +1490,7 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(approxEquals(actual, expected));
|
assertTrue(approxEquals(actual, expected));
|
||||||
assertTrue(Operations.sameLanguage(actual, expected));
|
assertTrue(AutomatonTestUtil.sameLanguage(actual, expected));
|
||||||
}
|
}
|
||||||
|
|
||||||
a.close();
|
a.close();
|
||||||
|
|
|
@ -363,7 +363,7 @@ public final class GeoEncodingUtils {
|
||||||
*/
|
*/
|
||||||
public boolean test(int lat, int lon) {
|
public boolean test(int lat, int lon) {
|
||||||
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
|
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
|
||||||
if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
|
if (lat2 < latBase || lat2 - latBase >= maxLatDelta) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
|
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
|
||||||
|
@ -411,7 +411,7 @@ public final class GeoEncodingUtils {
|
||||||
*/
|
*/
|
||||||
public boolean test(int lat, int lon) {
|
public boolean test(int lat, int lon) {
|
||||||
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
|
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
|
||||||
if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
|
if (lat2 < latBase || lat2 - latBase >= maxLatDelta) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
|
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
|
||||||
|
|
|
@ -1,78 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This class wraps {@link CollectorManager} and owns the collectors the manager creates. It is
|
|
||||||
* convenient that clients of the class don't have to worry about keeping the list of collectors, as
|
|
||||||
* well as about making the collector's type (C) compatible when reduce is called. Instances of this
|
|
||||||
* class cache results of {@link CollectorManager#reduce(Collection)}.
|
|
||||||
*
|
|
||||||
* <p>Note that instance of this class ignores any {@link Collector} created by {@link
|
|
||||||
* CollectorManager#newCollector()} directly, not through {@link #newCollector()}
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class CollectorOwner<C extends Collector, T> {
|
|
||||||
|
|
||||||
private final CollectorManager<C, T> manager;
|
|
||||||
|
|
||||||
private T result;
|
|
||||||
private boolean reduced;
|
|
||||||
|
|
||||||
// TODO: For IndexSearcher, the list doesn't have to be synchronized
|
|
||||||
// because we create new collectors sequentially. Drill sideways creates new collectors in
|
|
||||||
// DrillSidewaysQuery#Weight#bulkScorer which is already called concurrently.
|
|
||||||
// I think making the list synchronized here is not a huge concern, at the same time, do we want
|
|
||||||
// to do something about it?
|
|
||||||
// e.g. have boolean property in constructor that makes it threads friendly when set?
|
|
||||||
private final List<C> collectors = Collections.synchronizedList(new ArrayList<>());
|
|
||||||
|
|
||||||
public CollectorOwner(CollectorManager<C, T> manager) {
|
|
||||||
this.manager = manager;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Return a new {@link Collector}. This must return a different instance on each call. */
|
|
||||||
public C newCollector() throws IOException {
|
|
||||||
C collector = manager.newCollector();
|
|
||||||
collectors.add(collector);
|
|
||||||
return collector;
|
|
||||||
}
|
|
||||||
|
|
||||||
public C getCollector(int i) {
|
|
||||||
return collectors.get(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns result of {@link CollectorManager#reduce(Collection)}. The result is cached.
|
|
||||||
*
|
|
||||||
* <p>This method is NOT threadsafe.
|
|
||||||
*/
|
|
||||||
public T getResult() throws IOException {
|
|
||||||
if (reduced == false) {
|
|
||||||
result = manager.reduce(collectors);
|
|
||||||
reduced = true;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -671,53 +671,6 @@ public class IndexSearcher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Lower-level search API. Search all leaves using the given {@link CollectorOwner}, without
|
|
||||||
* calling {@link CollectorOwner#getResult()} so that clients can reduce and read results
|
|
||||||
* themselves.
|
|
||||||
*
|
|
||||||
* <p>Note that this method doesn't return anything - users can access results by calling {@link
|
|
||||||
* CollectorOwner#getResult()}
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public <C extends Collector> void search(Query query, CollectorOwner<C, ?> collectorOwner)
|
|
||||||
throws IOException {
|
|
||||||
final C firstCollector = collectorOwner.newCollector();
|
|
||||||
query = rewrite(query, firstCollector.scoreMode().needsScores());
|
|
||||||
final Weight weight = createWeight(query, firstCollector.scoreMode(), 1);
|
|
||||||
search(weight, collectorOwner, firstCollector);
|
|
||||||
}
|
|
||||||
|
|
||||||
private <C extends Collector> void search(
|
|
||||||
Weight weight, CollectorOwner<C, ?> collectorOwner, C firstCollector) throws IOException {
|
|
||||||
final LeafSlice[] leafSlices = getSlices();
|
|
||||||
if (leafSlices.length == 0) {
|
|
||||||
// there are no segments, nothing to offload to the executor
|
|
||||||
assert leafContexts.isEmpty();
|
|
||||||
} else {
|
|
||||||
final ScoreMode scoreMode = firstCollector.scoreMode();
|
|
||||||
for (int i = 1; i < leafSlices.length; ++i) {
|
|
||||||
final C collector = collectorOwner.newCollector();
|
|
||||||
if (scoreMode != collector.scoreMode()) {
|
|
||||||
throw new IllegalStateException(
|
|
||||||
"CollectorManager does not always produce collectors with the same score mode");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
final List<Callable<C>> listTasks = new ArrayList<>(leafSlices.length);
|
|
||||||
for (int i = 0; i < leafSlices.length; ++i) {
|
|
||||||
final LeafReaderContext[] leaves = leafSlices[i].leaves;
|
|
||||||
final C collector = collectorOwner.getCollector(i);
|
|
||||||
listTasks.add(
|
|
||||||
() -> {
|
|
||||||
search(Arrays.asList(leaves), weight, collector);
|
|
||||||
return collector;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
taskExecutor.invokeAll(listTasks);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lower-level search API.
|
* Lower-level search API.
|
||||||
*
|
*
|
||||||
|
|
|
@ -313,7 +313,7 @@ public abstract class MultiTermQuery extends Query {
|
||||||
* Return the number of unique terms contained in this query, if known up-front. If not known, -1
|
* Return the number of unique terms contained in this query, if known up-front. If not known, -1
|
||||||
* will be returned.
|
* will be returned.
|
||||||
*/
|
*/
|
||||||
public long getTermsCount() throws IOException {
|
public long getTermsCount() {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -137,7 +137,7 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getTermsCount() throws IOException {
|
public long getTermsCount() {
|
||||||
return termData.size();
|
return termData.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -339,6 +339,7 @@ public class Automaton implements Accountable, TransitionAccessor {
|
||||||
@Override
|
@Override
|
||||||
public int getNumTransitions(int state) {
|
public int getNumTransitions(int state) {
|
||||||
assert state >= 0;
|
assert state >= 0;
|
||||||
|
assert state < getNumStates();
|
||||||
int count = states[2 * state + 1];
|
int count = states[2 * state + 1];
|
||||||
if (count == -1) {
|
if (count == -1) {
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -86,7 +86,7 @@ public class FiniteStringsIterator {
|
||||||
this.emitEmptyString = a.isAccept(0);
|
this.emitEmptyString = a.isAccept(0);
|
||||||
|
|
||||||
// Start iteration with node startState.
|
// Start iteration with node startState.
|
||||||
if (a.getNumTransitions(startState) > 0) {
|
if (a.getNumStates() > startState && a.getNumTransitions(startState) > 0) {
|
||||||
pathStates.set(startState);
|
pathStates.set(startState);
|
||||||
nodes[0].resetState(a, startState);
|
nodes[0].resetState(a, startState);
|
||||||
string.append(startState);
|
string.append(startState);
|
||||||
|
|
|
@ -35,7 +35,6 @@ import java.util.Arrays;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -182,30 +181,65 @@ public final class Operations {
|
||||||
// Repeating the empty automata will still only accept the empty automata.
|
// Repeating the empty automata will still only accept the empty automata.
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (a.isAccept(0) && a.getAcceptStates().cardinality() == 1) {
|
||||||
|
// If state 0 is the only accept state, then this automaton already repeats itself.
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
Automaton.Builder builder = new Automaton.Builder();
|
Automaton.Builder builder = new Automaton.Builder();
|
||||||
|
// Create the initial state, which is accepted
|
||||||
builder.createState();
|
builder.createState();
|
||||||
builder.setAccept(0, true);
|
builder.setAccept(0, true);
|
||||||
builder.copy(a);
|
|
||||||
|
|
||||||
Transition t = new Transition();
|
Transition t = new Transition();
|
||||||
|
|
||||||
|
int[] stateMap = new int[a.getNumStates()];
|
||||||
|
for (int state = 0; state < a.getNumStates(); ++state) {
|
||||||
|
if (a.isAccept(state) == false) {
|
||||||
|
stateMap[state] = builder.createState();
|
||||||
|
} else if (a.getNumTransitions(state) == 0) {
|
||||||
|
// Accept states that have no transitions get merged into state 0.
|
||||||
|
stateMap[state] = 0;
|
||||||
|
} else {
|
||||||
|
int newState = builder.createState();
|
||||||
|
stateMap[state] = newState;
|
||||||
|
builder.setAccept(newState, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now copy the automaton while renumbering states.
|
||||||
|
for (int state = 0; state < a.getNumStates(); ++state) {
|
||||||
|
int src = stateMap[state];
|
||||||
|
int count = a.initTransition(state, t);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
a.getNextTransition(t);
|
||||||
|
int dest = stateMap[t.dest];
|
||||||
|
builder.addTransition(src, dest, t.min, t.max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now copy transitions of the initial state to our new initial state.
|
||||||
int count = a.initTransition(0, t);
|
int count = a.initTransition(0, t);
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
a.getNextTransition(t);
|
a.getNextTransition(t);
|
||||||
builder.addTransition(0, t.dest + 1, t.min, t.max);
|
builder.addTransition(0, stateMap[t.dest], t.min, t.max);
|
||||||
}
|
}
|
||||||
|
|
||||||
int numStates = a.getNumStates();
|
// Now copy transitions of the initial state to final states to make the automaton repeat
|
||||||
for (int s = 0; s < numStates; s++) {
|
// itself.
|
||||||
if (a.isAccept(s)) {
|
for (int s = a.getAcceptStates().nextSetBit(0);
|
||||||
|
s != -1;
|
||||||
|
s = a.getAcceptStates().nextSetBit(s + 1)) {
|
||||||
|
if (stateMap[s] != 0) {
|
||||||
count = a.initTransition(0, t);
|
count = a.initTransition(0, t);
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
a.getNextTransition(t);
|
a.getNextTransition(t);
|
||||||
builder.addTransition(s + 1, t.dest + 1, t.min, t.max);
|
builder.addTransition(stateMap[s], stateMap[t.dest], t.min, t.max);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return builder.finish();
|
return removeDeadStates(builder.finish());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -374,17 +408,6 @@ public final class Operations {
|
||||||
return removeDeadStates(c);
|
return removeDeadStates(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if these two automata accept exactly the same language. This is a costly
|
|
||||||
* computation! Both automata must be determinized and have no dead states!
|
|
||||||
*/
|
|
||||||
public static boolean sameLanguage(Automaton a1, Automaton a2) {
|
|
||||||
if (a1 == a2) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return subsetOf(a2, a1) && subsetOf(a1, a2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: move to test-framework?
|
// TODO: move to test-framework?
|
||||||
/**
|
/**
|
||||||
* Returns true if this automaton has any states that cannot be reached from the initial state or
|
* Returns true if this automaton has any states that cannot be reached from the initial state or
|
||||||
|
@ -417,73 +440,6 @@ public final class Operations {
|
||||||
return reachableFromAccept.isEmpty() == false;
|
return reachableFromAccept.isEmpty() == false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the language of <code>a1</code> is a subset of the language of <code>a2</code>.
|
|
||||||
* Both automata must be determinized and must have no dead states.
|
|
||||||
*
|
|
||||||
* <p>Complexity: quadratic in number of states.
|
|
||||||
*/
|
|
||||||
public static boolean subsetOf(Automaton a1, Automaton a2) {
|
|
||||||
if (a1.isDeterministic() == false) {
|
|
||||||
throw new IllegalArgumentException("a1 must be deterministic");
|
|
||||||
}
|
|
||||||
if (a2.isDeterministic() == false) {
|
|
||||||
throw new IllegalArgumentException("a2 must be deterministic");
|
|
||||||
}
|
|
||||||
assert hasDeadStatesFromInitial(a1) == false;
|
|
||||||
assert hasDeadStatesFromInitial(a2) == false;
|
|
||||||
if (a1.getNumStates() == 0) {
|
|
||||||
// Empty language is alwyas a subset of any other language
|
|
||||||
return true;
|
|
||||||
} else if (a2.getNumStates() == 0) {
|
|
||||||
return isEmpty(a1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: cutover to iterators instead
|
|
||||||
Transition[][] transitions1 = a1.getSortedTransitions();
|
|
||||||
Transition[][] transitions2 = a2.getSortedTransitions();
|
|
||||||
ArrayDeque<StatePair> worklist = new ArrayDeque<>();
|
|
||||||
HashSet<StatePair> visited = new HashSet<>();
|
|
||||||
StatePair p = new StatePair(0, 0);
|
|
||||||
worklist.add(p);
|
|
||||||
visited.add(p);
|
|
||||||
while (worklist.size() > 0) {
|
|
||||||
p = worklist.removeFirst();
|
|
||||||
if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
Transition[] t1 = transitions1[p.s1];
|
|
||||||
Transition[] t2 = transitions2[p.s2];
|
|
||||||
for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
|
|
||||||
while (b2 < t2.length && t2[b2].max < t1[n1].min) {
|
|
||||||
b2++;
|
|
||||||
}
|
|
||||||
int min1 = t1[n1].min, max1 = t1[n1].max;
|
|
||||||
|
|
||||||
for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) {
|
|
||||||
if (t2[n2].min > min1) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (t2[n2].max < Character.MAX_CODE_POINT) {
|
|
||||||
min1 = t2[n2].max + 1;
|
|
||||||
} else {
|
|
||||||
min1 = Character.MAX_CODE_POINT;
|
|
||||||
max1 = Character.MIN_CODE_POINT;
|
|
||||||
}
|
|
||||||
StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
|
|
||||||
if (!visited.contains(q)) {
|
|
||||||
worklist.add(q);
|
|
||||||
visited.add(q);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (min1 <= max1) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an automaton that accepts the union of the languages of the given automata.
|
* Returns an automaton that accepts the union of the languages of the given automata.
|
||||||
*
|
*
|
||||||
|
@ -857,22 +813,48 @@ public final class Operations {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if the given automaton accepts all strings. The automaton must be minimized. */
|
/**
|
||||||
|
* Returns true if the given automaton accepts all strings.
|
||||||
|
*
|
||||||
|
* <p>The automaton must be deterministic, or this method may return false.
|
||||||
|
*
|
||||||
|
* <p>Complexity: linear in number of states and transitions.
|
||||||
|
*/
|
||||||
public static boolean isTotal(Automaton a) {
|
public static boolean isTotal(Automaton a) {
|
||||||
return isTotal(a, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
|
return isTotal(a, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the given automaton accepts all strings for the specified min/max range of the
|
* Returns true if the given automaton accepts all strings for the specified min/max range of the
|
||||||
* alphabet. The automaton must be minimized.
|
* alphabet.
|
||||||
|
*
|
||||||
|
* <p>The automaton must be deterministic, or this method may return false.
|
||||||
|
*
|
||||||
|
* <p>Complexity: linear in number of states and transitions.
|
||||||
*/
|
*/
|
||||||
public static boolean isTotal(Automaton a, int minAlphabet, int maxAlphabet) {
|
public static boolean isTotal(Automaton a, int minAlphabet, int maxAlphabet) {
|
||||||
if (a.isAccept(0) && a.getNumTransitions(0) == 1) {
|
BitSet states = getLiveStates(a);
|
||||||
Transition t = new Transition();
|
Transition spare = new Transition();
|
||||||
a.getTransition(0, 0, t);
|
int seenStates = 0;
|
||||||
return t.dest == 0 && t.min == minAlphabet && t.max == maxAlphabet;
|
for (int state = states.nextSetBit(0); state >= 0; state = states.nextSetBit(state + 1)) {
|
||||||
|
// all reachable states must be accept states
|
||||||
|
if (a.isAccept(state) == false) return false;
|
||||||
|
// all reachable states must contain transitions covering minAlphabet-maxAlphabet
|
||||||
|
int previousLabel = minAlphabet - 1;
|
||||||
|
for (int transition = 0; transition < a.getNumTransitions(state); transition++) {
|
||||||
|
a.getTransition(state, transition, spare);
|
||||||
|
// no gaps are allowed
|
||||||
|
if (spare.min > previousLabel + 1) return false;
|
||||||
|
previousLabel = spare.max;
|
||||||
|
}
|
||||||
|
if (previousLabel < maxAlphabet) return false;
|
||||||
|
if (state == Integer.MAX_VALUE) {
|
||||||
|
break; // or (state+1) would overflow
|
||||||
|
}
|
||||||
|
seenStates++;
|
||||||
}
|
}
|
||||||
return false;
|
// we've checked all the states, automaton is either total or empty
|
||||||
|
return seenStates > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1004,6 +986,9 @@ public final class Operations {
|
||||||
public static Automaton removeDeadStates(Automaton a) {
|
public static Automaton removeDeadStates(Automaton a) {
|
||||||
int numStates = a.getNumStates();
|
int numStates = a.getNumStates();
|
||||||
BitSet liveSet = getLiveStates(a);
|
BitSet liveSet = getLiveStates(a);
|
||||||
|
if (liveSet.cardinality() == numStates) {
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
int[] map = new int[numStates];
|
int[] map = new int[numStates];
|
||||||
|
|
||||||
|
|
|
@ -35,9 +35,14 @@ package org.apache.lucene.util.automaton;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class StatePair {
|
public class StatePair {
|
||||||
|
// only mike knows what it does (do not expose)
|
||||||
int s;
|
int s;
|
||||||
int s1;
|
|
||||||
int s2;
|
/** first state */
|
||||||
|
public final int s1;
|
||||||
|
|
||||||
|
/** second state */
|
||||||
|
public final int s2;
|
||||||
|
|
||||||
StatePair(int s, int s1, int s2) {
|
StatePair(int s, int s1, int s2) {
|
||||||
this.s = s;
|
this.s = s;
|
||||||
|
@ -81,7 +86,7 @@ public class StatePair {
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
// Don't use s1 ^ s2 since it's vulnerable to the case where s1 == s2 always --> hashCode = 0,
|
// Don't use s1 ^ s2 since it's vulnerable to the case where s1 == s2 always --> hashCode = 0,
|
||||||
// e.g. if you call Operations.sameLanguage,
|
// e.g. if you call AutomatonTestUtil.sameLanguage,
|
||||||
// passing the same automaton against itself:
|
// passing the same automaton against itself:
|
||||||
return s1 * 31 + s2;
|
return s1 * 31 + s2;
|
||||||
}
|
}
|
||||||
|
|
|
@ -625,7 +625,7 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
||||||
Operations.removeDeadStates(expected), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Operations.removeDeadStates(expected), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
Automaton actualDet =
|
Automaton actualDet =
|
||||||
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
if (Operations.sameLanguage(expectedDet, actualDet) == false) {
|
if (AutomatonTestUtil.sameLanguage(expectedDet, actualDet) == false) {
|
||||||
Set<String> expectedPaths = toPathStrings(expectedDet);
|
Set<String> expectedPaths = toPathStrings(expectedDet);
|
||||||
Set<String> actualPaths = toPathStrings(actualDet);
|
Set<String> actualPaths = toPathStrings(actualDet);
|
||||||
StringBuilder b = new StringBuilder();
|
StringBuilder b = new StringBuilder();
|
||||||
|
|
|
@ -183,7 +183,7 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
||||||
|
|
||||||
Automaton actual =
|
Automaton actual =
|
||||||
Operations.determinize(Automata.makeStringUnion(found), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Operations.determinize(Automata.makeStringUnion(found), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
assertTrue(Operations.sameLanguage(expected, actual));
|
assertTrue(AutomatonTestUtil.sameLanguage(expected, actual));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -403,12 +403,8 @@ public class TestBoolean2 extends LuceneTestCase {
|
||||||
bigSearcher.count(q3.build()));
|
bigSearcher.count(q3.build()));
|
||||||
|
|
||||||
// test diff (randomized) scorers produce the same results on bigSearcher as well
|
// test diff (randomized) scorers produce the same results on bigSearcher as well
|
||||||
hits1 =
|
hits1 = bigSearcher.search(q1, new TopFieldCollectorManager(sort, mulFactor, 1)).scoreDocs;
|
||||||
bigSearcher.search(q1, new TopFieldCollectorManager(sort, 1000 * mulFactor, 1))
|
hits2 = bigSearcher.search(q1, new TopFieldCollectorManager(sort, mulFactor, 1)).scoreDocs;
|
||||||
.scoreDocs;
|
|
||||||
hits2 =
|
|
||||||
bigSearcher.search(q1, new TopFieldCollectorManager(sort, 1000 * mulFactor, 1))
|
|
||||||
.scoreDocs;
|
|
||||||
CheckHits.checkEqual(q1, hits1, hits2);
|
CheckHits.checkEqual(q1, hits1, hits2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,15 +17,11 @@
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.BitSet;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
@ -35,34 +31,7 @@ import org.apache.lucene.util.FixedBitSet;
|
||||||
public class TestScorerPerf extends LuceneTestCase {
|
public class TestScorerPerf extends LuceneTestCase {
|
||||||
private final boolean validate = true; // set to false when doing performance testing
|
private final boolean validate = true; // set to false when doing performance testing
|
||||||
|
|
||||||
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir)
|
private static FixedBitSet randBitSet(int sz, int numBitsToSet) {
|
||||||
throws Exception {
|
|
||||||
int[] freq = new int[nTerms];
|
|
||||||
Term[] terms = new Term[nTerms];
|
|
||||||
for (int i = 0; i < nTerms; i++) {
|
|
||||||
int f = (nTerms + 1) - i; // make first terms less frequent
|
|
||||||
freq[i] = (int) Math.ceil(Math.pow(f, power));
|
|
||||||
terms[i] = new Term("f", Character.toString((char) ('A' + i)));
|
|
||||||
}
|
|
||||||
|
|
||||||
IndexWriter iw =
|
|
||||||
new IndexWriter(
|
|
||||||
dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
|
|
||||||
for (int i = 0; i < nDocs; i++) {
|
|
||||||
Document d = new Document();
|
|
||||||
for (int j = 0; j < nTerms; j++) {
|
|
||||||
if (random().nextInt(freq[j]) == 0) {
|
|
||||||
d.add(newStringField("f", terms[j].text(), Field.Store.NO));
|
|
||||||
// System.out.println(d);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
iw.addDocument(d);
|
|
||||||
}
|
|
||||||
iw.forceMerge(1);
|
|
||||||
iw.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public FixedBitSet randBitSet(int sz, int numBitsToSet) {
|
|
||||||
FixedBitSet set = new FixedBitSet(sz);
|
FixedBitSet set = new FixedBitSet(sz);
|
||||||
for (int i = 0; i < numBitsToSet; i++) {
|
for (int i = 0; i < numBitsToSet; i++) {
|
||||||
set.set(random().nextInt(sz));
|
set.set(random().nextInt(sz));
|
||||||
|
@ -70,7 +39,7 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
return set;
|
return set;
|
||||||
}
|
}
|
||||||
|
|
||||||
public FixedBitSet[] randBitSets(int numSets, int setSize) {
|
private static FixedBitSet[] randBitSets(int numSets, int setSize) {
|
||||||
FixedBitSet[] sets = new FixedBitSet[numSets];
|
FixedBitSet[] sets = new FixedBitSet[numSets];
|
||||||
for (int i = 0; i < sets.length; i++) {
|
for (int i = 0; i < sets.length; i++) {
|
||||||
sets[i] = randBitSet(setSize, random().nextInt(setSize));
|
sets[i] = randBitSet(setSize, random().nextInt(setSize));
|
||||||
|
@ -81,22 +50,13 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
private static final class CountingHitCollectorManager
|
private static final class CountingHitCollectorManager
|
||||||
implements CollectorManager<CountingHitCollector, CountingHitCollector> {
|
implements CollectorManager<CountingHitCollector, CountingHitCollector> {
|
||||||
|
|
||||||
private final boolean validate;
|
|
||||||
private final FixedBitSet result;
|
|
||||||
|
|
||||||
CountingHitCollectorManager(boolean validate, FixedBitSet result) {
|
|
||||||
this.validate = validate;
|
|
||||||
this.result = result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CountingHitCollector newCollector() {
|
public CountingHitCollector newCollector() {
|
||||||
return validate ? new MatchingHitCollector(result) : new CountingHitCollector();
|
return new CountingHitCollector();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CountingHitCollector reduce(Collection<CountingHitCollector> collectors)
|
public CountingHitCollector reduce(Collection<CountingHitCollector> collectors) {
|
||||||
throws IOException {
|
|
||||||
CountingHitCollector result = new CountingHitCollector();
|
CountingHitCollector result = new CountingHitCollector();
|
||||||
for (CountingHitCollector collector : collectors) {
|
for (CountingHitCollector collector : collectors) {
|
||||||
result.count += collector.count;
|
result.count += collector.count;
|
||||||
|
@ -106,7 +66,7 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class CountingHitCollector extends SimpleCollector {
|
private static class CountingHitCollector extends SimpleCollector {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
protected int docBase = 0;
|
protected int docBase = 0;
|
||||||
|
@ -121,12 +81,8 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getSum() {
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
protected void doSetNextReader(LeafReaderContext context) {
|
||||||
docBase = context.docBase;
|
docBase = context.docBase;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,24 +92,6 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MatchingHitCollector extends CountingHitCollector {
|
|
||||||
FixedBitSet answer;
|
|
||||||
int pos = -1;
|
|
||||||
|
|
||||||
public MatchingHitCollector(FixedBitSet answer) {
|
|
||||||
this.answer = answer;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void collect(int doc, float score) {
|
|
||||||
|
|
||||||
pos = answer.nextSetBit(pos + 1);
|
|
||||||
if (pos != doc + docBase) {
|
|
||||||
throw new RuntimeException("Expected doc " + pos + " but got " + (doc + docBase));
|
|
||||||
}
|
|
||||||
super.collect(doc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class BitSetQuery extends Query {
|
private static class BitSetQuery extends Query {
|
||||||
|
|
||||||
private final FixedBitSet docs;
|
private final FixedBitSet docs;
|
||||||
|
@ -163,11 +101,10 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
|
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
|
||||||
throws IOException {
|
|
||||||
return new ConstantScoreWeight(this, boost) {
|
return new ConstantScoreWeight(this, boost) {
|
||||||
@Override
|
@Override
|
||||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
public ScorerSupplier scorerSupplier(LeafReaderContext context) {
|
||||||
final var scorer =
|
final var scorer =
|
||||||
new ConstantScoreScorer(
|
new ConstantScoreScorer(
|
||||||
score(), scoreMode, new BitSetIterator(docs, docs.approximateCardinality()));
|
score(), scoreMode, new BitSetIterator(docs, docs.approximateCardinality()));
|
||||||
|
@ -200,20 +137,22 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FixedBitSet addClause(FixedBitSet[] sets, BooleanQuery.Builder bq, FixedBitSet result) {
|
private FixedBitSet addClause(FixedBitSet[] sets, BooleanQuery.Builder bq, FixedBitSet result) {
|
||||||
final FixedBitSet rnd = sets[random().nextInt(sets.length)];
|
final FixedBitSet rnd = sets[random().nextInt(sets.length)];
|
||||||
Query q = new BitSetQuery(rnd);
|
Query q = new BitSetQuery(rnd);
|
||||||
bq.add(q, BooleanClause.Occur.MUST);
|
bq.add(q, BooleanClause.Occur.MUST);
|
||||||
if (validate) {
|
if (validate) {
|
||||||
if (result == null) result = rnd.clone();
|
if (result == null) {
|
||||||
else result.and(rnd);
|
result = rnd.clone();
|
||||||
|
} else {
|
||||||
|
result.and(rnd);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int maxClauses)
|
private void doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int maxClauses)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < iter; i++) {
|
for (int i = 0; i < iter; i++) {
|
||||||
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
|
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
|
||||||
|
@ -222,21 +161,17 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
for (int j = 0; j < nClauses; j++) {
|
for (int j = 0; j < nClauses; j++) {
|
||||||
result = addClause(sets, bq, result);
|
result = addClause(sets, bq, result);
|
||||||
}
|
}
|
||||||
CountingHitCollector hc =
|
CountingHitCollector hc = s.search(bq.build(), new CountingHitCollectorManager());
|
||||||
s.search(bq.build(), new CountingHitCollectorManager(validate, result));
|
|
||||||
ret += hc.getSum();
|
|
||||||
|
|
||||||
if (validate) assertEquals(result.cardinality(), hc.getCount());
|
if (validate) {
|
||||||
// System.out.println(hc.getCount());
|
assertEquals(result.cardinality(), hc.getCount());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int doNestedConjunctions(
|
private void doNestedConjunctions(
|
||||||
IndexSearcher s, FixedBitSet[] sets, int iter, int maxOuterClauses, int maxClauses)
|
IndexSearcher s, FixedBitSet[] sets, int iter, int maxOuterClauses, int maxClauses)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
int ret = 0;
|
|
||||||
long nMatches = 0;
|
long nMatches = 0;
|
||||||
|
|
||||||
for (int i = 0; i < iter; i++) {
|
for (int i = 0; i < iter; i++) {
|
||||||
|
@ -255,107 +190,15 @@ public class TestScorerPerf extends LuceneTestCase {
|
||||||
oq.add(bq.build(), BooleanClause.Occur.MUST);
|
oq.add(bq.build(), BooleanClause.Occur.MUST);
|
||||||
} // outer
|
} // outer
|
||||||
|
|
||||||
CountingHitCollector hc =
|
CountingHitCollector hc = s.search(oq.build(), new CountingHitCollectorManager());
|
||||||
s.search(oq.build(), new CountingHitCollectorManager(validate, result));
|
|
||||||
nMatches += hc.getCount();
|
nMatches += hc.getCount();
|
||||||
ret += hc.getSum();
|
if (validate) {
|
||||||
if (validate) assertEquals(result.cardinality(), hc.getCount());
|
assertEquals(result.cardinality(), hc.getCount());
|
||||||
// System.out.println(hc.getCount());
|
|
||||||
}
|
|
||||||
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter));
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int doTermConjunctions(
|
|
||||||
Term[] terms, IndexSearcher s, int termsInIndex, int maxClauses, int iter)
|
|
||||||
throws IOException {
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
long nMatches = 0;
|
|
||||||
for (int i = 0; i < iter; i++) {
|
|
||||||
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
|
|
||||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
|
||||||
BitSet termflag = new BitSet(termsInIndex);
|
|
||||||
for (int j = 0; j < nClauses; j++) {
|
|
||||||
int tnum;
|
|
||||||
// don't pick same clause twice
|
|
||||||
tnum = random().nextInt(termsInIndex);
|
|
||||||
if (termflag.get(tnum)) tnum = termflag.nextClearBit(tnum);
|
|
||||||
if (tnum < 0 || tnum >= termsInIndex) tnum = termflag.nextClearBit(0);
|
|
||||||
termflag.set(tnum);
|
|
||||||
Query tq = new TermQuery(terms[tnum]);
|
|
||||||
bq.add(tq, BooleanClause.Occur.MUST);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CountingHitCollector hc = s.search(bq.build(), new CountingHitCollectorManager(false, null));
|
|
||||||
nMatches += hc.getCount();
|
|
||||||
ret += hc.getSum();
|
|
||||||
}
|
}
|
||||||
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter));
|
if (VERBOSE) {
|
||||||
|
System.out.println("Average number of matches=" + (nMatches / iter));
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int doNestedTermConjunctions(
|
|
||||||
IndexSearcher s,
|
|
||||||
Term[] terms,
|
|
||||||
int termsInIndex,
|
|
||||||
int maxOuterClauses,
|
|
||||||
int maxClauses,
|
|
||||||
int iter)
|
|
||||||
throws IOException {
|
|
||||||
int ret = 0;
|
|
||||||
long nMatches = 0;
|
|
||||||
for (int i = 0; i < iter; i++) {
|
|
||||||
int oClauses = random().nextInt(maxOuterClauses - 1) + 2;
|
|
||||||
BooleanQuery.Builder oq = new BooleanQuery.Builder();
|
|
||||||
for (int o = 0; o < oClauses; o++) {
|
|
||||||
|
|
||||||
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
|
|
||||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
|
||||||
BitSet termflag = new BitSet(termsInIndex);
|
|
||||||
for (int j = 0; j < nClauses; j++) {
|
|
||||||
int tnum;
|
|
||||||
// don't pick same clause twice
|
|
||||||
tnum = random().nextInt(termsInIndex);
|
|
||||||
if (termflag.get(tnum)) tnum = termflag.nextClearBit(tnum);
|
|
||||||
if (tnum < 0 || tnum >= 25) tnum = termflag.nextClearBit(0);
|
|
||||||
termflag.set(tnum);
|
|
||||||
Query tq = new TermQuery(terms[tnum]);
|
|
||||||
bq.add(tq, BooleanClause.Occur.MUST);
|
|
||||||
} // inner
|
|
||||||
|
|
||||||
oq.add(bq.build(), BooleanClause.Occur.MUST);
|
|
||||||
} // outer
|
|
||||||
|
|
||||||
CountingHitCollector hc = s.search(oq.build(), new CountingHitCollectorManager(false, null));
|
|
||||||
nMatches += hc.getCount();
|
|
||||||
ret += hc.getSum();
|
|
||||||
}
|
}
|
||||||
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter));
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int doSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter)
|
|
||||||
throws IOException {
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < iter; i++) {
|
|
||||||
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
|
|
||||||
PhraseQuery.Builder builder = new PhraseQuery.Builder();
|
|
||||||
for (int j = 0; j < nClauses; j++) {
|
|
||||||
int tnum = random().nextInt(termsInIndex);
|
|
||||||
builder.add(new Term("f", Character.toString((char) (tnum + 'A'))));
|
|
||||||
}
|
|
||||||
// slop could be random too
|
|
||||||
builder.setSlop(termsInIndex);
|
|
||||||
PhraseQuery q = builder.build();
|
|
||||||
|
|
||||||
CountingHitCollector hc = s.search(q, new CountingHitCollectorManager(false, null));
|
|
||||||
ret += hc.getSum();
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testConjunctions() throws Exception {
|
public void testConjunctions() throws Exception {
|
||||||
|
|
|
@ -87,7 +87,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
Automaton a2 =
|
Automaton a2 =
|
||||||
Operations.removeDeadStates(
|
Operations.removeDeadStates(
|
||||||
Operations.concatenate(Automata.makeString("foo"), Automata.makeString("bar")));
|
Operations.concatenate(Automata.makeString("foo"), Automata.makeString("bar")));
|
||||||
assertTrue(Operations.sameLanguage(a1, a2));
|
assertTrue(AutomatonTestUtil.sameLanguage(a1, a2));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCommonPrefixString() throws Exception {
|
public void testCommonPrefixString() throws Exception {
|
||||||
|
@ -257,7 +257,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
Automaton a = Automata.makeString("foobar");
|
Automaton a = Automata.makeString("foobar");
|
||||||
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
|
|
||||||
assertTrue(Operations.sameLanguage(a, aMin));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, aMin));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMinimize2() throws Exception {
|
public void testMinimize2() throws Exception {
|
||||||
|
@ -266,7 +266,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
Arrays.asList(Automata.makeString("foobar"), Automata.makeString("boobar")));
|
Arrays.asList(Automata.makeString("foobar"), Automata.makeString("boobar")));
|
||||||
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
Operations.determinize(Operations.removeDeadStates(a), DEFAULT_DETERMINIZE_WORK_LIMIT),
|
Operations.determinize(Operations.removeDeadStates(a), DEFAULT_DETERMINIZE_WORK_LIMIT),
|
||||||
aMin));
|
aMin));
|
||||||
}
|
}
|
||||||
|
@ -276,7 +276,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
Automaton ra = Operations.reverse(a);
|
Automaton ra = Operations.reverse(a);
|
||||||
Automaton a2 = Operations.determinize(Operations.reverse(ra), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Automaton a2 = Operations.determinize(Operations.reverse(ra), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
|
|
||||||
assertTrue(Operations.sameLanguage(a, a2));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, a2));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOptional() throws Exception {
|
public void testOptional() throws Exception {
|
||||||
|
@ -401,7 +401,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
Automaton ra = Operations.reverse(a);
|
Automaton ra = Operations.reverse(a);
|
||||||
Automaton rra = Operations.reverse(ra);
|
Automaton rra = Operations.reverse(ra);
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
|
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
|
||||||
Operations.determinize(Operations.removeDeadStates(rra), Integer.MAX_VALUE)));
|
Operations.determinize(Operations.removeDeadStates(rra), Integer.MAX_VALUE)));
|
||||||
}
|
}
|
||||||
|
@ -502,7 +502,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
|
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
|
||||||
Operations.determinize(
|
Operations.determinize(
|
||||||
Operations.removeDeadStates(builder.finish()), Integer.MAX_VALUE)));
|
Operations.removeDeadStates(builder.finish()), Integer.MAX_VALUE)));
|
||||||
|
@ -735,7 +735,8 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
a2.addTransition(0, state, 'a');
|
a2.addTransition(0, state, 'a');
|
||||||
a2.finishState();
|
a2.finishState();
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(Operations.removeDeadStates(a), Operations.removeDeadStates(a2)));
|
AutomatonTestUtil.sameLanguage(
|
||||||
|
Operations.removeDeadStates(a), Operations.removeDeadStates(a2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Automaton randomNoOp(Automaton a) {
|
private Automaton randomNoOp(Automaton a) {
|
||||||
|
@ -1288,7 +1289,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
Automaton a2 =
|
Automaton a2 =
|
||||||
Operations.removeDeadStates(Operations.determinize(unionTerms(terms), Integer.MAX_VALUE));
|
Operations.removeDeadStates(Operations.determinize(unionTerms(terms), Integer.MAX_VALUE));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
a2, Operations.removeDeadStates(Operations.determinize(a, Integer.MAX_VALUE))));
|
a2, Operations.removeDeadStates(Operations.determinize(a, Integer.MAX_VALUE))));
|
||||||
|
|
||||||
// Do same check, in UTF8 space
|
// Do same check, in UTF8 space
|
||||||
|
@ -1613,7 +1614,7 @@ public class TestAutomaton extends LuceneTestCase {
|
||||||
|
|
||||||
public void testAcceptAllEmptyStringMin() throws Exception {
|
public void testAcceptAllEmptyStringMin() throws Exception {
|
||||||
Automaton a = Automata.makeBinaryInterval(newBytesRef(), true, null, true);
|
Automaton a = Automata.makeBinaryInterval(newBytesRef(), true, null, true);
|
||||||
assertTrue(Operations.sameLanguage(Automata.makeAnyBinary(), a));
|
assertTrue(AutomatonTestUtil.sameLanguage(Automata.makeAnyBinary(), a));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IntsRef toIntsRef(String s) {
|
private static IntsRef toIntsRef(String s) {
|
||||||
|
|
|
@ -41,7 +41,7 @@ public class TestDeterminism extends LuceneTestCase {
|
||||||
a = AutomatonTestUtil.determinizeSimple(a);
|
a = AutomatonTestUtil.determinizeSimple(a);
|
||||||
Automaton b = Operations.determinize(a, Integer.MAX_VALUE);
|
Automaton b = Operations.determinize(a, Integer.MAX_VALUE);
|
||||||
// TODO: more verifications possible?
|
// TODO: more verifications possible?
|
||||||
assertTrue(Operations.sameLanguage(a, b));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,20 +53,20 @@ public class TestDeterminism extends LuceneTestCase {
|
||||||
Operations.complement(
|
Operations.complement(
|
||||||
Operations.complement(a, DEFAULT_DETERMINIZE_WORK_LIMIT),
|
Operations.complement(a, DEFAULT_DETERMINIZE_WORK_LIMIT),
|
||||||
DEFAULT_DETERMINIZE_WORK_LIMIT);
|
DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
assertTrue(Operations.sameLanguage(a, equivalent));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
|
||||||
|
|
||||||
// a union a = a
|
// a union a = a
|
||||||
equivalent =
|
equivalent =
|
||||||
Operations.determinize(
|
Operations.determinize(
|
||||||
Operations.removeDeadStates(Operations.union(a, a)), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Operations.removeDeadStates(Operations.union(a, a)), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
assertTrue(Operations.sameLanguage(a, equivalent));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
|
||||||
|
|
||||||
// a intersect a = a
|
// a intersect a = a
|
||||||
equivalent =
|
equivalent =
|
||||||
Operations.determinize(
|
Operations.determinize(
|
||||||
Operations.removeDeadStates(Operations.intersection(a, a)),
|
Operations.removeDeadStates(Operations.intersection(a, a)),
|
||||||
DEFAULT_DETERMINIZE_WORK_LIMIT);
|
DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
assertTrue(Operations.sameLanguage(a, equivalent));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
|
||||||
|
|
||||||
// a minus a = empty
|
// a minus a = empty
|
||||||
Automaton empty = Operations.minus(a, a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Automaton empty = Operations.minus(a, a, DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
|
@ -81,7 +81,7 @@ public class TestDeterminism extends LuceneTestCase {
|
||||||
equivalent =
|
equivalent =
|
||||||
Operations.minus(optional, Automata.makeEmptyString(), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
Operations.minus(optional, Automata.makeEmptyString(), DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
// System.out.println("equiv " + equivalent);
|
// System.out.println("equiv " + equivalent);
|
||||||
assertTrue(Operations.sameLanguage(a, equivalent));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,44 +81,46 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
||||||
// check that the dfa for n-1 accepts a subset of the dfa for n
|
// check that the dfa for n-1 accepts a subset of the dfa for n
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.subsetOf(
|
AutomatonTestUtil.subsetOf(
|
||||||
Operations.removeDeadStates(automata[n - 1]),
|
Operations.removeDeadStates(automata[n - 1]),
|
||||||
Operations.removeDeadStates(automata[n])));
|
Operations.removeDeadStates(automata[n])));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.subsetOf(
|
AutomatonTestUtil.subsetOf(
|
||||||
Operations.removeDeadStates(automata[n - 1]),
|
Operations.removeDeadStates(automata[n - 1]),
|
||||||
Operations.removeDeadStates(tautomata[n])));
|
Operations.removeDeadStates(tautomata[n])));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.subsetOf(
|
AutomatonTestUtil.subsetOf(
|
||||||
Operations.removeDeadStates(tautomata[n - 1]),
|
Operations.removeDeadStates(tautomata[n - 1]),
|
||||||
Operations.removeDeadStates(automata[n])));
|
Operations.removeDeadStates(automata[n])));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.subsetOf(
|
AutomatonTestUtil.subsetOf(
|
||||||
Operations.removeDeadStates(tautomata[n - 1]),
|
Operations.removeDeadStates(tautomata[n - 1]),
|
||||||
Operations.removeDeadStates(tautomata[n])));
|
Operations.removeDeadStates(tautomata[n])));
|
||||||
assertNotSame(automata[n - 1], automata[n]);
|
assertNotSame(automata[n - 1], automata[n]);
|
||||||
}
|
}
|
||||||
// check that Lev(N) is a subset of LevT(N)
|
// check that Lev(N) is a subset of LevT(N)
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.subsetOf(
|
AutomatonTestUtil.subsetOf(
|
||||||
Operations.removeDeadStates(automata[n]), Operations.removeDeadStates(tautomata[n])));
|
Operations.removeDeadStates(automata[n]), Operations.removeDeadStates(tautomata[n])));
|
||||||
// special checks for specific n
|
// special checks for specific n
|
||||||
switch (n) {
|
switch (n) {
|
||||||
case 0:
|
case 0:
|
||||||
// easy, matches the string itself
|
// easy, matches the string itself
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
Automata.makeString(s), Operations.removeDeadStates(automata[0])));
|
Automata.makeString(s), Operations.removeDeadStates(automata[0])));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
Automata.makeString(s), Operations.removeDeadStates(tautomata[0])));
|
Automata.makeString(s), Operations.removeDeadStates(tautomata[0])));
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
// generate a lev1 naively, and check the accepted lang is the same.
|
// generate a lev1 naively, and check the accepted lang is the same.
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(naiveLev1(s), Operations.removeDeadStates(automata[1])));
|
AutomatonTestUtil.sameLanguage(
|
||||||
|
naiveLev1(s), Operations.removeDeadStates(automata[1])));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(naiveLev1T(s), Operations.removeDeadStates(tautomata[1])));
|
AutomatonTestUtil.sameLanguage(
|
||||||
|
naiveLev1T(s), Operations.removeDeadStates(tautomata[1])));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assertBruteForce(s, automata[n], n);
|
assertBruteForce(s, automata[n], n);
|
||||||
|
|
|
@ -28,7 +28,7 @@ public class TestMinimize extends LuceneTestCase {
|
||||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||||
Automaton la = Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE);
|
Automaton la = Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE);
|
||||||
Automaton lb = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
|
Automaton lb = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
|
||||||
assertTrue(Operations.sameLanguage(la, lb));
|
assertTrue(AutomatonTestUtil.sameLanguage(la, lb));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ public class TestMinimize extends LuceneTestCase {
|
||||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||||
a = AutomatonTestUtil.minimizeSimple(a);
|
a = AutomatonTestUtil.minimizeSimple(a);
|
||||||
Automaton b = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
|
Automaton b = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
|
||||||
assertTrue(Operations.sameLanguage(a, b));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, b));
|
||||||
assertEquals(a.getNumStates(), b.getNumStates());
|
assertEquals(a.getNumStates(), b.getNumStates());
|
||||||
int numStates = a.getNumStates();
|
int numStates = a.getNumStates();
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ public class TestOperations extends LuceneTestCase {
|
||||||
assertTrue(naiveUnion.isDeterministic());
|
assertTrue(naiveUnion.isDeterministic());
|
||||||
assertFalse(Operations.hasDeadStatesFromInitial(naiveUnion));
|
assertFalse(Operations.hasDeadStatesFromInitial(naiveUnion));
|
||||||
|
|
||||||
assertTrue(Operations.sameLanguage(union, naiveUnion));
|
assertTrue(AutomatonTestUtil.sameLanguage(union, naiveUnion));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Automaton naiveUnion(List<BytesRef> strings) {
|
private static Automaton naiveUnion(List<BytesRef> strings) {
|
||||||
|
@ -116,13 +116,13 @@ public class TestOperations extends LuceneTestCase {
|
||||||
Automaton concat2 = Operations.concatenate(singleton, nfa);
|
Automaton concat2 = Operations.concatenate(singleton, nfa);
|
||||||
assertFalse(concat2.isDeterministic());
|
assertFalse(concat2.isDeterministic());
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
Operations.determinize(concat1, 100), Operations.determinize(concat2, 100)));
|
Operations.determinize(concat1, 100), Operations.determinize(concat2, 100)));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
Operations.determinize(nfa, 100), Operations.determinize(concat1, 100)));
|
Operations.determinize(nfa, 100), Operations.determinize(concat1, 100)));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
Operations.sameLanguage(
|
AutomatonTestUtil.sameLanguage(
|
||||||
Operations.determinize(nfa, 100), Operations.determinize(concat2, 100)));
|
Operations.determinize(nfa, 100), Operations.determinize(concat2, 100)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,6 +173,42 @@ public class TestOperations extends LuceneTestCase {
|
||||||
assertTrue(exc.getMessage().contains("input automaton is too large"));
|
assertTrue(exc.getMessage().contains("input automaton is too large"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testIsTotal() {
|
||||||
|
// minimal
|
||||||
|
assertFalse(Operations.isTotal(Automata.makeEmpty()));
|
||||||
|
assertFalse(Operations.isTotal(Automata.makeEmptyString()));
|
||||||
|
assertTrue(Operations.isTotal(Automata.makeAnyString()));
|
||||||
|
assertTrue(Operations.isTotal(Automata.makeAnyBinary(), 0, 255));
|
||||||
|
assertFalse(Operations.isTotal(Automata.makeNonEmptyBinary(), 0, 255));
|
||||||
|
// deterministic, but not minimal
|
||||||
|
assertTrue(Operations.isTotal(Operations.repeat(Automata.makeAnyChar())));
|
||||||
|
Automaton tricky =
|
||||||
|
Operations.repeat(
|
||||||
|
Operations.union(
|
||||||
|
Automata.makeCharRange(Character.MIN_CODE_POINT, 100),
|
||||||
|
Automata.makeCharRange(101, Character.MAX_CODE_POINT)));
|
||||||
|
assertTrue(Operations.isTotal(tricky));
|
||||||
|
// not total, but close
|
||||||
|
Automaton tricky2 =
|
||||||
|
Operations.repeat(
|
||||||
|
Operations.union(
|
||||||
|
Automata.makeCharRange(Character.MIN_CODE_POINT + 1, 100),
|
||||||
|
Automata.makeCharRange(101, Character.MAX_CODE_POINT)));
|
||||||
|
assertFalse(Operations.isTotal(tricky2));
|
||||||
|
Automaton tricky3 =
|
||||||
|
Operations.repeat(
|
||||||
|
Operations.union(
|
||||||
|
Automata.makeCharRange(Character.MIN_CODE_POINT, 99),
|
||||||
|
Automata.makeCharRange(101, Character.MAX_CODE_POINT)));
|
||||||
|
assertFalse(Operations.isTotal(tricky3));
|
||||||
|
Automaton tricky4 =
|
||||||
|
Operations.repeat(
|
||||||
|
Operations.union(
|
||||||
|
Automata.makeCharRange(Character.MIN_CODE_POINT, 100),
|
||||||
|
Automata.makeCharRange(101, Character.MAX_CODE_POINT - 1)));
|
||||||
|
assertFalse(Operations.isTotal(tricky4));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the set of all accepted strings.
|
* Returns the set of all accepted strings.
|
||||||
*
|
*
|
||||||
|
@ -254,4 +290,126 @@ public class TestOperations extends LuceneTestCase {
|
||||||
a.finishState();
|
a.finishState();
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRepeat() {
|
||||||
|
Automaton emptyLanguage = Automata.makeEmpty();
|
||||||
|
assertSame(emptyLanguage, Operations.repeat(emptyLanguage));
|
||||||
|
|
||||||
|
Automaton emptyString = Automata.makeEmptyString();
|
||||||
|
assertSame(emptyString, Operations.repeat(emptyString));
|
||||||
|
|
||||||
|
Automaton a = Automata.makeChar('a');
|
||||||
|
Automaton as = new Automaton();
|
||||||
|
as.createState();
|
||||||
|
as.setAccept(0, true);
|
||||||
|
as.addTransition(0, 0, 'a');
|
||||||
|
as.finishState();
|
||||||
|
assertTrue(AutomatonTestUtil.sameLanguage(as, Operations.repeat(a)));
|
||||||
|
assertSame(as, Operations.repeat(as));
|
||||||
|
|
||||||
|
Automaton aOrEmpty = new Automaton();
|
||||||
|
aOrEmpty.createState();
|
||||||
|
aOrEmpty.setAccept(0, true);
|
||||||
|
aOrEmpty.createState();
|
||||||
|
aOrEmpty.setAccept(1, true);
|
||||||
|
aOrEmpty.addTransition(0, 1, 'a');
|
||||||
|
assertTrue(AutomatonTestUtil.sameLanguage(as, Operations.repeat(aOrEmpty)));
|
||||||
|
|
||||||
|
Automaton ab = Automata.makeString("ab");
|
||||||
|
Automaton abs = new Automaton();
|
||||||
|
abs.createState();
|
||||||
|
abs.createState();
|
||||||
|
abs.setAccept(0, true);
|
||||||
|
abs.addTransition(0, 1, 'a');
|
||||||
|
abs.finishState();
|
||||||
|
abs.addTransition(1, 0, 'b');
|
||||||
|
abs.finishState();
|
||||||
|
assertTrue(AutomatonTestUtil.sameLanguage(abs, Operations.repeat(ab)));
|
||||||
|
assertSame(abs, Operations.repeat(abs));
|
||||||
|
|
||||||
|
Automaton absThenC = Operations.concatenate(abs, Automata.makeChar('c'));
|
||||||
|
Automaton absThenCs = new Automaton();
|
||||||
|
absThenCs.createState();
|
||||||
|
absThenCs.createState();
|
||||||
|
absThenCs.createState();
|
||||||
|
absThenCs.setAccept(0, true);
|
||||||
|
absThenCs.addTransition(0, 1, 'a');
|
||||||
|
absThenCs.addTransition(0, 0, 'c');
|
||||||
|
absThenCs.finishState();
|
||||||
|
absThenCs.addTransition(1, 2, 'b');
|
||||||
|
absThenCs.finishState();
|
||||||
|
absThenCs.addTransition(2, 1, 'a');
|
||||||
|
absThenCs.addTransition(2, 0, 'c');
|
||||||
|
absThenCs.finishState();
|
||||||
|
assertTrue(AutomatonTestUtil.sameLanguage(absThenCs, Operations.repeat(absThenC)));
|
||||||
|
assertSame(absThenCs, Operations.repeat(absThenCs));
|
||||||
|
|
||||||
|
Automaton aOrAb = new Automaton();
|
||||||
|
aOrAb.createState();
|
||||||
|
aOrAb.createState();
|
||||||
|
aOrAb.createState();
|
||||||
|
aOrAb.setAccept(1, true);
|
||||||
|
aOrAb.setAccept(2, true);
|
||||||
|
aOrAb.addTransition(0, 1, 'a');
|
||||||
|
aOrAb.finishState();
|
||||||
|
aOrAb.addTransition(1, 2, 'b');
|
||||||
|
aOrAb.finishState();
|
||||||
|
Automaton aOrAbs = new Automaton();
|
||||||
|
aOrAbs.createState();
|
||||||
|
aOrAbs.createState();
|
||||||
|
aOrAbs.setAccept(0, true);
|
||||||
|
aOrAbs.addTransition(0, 0, 'a');
|
||||||
|
aOrAbs.addTransition(0, 1, 'a');
|
||||||
|
aOrAbs.finishState();
|
||||||
|
aOrAbs.addTransition(1, 0, 'b');
|
||||||
|
aOrAbs.finishState();
|
||||||
|
assertTrue(
|
||||||
|
AutomatonTestUtil.sameLanguage(
|
||||||
|
Operations.determinize(aOrAbs, Integer.MAX_VALUE),
|
||||||
|
Operations.determinize(Operations.repeat(aOrAb), Integer.MAX_VALUE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDuelRepeat() {
|
||||||
|
final int iters = atLeast(1_000);
|
||||||
|
for (int iter = 0; iter < iters; ++iter) {
|
||||||
|
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||||
|
Automaton repeat1 = Operations.determinize(Operations.repeat(a), Integer.MAX_VALUE);
|
||||||
|
Automaton repeat2 = Operations.determinize(naiveRepeat(a), Integer.MAX_VALUE);
|
||||||
|
assertTrue(AutomatonTestUtil.sameLanguage(repeat1, repeat2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is the original implementation of Operations#repeat, before we improved it to generate
|
||||||
|
// simpler automata in some common cases.
|
||||||
|
private static Automaton naiveRepeat(Automaton a) {
|
||||||
|
if (a.getNumStates() == 0) {
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
Automaton.Builder builder = new Automaton.Builder();
|
||||||
|
// Create the initial state, which is accepted
|
||||||
|
builder.createState();
|
||||||
|
builder.setAccept(0, true);
|
||||||
|
builder.copy(a);
|
||||||
|
|
||||||
|
Transition t = new Transition();
|
||||||
|
int count = a.initTransition(0, t);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
a.getNextTransition(t);
|
||||||
|
builder.addTransition(0, t.dest + 1, t.min, t.max);
|
||||||
|
}
|
||||||
|
|
||||||
|
int numStates = a.getNumStates();
|
||||||
|
for (int s = 0; s < numStates; s++) {
|
||||||
|
if (a.isAccept(s)) {
|
||||||
|
count = a.initTransition(0, t);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
a.getNextTransition(t);
|
||||||
|
builder.addTransition(s + 1, t.dest + 1, t.min, t.max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder.finish();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple unit tests for RegExp parsing.
|
* Simple unit tests for RegExp parsing.
|
||||||
|
@ -698,7 +699,7 @@ public class TestRegExpParsing extends LuceneTestCase {
|
||||||
private void assertSameLanguage(Automaton expected, Automaton actual) {
|
private void assertSameLanguage(Automaton expected, Automaton actual) {
|
||||||
expected = Operations.determinize(expected, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
expected = Operations.determinize(expected, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
actual = Operations.determinize(actual, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
actual = Operations.determinize(actual, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
boolean result = Operations.sameLanguage(expected, actual);
|
boolean result = AutomatonTestUtil.sameLanguage(expected, actual);
|
||||||
if (result == false) {
|
if (result == false) {
|
||||||
System.out.println(expected.toDot());
|
System.out.println(expected.toDot());
|
||||||
System.out.println(actual.toDot());
|
System.out.println(actual.toDot());
|
||||||
|
|
|
@ -28,6 +28,7 @@ import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
import org.apache.lucene.tests.util.TestUtil;
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
|
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
@ -158,7 +159,7 @@ public class TestStringsToAutomaton extends LuceneTestCase {
|
||||||
private static void assertSameAutomaton(Automaton a, Automaton b) {
|
private static void assertSameAutomaton(Automaton a, Automaton b) {
|
||||||
assertEquals(a.getNumStates(), b.getNumStates());
|
assertEquals(a.getNumStates(), b.getNumStates());
|
||||||
assertEquals(a.getNumTransitions(), b.getNumTransitions());
|
assertEquals(a.getNumTransitions(), b.getNumTransitions());
|
||||||
assertTrue(Operations.sameLanguage(a, b));
|
assertTrue(AutomatonTestUtil.sameLanguage(a, b));
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<BytesRef> basicTerms() {
|
private List<BytesRef> basicTerms() {
|
||||||
|
|
|
@ -21,7 +21,6 @@ import static org.apache.lucene.sandbox.facet.ComparableUtils.byAggregatedValue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -58,7 +57,7 @@ import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
|
||||||
import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder;
|
import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder;
|
||||||
import org.apache.lucene.sandbox.facet.recorders.MultiFacetsRecorder;
|
import org.apache.lucene.sandbox.facet.recorders.MultiFacetsRecorder;
|
||||||
import org.apache.lucene.sandbox.facet.recorders.Reducer;
|
import org.apache.lucene.sandbox.facet.recorders.Reducer;
|
||||||
import org.apache.lucene.search.CollectorOwner;
|
import org.apache.lucene.search.CollectorManager;
|
||||||
import org.apache.lucene.search.DoubleValuesSource;
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LongValuesSource;
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
|
@ -148,9 +147,10 @@ public class SandboxFacetsExample {
|
||||||
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
|
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
|
||||||
new FacetFieldCollectorManager<>(defaultTaxoCutter, defaultRecorder);
|
new FacetFieldCollectorManager<>(defaultTaxoCutter, defaultRecorder);
|
||||||
|
|
||||||
//// (2.1) if we need to collect data using multiple different collectors, e.g. taxonomy and
|
// (2.1) if we need to collect data using multiple different collectors, e.g. taxonomy and
|
||||||
//// ranges, or even two taxonomy facets that use different Category List Field, we can
|
// ranges, or even two taxonomy facets that use different Category List Field, we can
|
||||||
//// use MultiCollectorManager, e.g.:
|
// use MultiCollectorManager, e.g.:
|
||||||
|
//
|
||||||
// TODO: add a demo for it.
|
// TODO: add a demo for it.
|
||||||
// TaxonomyFacetsCutter publishDateCutter = new
|
// TaxonomyFacetsCutter publishDateCutter = new
|
||||||
// TaxonomyFacetsCutter(config.getDimConfig("Publish Date"), taxoReader);
|
// TaxonomyFacetsCutter(config.getDimConfig("Publish Date"), taxoReader);
|
||||||
|
@ -563,17 +563,13 @@ public class SandboxFacetsExample {
|
||||||
// FacetFieldCollectorManager anyway, and leaf cutter are not merged or anything like that.
|
// FacetFieldCollectorManager anyway, and leaf cutter are not merged or anything like that.
|
||||||
FacetFieldCollectorManager<CountFacetRecorder> publishDayDimensionCollectorManager =
|
FacetFieldCollectorManager<CountFacetRecorder> publishDayDimensionCollectorManager =
|
||||||
new FacetFieldCollectorManager<>(defaultTaxoCutter, publishDayDimensionRecorder);
|
new FacetFieldCollectorManager<>(defaultTaxoCutter, publishDayDimensionRecorder);
|
||||||
List<CollectorOwner<FacetFieldCollector, CountFacetRecorder>> drillSidewaysOwners =
|
List<CollectorManager<FacetFieldCollector, CountFacetRecorder>> drillSidewaysManagers =
|
||||||
List.of(new CollectorOwner<>(publishDayDimensionCollectorManager));
|
List.of(publishDayDimensionCollectorManager);
|
||||||
|
|
||||||
//// (3) search
|
//// (3) search
|
||||||
// Right now we return the same Recorder we created - so we can ignore results
|
// Right now we return the same Recorder we created - so we can ignore results
|
||||||
DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
|
DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
|
||||||
// We must wrap list of drill sideways owner with unmodifiableList to make generics work.
|
ds.search(q, drillDownCollectorManager, drillSidewaysManagers);
|
||||||
ds.search(
|
|
||||||
q,
|
|
||||||
new CollectorOwner<>(drillDownCollectorManager),
|
|
||||||
Collections.unmodifiableList(drillSidewaysOwners));
|
|
||||||
|
|
||||||
//// (4) Get top 10 results by count for Author
|
//// (4) Get top 10 results by count for Author
|
||||||
List<FacetResult> facetResults = new ArrayList<>(2);
|
List<FacetResult> facetResults = new ArrayList<>(2);
|
||||||
|
|
|
@ -37,9 +37,9 @@ configure(project(":lucene:distribution")) {
|
||||||
|
|
||||||
// Maven-published submodule JARs are part of the binary distribution.
|
// Maven-published submodule JARs are part of the binary distribution.
|
||||||
// We don't copy their transitive dependencies.
|
// We don't copy their transitive dependencies.
|
||||||
def binaryModules = rootProject.ext.mavenProjects.findAll { p -> !(p in [
|
def binaryModules = rootProject.ext.mavenProjects.findAll { p -> !(p.path in [
|
||||||
// Placed in a separate folder (module layer conflicts).
|
// Placed in a separate folder (module layer conflicts).
|
||||||
project(":lucene:test-framework"),
|
":lucene:test-framework",
|
||||||
]) }
|
]) }
|
||||||
for (Project module : binaryModules) {
|
for (Project module : binaryModules) {
|
||||||
jars(module, {
|
jars(module, {
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -31,8 +30,8 @@ import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
||||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
||||||
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
|
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
import org.apache.lucene.search.CollectorManager;
|
import org.apache.lucene.search.CollectorManager;
|
||||||
import org.apache.lucene.search.CollectorOwner;
|
|
||||||
import org.apache.lucene.search.FieldDoc;
|
import org.apache.lucene.search.FieldDoc;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
@ -302,25 +301,13 @@ public class DrillSideways {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class CallableCollector implements Callable<Void> {
|
private record CallableCollector<R>(
|
||||||
private final IndexSearcher searcher;
|
IndexSearcher searcher, Query query, CollectorManager<?, R> collectorManager)
|
||||||
private final Query query;
|
implements Callable<R> {
|
||||||
private final CollectorOwner<?, ?> collectorOwner;
|
|
||||||
|
|
||||||
private CallableCollector(
|
|
||||||
IndexSearcher searcher, Query query, CollectorOwner<?, ?> collectorOwner) {
|
|
||||||
this.searcher = searcher;
|
|
||||||
this.query = query;
|
|
||||||
this.collectorOwner = collectorOwner;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Void call() throws Exception {
|
public R call() throws Exception {
|
||||||
searcher.search(query, collectorOwner);
|
return searcher.search(query, collectorManager);
|
||||||
// Call getResult to trigger reduce, we don't need to return results because users can access
|
|
||||||
// them directly from collectorOwner
|
|
||||||
collectorOwner.getResult();
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -344,31 +331,30 @@ public class DrillSideways {
|
||||||
// Main query
|
// Main query
|
||||||
FacetsCollectorManager drillDownFacetsCollectorManager =
|
FacetsCollectorManager drillDownFacetsCollectorManager =
|
||||||
createDrillDownFacetsCollectorManager();
|
createDrillDownFacetsCollectorManager();
|
||||||
final CollectorOwner<?, ?> mainCollectorOwner;
|
final CollectorManager<?, ?> mainCollectorManager;
|
||||||
if (drillDownFacetsCollectorManager != null) {
|
if (drillDownFacetsCollectorManager != null) {
|
||||||
// Make sure we populate a facet collector corresponding to the base query if desired:
|
// Make sure we populate a facet collector corresponding to the base query if desired:
|
||||||
mainCollectorOwner =
|
mainCollectorManager =
|
||||||
new CollectorOwner<>(
|
new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager);
|
||||||
new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager));
|
|
||||||
} else {
|
} else {
|
||||||
mainCollectorOwner = new CollectorOwner<>(hitCollectorManager);
|
mainCollectorManager = hitCollectorManager;
|
||||||
}
|
}
|
||||||
// Drill sideways dimensions
|
// Drill sideways dimensions
|
||||||
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners;
|
final List<CollectorManager<FacetsCollector, FacetsCollector>> drillSidewaysCollectorManagers;
|
||||||
if (query.getDims().isEmpty() == false) {
|
if (query.getDims().isEmpty() == false) {
|
||||||
drillSidewaysCollectorOwners = new ArrayList<>(query.getDims().size());
|
drillSidewaysCollectorManagers = new ArrayList<>(query.getDims().size());
|
||||||
for (int i = 0; i < query.getDims().size(); i++) {
|
for (int i = 0; i < query.getDims().size(); i++) {
|
||||||
drillSidewaysCollectorOwners.add(
|
drillSidewaysCollectorManagers.add(createDrillSidewaysFacetsCollectorManager());
|
||||||
new CollectorOwner<>(createDrillSidewaysFacetsCollectorManager()));
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
drillSidewaysCollectorOwners = null;
|
drillSidewaysCollectorManagers = null;
|
||||||
}
|
}
|
||||||
// Execute query
|
// Execute query
|
||||||
|
final Result<?, FacetsCollector> result;
|
||||||
if (executor != null) {
|
if (executor != null) {
|
||||||
searchConcurrently(query, mainCollectorOwner, drillSidewaysCollectorOwners);
|
result = searchConcurrently(query, mainCollectorManager, drillSidewaysCollectorManagers);
|
||||||
} else {
|
} else {
|
||||||
searchSequentially(query, mainCollectorOwner, drillSidewaysCollectorOwners);
|
result = searchSequentially(query, mainCollectorManager, drillSidewaysCollectorManagers);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect results
|
// Collect results
|
||||||
|
@ -377,12 +363,12 @@ public class DrillSideways {
|
||||||
if (drillDownFacetsCollectorManager != null) {
|
if (drillDownFacetsCollectorManager != null) {
|
||||||
// drill down collected using MultiCollector
|
// drill down collected using MultiCollector
|
||||||
// Extract the results:
|
// Extract the results:
|
||||||
Object[] drillDownResult = (Object[]) mainCollectorOwner.getResult();
|
Object[] drillDownResult = (Object[]) result.drillDownResult;
|
||||||
facetsCollectorResult = (FacetsCollector) drillDownResult[0];
|
facetsCollectorResult = (FacetsCollector) drillDownResult[0];
|
||||||
hitCollectorResult = (R) drillDownResult[1];
|
hitCollectorResult = (R) drillDownResult[1];
|
||||||
} else {
|
} else {
|
||||||
facetsCollectorResult = null;
|
facetsCollectorResult = null;
|
||||||
hitCollectorResult = (R) mainCollectorOwner.getResult();
|
hitCollectorResult = (R) result.drillDownResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Getting results for drill sideways dimensions (if any)
|
// Getting results for drill sideways dimensions (if any)
|
||||||
|
@ -391,12 +377,11 @@ public class DrillSideways {
|
||||||
if (query.getDims().isEmpty() == false) {
|
if (query.getDims().isEmpty() == false) {
|
||||||
drillSidewaysDims = query.getDims().keySet().toArray(new String[0]);
|
drillSidewaysDims = query.getDims().keySet().toArray(new String[0]);
|
||||||
int numDims = query.getDims().size();
|
int numDims = query.getDims().size();
|
||||||
assert drillSidewaysCollectorOwners != null;
|
assert drillSidewaysCollectorManagers != null;
|
||||||
assert drillSidewaysCollectorOwners.size() == numDims;
|
assert drillSidewaysCollectorManagers.size() == numDims;
|
||||||
drillSidewaysCollectors = new FacetsCollector[numDims];
|
drillSidewaysCollectors = new FacetsCollector[numDims];
|
||||||
for (int dim = 0; dim < numDims; dim++) {
|
for (int dim = 0; dim < numDims; dim++) {
|
||||||
drillSidewaysCollectors[dim] =
|
drillSidewaysCollectors[dim] = result.drillSidewaysResults.get(dim);
|
||||||
(FacetsCollector) drillSidewaysCollectorOwners.get(dim).getResult();
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
drillSidewaysDims = null;
|
drillSidewaysDims = null;
|
||||||
|
@ -414,52 +399,51 @@ public class DrillSideways {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search using DrillDownQuery with custom collectors. This method can be used with any {@link
|
* Search using DrillDownQuery with custom collectors. This method can be used with any {@link
|
||||||
* CollectorOwner}s. It doesn't return anything because it is expected that you read results from
|
* CollectorManager}s.
|
||||||
* provided {@link CollectorOwner}s.
|
|
||||||
*
|
*
|
||||||
* <p>To read the results, run {@link CollectorOwner#getResult()} for drill down and all drill
|
* <p>Note: Use {@link MultiCollectorManager} to collect both hits and facets for the entire query
|
||||||
* sideways dimensions.
|
* and/or for drill-sideways dimensions. You can also use it to wrap different types of {@link
|
||||||
*
|
* CollectorManager} for drill-sideways dimensions.
|
||||||
* <p>Note: use {@link Collections#unmodifiableList(List)} to wrap {@code
|
|
||||||
* drillSidewaysCollectorOwners} to convince compiler that it is safe to use List here.
|
|
||||||
*
|
|
||||||
* <p>Use {@link MultiCollectorManager} wrapped by {@link CollectorOwner} to collect both hits and
|
|
||||||
* facets for the entire query and/or for drill-sideways dimensions.
|
|
||||||
*
|
|
||||||
* <p>TODO: Class CollectorOwner was created so that we can ignore CollectorManager type C,
|
|
||||||
* because we want each dimensions to be able to use their own types. Alternatively, we can use
|
|
||||||
* typesafe heterogeneous container and provide CollectorManager type for each dimension to this
|
|
||||||
* method? I do like CollectorOwner approach as it seems more intuitive?
|
|
||||||
*/
|
*/
|
||||||
public void search(
|
public <C extends Collector, T, K extends Collector, R> Result<T, R> search(
|
||||||
final DrillDownQuery query,
|
DrillDownQuery query,
|
||||||
CollectorOwner<?, ?> drillDownCollectorOwner,
|
CollectorManager<C, T> drillDownCollectorManager,
|
||||||
List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
|
List<CollectorManager<K, R>> drillSidewaysCollectorManagers)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (drillDownCollectorOwner == null) {
|
if (drillDownCollectorManager == null) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"This search method requires client to provide drill down collector manager");
|
"This search method requires client to provide drill down collector manager");
|
||||||
}
|
}
|
||||||
if (drillSidewaysCollectorOwners == null) {
|
if (drillSidewaysCollectorManagers == null) {
|
||||||
if (query.getDims().isEmpty() == false) {
|
if (query.getDims().isEmpty() == false) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"The query requires not null drillSidewaysCollectorOwners");
|
"The query requires not null drillSidewaysCollectorManagers");
|
||||||
}
|
}
|
||||||
} else if (drillSidewaysCollectorOwners.size() != query.getDims().size()) {
|
} else if (drillSidewaysCollectorManagers.size() != query.getDims().size()) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"drillSidewaysCollectorOwners size must be equal to number of dimensions in the query.");
|
"drillSidewaysCollectorManagers size must be equal to number of dimensions in the query.");
|
||||||
}
|
}
|
||||||
if (executor != null) {
|
if (executor != null) {
|
||||||
searchConcurrently(query, drillDownCollectorOwner, drillSidewaysCollectorOwners);
|
return searchConcurrently(query, drillDownCollectorManager, drillSidewaysCollectorManagers);
|
||||||
} else {
|
} else {
|
||||||
searchSequentially(query, drillDownCollectorOwner, drillSidewaysCollectorOwners);
|
return searchSequentially(query, drillDownCollectorManager, drillSidewaysCollectorManagers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void searchSequentially(
|
/**
|
||||||
|
* {@link #search(DrillDownQuery, CollectorManager, List)} result. It doesn't depend on {@link
|
||||||
|
* Facets} to allow users to use any type of {@link CollectorManager} for drill-down or
|
||||||
|
* drill-sideways dimension.
|
||||||
|
*
|
||||||
|
* @param drillDownResult result from drill down (main) {@link CollectorManager}
|
||||||
|
* @param drillSidewaysResults results from drill sideways {@link CollectorManager}s
|
||||||
|
*/
|
||||||
|
public record Result<T, R>(T drillDownResult, List<R> drillSidewaysResults) {}
|
||||||
|
|
||||||
|
private <C extends Collector, T, K extends Collector, R> Result<T, R> searchSequentially(
|
||||||
final DrillDownQuery query,
|
final DrillDownQuery query,
|
||||||
final CollectorOwner<?, ?> drillDownCollectorOwner,
|
final CollectorManager<C, T> drillDownCollectorManager,
|
||||||
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
|
final List<CollectorManager<K, R>> drillSidewaysCollectorManagers)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
Map<String, Integer> drillDownDims = query.getDims();
|
Map<String, Integer> drillDownDims = query.getDims();
|
||||||
|
@ -467,9 +451,7 @@ public class DrillSideways {
|
||||||
if (drillDownDims.isEmpty()) {
|
if (drillDownDims.isEmpty()) {
|
||||||
// There are no drill-down dims, so there is no
|
// There are no drill-down dims, so there is no
|
||||||
// drill-sideways to compute:
|
// drill-sideways to compute:
|
||||||
searcher.search(query, drillDownCollectorOwner);
|
return new Result<>(searcher.search(query, drillDownCollectorManager), null);
|
||||||
drillDownCollectorOwner.getResult();
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Query baseQuery = query.getBaseQuery();
|
Query baseQuery = query.getBaseQuery();
|
||||||
|
@ -480,59 +462,60 @@ public class DrillSideways {
|
||||||
}
|
}
|
||||||
Query[] drillDownQueries = query.getDrillDownQueries();
|
Query[] drillDownQueries = query.getDrillDownQueries();
|
||||||
|
|
||||||
DrillSidewaysQuery dsq =
|
DrillSidewaysQuery<K, R> dsq =
|
||||||
new DrillSidewaysQuery(
|
new DrillSidewaysQuery<>(
|
||||||
baseQuery,
|
baseQuery, drillSidewaysCollectorManagers, drillDownQueries, scoreSubDocsAtOnce());
|
||||||
// drillDownCollectorOwner,
|
|
||||||
// Don't pass drill down collector because drill down is collected by IndexSearcher
|
|
||||||
// itself.
|
|
||||||
// TODO: deprecate drillDown collection in DrillSidewaysQuery?
|
|
||||||
null,
|
|
||||||
drillSidewaysCollectorOwners,
|
|
||||||
drillDownQueries,
|
|
||||||
scoreSubDocsAtOnce());
|
|
||||||
|
|
||||||
searcher.search(dsq, drillDownCollectorOwner);
|
T collectorResult = searcher.search(dsq, drillDownCollectorManager);
|
||||||
// This method doesn't return results as each dimension might have its own result type.
|
List<R> drillSidewaysResults = new ArrayList<>(drillDownDims.size());
|
||||||
// But we call getResult to trigger results reducing, so that users don't have to worry about
|
assert drillSidewaysCollectorManagers != null
|
||||||
// it.
|
: "Case without drill sideways dimensions is handled above";
|
||||||
drillDownCollectorOwner.getResult();
|
int numSlices = dsq.managedDrillSidewaysCollectors.size();
|
||||||
if (drillSidewaysCollectorOwners != null) {
|
for (int dim = 0; dim < drillDownDims.size(); dim++) {
|
||||||
for (CollectorOwner<?, ?> sidewaysOwner : drillSidewaysCollectorOwners) {
|
List<K> collectorsForDim = new ArrayList<>(numSlices);
|
||||||
sidewaysOwner.getResult();
|
for (int slice = 0; slice < numSlices; slice++) {
|
||||||
|
collectorsForDim.add(dsq.managedDrillSidewaysCollectors.get(slice).get(dim));
|
||||||
}
|
}
|
||||||
|
drillSidewaysResults.add(
|
||||||
|
dim, drillSidewaysCollectorManagers.get(dim).reduce(collectorsForDim));
|
||||||
}
|
}
|
||||||
|
return new Result<>(collectorResult, drillSidewaysResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void searchConcurrently(
|
private <C extends Collector, T, K extends Collector, R> Result<T, R> searchConcurrently(
|
||||||
final DrillDownQuery query,
|
final DrillDownQuery query,
|
||||||
final CollectorOwner<?, ?> drillDownCollectorOwner,
|
final CollectorManager<C, T> drillDownCollectorManager,
|
||||||
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
|
final List<CollectorManager<K, R>> drillSidewaysCollectorManagers) {
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
final Map<String, Integer> drillDownDims = query.getDims();
|
final Map<String, Integer> drillDownDims = query.getDims();
|
||||||
final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1);
|
final CallableCollector<T> drillDownCallableCollector =
|
||||||
|
new CallableCollector<>(searcher, query, drillDownCollectorManager);
|
||||||
|
final List<CallableCollector<R>> drillSidewaysCallableCollectors =
|
||||||
|
new ArrayList<>(drillDownDims.size());
|
||||||
|
|
||||||
callableCollectors.add(new CallableCollector(searcher, query, drillDownCollectorOwner));
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
final Query[] filters = query.getDrillDownQueries();
|
final Query[] filters = query.getDrillDownQueries();
|
||||||
for (String dim : drillDownDims.keySet()) {
|
for (String dim : drillDownDims.keySet()) {
|
||||||
callableCollectors.add(
|
drillSidewaysCallableCollectors.add(
|
||||||
new CallableCollector(
|
new CallableCollector<>(
|
||||||
searcher,
|
searcher,
|
||||||
getDrillDownQuery(query, filters, dim),
|
getDrillDownQuery(query, filters, dim),
|
||||||
drillSidewaysCollectorOwners.get(i)));
|
drillSidewaysCollectorManagers.get(i)));
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Run the query pool
|
final Future<T> drillDownFuture = executor.submit(drillDownCallableCollector);
|
||||||
final List<Future<Void>> futures = executor.invokeAll(callableCollectors);
|
final List<Future<R>> drillSidewaysFutures =
|
||||||
|
executor.invokeAll(drillSidewaysCallableCollectors);
|
||||||
|
|
||||||
// Wait for results. We don't read the results as they are collected by CollectorOwners
|
T collectorResult = drillDownFuture.get();
|
||||||
for (i = 0; i < futures.size(); i++) {
|
List<R> drillSidewaysResults = new ArrayList<>(drillDownDims.size());
|
||||||
futures.get(i).get();
|
|
||||||
|
for (i = 0; i < drillSidewaysFutures.size(); i++) {
|
||||||
|
drillSidewaysResults.add(i, drillSidewaysFutures.get(i).get());
|
||||||
}
|
}
|
||||||
|
return new Result<>(collectorResult, drillSidewaysResults);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new ThreadInterruptedException(e);
|
throw new ThreadInterruptedException(e);
|
||||||
} catch (ExecutionException e) {
|
} catch (ExecutionException e) {
|
||||||
|
|
|
@ -17,19 +17,20 @@
|
||||||
package org.apache.lucene.facet;
|
package org.apache.lucene.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.BulkScorer;
|
import org.apache.lucene.search.BulkScorer;
|
||||||
import org.apache.lucene.search.Collector;
|
import org.apache.lucene.search.Collector;
|
||||||
import org.apache.lucene.search.CollectorOwner;
|
import org.apache.lucene.search.CollectorManager;
|
||||||
import org.apache.lucene.search.ConstantScoreScorer;
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LeafCollector;
|
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryVisitor;
|
import org.apache.lucene.search.QueryVisitor;
|
||||||
import org.apache.lucene.search.ScoreMode;
|
import org.apache.lucene.search.ScoreMode;
|
||||||
|
@ -41,12 +42,12 @@ import org.apache.lucene.search.Weight;
|
||||||
|
|
||||||
// TODO change the way DrillSidewaysScorer is used, this query does not work
|
// TODO change the way DrillSidewaysScorer is used, this query does not work
|
||||||
// with filter caching
|
// with filter caching
|
||||||
class DrillSidewaysQuery extends Query {
|
class DrillSidewaysQuery<K extends Collector, R> extends Query {
|
||||||
|
|
||||||
final Query baseQuery;
|
final Query baseQuery;
|
||||||
|
|
||||||
final CollectorOwner<?, ?> drillDownCollectorOwner;
|
final List<CollectorManager<K, R>> drillSidewaysCollectorManagers;
|
||||||
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners;
|
final List<List<K>> managedDrillSidewaysCollectors;
|
||||||
|
|
||||||
final Query[] drillDownQueries;
|
final Query[] drillDownQueries;
|
||||||
|
|
||||||
|
@ -58,15 +59,36 @@ class DrillSidewaysQuery extends Query {
|
||||||
*/
|
*/
|
||||||
DrillSidewaysQuery(
|
DrillSidewaysQuery(
|
||||||
Query baseQuery,
|
Query baseQuery,
|
||||||
CollectorOwner<?, ?> drillDownCollectorOwner,
|
List<CollectorManager<K, R>> drillSidewaysCollectorManagers,
|
||||||
List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners,
|
Query[] drillDownQueries,
|
||||||
|
boolean scoreSubDocsAtOnce) {
|
||||||
|
// Note that the "managed" collector lists are synchronized here since bulkScorer()
|
||||||
|
// can be invoked concurrently and needs to remain thread-safe. We're OK with synchronizing
|
||||||
|
// on the whole list as contention is expected to remain very low:
|
||||||
|
this(
|
||||||
|
baseQuery,
|
||||||
|
drillSidewaysCollectorManagers,
|
||||||
|
Collections.synchronizedList(new ArrayList<>()),
|
||||||
|
drillDownQueries,
|
||||||
|
scoreSubDocsAtOnce);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Needed for {@link Query#rewrite(IndexSearcher)}. Ensures the same "managed" lists get used
|
||||||
|
* since {@link DrillSideways} accesses references to these through the original {@code
|
||||||
|
* DrillSidewaysQuery}.
|
||||||
|
*/
|
||||||
|
private DrillSidewaysQuery(
|
||||||
|
Query baseQuery,
|
||||||
|
List<CollectorManager<K, R>> drillSidewaysCollectorManagers,
|
||||||
|
List<List<K>> managedDrillSidewaysCollectors,
|
||||||
Query[] drillDownQueries,
|
Query[] drillDownQueries,
|
||||||
boolean scoreSubDocsAtOnce) {
|
boolean scoreSubDocsAtOnce) {
|
||||||
this.baseQuery = Objects.requireNonNull(baseQuery);
|
this.baseQuery = Objects.requireNonNull(baseQuery);
|
||||||
this.drillDownCollectorOwner = drillDownCollectorOwner;
|
this.drillSidewaysCollectorManagers = drillSidewaysCollectorManagers;
|
||||||
this.drillSidewaysCollectorOwners = drillSidewaysCollectorOwners;
|
|
||||||
this.drillDownQueries = drillDownQueries;
|
this.drillDownQueries = drillDownQueries;
|
||||||
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
|
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
|
||||||
|
this.managedDrillSidewaysCollectors = managedDrillSidewaysCollectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -87,10 +109,10 @@ class DrillSidewaysQuery extends Query {
|
||||||
if (newQuery == baseQuery) {
|
if (newQuery == baseQuery) {
|
||||||
return super.rewrite(indexSearcher);
|
return super.rewrite(indexSearcher);
|
||||||
} else {
|
} else {
|
||||||
return new DrillSidewaysQuery(
|
return new DrillSidewaysQuery<>(
|
||||||
newQuery,
|
newQuery,
|
||||||
drillDownCollectorOwner,
|
drillSidewaysCollectorManagers,
|
||||||
drillSidewaysCollectorOwners,
|
managedDrillSidewaysCollectors,
|
||||||
drillDownQueries,
|
drillDownQueries,
|
||||||
scoreSubDocsAtOnce);
|
scoreSubDocsAtOnce);
|
||||||
}
|
}
|
||||||
|
@ -124,14 +146,8 @@ class DrillSidewaysQuery extends Query {
|
||||||
|
|
||||||
int drillDownCount = drillDowns.length;
|
int drillDownCount = drillDowns.length;
|
||||||
|
|
||||||
Collector drillDownCollector;
|
List<K> sidewaysCollectors = new ArrayList<>(drillDownCount);
|
||||||
final LeafCollector drillDownLeafCollector;
|
managedDrillSidewaysCollectors.add(sidewaysCollectors);
|
||||||
if (drillDownCollectorOwner != null) {
|
|
||||||
drillDownCollector = drillDownCollectorOwner.newCollector();
|
|
||||||
drillDownLeafCollector = drillDownCollector.getLeafCollector(context);
|
|
||||||
} else {
|
|
||||||
drillDownLeafCollector = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
DrillSidewaysScorer.DocsAndCost[] dims =
|
DrillSidewaysScorer.DocsAndCost[] dims =
|
||||||
new DrillSidewaysScorer.DocsAndCost[drillDownCount];
|
new DrillSidewaysScorer.DocsAndCost[drillDownCount];
|
||||||
|
@ -144,7 +160,8 @@ class DrillSidewaysQuery extends Query {
|
||||||
scorer = new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
|
scorer = new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
Collector sidewaysCollector = drillSidewaysCollectorOwners.get(dim).newCollector();
|
K sidewaysCollector = drillSidewaysCollectorManagers.get(dim).newCollector();
|
||||||
|
sidewaysCollectors.add(dim, sidewaysCollector);
|
||||||
|
|
||||||
dims[dim] =
|
dims[dim] =
|
||||||
new DrillSidewaysScorer.DocsAndCost(
|
new DrillSidewaysScorer.DocsAndCost(
|
||||||
|
@ -155,9 +172,6 @@ class DrillSidewaysQuery extends Query {
|
||||||
// a null scorer in this case, but we need to make sure #finish gets called on all facet
|
// a null scorer in this case, but we need to make sure #finish gets called on all facet
|
||||||
// collectors since IndexSearcher won't handle this for us:
|
// collectors since IndexSearcher won't handle this for us:
|
||||||
if (baseScorerSupplier == null || nullCount > 1) {
|
if (baseScorerSupplier == null || nullCount > 1) {
|
||||||
if (drillDownLeafCollector != null) {
|
|
||||||
drillDownLeafCollector.finish();
|
|
||||||
}
|
|
||||||
for (DrillSidewaysScorer.DocsAndCost dim : dims) {
|
for (DrillSidewaysScorer.DocsAndCost dim : dims) {
|
||||||
dim.sidewaysLeafCollector.finish();
|
dim.sidewaysLeafCollector.finish();
|
||||||
}
|
}
|
||||||
|
@ -177,11 +191,7 @@ class DrillSidewaysQuery extends Query {
|
||||||
@Override
|
@Override
|
||||||
public BulkScorer bulkScorer() throws IOException {
|
public BulkScorer bulkScorer() throws IOException {
|
||||||
return new DrillSidewaysScorer(
|
return new DrillSidewaysScorer(
|
||||||
context,
|
context, baseScorerSupplier.get(Long.MAX_VALUE), dims, scoreSubDocsAtOnce);
|
||||||
baseScorerSupplier.get(Long.MAX_VALUE),
|
|
||||||
drillDownLeafCollector,
|
|
||||||
dims,
|
|
||||||
scoreSubDocsAtOnce);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -212,9 +222,8 @@ class DrillSidewaysQuery extends Query {
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
int result = classHash();
|
int result = classHash();
|
||||||
result = prime * result + Objects.hashCode(baseQuery);
|
result = prime * result + Objects.hashCode(baseQuery);
|
||||||
result = prime * result + Objects.hashCode(drillDownCollectorOwner);
|
|
||||||
result = prime * result + Arrays.hashCode(drillDownQueries);
|
result = prime * result + Arrays.hashCode(drillDownQueries);
|
||||||
result = prime * result + Objects.hashCode(drillSidewaysCollectorOwners);
|
result = prime * result + Objects.hashCode(drillSidewaysCollectorManagers);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -223,10 +232,9 @@ class DrillSidewaysQuery extends Query {
|
||||||
return sameClassAs(other) && equalsTo(getClass().cast(other));
|
return sameClassAs(other) && equalsTo(getClass().cast(other));
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean equalsTo(DrillSidewaysQuery other) {
|
private boolean equalsTo(DrillSidewaysQuery<?, ?> other) {
|
||||||
return Objects.equals(baseQuery, other.baseQuery)
|
return Objects.equals(baseQuery, other.baseQuery)
|
||||||
&& Objects.equals(drillDownCollectorOwner, other.drillDownCollectorOwner)
|
|
||||||
&& Arrays.equals(drillDownQueries, other.drillDownQueries)
|
&& Arrays.equals(drillDownQueries, other.drillDownQueries)
|
||||||
&& Objects.equals(drillSidewaysCollectorOwners, other.drillSidewaysCollectorOwners);
|
&& Objects.equals(drillSidewaysCollectorManagers, other.drillSidewaysCollectorManagers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,8 +45,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||||
|
|
||||||
// private static boolean DEBUG = false;
|
// private static boolean DEBUG = false;
|
||||||
|
|
||||||
private final LeafCollector drillDownLeafCollector;
|
|
||||||
|
|
||||||
private final DocsAndCost[] dims;
|
private final DocsAndCost[] dims;
|
||||||
|
|
||||||
// DrillDown DocsEnums:
|
// DrillDown DocsEnums:
|
||||||
|
@ -68,7 +66,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||||
DrillSidewaysScorer(
|
DrillSidewaysScorer(
|
||||||
LeafReaderContext context,
|
LeafReaderContext context,
|
||||||
Scorer baseScorer,
|
Scorer baseScorer,
|
||||||
LeafCollector drillDownLeafCollector,
|
|
||||||
DocsAndCost[] dims,
|
DocsAndCost[] dims,
|
||||||
boolean scoreSubDocsAtOnce) {
|
boolean scoreSubDocsAtOnce) {
|
||||||
this.dims = dims;
|
this.dims = dims;
|
||||||
|
@ -81,7 +78,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||||
} else {
|
} else {
|
||||||
this.baseApproximation = baseIterator;
|
this.baseApproximation = baseIterator;
|
||||||
}
|
}
|
||||||
this.drillDownLeafCollector = drillDownLeafCollector;
|
|
||||||
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
|
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -709,9 +705,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||||
// }
|
// }
|
||||||
|
|
||||||
collector.collect(collectDocID);
|
collector.collect(collectDocID);
|
||||||
if (drillDownLeafCollector != null) {
|
|
||||||
drillDownLeafCollector.collect(collectDocID);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: we could "fix" faceting of the sideways counts
|
// TODO: we could "fix" faceting of the sideways counts
|
||||||
// to do this "union" (of the drill down hits) in the
|
// to do this "union" (of the drill down hits) in the
|
||||||
|
@ -725,9 +718,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||||
|
|
||||||
private void collectHit(LeafCollector collector, DocsAndCost dim) throws IOException {
|
private void collectHit(LeafCollector collector, DocsAndCost dim) throws IOException {
|
||||||
collector.collect(collectDocID);
|
collector.collect(collectDocID);
|
||||||
if (drillDownLeafCollector != null) {
|
|
||||||
drillDownLeafCollector.collect(collectDocID);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tally sideways count:
|
// Tally sideways count:
|
||||||
dim.sidewaysLeafCollector.collect(collectDocID);
|
dim.sidewaysLeafCollector.collect(collectDocID);
|
||||||
|
@ -735,9 +725,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||||
|
|
||||||
private void collectHit(LeafCollector collector, List<DocsAndCost> dims) throws IOException {
|
private void collectHit(LeafCollector collector, List<DocsAndCost> dims) throws IOException {
|
||||||
collector.collect(collectDocID);
|
collector.collect(collectDocID);
|
||||||
if (drillDownLeafCollector != null) {
|
|
||||||
drillDownLeafCollector.collect(collectDocID);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tally sideways counts:
|
// Tally sideways counts:
|
||||||
for (DocsAndCost dim : dims) {
|
for (DocsAndCost dim : dims) {
|
||||||
|
@ -756,9 +743,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||||
// Note: We _only_ call #finish on the facets collectors we're managing here, but not the
|
// Note: We _only_ call #finish on the facets collectors we're managing here, but not the
|
||||||
// "main" collector. This is because IndexSearcher handles calling #finish on the main
|
// "main" collector. This is because IndexSearcher handles calling #finish on the main
|
||||||
// collector.
|
// collector.
|
||||||
if (drillDownLeafCollector != null) {
|
|
||||||
drillDownLeafCollector.finish();
|
|
||||||
}
|
|
||||||
for (DocsAndCost dim : dims) {
|
for (DocsAndCost dim : dims) {
|
||||||
dim.sidewaysLeafCollector.finish();
|
dim.sidewaysLeafCollector.finish();
|
||||||
}
|
}
|
||||||
|
@ -766,9 +750,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||||
|
|
||||||
private void setScorer(LeafCollector mainCollector, Scorable scorer) throws IOException {
|
private void setScorer(LeafCollector mainCollector, Scorable scorer) throws IOException {
|
||||||
mainCollector.setScorer(scorer);
|
mainCollector.setScorer(scorer);
|
||||||
if (drillDownLeafCollector != null) {
|
|
||||||
drillDownLeafCollector.setScorer(scorer);
|
|
||||||
}
|
|
||||||
for (DocsAndCost dim : dims) {
|
for (DocsAndCost dim : dims) {
|
||||||
dim.sidewaysLeafCollector.setScorer(scorer);
|
dim.sidewaysLeafCollector.setScorer(scorer);
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,15 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.facet.taxonomy.writercache;
|
package org.apache.lucene.facet.taxonomy.writercache;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.ObjectInputStream;
|
|
||||||
import java.io.ObjectOutputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.lucene.util.SuppressForbidden;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Similar to {@link StringBuilder}, but with a more efficient growing strategy. This class uses
|
* Similar to {@link StringBuilder}, but with a more efficient growing strategy. This class uses
|
||||||
|
@ -32,15 +25,11 @@ import org.apache.lucene.util.SuppressForbidden;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
class CharBlockArray implements Appendable, Serializable, CharSequence {
|
class CharBlockArray implements Appendable, CharSequence {
|
||||||
|
|
||||||
private static final long serialVersionUID = 1L;
|
|
||||||
|
|
||||||
private static final int DefaultBlockSize = 32 * 1024; // 32 KB default size
|
private static final int DefaultBlockSize = 32 * 1024; // 32 KB default size
|
||||||
|
|
||||||
static final class Block implements Serializable, Cloneable {
|
static final class Block implements Cloneable {
|
||||||
private static final long serialVersionUID = 1L;
|
|
||||||
|
|
||||||
final char[] chars;
|
final char[] chars;
|
||||||
int length;
|
int length;
|
||||||
|
|
||||||
|
@ -185,34 +174,4 @@ class CharBlockArray implements Appendable, Serializable, CharSequence {
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressForbidden(
|
|
||||||
reason = "TODO: don't use java serialization here, inefficient and unnecessary")
|
|
||||||
void flush(OutputStream out) throws IOException {
|
|
||||||
ObjectOutputStream oos = null;
|
|
||||||
try {
|
|
||||||
oos = new ObjectOutputStream(out);
|
|
||||||
oos.writeObject(this);
|
|
||||||
oos.flush();
|
|
||||||
} finally {
|
|
||||||
if (oos != null) {
|
|
||||||
oos.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressForbidden(
|
|
||||||
reason = "TODO: don't use java serialization here, inefficient and unnecessary")
|
|
||||||
public static CharBlockArray open(InputStream in) throws IOException, ClassNotFoundException {
|
|
||||||
ObjectInputStream ois = null;
|
|
||||||
try {
|
|
||||||
ois = new ObjectInputStream(in);
|
|
||||||
CharBlockArray a = (CharBlockArray) ois.readObject();
|
|
||||||
return a;
|
|
||||||
} finally {
|
|
||||||
if (ois != null) {
|
|
||||||
ois.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -284,7 +284,6 @@ public class TestDrillSideways extends FacetTestCase {
|
||||||
Weight dimWeight = searcher.createWeight(dimQ, ScoreMode.COMPLETE_NO_SCORES, 1f);
|
Weight dimWeight = searcher.createWeight(dimQ, ScoreMode.COMPLETE_NO_SCORES, 1f);
|
||||||
Scorer dimScorer = dimWeight.scorer(ctx);
|
Scorer dimScorer = dimWeight.scorer(ctx);
|
||||||
|
|
||||||
FacetsCollector baseFC = new FacetsCollector();
|
|
||||||
FacetsCollector dimFC = new FacetsCollector();
|
FacetsCollector dimFC = new FacetsCollector();
|
||||||
DrillSidewaysScorer.DocsAndCost docsAndCost =
|
DrillSidewaysScorer.DocsAndCost docsAndCost =
|
||||||
new DrillSidewaysScorer.DocsAndCost(dimScorer, dimFC.getLeafCollector(ctx));
|
new DrillSidewaysScorer.DocsAndCost(dimScorer, dimFC.getLeafCollector(ctx));
|
||||||
|
@ -311,7 +310,6 @@ public class TestDrillSideways extends FacetTestCase {
|
||||||
new DrillSidewaysScorer(
|
new DrillSidewaysScorer(
|
||||||
ctx,
|
ctx,
|
||||||
baseScorer,
|
baseScorer,
|
||||||
baseFC.getLeafCollector(ctx),
|
|
||||||
new DrillSidewaysScorer.DocsAndCost[] {docsAndCost},
|
new DrillSidewaysScorer.DocsAndCost[] {docsAndCost},
|
||||||
scoreSubDocsAtOnce);
|
scoreSubDocsAtOnce);
|
||||||
expectThrows(CollectionTerminatedException.class, () -> scorer.score(baseCollector, null));
|
expectThrows(CollectionTerminatedException.class, () -> scorer.score(baseCollector, null));
|
||||||
|
@ -321,7 +319,6 @@ public class TestDrillSideways extends FacetTestCase {
|
||||||
// both our base and sideways dim facets collectors. What we really want to test here is
|
// both our base and sideways dim facets collectors. What we really want to test here is
|
||||||
// that the matching docs are still correctly present and populated after an early
|
// that the matching docs are still correctly present and populated after an early
|
||||||
// termination occurs (i.e., #finish is properly called in that scenario):
|
// termination occurs (i.e., #finish is properly called in that scenario):
|
||||||
assertEquals(1, baseFC.getMatchingDocs().size());
|
|
||||||
assertEquals(1, dimFC.getMatchingDocs().size());
|
assertEquals(1, dimFC.getMatchingDocs().size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -242,10 +242,9 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||||
|
|
||||||
public void testIntAssociationRandom() throws Exception {
|
public void testIntAssociationRandom() throws Exception {
|
||||||
|
|
||||||
FacetsCollector fc = new FacetsCollector();
|
|
||||||
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
searcher.search(new TermQuery(new Term("match", "yes")), fc);
|
FacetsCollector fc =
|
||||||
|
searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager());
|
||||||
|
|
||||||
Map<String, Integer> expected;
|
Map<String, Integer> expected;
|
||||||
Facets facets;
|
Facets facets;
|
||||||
|
@ -332,10 +331,9 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||||
|
|
||||||
public void testFloatAssociationRandom() throws Exception {
|
public void testFloatAssociationRandom() throws Exception {
|
||||||
|
|
||||||
FacetsCollector fc = new FacetsCollector();
|
|
||||||
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
searcher.search(new TermQuery(new Term("match", "yes")), fc);
|
FacetsCollector fc =
|
||||||
|
searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager());
|
||||||
|
|
||||||
Map<String, Float> expected;
|
Map<String, Float> expected;
|
||||||
Facets facets;
|
Facets facets;
|
||||||
|
|
|
@ -16,14 +16,10 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.facet.taxonomy.writercache;
|
package org.apache.lucene.facet.taxonomy.writercache;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.charset.CharsetDecoder;
|
import java.nio.charset.CharsetDecoder;
|
||||||
import java.nio.charset.CodingErrorAction;
|
import java.nio.charset.CodingErrorAction;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
|
|
||||||
public class TestCharBlockArray extends FacetTestCase {
|
public class TestCharBlockArray extends FacetTestCase {
|
||||||
|
@ -89,19 +85,6 @@ public class TestCharBlockArray extends FacetTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
assertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array);
|
assertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array);
|
||||||
|
|
||||||
Path tempDir = createTempDir("growingchararray");
|
|
||||||
Path f = tempDir.resolve("GrowingCharArrayTest.tmp");
|
|
||||||
BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(f));
|
|
||||||
array.flush(out);
|
|
||||||
out.flush();
|
|
||||||
out.close();
|
|
||||||
|
|
||||||
BufferedInputStream in = new BufferedInputStream(Files.newInputStream(f));
|
|
||||||
array = CharBlockArray.open(in);
|
|
||||||
assertEqualsInternal(
|
|
||||||
"GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array);
|
|
||||||
in.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void assertEqualsInternal(
|
private static void assertEqualsInternal(
|
||||||
|
|
|
@ -95,7 +95,7 @@ class TermsQuery extends MultiTermQuery implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getTermsCount() throws IOException {
|
public long getTermsCount() {
|
||||||
return terms.size();
|
return terms.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -238,6 +238,7 @@ public final class Intervals {
|
||||||
*/
|
*/
|
||||||
public static IntervalsSource regexp(BytesRef regexp, int maxExpansions) {
|
public static IntervalsSource regexp(BytesRef regexp, int maxExpansions) {
|
||||||
Automaton automaton = new RegExp(new Term("", regexp).text()).toAutomaton();
|
Automaton automaton = new RegExp(new Term("", regexp).text()).toAutomaton();
|
||||||
|
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, true, false);
|
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, true, false);
|
||||||
return new MultiTermIntervalsSource(ca, maxExpansions, regexp.utf8ToString());
|
return new MultiTermIntervalsSource(ca, maxExpansions, regexp.utf8ToString());
|
||||||
}
|
}
|
||||||
|
|
|
@ -447,4 +447,24 @@ public class TestIntervalQuery extends LuceneTestCase {
|
||||||
field, or(term("XXX"), containing(extend(term("message"), 0, 10), term("intend"))));
|
field, or(term("XXX"), containing(extend(term("message"), 0, 10), term("intend"))));
|
||||||
checkHits(q, new int[] {});
|
checkHits(q, new int[] {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testEquality() {
|
||||||
|
assertEquals(
|
||||||
|
new IntervalQuery("f", Intervals.regexp(new BytesRef(".*foo"))),
|
||||||
|
new IntervalQuery("f", Intervals.regexp(new BytesRef(".*foo"))));
|
||||||
|
assertEquals(
|
||||||
|
new IntervalQuery("f", Intervals.prefix(new BytesRef("p"), 1)),
|
||||||
|
new IntervalQuery("f", Intervals.prefix(new BytesRef("p"), 1)));
|
||||||
|
assertEquals(
|
||||||
|
new IntervalQuery("f", Intervals.fuzzyTerm("kot", 1)),
|
||||||
|
new IntervalQuery("f", Intervals.fuzzyTerm("kot", 1)));
|
||||||
|
assertEquals(
|
||||||
|
new IntervalQuery("f", Intervals.wildcard(new BytesRef("*.txt"))),
|
||||||
|
new IntervalQuery("f", Intervals.wildcard(new BytesRef("*.txt"))));
|
||||||
|
assertEquals(
|
||||||
|
new IntervalQuery(
|
||||||
|
"f", Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)),
|
||||||
|
new IntervalQuery(
|
||||||
|
"f", Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1187,4 +1187,27 @@ public class TestIntervals extends LuceneTestCase {
|
||||||
|
|
||||||
checkVisits(source, 1);
|
checkVisits(source, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// basic test for equality and inequality of instances created by the factories
|
||||||
|
public void testEquality() {
|
||||||
|
assertEquals(Intervals.term("wibble"), Intervals.term("wibble"));
|
||||||
|
assertEquals(Intervals.prefix(new BytesRef("p"), 1), Intervals.prefix(new BytesRef("p"), 1));
|
||||||
|
assertEquals(Intervals.fuzzyTerm("kot", 1), Intervals.fuzzyTerm("kot", 1));
|
||||||
|
assertEquals(Intervals.regexp(new BytesRef(".*ot")), Intervals.regexp(new BytesRef(".*ot")));
|
||||||
|
assertEquals(
|
||||||
|
Intervals.wildcard(new BytesRef("*.txt")), Intervals.wildcard(new BytesRef("*.txt")));
|
||||||
|
assertEquals(
|
||||||
|
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true),
|
||||||
|
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true));
|
||||||
|
|
||||||
|
assertNotEquals(Intervals.term("wibble"), Intervals.term("wobble"));
|
||||||
|
assertNotEquals(Intervals.prefix(new BytesRef("p"), 1), Intervals.prefix(new BytesRef("b"), 1));
|
||||||
|
assertNotEquals(Intervals.fuzzyTerm("kot", 1), Intervals.fuzzyTerm("kof", 1));
|
||||||
|
assertNotEquals(Intervals.regexp(new BytesRef(".*ot")), Intervals.regexp(new BytesRef(".*at")));
|
||||||
|
assertNotEquals(
|
||||||
|
Intervals.wildcard(new BytesRef("*.txt")), Intervals.wildcard(new BytesRef("*.tat")));
|
||||||
|
assertNotEquals(
|
||||||
|
Intervals.range(new BytesRef("warm"), new BytesRef("hot"), true, true),
|
||||||
|
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,6 @@ import org.apache.lucene.sandbox.facet.cutters.ranges.LongRangeFacetCutter;
|
||||||
import org.apache.lucene.sandbox.facet.labels.OrdToLabel;
|
import org.apache.lucene.sandbox.facet.labels.OrdToLabel;
|
||||||
import org.apache.lucene.sandbox.facet.labels.RangeOrdToLabel;
|
import org.apache.lucene.sandbox.facet.labels.RangeOrdToLabel;
|
||||||
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
|
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
|
||||||
import org.apache.lucene.search.CollectorOwner;
|
|
||||||
import org.apache.lucene.search.DoubleValues;
|
import org.apache.lucene.search.DoubleValues;
|
||||||
import org.apache.lucene.search.DoubleValuesSource;
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
|
@ -538,7 +537,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
|
||||||
|
|
||||||
////// First search, no drill-downs:
|
////// First search, no drill-downs:
|
||||||
DrillDownQuery ddq = new DrillDownQuery(config);
|
DrillDownQuery ddq = new DrillDownQuery(config);
|
||||||
ds.search(ddq, new CollectorOwner<>(collectorManager), List.of());
|
ds.search(ddq, collectorManager, List.of());
|
||||||
|
|
||||||
// assertEquals(100, dsr.hits.totalHits.value);
|
// assertEquals(100, dsr.hits.totalHits.value);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
@ -556,10 +555,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
|
||||||
dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder);
|
dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder);
|
||||||
ddq = new DrillDownQuery(config);
|
ddq = new DrillDownQuery(config);
|
||||||
ddq.add("dim", "b");
|
ddq.add("dim", "b");
|
||||||
ds.search(
|
ds.search(ddq, fieldCollectorManager, List.of(dimCollectorManager));
|
||||||
ddq,
|
|
||||||
new CollectorOwner<>(fieldCollectorManager),
|
|
||||||
List.of(new CollectorOwner<>(dimCollectorManager)));
|
|
||||||
|
|
||||||
// assertEquals(75, dsr.hits.totalHits.value);
|
// assertEquals(75, dsr.hits.totalHits.value);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
@ -577,10 +573,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
|
||||||
dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder);
|
dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder);
|
||||||
ddq = new DrillDownQuery(config);
|
ddq = new DrillDownQuery(config);
|
||||||
ddq.add("field", LongPoint.newRangeQuery("field", 0L, 10L));
|
ddq.add("field", LongPoint.newRangeQuery("field", 0L, 10L));
|
||||||
ds.search(
|
ds.search(ddq, dimCollectorManager, List.of(fieldCollectorManager));
|
||||||
ddq,
|
|
||||||
new CollectorOwner<>(dimCollectorManager),
|
|
||||||
List.of(new CollectorOwner<>(fieldCollectorManager)));
|
|
||||||
|
|
||||||
// assertEquals(11, dsr.hits.totalHits.value);
|
// assertEquals(11, dsr.hits.totalHits.value);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
@ -1629,14 +1622,12 @@ public class TestRangeFacet extends SandboxFacetTestCase {
|
||||||
|
|
||||||
countRecorder = new CountFacetRecorder();
|
countRecorder = new CountFacetRecorder();
|
||||||
|
|
||||||
CollectorOwner<DummyTotalHitCountCollector, Integer> totalHitsCollectorOwner =
|
DrillSideways.Result<Integer, CountFacetRecorder> result =
|
||||||
new CollectorOwner<>(DummyTotalHitCountCollector.createManager());
|
ds.search(
|
||||||
CollectorOwner<FacetFieldCollector, CountFacetRecorder> drillSidewaysCollectorOwner =
|
ddq,
|
||||||
new CollectorOwner<>(
|
DummyTotalHitCountCollector.createManager(),
|
||||||
new FacetFieldCollectorManager<>(doubleRangeFacetCutter, countRecorder));
|
List.of(new FacetFieldCollectorManager<>(doubleRangeFacetCutter, countRecorder)));
|
||||||
ds.search(ddq, totalHitsCollectorOwner, List.of(drillSidewaysCollectorOwner));
|
assertEquals(1, result.drillDownResult().intValue());
|
||||||
assertEquals(1, totalHitsCollectorOwner.getResult().intValue());
|
|
||||||
drillSidewaysCollectorOwner.getResult();
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"dim=field path=[] value=-2147483648 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n",
|
"dim=field path=[] value=-2147483648 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n",
|
||||||
getAllSortByOrd(getRangeOrdinals(ranges), countRecorder, "field", ordToLabel).toString());
|
getAllSortByOrd(getRangeOrdinals(ranges), countRecorder, "field", ordToLabel).toString());
|
||||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.geo.Circle;
|
import org.apache.lucene.geo.Circle;
|
||||||
import org.apache.lucene.geo.Component2D;
|
import org.apache.lucene.geo.Component2D;
|
||||||
|
import org.apache.lucene.geo.GeoEncodingUtils;
|
||||||
import org.apache.lucene.geo.GeoUtils;
|
import org.apache.lucene.geo.GeoUtils;
|
||||||
import org.apache.lucene.geo.LatLonGeometry;
|
import org.apache.lucene.geo.LatLonGeometry;
|
||||||
import org.apache.lucene.geo.Polygon;
|
import org.apache.lucene.geo.Polygon;
|
||||||
|
@ -1751,4 +1752,41 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
||||||
newDistanceQuery("point", 32.94823588839368, -179.9538113027811, 120000), 20);
|
newDistanceQuery("point", 32.94823588839368, -179.9538113027811, 120000), 20);
|
||||||
assertEquals(3, td.totalHits.value);
|
assertEquals(3, td.totalHits.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testNarrowPolygonCloseToNorthPole() throws Exception {
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||||
|
iwc.setMergeScheduler(new SerialMergeScheduler());
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
|
|
||||||
|
// index point closes to Lat 90
|
||||||
|
Document doc = new Document();
|
||||||
|
final int base = Integer.MAX_VALUE;
|
||||||
|
addPointToDoc(
|
||||||
|
FIELD_NAME,
|
||||||
|
doc,
|
||||||
|
GeoEncodingUtils.decodeLatitude(base - 2),
|
||||||
|
GeoEncodingUtils.decodeLongitude(base - 2));
|
||||||
|
w.addDocument(doc);
|
||||||
|
w.flush();
|
||||||
|
|
||||||
|
// query testing
|
||||||
|
final IndexReader reader = DirectoryReader.open(w);
|
||||||
|
final IndexSearcher s = newSearcher(reader);
|
||||||
|
|
||||||
|
double minLat = GeoEncodingUtils.decodeLatitude(base - 3);
|
||||||
|
double maxLat = GeoEncodingUtils.decodeLatitude(base);
|
||||||
|
double minLon = GeoEncodingUtils.decodeLongitude(base - 3);
|
||||||
|
double maxLon = GeoEncodingUtils.decodeLongitude(base);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
newPolygonQuery(
|
||||||
|
FIELD_NAME,
|
||||||
|
new Polygon(
|
||||||
|
new double[] {minLat, minLat, maxLat, maxLat, minLat},
|
||||||
|
new double[] {minLon, maxLon, maxLon, minLon, minLon}));
|
||||||
|
|
||||||
|
assertEquals(1, s.count(query));
|
||||||
|
IOUtils.close(w, reader, dir);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.tests.util.automaton;
|
package org.apache.lucene.tests.util.automaton;
|
||||||
|
|
||||||
|
import java.util.ArrayDeque;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -33,6 +34,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
import org.apache.lucene.util.automaton.Operations;
|
import org.apache.lucene.util.automaton.Operations;
|
||||||
import org.apache.lucene.util.automaton.RegExp;
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
|
import org.apache.lucene.util.automaton.StatePair;
|
||||||
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
|
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
|
||||||
import org.apache.lucene.util.automaton.Transition;
|
import org.apache.lucene.util.automaton.Transition;
|
||||||
|
|
||||||
|
@ -533,4 +535,82 @@ public class AutomatonTestUtil {
|
||||||
assert a.isDeterministic() == true;
|
assert a.isDeterministic() == true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if these two automata accept exactly the same language. This is a costly
|
||||||
|
* computation! Both automata must be determinized and have no dead states!
|
||||||
|
*/
|
||||||
|
public static boolean sameLanguage(Automaton a1, Automaton a2) {
|
||||||
|
if (a1 == a2) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return subsetOf(a2, a1) && subsetOf(a1, a2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the language of <code>a1</code> is a subset of the language of <code>a2</code>.
|
||||||
|
* Both automata must be determinized and must have no dead states.
|
||||||
|
*
|
||||||
|
* <p>Complexity: quadratic in number of states.
|
||||||
|
*/
|
||||||
|
public static boolean subsetOf(Automaton a1, Automaton a2) {
|
||||||
|
if (a1.isDeterministic() == false) {
|
||||||
|
throw new IllegalArgumentException("a1 must be deterministic");
|
||||||
|
}
|
||||||
|
if (a2.isDeterministic() == false) {
|
||||||
|
throw new IllegalArgumentException("a2 must be deterministic");
|
||||||
|
}
|
||||||
|
assert Operations.hasDeadStatesFromInitial(a1) == false;
|
||||||
|
assert Operations.hasDeadStatesFromInitial(a2) == false;
|
||||||
|
if (a1.getNumStates() == 0) {
|
||||||
|
// Empty language is alwyas a subset of any other language
|
||||||
|
return true;
|
||||||
|
} else if (a2.getNumStates() == 0) {
|
||||||
|
return Operations.isEmpty(a1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: cutover to iterators instead
|
||||||
|
Transition[][] transitions1 = a1.getSortedTransitions();
|
||||||
|
Transition[][] transitions2 = a2.getSortedTransitions();
|
||||||
|
ArrayDeque<StatePair> worklist = new ArrayDeque<>();
|
||||||
|
HashSet<StatePair> visited = new HashSet<>();
|
||||||
|
StatePair p = new StatePair(0, 0);
|
||||||
|
worklist.add(p);
|
||||||
|
visited.add(p);
|
||||||
|
while (worklist.size() > 0) {
|
||||||
|
p = worklist.removeFirst();
|
||||||
|
if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Transition[] t1 = transitions1[p.s1];
|
||||||
|
Transition[] t2 = transitions2[p.s2];
|
||||||
|
for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
|
||||||
|
while (b2 < t2.length && t2[b2].max < t1[n1].min) {
|
||||||
|
b2++;
|
||||||
|
}
|
||||||
|
int min1 = t1[n1].min, max1 = t1[n1].max;
|
||||||
|
|
||||||
|
for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) {
|
||||||
|
if (t2[n2].min > min1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (t2[n2].max < Character.MAX_CODE_POINT) {
|
||||||
|
min1 = t2[n2].max + 1;
|
||||||
|
} else {
|
||||||
|
min1 = Character.MAX_CODE_POINT;
|
||||||
|
max1 = Character.MIN_CODE_POINT;
|
||||||
|
}
|
||||||
|
StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
|
||||||
|
if (!visited.contains(q)) {
|
||||||
|
worklist.add(q);
|
||||||
|
visited.add(q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (min1 <= max1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,17 +9,17 @@ errorprone = "2.18.0"
|
||||||
flexmark = "0.61.24"
|
flexmark = "0.61.24"
|
||||||
# @keep This is GJF version for spotless/ tidy.
|
# @keep This is GJF version for spotless/ tidy.
|
||||||
googleJavaFormat = "1.23.0"
|
googleJavaFormat = "1.23.0"
|
||||||
groovy = "3.0.21"
|
groovy = "4.0.22"
|
||||||
hamcrest = "2.2"
|
hamcrest = "2.2"
|
||||||
icu4j = "74.2"
|
icu4j = "74.2"
|
||||||
javacc = "7.0.12"
|
javacc = "7.0.12"
|
||||||
jflex = "1.8.2"
|
jflex = "1.8.2"
|
||||||
jgit = "5.13.1.202206130422-r"
|
jgit = "6.10.0.202406032230-r"
|
||||||
jmh = "1.37"
|
jmh = "1.37"
|
||||||
jts = "1.17.0"
|
jts = "1.17.0"
|
||||||
junit = "4.13.1"
|
junit = "4.13.1"
|
||||||
# @keep Minimum gradle version to run the build
|
# @keep Minimum gradle version to run the build
|
||||||
minGradle = "8.8"
|
minGradle = "8.10"
|
||||||
# @keep This is the minimum required Java version.
|
# @keep This is the minimum required Java version.
|
||||||
minJava = "21"
|
minJava = "21"
|
||||||
morfologik = "2.1.9"
|
morfologik = "2.1.9"
|
||||||
|
@ -49,7 +49,7 @@ flexmark-ext-abbreviation = { module = "com.vladsch.flexmark:flexmark-ext-abbrev
|
||||||
flexmark-ext-attributes = { module = "com.vladsch.flexmark:flexmark-ext-attributes", version.ref = "flexmark" }
|
flexmark-ext-attributes = { module = "com.vladsch.flexmark:flexmark-ext-attributes", version.ref = "flexmark" }
|
||||||
flexmark-ext-autolink = { module = "com.vladsch.flexmark:flexmark-ext-autolink", version.ref = "flexmark" }
|
flexmark-ext-autolink = { module = "com.vladsch.flexmark:flexmark-ext-autolink", version.ref = "flexmark" }
|
||||||
flexmark-ext-tables = { module = "com.vladsch.flexmark:flexmark-ext-tables", version.ref = "flexmark" }
|
flexmark-ext-tables = { module = "com.vladsch.flexmark:flexmark-ext-tables", version.ref = "flexmark" }
|
||||||
groovy = { module = "org.codehaus.groovy:groovy-all", version.ref = "groovy" }
|
groovy = { module = "org.apache.groovy:groovy-all", version.ref = "groovy" }
|
||||||
hamcrest = { module = "org.hamcrest:hamcrest", version.ref = "hamcrest" }
|
hamcrest = { module = "org.hamcrest:hamcrest", version.ref = "hamcrest" }
|
||||||
icu4j = { module = "com.ibm.icu:icu4j", version.ref = "icu4j" }
|
icu4j = { module = "com.ibm.icu:icu4j", version.ref = "icu4j" }
|
||||||
javacc = { module = "net.java.dev.javacc:javacc", version.ref = "javacc" }
|
javacc = { module = "net.java.dev.javacc:javacc", version.ref = "javacc" }
|
||||||
|
|
Loading…
Reference in New Issue