mirror of https://github.com/apache/lucene.git
Merge branch 'main' into optimize_prefix_query
This commit is contained in:
commit
73b4ced245
|
@ -30,7 +30,7 @@ jobs:
|
|||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-latest ]
|
||||
java-version: [ '22' ]
|
||||
java-version: [ '23-ea' ]
|
||||
uses-alt-java: [ true, false ]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
@ -61,7 +61,16 @@ jobs:
|
|||
# https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-environment-variable
|
||||
echo "RUNTIME_JAVA_HOME=${{ env.ALT_JAVA_DIR }}" >> "$GITHUB_ENV"
|
||||
|
||||
- run: ./gradlew -p lucene/core check -x test
|
||||
- name: ./gradlew tidy
|
||||
run: |
|
||||
./gradlew tidy
|
||||
if [ ! -z "$(git status --porcelain)" ]; then
|
||||
echo ":warning: **tidy left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
git status --porcelain >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
git reset --hard && git clean -xfd .
|
||||
fi
|
||||
|
||||
- name: ./gradlew regenerate
|
||||
run: |
|
||||
|
@ -69,7 +78,7 @@ jobs:
|
|||
sudo apt-get install libwww-perl
|
||||
./gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks
|
||||
if [ ! -z "$(git status --porcelain)" ]; then
|
||||
echo ":warning: **regenerateleft local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
|
||||
echo ":warning: **regenerate left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
git status --porcelain >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
|
@ -79,8 +88,7 @@ jobs:
|
|||
- run: ./gradlew testOpts
|
||||
- run: ./gradlew helpWorkflow
|
||||
- run: ./gradlew licenses updateLicenses
|
||||
- run: ./gradlew tidy
|
||||
- run: ./gradlew check -x test
|
||||
- run: ./gradlew check -x test -Pvalidation.git.failOnModified=false
|
||||
- run: ./gradlew assembleRelease mavenToLocal
|
||||
|
||||
# Conserve resources: only run these in non-alt-java mode.
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-latest ]
|
||||
java-version: [ '21', '22' ]
|
||||
java-version: [ '21', '22', '23-ea' ]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
|
@ -72,3 +72,4 @@ jobs:
|
|||
name: smoke-tester-logs-jdk-${{ matrix.java-version }}
|
||||
path: |
|
||||
${{ env.TMP_DIR }}/**/*.log
|
||||
/tmp/release.log
|
||||
|
|
|
@ -41,7 +41,7 @@ import jdk.jfr.consumer.RecordingFile;
|
|||
*/
|
||||
public class ProfileResults {
|
||||
/** Formats a frame to a formatted line. This is deduplicated on! */
|
||||
static String frameToString(RecordedFrame frame, boolean lineNumbers) {
|
||||
static String frameToString(RecordedFrame frame, boolean lineNumbers, boolean frameTypes) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
RecordedMethod method = frame.getMethod();
|
||||
RecordedClass clazz = method.getType();
|
||||
|
@ -55,13 +55,14 @@ public class ProfileResults {
|
|||
builder.append("#");
|
||||
builder.append(method.getName());
|
||||
builder.append("()");
|
||||
if (lineNumbers) {
|
||||
if (lineNumbers && frame.getLineNumber() != -1) {
|
||||
builder.append(":");
|
||||
if (frame.getLineNumber() == -1) {
|
||||
builder.append("(" + frame.getType() + " code)");
|
||||
} else {
|
||||
builder.append(frame.getLineNumber());
|
||||
}
|
||||
if (clazz != null && frameTypes) {
|
||||
builder.append(" [");
|
||||
builder.append(frame.getType());
|
||||
builder.append(" code]");
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
@ -77,6 +78,8 @@ public class ProfileResults {
|
|||
public static final String COUNT_DEFAULT = "10";
|
||||
public static final String LINENUMBERS_KEY = "tests.profile.linenumbers";
|
||||
public static final String LINENUMBERS_DEFAULT = "false";
|
||||
public static final String FRAMETYPES_KEY = "tests.profile.frametypes";
|
||||
public static final String FRAMETYPES_DEFAULT = "true";
|
||||
|
||||
/**
|
||||
* Driver method, for testing standalone.
|
||||
|
@ -92,7 +95,8 @@ public class ProfileResults {
|
|||
System.getProperty(MODE_KEY, MODE_DEFAULT),
|
||||
Integer.parseInt(System.getProperty(STACKSIZE_KEY, STACKSIZE_DEFAULT)),
|
||||
Integer.parseInt(System.getProperty(COUNT_KEY, COUNT_DEFAULT)),
|
||||
Boolean.parseBoolean(System.getProperty(LINENUMBERS_KEY, LINENUMBERS_DEFAULT)));
|
||||
Boolean.parseBoolean(System.getProperty(LINENUMBERS_KEY, LINENUMBERS_DEFAULT)),
|
||||
Boolean.parseBoolean(System.getProperty(FRAMETYPES_KEY, FRAMETYPES_DEFAULT)));
|
||||
}
|
||||
|
||||
/** true if we care about this event */
|
||||
|
@ -152,7 +156,12 @@ public class ProfileResults {
|
|||
|
||||
/** Process all the JFR files passed in args and print a merged summary. */
|
||||
public static void printReport(
|
||||
List<String> files, String mode, int stacksize, int count, boolean lineNumbers)
|
||||
List<String> files,
|
||||
String mode,
|
||||
int stacksize,
|
||||
int count,
|
||||
boolean lineNumbers,
|
||||
boolean frameTypes)
|
||||
throws IOException {
|
||||
if (!"cpu".equals(mode) && !"heap".equals(mode)) {
|
||||
throw new IllegalArgumentException("tests.profile.mode must be one of (cpu,heap)");
|
||||
|
@ -181,7 +190,7 @@ public class ProfileResults {
|
|||
if (stack.length() > 0) {
|
||||
stack.append("\n").append(framePadding).append(" at ");
|
||||
}
|
||||
stack.append(frameToString(trace.getFrames().get(i), lineNumbers));
|
||||
stack.append(frameToString(trace.getFrames().get(i), lineNumbers, frameTypes));
|
||||
}
|
||||
String line = stack.toString();
|
||||
SimpleEntry<String, Long> entry =
|
||||
|
|
|
@ -60,8 +60,8 @@ public class WrapperDownloader {
|
|||
|
||||
public static void checkVersion() {
|
||||
int major = Runtime.version().feature();
|
||||
if (major != 21 && major != 22) {
|
||||
throw new IllegalStateException("java version must be 21 or 22, your version: " + major);
|
||||
if (major != 21 && major != 22 && major != 23) {
|
||||
throw new IllegalStateException("java version must be 21, 22 or 23, your version: " + major);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
11
build.gradle
11
build.gradle
|
@ -80,6 +80,9 @@ ext {
|
|||
// Minimum Java version required to compile and run Lucene.
|
||||
minJavaVersion = JavaVersion.toVersion(deps.versions.minJava.get())
|
||||
|
||||
// also change this in extractor tool: ExtractForeignAPI
|
||||
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_21, JavaVersion.VERSION_22, JavaVersion.VERSION_23 ] as Set
|
||||
|
||||
// snapshot build marker used in scripts.
|
||||
snapshotBuild = version.contains("SNAPSHOT")
|
||||
|
||||
|
@ -117,10 +120,6 @@ apply from: file('gradle/generation/local-settings.gradle')
|
|||
// Make sure the build environment is consistent.
|
||||
apply from: file('gradle/validation/check-environment.gradle')
|
||||
|
||||
// IDE support, settings and specials.
|
||||
apply from: file('gradle/ide/intellij-idea.gradle')
|
||||
apply from: file('gradle/ide/eclipse.gradle')
|
||||
|
||||
// Set up defaults and configure aspects for certain modules or functionality
|
||||
// (java, tests)
|
||||
apply from: file('gradle/java/folder-layout.gradle')
|
||||
|
@ -133,6 +132,10 @@ apply from: file('gradle/testing/alternative-jdk-support.gradle')
|
|||
apply from: file('gradle/java/jar-manifest.gradle')
|
||||
apply from: file('gradle/java/modules.gradle')
|
||||
|
||||
// IDE support, settings and specials.
|
||||
apply from: file('gradle/ide/intellij-idea.gradle')
|
||||
apply from: file('gradle/ide/eclipse.gradle')
|
||||
|
||||
// Maven artifact publishing.
|
||||
apply from: file('gradle/maven/publications.gradle')
|
||||
|
||||
|
|
|
@ -112,8 +112,8 @@ def prepare(root, version, pause_before_sign, gpg_key_id, gpg_password, gpg_home
|
|||
checkDOAPfiles(version)
|
||||
|
||||
if not dev_mode:
|
||||
print(' ./gradlew --no-daemon clean check')
|
||||
run('./gradlew --no-daemon clean check')
|
||||
print(' ./gradlew --stacktrace --no-daemon clean check')
|
||||
run('./gradlew --stacktrace --no-daemon clean check')
|
||||
else:
|
||||
print(' skipping precommit check due to dev-mode')
|
||||
|
||||
|
@ -121,7 +121,7 @@ def prepare(root, version, pause_before_sign, gpg_key_id, gpg_password, gpg_home
|
|||
input("Tests complete! Please press ENTER to proceed to assembleRelease: ")
|
||||
|
||||
print(' prepare-release')
|
||||
cmd = './gradlew --no-daemon assembleRelease' \
|
||||
cmd = './gradlew --stacktrace --no-daemon assembleRelease' \
|
||||
' -Dversion.release=%s' % version
|
||||
if dev_mode:
|
||||
cmd += ' -Pvalidation.git.failOnModified=false'
|
||||
|
|
|
@ -32,7 +32,7 @@ allprojects {
|
|||
missingdoclet "org.apache.lucene.tools:missing-doclet"
|
||||
}
|
||||
|
||||
ext {
|
||||
project.ext {
|
||||
relativeDocPath = project.path.replaceFirst(/:\w+:/, "").replace(':', '/')
|
||||
}
|
||||
|
||||
|
|
|
@ -17,13 +17,6 @@
|
|||
|
||||
def resources = scriptResources(buildscript)
|
||||
|
||||
configure(rootProject) {
|
||||
ext {
|
||||
// also change this in extractor tool: ExtractForeignAPI
|
||||
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_21, JavaVersion.VERSION_22 ] as Set
|
||||
}
|
||||
}
|
||||
|
||||
configure(project(":lucene:core")) {
|
||||
ext {
|
||||
apijars = layout.projectDirectory.dir("src/generated/jdk")
|
||||
|
|
|
@ -43,6 +43,31 @@ configure(project(":lucene:core")) {
|
|||
andThenTasks: ["spotlessJava", "spotlessJavaApply"],
|
||||
mustRunBefore: [ "compileJava" ]
|
||||
])
|
||||
|
||||
task generateForDeltaUtilInternal() {
|
||||
description "Regenerate gen_ForDeltaUtil.py"
|
||||
group "generation"
|
||||
|
||||
def genDir = file("src/java/org/apache/lucene/codecs/lucene912")
|
||||
def genScript = file("${genDir}/gen_ForDeltaUtil.py")
|
||||
def genOutput = file("${genDir}/ForDeltaUtil.java")
|
||||
|
||||
inputs.file genScript
|
||||
outputs.file genOutput
|
||||
|
||||
doLast {
|
||||
quietExec {
|
||||
workingDir genDir
|
||||
executable project.externalTool("python3")
|
||||
args = [ '-B', genScript ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
regenerate.dependsOn wrapWithPersistentChecksums(generateForDeltaUtilInternal, [
|
||||
andThenTasks: ["spotlessJava", "spotlessJavaApply"],
|
||||
mustRunBefore: [ "compileJava" ]
|
||||
])
|
||||
}
|
||||
|
||||
configure(project(":lucene:backward-codecs")) {
|
||||
|
|
|
@ -65,10 +65,8 @@ configure(project(":lucene:analysis:icu")) {
|
|||
icupkg = file("${icuBinDir}/icupkg")
|
||||
}
|
||||
|
||||
// Resolve version lazily (can't resolve at configuration time).
|
||||
def icu4jVersionProvider = project.provider { getVersion('com.ibm.icu', 'icu4j') }
|
||||
// lazy gstring with ICU version.
|
||||
def icu4jVersion = "${-> icu4jVersionProvider.get()}"
|
||||
def icu4jVersion = deps.icu4j.get().version
|
||||
|
||||
def icuCompileTask = Os.isFamily(Os.FAMILY_WINDOWS) ? "compileIcuWindows" : "compileIcuLinux"
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ configure(project(":lucene:analysis:kuromoji")) {
|
|||
apply plugin: deps.plugins.undercouch.download.get().pluginId
|
||||
|
||||
plugins.withType(JavaPlugin) {
|
||||
ext {
|
||||
project.ext {
|
||||
targetDir = file("src/resources")
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ configure(project(":lucene:analysis:nori")) {
|
|||
apply plugin: deps.plugins.undercouch.download.get().pluginId
|
||||
|
||||
plugins.withType(JavaPlugin) {
|
||||
ext {
|
||||
project.ext {
|
||||
targetDir = file("src/resources")
|
||||
}
|
||||
|
||||
|
|
|
@ -22,10 +22,11 @@ import org.gradle.plugins.ide.eclipse.model.ClasspathEntry
|
|||
def resources = scriptResources(buildscript)
|
||||
|
||||
configure(rootProject) {
|
||||
plugins.withType(JavaPlugin) {
|
||||
apply plugin: "eclipse"
|
||||
if (gradle.startParameter.taskNames.contains("eclipse")) {
|
||||
project.pluginManager.apply("java-base")
|
||||
project.pluginManager.apply("eclipse")
|
||||
|
||||
def eclipseJavaVersion = propertyOrDefault("eclipse.javaVersion", rootProject.minJavaVersion)
|
||||
def eclipseJavaVersion = propertyOrDefault("eclipse.javaVersion", deps.versions.minJava.get())
|
||||
def relativize = { other -> rootProject.rootDir.relativePath(other).toString() }
|
||||
|
||||
eclipse {
|
||||
|
@ -107,7 +108,7 @@ configure(rootProject) {
|
|||
|
||||
eclipseJdt {
|
||||
enabled = false
|
||||
dependsOn 'luceneEclipse'
|
||||
dependsOn 'luceneEclipseJdt'
|
||||
}
|
||||
|
||||
eclipseClasspath {
|
||||
|
|
|
@ -27,7 +27,7 @@ def beastingMode = gradle.startParameter.taskNames.any{ name -> name == 'beast'
|
|||
|
||||
allprojects {
|
||||
plugins.withType(JavaPlugin) {
|
||||
ext {
|
||||
project.ext {
|
||||
testOptions += [
|
||||
[propName: 'tests.dups', value: 0, description: "Reiterate runs of entire test suites ('beast' task)."]
|
||||
]
|
||||
|
|
|
@ -19,7 +19,7 @@ def recordings = files()
|
|||
|
||||
allprojects {
|
||||
plugins.withType(JavaPlugin) {
|
||||
ext {
|
||||
project.ext {
|
||||
testOptions += [
|
||||
[propName: 'tests.profile', value: false, description: "Enable Java Flight Recorder profiling."]
|
||||
]
|
||||
|
|
|
@ -62,7 +62,7 @@ allprojects {
|
|||
// Configure test property defaults and their descriptions.
|
||||
allprojects {
|
||||
plugins.withType(JavaPlugin) {
|
||||
ext {
|
||||
project.ext {
|
||||
String randomVectorSize = RandomPicks.randomFrom(new Random(projectSeedLong), ["default", "128", "256", "512"])
|
||||
testOptions += [
|
||||
// seed, repetition and amplification.
|
||||
|
@ -135,14 +135,14 @@ allprojects {
|
|||
}
|
||||
|
||||
afterEvaluate {
|
||||
ext.testOptionsResolved = testOptions.findAll { opt ->
|
||||
project.ext.testOptionsResolved = testOptions.findAll { opt ->
|
||||
propertyOrDefault(opt.propName, opt.value) != null
|
||||
}.collectEntries { opt ->
|
||||
[(opt.propName): Objects.toString(resolvedTestOption(opt.propName))]
|
||||
}
|
||||
|
||||
// Compute the "reproduce with" string.
|
||||
ext.testOptionsForReproduceLine = testOptions.findAll { opt ->
|
||||
project.ext.testOptionsForReproduceLine = testOptions.findAll { opt ->
|
||||
if (opt["includeInReproLine"] == false) {
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ def allSuites = []
|
|||
|
||||
allprojects {
|
||||
plugins.withType(JavaPlugin) {
|
||||
ext {
|
||||
project.ext {
|
||||
testOptions += [
|
||||
[propName: 'tests.slowestTests', value: true, description: "Print the summary of the slowest tests."],
|
||||
[propName: 'tests.slowestSuites', value: true, description: "Print the summary of the slowest suites."]
|
||||
|
|
|
@ -75,6 +75,18 @@ configure(rootProject) {
|
|||
it.dependsOn(":versionCatalogFormatDeps")
|
||||
}
|
||||
|
||||
// correct crlf/ default encoding after version catalog formatting finishes.
|
||||
tasks.matching {
|
||||
it.path in [
|
||||
":versionCatalogFormatDeps"
|
||||
]
|
||||
}.configureEach {
|
||||
it.doLast {
|
||||
ant.fixcrlf(file: it.catalogFile.get().asFile,
|
||||
eol: "lf", fixlast: "true", encoding: "UTF-8")
|
||||
}
|
||||
}
|
||||
|
||||
tasks.matching {
|
||||
it.path in [
|
||||
":versionCatalogUpdateDeps"
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
@defaultMessage Spawns threads with vague names; use a custom thread factory (Lucene's NamedThreadFactory, Solr's SolrNamedThreadFactory) and name threads so that you can tell (by its name) which executor it is associated with
|
||||
@defaultMessage Spawns threads with vague names; use a custom thread factory (Lucene's NamedThreadFactory) and name threads so that you can tell (by its name) which executor it is associated with
|
||||
java.util.concurrent.Executors#newFixedThreadPool(int)
|
||||
java.util.concurrent.Executors#newSingleThreadExecutor()
|
||||
java.util.concurrent.Executors#newCachedThreadPool()
|
||||
|
|
|
@ -74,21 +74,6 @@ configure(rootProject) {
|
|||
logger.warn("WARNING: Directory is not a valid git checkout (won't check dirty files): ${rootProject.projectDir}")
|
||||
}
|
||||
} else {
|
||||
// git ignores any folders which are empty (this includes folders with recursively empty sub-folders).
|
||||
def untrackedNonEmptyFolders = status.untrackedFolders.findAll { path ->
|
||||
File location = file("${rootProject.projectDir}/${path}")
|
||||
boolean hasFiles = false
|
||||
Files.walkFileTree(location.toPath(), new SimpleFileVisitor<Path>() {
|
||||
@Override
|
||||
FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
|
||||
hasFiles = true
|
||||
// Terminate early.
|
||||
return FileVisitResult.TERMINATE
|
||||
}
|
||||
})
|
||||
return hasFiles
|
||||
}
|
||||
|
||||
def offenders = [
|
||||
// Exclude staged changes. These are fine in precommit.
|
||||
// "(added)": status.added,
|
||||
|
@ -97,8 +82,7 @@ configure(rootProject) {
|
|||
"(conflicting)": status.conflicting,
|
||||
"(missing)": status.missing,
|
||||
"(modified)": status.modified,
|
||||
"(untracked)": status.untracked,
|
||||
"(untracked non-empty dir)": untrackedNonEmptyFolders
|
||||
"(untracked)": status.untracked
|
||||
].collectMany { fileStatus, files ->
|
||||
files.collect {file -> " - ${file} ${fileStatus}" }
|
||||
}.sort()
|
||||
|
|
|
@ -20,6 +20,10 @@
|
|||
// 2) notice file
|
||||
// 3) checksum validation/ generation.
|
||||
|
||||
// WARNING: The tasks in this file share internal state between tasks without using files.
|
||||
// Because of this all tasks here must always execute together, so they cannot define task outputs.
|
||||
// TODO: Rewrite the internal state to use state files containing the ext.jarInfos and its referencedFiles
|
||||
|
||||
// This should be false only for debugging.
|
||||
def failOnError = true
|
||||
|
||||
|
@ -194,13 +198,6 @@ subprojects {
|
|||
description = "Validate license and notice files of dependencies"
|
||||
dependsOn collectJarInfos
|
||||
|
||||
def outputFileName = 'validateJarLicenses'
|
||||
inputs.dir(file(project.rootDir.path + '/lucene/licenses'))
|
||||
.withPropertyName('licenses')
|
||||
.withPathSensitivity(PathSensitivity.RELATIVE)
|
||||
outputs.file(layout.buildDirectory.file(outputFileName))
|
||||
.withPropertyName('validateJarLicensesResult')
|
||||
|
||||
doLast {
|
||||
def errors = []
|
||||
jarInfos.each { dep ->
|
||||
|
@ -246,9 +243,7 @@ subprojects {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Required to take advantage of incremental building and the build cache
|
||||
def f = new File(project.buildDir.path + "/" + outputFileName)
|
||||
f.write(errors.toString(), "UTF-8")
|
||||
|
||||
if (errors) {
|
||||
def msg = "Certain license/ notice files are missing:\n - " + errors.join("\n - ")
|
||||
if (failOnError) {
|
||||
|
|
|
@ -1 +1 @@
|
|||
cb0da6751c2b753a16ac168bb354870ebb1e162e9083f116729cec9c781156b8
|
||||
2db75c40782f5e8ba1fc278a5574bab070adccb2d21ca5a6e5ed840888448046
|
|
@ -1 +1 @@
|
|||
8.8.0
|
||||
8.10.0
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
|
||||
networkTimeout=10000
|
||||
validateDistributionUrl=true
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
|
|
|
@ -112,6 +112,16 @@ API Changes
|
|||
|
||||
* GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz)
|
||||
|
||||
* GITHUB#13708: Move Operations.sameLanguage/subsetOf to test-framework. (Robert Muir)
|
||||
|
||||
* GITHUB#13733: Move FacetsCollector#search utility methods to `FacetsCollectorManager`, replace the `Collector`
|
||||
argument with a `FacetsCollectorManager` and update the return type to include both `TopDocs` results as well as
|
||||
facets results. (Luca Cavanna)
|
||||
|
||||
* GITHUB#13328: Convert many basic Lucene classes to record classes, including CollectionStatistics, TermStatistics and LeafMetadata. (Shubham Chaudhary)
|
||||
|
||||
* GITHUB#13780: Remove `IndexSearcher#search(List<LeafReaderContext>, Weight, Collector)` in favour of the newly
|
||||
introduced `IndexSearcher#search(LeafReaderContextPartition[], Weight, Collector)`
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
@ -141,6 +151,19 @@ New Features
|
|||
|
||||
* GITHUB#13604: Add Kmeans clustering on vectors (Mayya Sharipova, Jim Ferenczi, Tom Veasey)
|
||||
|
||||
* GITHUB#13592: Take advantage of the doc value skipper when it is primary sort in SortedNumericDocValuesRangeQuery
|
||||
and SortedSetDocValuesRangeQuery. (Ignacio Vera)
|
||||
|
||||
* GITHUB#13542: Add initial support for intra-segment concurrency. IndexSearcher now supports searching across leaf
|
||||
reader partitions concurrently. This is useful to max out available resource usage especially with force merged
|
||||
indices or big segments. There is still a performance penalty for queries that require segment-level computation
|
||||
ahead of time, such as points/range queries. This is an implementation limitation that we expect to improve in
|
||||
future releases, ad that's why intra-segment slicing is not enabled by default, but leveraged in tests when the
|
||||
searcher is created via LuceneTestCase#newSearcher. Users may override IndexSearcher#slices(List) to optionally
|
||||
create slices that target segment partitions. (Luca Cavanna)
|
||||
|
||||
* GITHUB#13741: Implement Accountable for NFARunAutomaton, fix hashCode implementation of CompiledAutomaton. (Patrick Zhai)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
||||
|
@ -164,6 +187,8 @@ Improvements
|
|||
|
||||
* GITHUB#12172: Update Romanian stopwords list to include the modern unicode forms. (Trey Jones)
|
||||
|
||||
* GITHUB#13707: Improve Operations.isTotal() to work with non-minimal automata. (Dawid Weiss, Robert Muir)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
@ -176,6 +201,8 @@ Optimizations
|
|||
|
||||
* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)
|
||||
|
||||
* GITHUB#13672: Leverage doc value skip lists in DocValuesRewriteMethod if indexed. (Greg Miller)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
@ -213,6 +240,9 @@ Changes in Backwards Compatibility Policy
|
|||
* GITHUB#13230: Remove the Kp and Lovins snowball algorithms which are not supported
|
||||
or intended for general use. (Robert Muir)
|
||||
|
||||
* GITHUB#13602: SearchWithCollectorTask no longer supports the `collector.class` config parameter to load a custom
|
||||
collector implementation. `collector.manager.class` allows users to load a collector manager instead. (Luca Cavanna)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
@ -253,6 +283,13 @@ Other
|
|||
|
||||
* GITHUB#13499: Remove usage of TopScoreDocCollector + TopFieldCollector deprecated methods (#create, #createSharedManager) (Jakub Slowinski)
|
||||
|
||||
Build
|
||||
---------------------
|
||||
|
||||
* GITHUB#13649: Fix eclipse ide settings generation #13649 (Uwe Schindler, Dawid Weiss)
|
||||
|
||||
* GITHUB#13698: Upgrade to gradle 8.10 (Dawid Weiss)
|
||||
|
||||
======================== Lucene 9.12.0 =======================
|
||||
|
||||
API Changes
|
||||
|
@ -268,6 +305,12 @@ API Changes
|
|||
|
||||
* GITHUB#13559: Add BitSet#nextSetBit(int, int) to get the index of the first set bit in range. (Egor Potemkin)
|
||||
|
||||
* GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and
|
||||
MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar)
|
||||
|
||||
* GITHUB#13568, GITHUB#13750: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions
|
||||
or drill-down. (Egor Potemkin)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
|
@ -280,8 +323,21 @@ New Features
|
|||
and LogByteSizeMergePolicy via a new #setTargetConcurrency setter.
|
||||
(Adrien Grand)
|
||||
|
||||
* GITHUB#13568: Add sandbox facets module to compute facets while collecting. (Egor Potemkin, Shradha Shankar)
|
||||
|
||||
* GITHUB#13678: Add support JDK 23 to the Panama Vectorization Provider. (Chris Hegarty)
|
||||
|
||||
* GITHUB#13689: Add a new faceting feature, dynamic range facets, which automatically picks a balanced set of numeric
|
||||
ranges based on the distribution of values that occur across all hits. For use cases that have a highly variable
|
||||
numeric doc values field, such as "price" in an e-commerce application, this facet method is powerful as it allows the
|
||||
presented ranges to adapt depending on what hits the query actually matches. This is in contrast to existing range
|
||||
faceting that requires the application to provide the specific fixed ranges up front. (Yuting Gan, Greg Miller,
|
||||
Stefan Vodita)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
* GITHUB#13475: Re-enable intra-merge parallelism except for terms, norms, and doc values.
|
||||
Related to GITHUB#13478. (Ben Trent)
|
||||
|
||||
* GITHUB#13548: Refactor and javadoc update for KNN vector writer classes. (Patrick Zhai)
|
||||
|
||||
|
@ -299,6 +355,11 @@ Improvements
|
|||
|
||||
* GITHUB#13201: Better cost estimation on MultiTermQuery over few terms. (Michael Froh)
|
||||
|
||||
* GITHUB#13735: Migrate monitor package usage of deprecated IndexSearcher#search(Query, Collector)
|
||||
to IndexSearcher#search(Query, CollectorManager). (Greg Miller)
|
||||
|
||||
* GITHUB#13746: Introduce ProfilerCollectorManager to parallelize search when using ProfilerCollector. (Luca Cavanna)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
@ -330,8 +391,14 @@ Optimizations
|
|||
Closing many individual index files can potentially lead to a degradation in execution performance.
|
||||
Index files are mmapped one-to-one with the JDK's foreign shared Arena. The JVM deoptimizes the top
|
||||
few frames of all threads when closing a shared Arena (see JDK-8335480). We mitigate this situation
|
||||
by 1) using a confined Arena where appropriate, and 2) grouping files from the same segment to a
|
||||
single shared Arena. (Chris Hegarty, Michael Gibney, Uwe Schindler)
|
||||
when running with JDK 21 and greater, by 1) using a confined Arena where appropriate, and 2) grouping
|
||||
files from the same segment to a single shared Arena.
|
||||
A system property has been added that allows to control the total maximum number of mmapped files
|
||||
that may be associated with a single shared Arena. For example, to set the max number of permits to
|
||||
256, pass the following on the command line
|
||||
-Dorg.apache.lucene.store.MMapDirectory.sharedArenaMaxPermits=256. Setting a value of 1 associates
|
||||
a single file to a single shared arena.
|
||||
(Chris Hegarty, Michael Gibney, Uwe Schindler)
|
||||
|
||||
* GITHUB#13585: Lucene912PostingsFormat, the new default postings format, now
|
||||
only has 2 levels of skip data, which are inlined into postings instead of
|
||||
|
@ -341,6 +408,22 @@ Optimizations
|
|||
|
||||
* GITHUB#13581: OnHeapHnswGraph no longer allocates a lock for every graph node (Mike Sokolov)
|
||||
|
||||
* GITHUB#13636, GITHUB#13658: Optimizations to the decoding logic of blocks of
|
||||
postings. (Adrien Grand, Uwe Schindler, Greg Miller)
|
||||
|
||||
* GITHUB##13644: Improve NumericComparator competitive iterator logic by comparing the missing value with the top
|
||||
value even after the hit queue is full (Pan Guixin)
|
||||
|
||||
* GITHUB#13587: Use Max WAND optimizations with ToParentBlockJoinQuery when using ScoreMode.Max (Mike Pellegrini)
|
||||
|
||||
* GITHUB#13742: Reorder checks in LRUQueryCache#count (Shubham Chaudhary)
|
||||
|
||||
* GITHUB#13686: Replace Map<String,Object> with IntObjectHashMap for DV producer (Pan Guixin)
|
||||
|
||||
* GITHUB#13697: Add a bulk scorer to ToParentBlockJoinQuery, which delegates to the bulk scorer of the child query.
|
||||
This should speed up query evaluation when the child query has a specialized bulk scorer, such as disjunctive queries.
|
||||
(Mike Pellegrini)
|
||||
|
||||
Changes in runtime behavior
|
||||
---------------------
|
||||
|
||||
|
@ -366,9 +449,38 @@ Bug Fixes
|
|||
|
||||
* GITHUB#13627: Fix race condition on flush for DWPT seqNo generation. (Ben Trent, Ao Li)
|
||||
|
||||
* GITHUB#13691: Fix incorrect exponent value in explain of SigmoidFunction. (Owais Kazi)
|
||||
|
||||
* GITHUB#13703: Fix bug in LatLonPoint queries where narrow polygons close to latitude 90 don't
|
||||
match any points due to an Integer overflow. (Ignacio Vera)
|
||||
|
||||
* GITHUB#13641: Unify how KnnFormats handle missing fields and correctly handle missing vector fields when
|
||||
merging segments. (Ben Trent)
|
||||
|
||||
* GITHUB#13519: 8 bit scalar vector quantization is no longer
|
||||
supported: it was buggy starting in 9.11 (GITHUB#13197). 4 and 7
|
||||
bit quantization are still supported. Existing (9.x) Lucene indices
|
||||
that previously used 8 bit quantization can still be read/searched
|
||||
but the results from `KNN*VectorQuery` are silently buggy. Further
|
||||
8 bit quantized vector indexing into such (9.11) indices is not
|
||||
permitted, so your path forward if you wish to continue using the
|
||||
same 9.11 index is to index additional vectors into the same field
|
||||
with either 4 or 7 bit quantization (or no quantization), and ensure
|
||||
all older (9.11 written) segments are rewritten either via
|
||||
`IndexWriter.forceMerge` or
|
||||
`IndexWriter.addIndexes(CodecReader...)`, or reindexing entirely.
|
||||
|
||||
Build
|
||||
---------------------
|
||||
|
||||
* GITHUB#13695, GITHUB#13696: Fix Gradle build sometimes gives spurious "unreferenced license file" warnings.
|
||||
(Uwe Schindler)
|
||||
|
||||
Other
|
||||
--------------------
|
||||
(No changes)
|
||||
|
||||
* GITHUB#13720: Add float comparison based on unit of least precision and use it to stop test failures caused by float
|
||||
summation not being associative in IEEE 754. (Alex Herbert, Stefan Vodita)
|
||||
|
||||
======================== Lucene 9.11.1 =======================
|
||||
|
||||
|
|
|
@ -80,9 +80,22 @@ behaviour as 9.x, clone `PersianAnalyzer` in 9.x or create custom analyzer by us
|
|||
### AutomatonQuery/CompiledAutomaton/RunAutomaton/RegExp no longer determinize (LUCENE-10010)
|
||||
|
||||
These classes no longer take a `determinizeWorkLimit` and no longer determinize
|
||||
behind the scenes. It is the responsibility of the caller to to call
|
||||
behind the scenes. It is the responsibility of the caller to call
|
||||
`Operations.determinize()` for DFA execution.
|
||||
|
||||
### RegExp optional complement syntax has been deprecated
|
||||
|
||||
Support for the optional complement syntax (`~`) has been deprecated.
|
||||
The `COMPLEMENT` syntax flag has been removed and replaced by the
|
||||
`DEPRECATED_COMPLEMENT` flag. Users wanting to enable the deprecated
|
||||
complement support can do so by explicitly passing a syntax flags that
|
||||
has `DEPRECATED_COMPLEMENT` when creating a `RegExp`. For example:
|
||||
`new RegExp("~(foo)", RegExp.DEPRECATED_COMPLEMENT)`.
|
||||
|
||||
Alternatively, and quite commonly, a more simple _complement bracket expression_,
|
||||
`[^...]`, may be a suitable replacement, For example, `[^fo]` matches any
|
||||
character that is not an `f` or `o`.
|
||||
|
||||
### DocValuesFieldExistsQuery, NormsFieldExistsQuery and KnnVectorFieldExistsQuery removed in favor of FieldExistsQuery (LUCENE-10436)
|
||||
|
||||
These classes have been removed and consolidated into `FieldExistsQuery`. To migrate, caller simply replace those classes
|
||||
|
@ -180,6 +193,9 @@ access the members using method calls instead of field accesses. Affected classe
|
|||
|
||||
- `IOContext`, `MergeInfo`, and `FlushInfo` (GITHUB#13205)
|
||||
- `BooleanClause` (GITHUB#13261)
|
||||
- `TotalHits` (GITHUB#13762)
|
||||
- `TermAndVector` (GITHUB#13772)
|
||||
- Many basic Lucene classes, including `CollectionStatistics`, `TermStatistics` and `LeafMetadata` (GITHUB#13328)
|
||||
|
||||
### Boolean flags on IOContext replaced with a new ReadAdvice enum.
|
||||
|
||||
|
@ -248,6 +264,11 @@ ConcurrentMergeScheduler now disables auto I/O throttling by default. There is s
|
|||
happening at the CPU level, since ConcurrentMergeScheduler has a maximum number of threads it can
|
||||
use, which is only a fraction of the total number of threads of the host by default.
|
||||
|
||||
### FieldInfos#hasVectors and FieldInfo#hasVectors renamed to hasTermVectors
|
||||
|
||||
To reduce confusion between term vectors and numeric vectors, `hasVectors` has been renamed to
|
||||
`hasTermVectors`.
|
||||
|
||||
## Migration from Lucene 9.0 to Lucene 9.1
|
||||
|
||||
### Test framework package migration and module (LUCENE-10301)
|
||||
|
@ -793,3 +814,77 @@ Specifically, the method `FunctionValues#getScorer(Weight weight, LeafReaderCont
|
|||
Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on
|
||||
Scorer.
|
||||
|
||||
### `FacetsCollector#search` utility methods moved and updated
|
||||
|
||||
The static `search` methods exposed by `FacetsCollector` have been moved to `FacetsCollectorManager`.
|
||||
Furthermore, they take a `FacetsCollectorManager` last argument in place of a `Collector` so that they support
|
||||
intra query concurrency. The return type has also be updated to `FacetsCollectorManager.FacetsResult` which includes
|
||||
both `TopDocs` as well as facets results included in a reduced `FacetsCollector` instance.
|
||||
|
||||
### `SearchWithCollectorTask` no longer supports the `collector.class` config parameter
|
||||
|
||||
`collector.class` used to allow users to load a custom collector implementation. `collector.manager.class`
|
||||
replaces it by allowing users to load a custom collector manager instead.
|
||||
|
||||
### BulkScorer#score(LeafCollector collector, Bits acceptDocs) removed
|
||||
|
||||
Use `BulkScorer#score(LeafCollector collector, Bits acceptDocs, int min, int max)` instead. In order to score the
|
||||
entire leaf, provide `0` as min and `DocIdSetIterator.NO_MORE_DOCS` as max. `BulkScorer` subclasses that override
|
||||
such method need to instead override the method variant that takes the range of doc ids as well as arguments.
|
||||
|
||||
### CollectorManager#newCollector and Collector#getLeafCollector contract
|
||||
|
||||
With the introduction of intra-segment query concurrency support, multiple `LeafCollector`s may be requested for the
|
||||
same `LeafReaderContext` via `Collector#getLeafCollector(LeafReaderContext)` across the different `Collector` instances
|
||||
returned by multiple `CollectorManager#newCollector` calls. Any logic or computation that needs to happen
|
||||
once per segment requires specific handling in the collector manager implementation. See `TotalHitCountCollectorManager`
|
||||
as an example. Individual collectors don't need to be adapted as a specific `Collector` instance will still see a given
|
||||
`LeafReaderContext` once, given that it is not possible to add more than one partition of the same segment to the same
|
||||
leaf slice.
|
||||
|
||||
### Weight#scorer, Weight#bulkScorer and Weight#scorerSupplier contract
|
||||
|
||||
With the introduction of intra-segment query concurrency support, multiple `Scorer`s, `ScorerSupplier`s or `BulkScorer`s
|
||||
may be requested for the same `LeafReaderContext` instance as part of a single search call. That may happen concurrently
|
||||
from separate threads each searching a specific doc id range of the segment. `Weight` implementations that rely on the
|
||||
assumption that a scorer, bulk scorer or scorer supplier for a given `LeafReaderContext` is requested once per search
|
||||
need updating.
|
||||
|
||||
### Signature of IndexSearcher#searchLeaf changed
|
||||
|
||||
With the introduction of intra-segment query concurrency support, the `IndexSearcher#searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector)`
|
||||
method now accepts two additional int arguments to identify the min/max range of doc ids that will be searched in this
|
||||
leaf partition`: IndexSearcher#searchLeaf(LeafReaderContext ctx, int minDocId, int maxDocId, Weight weight, Collector collector)`.
|
||||
Subclasses of `IndexSearcher` that call or override the `searchLeaf` method need to be updated accordingly.
|
||||
|
||||
### Signature of static IndexSearch#slices method changed
|
||||
|
||||
The static `IndexSearcher#slices(List<LeafReaderContext> leaves, int maxDocsPerSlice, int maxSegmentsPerSlice)`
|
||||
method now supports an additional 4th and last argument to optionally enable creating segment partitions:
|
||||
`IndexSearcher#slices(List<LeafReaderContext> leaves, int maxDocsPerSlice, int maxSegmentsPerSlice, boolean allowSegmentPartitions)`
|
||||
|
||||
### TotalHitCountCollectorManager constructor
|
||||
|
||||
`TotalHitCountCollectorManager` now requires that an array of `LeafSlice`s, retrieved via `IndexSearcher#getSlices`,
|
||||
is provided to its constructor. Depending on whether segment partitions are present among slices, the manager can
|
||||
optimize the type of collectors it creates and exposes via `newCollector`.
|
||||
|
||||
### `IndexSearcher#search(List<LeafReaderContext>, Weight, Collector)` removed
|
||||
|
||||
The protected `IndexSearcher#search(List<LeafReaderContext> leaves, Weight weight, Collector collector)` method has been
|
||||
removed in favour of the newly introduced `search(LeafReaderContextPartition[] partitions, Weight weight, Collector collector)`.
|
||||
`IndexSearcher` subclasses that override this method need to instead override the new method.
|
||||
|
||||
### Indexing vectors with 8 bit scalar quantization is no longer supported but 7 and 4 bit quantization still work (GITHUB#13519)
|
||||
|
||||
8 bit scalar vector quantization is no longer supported: it was buggy
|
||||
starting in 9.11 (GITHUB#13197). 4 and 7 bit quantization are still
|
||||
supported. Existing (9.11) Lucene indices that previously used 8 bit
|
||||
quantization can still be read/searched but the results from
|
||||
`KNN*VectorQuery` are silently buggy. Further 8 bit quantized vector
|
||||
indexing into such (9.11) indices is not permitted, so your path
|
||||
forward if you wish to continue using the same 9.11 index is to index
|
||||
additional vectors into the same field with either 4 or 7 bit
|
||||
quantization (or no quantization), and ensure all older (9.x written)
|
||||
segments are rewritten either via `IndexWriter.forceMerge` or
|
||||
`IndexWriter.addIndexes(CodecReader...)`, or reindexing entirely.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "1f7a446f3483326385eef257cea8366c27da0850",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "e62dcd8c25219d8f5d783823b228ffe38d2bacde",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex": "f52109bb7d5701979fde90aeeeda726246a8d5fd"
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "ac298e08bc5b96202efca0c01f9f0376fda976bd",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "2b5df5ff35543a6380c82f298225eb5fa06e4453",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex": "0b8c7774b98e8237702013e82c352d4711509bd0"
|
||||
}
|
|
@ -52,7 +52,7 @@ class CheckCompoundPattern {
|
|||
|
||||
boolean prohibitsCompounding(CharsRef word, int breakPos, Root<?> rootBefore, Root<?> rootAfter) {
|
||||
if (isNonAffixedPattern(endChars)) {
|
||||
if (!charsMatch(word, breakPos - rootBefore.word.length(), rootBefore.word)) {
|
||||
if (!charsMatch(word, breakPos - rootBefore.word().length(), rootBefore.word())) {
|
||||
return false;
|
||||
}
|
||||
} else if (!charsMatch(word, breakPos - endChars.length(), endChars)) {
|
||||
|
@ -60,7 +60,7 @@ class CheckCompoundPattern {
|
|||
}
|
||||
|
||||
if (isNonAffixedPattern(beginChars)) {
|
||||
if (!charsMatch(word, breakPos, rootAfter.word)) {
|
||||
if (!charsMatch(word, breakPos, rootAfter.word())) {
|
||||
return false;
|
||||
}
|
||||
} else if (!charsMatch(word, breakPos, beginChars)) {
|
||||
|
@ -84,7 +84,7 @@ class CheckCompoundPattern {
|
|||
|
||||
private boolean hasAllFlags(Root<?> root, char[] flags) {
|
||||
for (char flag : flags) {
|
||||
if (!dictionary.hasFlag(root.entryId, flag)) {
|
||||
if (!dictionary.hasFlag(root.entryId(), flag)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.ArrayList;
|
|||
import java.util.Comparator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
@ -62,8 +61,7 @@ class GeneratingSuggester {
|
|||
|
||||
private List<Weighted<Root<String>>> findSimilarDictionaryEntries(
|
||||
String word, WordCase originalCase) {
|
||||
Comparator<Weighted<Root<String>>> natural = Comparator.naturalOrder();
|
||||
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(natural.reversed());
|
||||
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(Comparator.reverseOrder());
|
||||
|
||||
char[] excludeFlags = dictionary.allNonSuggestibleFlags();
|
||||
FlagEnumerator.Lookup flagLookup = dictionary.flagLookup;
|
||||
|
@ -111,7 +109,7 @@ class GeneratingSuggester {
|
|||
|
||||
private static boolean isWorseThan(int score, CharsRef candidate, Weighted<Root<String>> root) {
|
||||
return score < root.score
|
||||
|| score == root.score && CharSequence.compare(candidate, root.word.word) > 0;
|
||||
|| score == root.score && CharSequence.compare(candidate, root.word.word()) > 0;
|
||||
}
|
||||
|
||||
private void processSuggestibleWords(
|
||||
|
@ -162,11 +160,11 @@ class GeneratingSuggester {
|
|||
List<char[]> crossProducts = new ArrayList<>();
|
||||
Set<String> result = new LinkedHashSet<>();
|
||||
|
||||
if (!dictionary.hasFlag(root.entryId, dictionary.needaffix)) {
|
||||
result.add(root.word);
|
||||
if (!dictionary.hasFlag(root.entryId(), dictionary.needaffix)) {
|
||||
result.add(root.word());
|
||||
}
|
||||
|
||||
char[] wordChars = root.word.toCharArray();
|
||||
char[] wordChars = root.word().toCharArray();
|
||||
|
||||
// suffixes
|
||||
processAffixes(
|
||||
|
@ -180,7 +178,7 @@ class GeneratingSuggester {
|
|||
}
|
||||
|
||||
String suffix = misspelled.substring(misspelled.length() - suffixLength);
|
||||
String withSuffix = root.word.substring(0, root.word.length() - stripLength) + suffix;
|
||||
String withSuffix = root.word().substring(0, root.word().length() - stripLength) + suffix;
|
||||
result.add(withSuffix);
|
||||
if (dictionary.isCrossProduct(suffixId)) {
|
||||
crossProducts.add(withSuffix.toCharArray());
|
||||
|
@ -192,7 +190,7 @@ class GeneratingSuggester {
|
|||
true,
|
||||
misspelled,
|
||||
(prefixLength, prefixId) -> {
|
||||
if (!dictionary.hasFlag(root.entryId, dictionary.affixData(prefixId, AFFIX_FLAG))
|
||||
if (!dictionary.hasFlag(root.entryId(), dictionary.affixData(prefixId, AFFIX_FLAG))
|
||||
|| !dictionary.isCrossProduct(prefixId)) {
|
||||
return;
|
||||
}
|
||||
|
@ -217,7 +215,7 @@ class GeneratingSuggester {
|
|||
if (hasCompatibleFlags(root, prefixId)
|
||||
&& checkAffixCondition(prefixId, wordChars, stripLength, stemLength)) {
|
||||
String prefix = misspelled.substring(0, prefixLength);
|
||||
result.add(prefix + root.word.substring(stripLength));
|
||||
result.add(prefix + root.word().substring(stripLength));
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -263,7 +261,7 @@ class GeneratingSuggester {
|
|||
}
|
||||
|
||||
private boolean hasCompatibleFlags(Root<?> root, int affixId) {
|
||||
if (!dictionary.hasFlag(root.entryId, dictionary.affixData(affixId, AFFIX_FLAG))) {
|
||||
if (!dictionary.hasFlag(root.entryId(), dictionary.affixData(affixId, AFFIX_FLAG))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -447,28 +445,8 @@ class GeneratingSuggester {
|
|||
return commonScore;
|
||||
}
|
||||
|
||||
private static class Weighted<T extends Comparable<T>> implements Comparable<Weighted<T>> {
|
||||
final T word;
|
||||
final int score;
|
||||
|
||||
Weighted(T word, int score) {
|
||||
this.word = word;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof Weighted)) return false;
|
||||
@SuppressWarnings("unchecked")
|
||||
Weighted<T> that = (Weighted<T>) o;
|
||||
return score == that.score && word.equals(that.word);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(word, score);
|
||||
}
|
||||
private record Weighted<T extends Comparable<T>>(T word, int score)
|
||||
implements Comparable<Weighted<T>> {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -132,7 +132,7 @@ public class Hunspell {
|
|||
Boolean checkSimpleWord(char[] wordChars, int length, WordCase originalCase) {
|
||||
Root<CharsRef> entry = findStem(wordChars, 0, length, originalCase, SIMPLE_WORD);
|
||||
if (entry != null) {
|
||||
return !dictionary.hasFlag(entry.entryId, dictionary.forbiddenword);
|
||||
return !dictionary.hasFlag(entry.entryId(), dictionary.forbiddenword);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@ -229,7 +229,7 @@ public class Hunspell {
|
|||
stem = findStem(word.chars, word.offset, breakPos + 1, originalCase, context);
|
||||
}
|
||||
if (stem != null
|
||||
&& !dictionary.hasFlag(stem.entryId, dictionary.forbiddenword)
|
||||
&& !dictionary.hasFlag(stem.entryId(), dictionary.forbiddenword)
|
||||
&& (prev == null || prev.mayCompound(stem, breakPos, originalCase))) {
|
||||
CompoundPart part = new CompoundPart(prev, word, breakPos, stem, null);
|
||||
if (checkCompoundsAfter(originalCase, part)) {
|
||||
|
@ -274,7 +274,7 @@ public class Hunspell {
|
|||
Root<CharsRef> lastRoot =
|
||||
findStem(word.chars, breakOffset, remainingLength, originalCase, COMPOUND_END);
|
||||
if (lastRoot != null
|
||||
&& !dictionary.hasFlag(lastRoot.entryId, dictionary.forbiddenword)
|
||||
&& !dictionary.hasFlag(lastRoot.entryId(), dictionary.forbiddenword)
|
||||
&& !(dictionary.checkCompoundDup && prev.root.equals(lastRoot))
|
||||
&& !hasForceUCaseProblem(lastRoot, originalCase, word.chars)
|
||||
&& prev.mayCompound(lastRoot, remainingLength, originalCase)) {
|
||||
|
@ -288,7 +288,7 @@ public class Hunspell {
|
|||
private boolean hasForceUCaseProblem(Root<?> root, WordCase originalCase, char[] wordChars) {
|
||||
if (originalCase == WordCase.TITLE || originalCase == WordCase.UPPER) return false;
|
||||
if (originalCase == null && Character.isUpperCase(wordChars[0])) return false;
|
||||
return dictionary.hasFlag(root.entryId, dictionary.forceUCase);
|
||||
return dictionary.hasFlag(root.entryId(), dictionary.forceUCase);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -117,7 +115,16 @@ public final class HunspellStemFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
if (longestOnly && buffer.size() > 1) {
|
||||
Collections.sort(buffer, lengthComparator);
|
||||
buffer.sort(
|
||||
(o1, o2) -> {
|
||||
int cmp = Integer.compare(o2.length, o1.length);
|
||||
if (cmp == 0) {
|
||||
// tie break on text
|
||||
return o2.compareTo(o1);
|
||||
} else {
|
||||
return cmp;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
CharsRef stem = buffer.remove(0);
|
||||
|
@ -139,18 +146,4 @@ public final class HunspellStemFilter extends TokenFilter {
|
|||
super.reset();
|
||||
buffer = null;
|
||||
}
|
||||
|
||||
static final Comparator<CharsRef> lengthComparator =
|
||||
new Comparator<CharsRef>() {
|
||||
@Override
|
||||
public int compare(CharsRef o1, CharsRef o2) {
|
||||
int cmp = Integer.compare(o2.length, o1.length);
|
||||
if (cmp == 0) {
|
||||
// tie break on text
|
||||
return o2.compareTo(o1);
|
||||
} else {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -16,36 +16,13 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
class Root<T extends CharSequence> implements Comparable<Root<T>> {
|
||||
final T word;
|
||||
final int entryId;
|
||||
|
||||
Root(T word, int entryId) {
|
||||
this.word = word;
|
||||
this.entryId = entryId;
|
||||
}
|
||||
record Root<T extends CharSequence>(T word, int entryId) implements Comparable<Root<T>> {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return word.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof Root)) return false;
|
||||
@SuppressWarnings("unchecked")
|
||||
Root<T> root = (Root<T>) o;
|
||||
return entryId == root.entryId && word.equals(root.word);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(word, entryId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Root<T> o) {
|
||||
return CharSequence.compare(word, o.word);
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -147,9 +146,7 @@ public class FingerprintFilter extends TokenFilter {
|
|||
|
||||
Arrays.sort(
|
||||
items,
|
||||
new Comparator<Object>() {
|
||||
@Override
|
||||
public int compare(Object o1, Object o2) {
|
||||
(o1, o2) -> {
|
||||
char[] v1 = (char[]) o1;
|
||||
char[] v2 = (char[]) o2;
|
||||
int len1 = v1.length;
|
||||
|
@ -166,7 +163,6 @@ public class FingerprintFilter extends TokenFilter {
|
|||
k++;
|
||||
}
|
||||
return len1 - len2;
|
||||
}
|
||||
});
|
||||
|
||||
// TODO lets append directly to termAttribute?
|
||||
|
|
|
@ -59,12 +59,12 @@ public class PatternTypingFilter extends TokenFilter {
|
|||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
for (PatternTypingRule rule : replacementAndFlagByPattern) {
|
||||
Matcher matcher = rule.getPattern().matcher(termAtt);
|
||||
Matcher matcher = rule.pattern().matcher(termAtt);
|
||||
if (matcher.find()) {
|
||||
// allow 2nd reset() and find() that occurs inside replaceFirst to avoid excess string
|
||||
// creation
|
||||
typeAtt.setType(matcher.replaceFirst(rule.getTypeTemplate()));
|
||||
flagAtt.setFlags(rule.getFlags());
|
||||
typeAtt.setType(matcher.replaceFirst(rule.typeTemplate()));
|
||||
flagAtt.setFlags(rule.flags());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -74,27 +74,5 @@ public class PatternTypingFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
/** Value holding class for pattern typing rules. */
|
||||
public static class PatternTypingRule {
|
||||
private final Pattern pattern;
|
||||
private final int flags;
|
||||
private final String typeTemplate;
|
||||
|
||||
public PatternTypingRule(Pattern pattern, int flags, String typeTemplate) {
|
||||
this.pattern = pattern;
|
||||
this.flags = flags;
|
||||
this.typeTemplate = typeTemplate;
|
||||
}
|
||||
|
||||
public Pattern getPattern() {
|
||||
return pattern;
|
||||
}
|
||||
|
||||
public int getFlags() {
|
||||
return flags;
|
||||
}
|
||||
|
||||
public String getTypeTemplate() {
|
||||
return typeTemplate;
|
||||
}
|
||||
}
|
||||
public record PatternTypingRule(Pattern pattern, int flags, String typeTemplate) {}
|
||||
}
|
||||
|
|
|
@ -142,22 +142,10 @@ public final class SynonymGraphFilter extends TokenFilter {
|
|||
}
|
||||
}
|
||||
|
||||
static class BufferedOutputToken {
|
||||
final String term;
|
||||
|
||||
// Non-null if this was an incoming token:
|
||||
final State state;
|
||||
|
||||
final int startNode;
|
||||
final int endNode;
|
||||
|
||||
public BufferedOutputToken(State state, String term, int startNode, int endNode) {
|
||||
this.state = state;
|
||||
this.term = term;
|
||||
this.startNode = startNode;
|
||||
this.endNode = endNode;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @param state Non-null if this was an incoming token:
|
||||
*/
|
||||
record BufferedOutputToken(State state, String term, int startNode, int endNode) {}
|
||||
|
||||
/**
|
||||
* Apply previously built synonyms to incoming tokens.
|
||||
|
|
|
@ -18,17 +18,15 @@ package org.apache.lucene.analysis.synonym.word2vec;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Wraps a term and boost */
|
||||
public class TermAndBoost {
|
||||
/** the term */
|
||||
public final BytesRef term;
|
||||
|
||||
/** the boost */
|
||||
public final float boost;
|
||||
|
||||
/**
|
||||
* Wraps a term and boost
|
||||
*
|
||||
* @param term the term
|
||||
* @param boost the boost
|
||||
*/
|
||||
public record TermAndBoost(BytesRef term, float boost) {
|
||||
/** Creates a new TermAndBoost */
|
||||
public TermAndBoost(BytesRef term, float boost) {
|
||||
this.term = BytesRef.deepCopyOf(term);
|
||||
this.boost = boost;
|
||||
public TermAndBoost {
|
||||
term = BytesRef.deepCopyOf(term);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,25 +56,25 @@ public class Word2VecModel implements RandomAccessVectorValues.Floats {
|
|||
}
|
||||
|
||||
public void addTermAndVector(TermAndVector modelEntry) {
|
||||
modelEntry.normalizeVector();
|
||||
modelEntry = modelEntry.normalizeVector();
|
||||
this.termsAndVectors[loadedCount++] = modelEntry;
|
||||
this.word2Vec.add(modelEntry.getTerm());
|
||||
this.word2Vec.add(modelEntry.term());
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue(int targetOrd) {
|
||||
return termsAndVectors[targetOrd].getVector();
|
||||
return termsAndVectors[targetOrd].vector();
|
||||
}
|
||||
|
||||
public float[] vectorValue(BytesRef term) {
|
||||
int termOrd = this.word2Vec.find(term);
|
||||
if (termOrd < 0) return null;
|
||||
TermAndVector entry = this.termsAndVectors[termOrd];
|
||||
return (entry == null) ? null : entry.getVector();
|
||||
return (entry == null) ? null : entry.vector();
|
||||
}
|
||||
|
||||
public BytesRef termValue(int targetOrd) {
|
||||
return termsAndVectors[targetOrd].getTerm();
|
||||
return termsAndVectors[targetOrd].term();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -80,7 +80,7 @@ public final class Word2VecSynonymFilter extends TokenFilter {
|
|||
clearAttributes();
|
||||
restoreState(this.lastState);
|
||||
termAtt.setEmpty();
|
||||
termAtt.append(synonym.term.utf8ToString());
|
||||
termAtt.append(synonym.term().utf8ToString());
|
||||
typeAtt.setType(SynonymGraphFilter.TYPE_SYNONYM);
|
||||
posLenAtt.setPositionLength(1);
|
||||
posIncrementAtt.setPositionIncrement(0);
|
||||
|
|
|
@ -1490,7 +1490,7 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
assertTrue(approxEquals(actual, expected));
|
||||
assertTrue(Operations.sameLanguage(actual, expected));
|
||||
assertTrue(AutomatonTestUtil.sameLanguage(actual, expected));
|
||||
}
|
||||
|
||||
a.close();
|
||||
|
|
|
@ -64,7 +64,7 @@ public class TestWord2VecSynonymProvider extends LuceneTestCase {
|
|||
|
||||
assertEquals(4, actualSynonymsResults.size());
|
||||
for (int i = 0; i < expectedSynonyms.length; i++) {
|
||||
assertEquals(new BytesRef(expectedSynonyms[i]), actualSynonymsResults.get(i).term);
|
||||
assertEquals(new BytesRef(expectedSynonyms[i]), actualSynonymsResults.get(i).term());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,8 +83,8 @@ public class TestWord2VecSynonymProvider extends LuceneTestCase {
|
|||
|
||||
BytesRef expectedFirstSynonymTerm = new BytesRef("b");
|
||||
double expectedFirstSynonymBoost = 1.0;
|
||||
assertEquals(expectedFirstSynonymTerm, actualSynonymsResults.get(0).term);
|
||||
assertEquals(expectedFirstSynonymBoost, actualSynonymsResults.get(0).boost, 0.001f);
|
||||
assertEquals(expectedFirstSynonymTerm, actualSynonymsResults.get(0).term());
|
||||
assertEquals(expectedFirstSynonymBoost, actualSynonymsResults.get(0).boost(), 0.001f);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -120,8 +120,8 @@ public class TestWord2VecSynonymProvider extends LuceneTestCase {
|
|||
@Test
|
||||
public void normalizedVector_shouldReturnModule1() {
|
||||
TermAndVector synonymTerm = new TermAndVector(new BytesRef("a"), new float[] {10, 10});
|
||||
synonymTerm.normalizeVector();
|
||||
float[] vector = synonymTerm.getVector();
|
||||
synonymTerm = synonymTerm.normalizeVector();
|
||||
float[] vector = synonymTerm.vector();
|
||||
float len = 0;
|
||||
for (int i = 0; i < vector.length; i++) {
|
||||
len += vector[i] * vector[i];
|
||||
|
|
|
@ -139,19 +139,7 @@ public final class JapaneseCompletionFilter extends TokenFilter {
|
|||
}
|
||||
}
|
||||
|
||||
private static class CompletionToken {
|
||||
final String term;
|
||||
final boolean isFirst;
|
||||
final int startOffset;
|
||||
final int endOffset;
|
||||
|
||||
CompletionToken(String term, boolean isFirst, int startOffset, int endOffset) {
|
||||
this.term = term;
|
||||
this.isFirst = isFirst;
|
||||
this.startOffset = startOffset;
|
||||
this.endOffset = endOffset;
|
||||
}
|
||||
}
|
||||
private record CompletionToken(String term, boolean isFirst, int startOffset, int endOffset) {}
|
||||
|
||||
private static class CompletionTokenGenerator implements Iterator<CompletionToken> {
|
||||
|
||||
|
|
|
@ -180,13 +180,5 @@ public class KatakanaRomanizer {
|
|||
return null;
|
||||
}
|
||||
|
||||
private static class MatchedKeystroke {
|
||||
final int keystrokeLen;
|
||||
final int keystrokeIndex;
|
||||
|
||||
MatchedKeystroke(int keystrokeLen, int keystrokeIndex) {
|
||||
this.keystrokeLen = keystrokeLen;
|
||||
this.keystrokeIndex = keystrokeIndex;
|
||||
}
|
||||
}
|
||||
private record MatchedKeystroke(int keystrokeLen, int keystrokeIndex) {}
|
||||
}
|
||||
|
|
|
@ -20,8 +20,6 @@ import java.io.BufferedReader;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.lucene.analysis.morph.Dictionary;
|
||||
|
@ -83,14 +81,7 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
|
|||
// TODO: should we allow multiple segmentations per input 'phrase'?
|
||||
// the old treemap didn't support this either, and i'm not sure if it's needed/useful?
|
||||
|
||||
Collections.sort(
|
||||
featureEntries,
|
||||
new Comparator<String[]>() {
|
||||
@Override
|
||||
public int compare(String[] left, String[] right) {
|
||||
return left[0].compareTo(right[0]);
|
||||
}
|
||||
});
|
||||
featureEntries.sort((left, right) -> left[0].compareTo(right[0]));
|
||||
|
||||
List<String> data = new ArrayList<>(featureEntries.size());
|
||||
List<int[]> segmentations = new ArrayList<>(featureEntries.size());
|
||||
|
|
|
@ -268,19 +268,19 @@ final class Viterbi
|
|||
final KoMorphData.Morpheme morpheme = morphemes[i];
|
||||
final Token compoundToken;
|
||||
if (token.getPOSType() == POS.Type.COMPOUND) {
|
||||
assert endOffset - morpheme.surfaceForm.length() >= 0;
|
||||
assert endOffset - morpheme.surfaceForm().length() >= 0;
|
||||
compoundToken =
|
||||
new DecompoundToken(
|
||||
morpheme.posTag,
|
||||
morpheme.surfaceForm,
|
||||
endOffset - morpheme.surfaceForm.length(),
|
||||
morpheme.posTag(),
|
||||
morpheme.surfaceForm(),
|
||||
endOffset - morpheme.surfaceForm().length(),
|
||||
endOffset,
|
||||
backType);
|
||||
} else {
|
||||
compoundToken =
|
||||
new DecompoundToken(
|
||||
morpheme.posTag,
|
||||
morpheme.surfaceForm,
|
||||
morpheme.posTag(),
|
||||
morpheme.surfaceForm(),
|
||||
token.getStartOffset(),
|
||||
token.getEndOffset(),
|
||||
backType);
|
||||
|
@ -289,7 +289,7 @@ final class Viterbi
|
|||
compoundToken.setPositionIncrement(0);
|
||||
}
|
||||
++posLen;
|
||||
endOffset -= morpheme.surfaceForm.length();
|
||||
endOffset -= morpheme.surfaceForm().length();
|
||||
pending.add(compoundToken);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" add token=" + pending.get(pending.size() - 1));
|
||||
|
|
|
@ -22,15 +22,7 @@ import org.apache.lucene.analysis.morph.MorphData;
|
|||
/** Represents Korean morphological information. */
|
||||
public interface KoMorphData extends MorphData {
|
||||
/** A morpheme extracted from a compound token. */
|
||||
class Morpheme {
|
||||
public final POS.Tag posTag;
|
||||
public final String surfaceForm;
|
||||
|
||||
public Morpheme(POS.Tag posTag, String surfaceForm) {
|
||||
this.posTag = posTag;
|
||||
this.surfaceForm = surfaceForm;
|
||||
}
|
||||
}
|
||||
record Morpheme(POS.Tag posTag, String surfaceForm) {}
|
||||
|
||||
/**
|
||||
* Get the {@link org.apache.lucene.analysis.ko.POS.Type} of specified word (morpheme, compound,
|
||||
|
|
|
@ -150,13 +150,13 @@ class TokenInfoDictionaryEntryWriter extends DictionaryEntryWriter {
|
|||
int compoundOffset = 0;
|
||||
for (KoMorphData.Morpheme morpheme : morphemes) {
|
||||
if (hasSinglePOS == false) {
|
||||
buffer.put((byte) morpheme.posTag.ordinal());
|
||||
buffer.put((byte) morpheme.posTag().ordinal());
|
||||
}
|
||||
if (posType != POS.Type.INFLECT) {
|
||||
buffer.put((byte) morpheme.surfaceForm.length());
|
||||
compoundOffset += morpheme.surfaceForm.length();
|
||||
buffer.put((byte) morpheme.surfaceForm().length());
|
||||
compoundOffset += morpheme.surfaceForm().length();
|
||||
} else {
|
||||
writeString(morpheme.surfaceForm);
|
||||
writeString(morpheme.surfaceForm());
|
||||
}
|
||||
assert compoundOffset <= entry[0].length() : Arrays.toString(entry);
|
||||
}
|
||||
|
|
|
@ -86,11 +86,11 @@ public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSp
|
|||
builder.append("+");
|
||||
}
|
||||
builder
|
||||
.append(morpheme.surfaceForm)
|
||||
.append(morpheme.surfaceForm())
|
||||
.append('/')
|
||||
.append(morpheme.posTag.name())
|
||||
.append(morpheme.posTag().name())
|
||||
.append('(')
|
||||
.append(morpheme.posTag.description())
|
||||
.append(morpheme.posTag().description())
|
||||
.append(')');
|
||||
}
|
||||
return builder.toString();
|
||||
|
|
|
@ -170,14 +170,14 @@ public class TestTokenInfoDictionary extends LuceneTestCase {
|
|||
if (decompound != null) {
|
||||
int offset = 0;
|
||||
for (KoMorphData.Morpheme morph : decompound) {
|
||||
assertTrue(UnicodeUtil.validUTF16String(morph.surfaceForm));
|
||||
assertFalse(morph.surfaceForm.isEmpty());
|
||||
assertEquals(morph.surfaceForm.trim(), morph.surfaceForm);
|
||||
assertTrue(UnicodeUtil.validUTF16String(morph.surfaceForm()));
|
||||
assertFalse(morph.surfaceForm().isEmpty());
|
||||
assertEquals(morph.surfaceForm().trim(), morph.surfaceForm());
|
||||
if (type != POS.Type.INFLECT) {
|
||||
assertEquals(
|
||||
morph.surfaceForm,
|
||||
surfaceForm.substring(offset, offset + morph.surfaceForm.length()));
|
||||
offset += morph.surfaceForm.length();
|
||||
morph.surfaceForm(),
|
||||
surfaceForm.substring(offset, offset + morph.surfaceForm().length()));
|
||||
offset += morph.surfaceForm().length();
|
||||
}
|
||||
}
|
||||
assertTrue(offset <= surfaceForm.length());
|
||||
|
|
|
@ -43,10 +43,10 @@ public class TestUserDictionary extends LuceneTestCase {
|
|||
dictionary.getMorphAttributes().getMorphemes(wordIds.get(1), sArray, 0, s.length());
|
||||
assertNotNull(decompound);
|
||||
assertEquals(2, decompound.length);
|
||||
assertEquals(decompound[0].posTag, POS.Tag.NNG);
|
||||
assertEquals(decompound[0].surfaceForm, "세종");
|
||||
assertEquals(decompound[1].posTag, POS.Tag.NNG);
|
||||
assertEquals(decompound[1].surfaceForm, "시");
|
||||
assertEquals(decompound[0].posTag(), POS.Tag.NNG);
|
||||
assertEquals(decompound[0].surfaceForm(), "세종");
|
||||
assertEquals(decompound[1].posTag(), POS.Tag.NNG);
|
||||
assertEquals(decompound[1].surfaceForm(), "시");
|
||||
|
||||
s = "c++";
|
||||
sArray = s.toCharArray();
|
||||
|
|
|
@ -103,19 +103,20 @@ final class ForUtil {
|
|||
for (int bpv = 1; bpv <= 32; ++bpv) {
|
||||
final FormatAndBits formatAndBits =
|
||||
PackedInts.fastestFormatAndBits(BLOCK_SIZE, bpv, acceptableOverheadRatio);
|
||||
assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
|
||||
assert formatAndBits.bitsPerValue <= 32;
|
||||
assert formatAndBits.format().isSupported(formatAndBits.bitsPerValue());
|
||||
assert formatAndBits.bitsPerValue() <= 32;
|
||||
encodedSizes[bpv] =
|
||||
encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
|
||||
encodedSize(
|
||||
formatAndBits.format(), PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue());
|
||||
encoders[bpv] =
|
||||
PackedInts.getEncoder(
|
||||
formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
|
||||
formatAndBits.format(), PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue());
|
||||
decoders[bpv] =
|
||||
PackedInts.getDecoder(
|
||||
formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
|
||||
formatAndBits.format(), PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue());
|
||||
iterations[bpv] = computeIterations(decoders[bpv]);
|
||||
|
||||
out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1));
|
||||
out.writeVInt(formatAndBits.format().getId() << 5 | (formatAndBits.bitsPerValue() - 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValuesSkipIndexType;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -209,7 +210,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
|||
storePayloads,
|
||||
indexOptions,
|
||||
docValuesType,
|
||||
false,
|
||||
DocValuesSkipIndexType.NONE,
|
||||
dvGen,
|
||||
attributes,
|
||||
pointDataDimensionCount,
|
||||
|
@ -347,7 +348,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
|||
output.writeVInt(fi.number);
|
||||
|
||||
byte bits = 0x0;
|
||||
if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
|
||||
if (fi.hasTermVectors()) bits |= STORE_TERMVECTOR;
|
||||
if (fi.omitsNorms()) bits |= OMIT_NORMS;
|
||||
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
|
||||
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
package org.apache.lucene.backward_codecs.lucene80;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicReader;
|
||||
import org.apache.lucene.backward_codecs.packed.LegacyDirectReader;
|
||||
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||
|
@ -41,6 +39,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.internal.hppc.IntObjectHashMap;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
|
@ -53,11 +52,11 @@ import org.apache.lucene.util.compress.LZ4;
|
|||
|
||||
/** reader for {@link Lucene80DocValuesFormat} */
|
||||
final class Lucene80DocValuesProducer extends DocValuesProducer {
|
||||
private final Map<String, NumericEntry> numerics = new HashMap<>();
|
||||
private final Map<String, BinaryEntry> binaries = new HashMap<>();
|
||||
private final Map<String, SortedEntry> sorted = new HashMap<>();
|
||||
private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
|
||||
private final Map<String, SortedNumericEntry> sortedNumerics = new HashMap<>();
|
||||
private final IntObjectHashMap<NumericEntry> numerics = new IntObjectHashMap<>();
|
||||
private final IntObjectHashMap<BinaryEntry> binaries = new IntObjectHashMap<>();
|
||||
private final IntObjectHashMap<SortedEntry> sorted = new IntObjectHashMap<>();
|
||||
private final IntObjectHashMap<SortedSetEntry> sortedSets = new IntObjectHashMap<>();
|
||||
private final IntObjectHashMap<SortedNumericEntry> sortedNumerics = new IntObjectHashMap<>();
|
||||
private final IndexInput data;
|
||||
private final int maxDoc;
|
||||
private int version = -1;
|
||||
|
@ -139,7 +138,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
}
|
||||
byte type = meta.readByte();
|
||||
if (type == Lucene80DocValuesFormat.NUMERIC) {
|
||||
numerics.put(info.name, readNumeric(meta));
|
||||
numerics.put(info.number, readNumeric(meta));
|
||||
} else if (type == Lucene80DocValuesFormat.BINARY) {
|
||||
final boolean compressed;
|
||||
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
|
||||
|
@ -158,13 +157,13 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
} else {
|
||||
compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
|
||||
}
|
||||
binaries.put(info.name, readBinary(meta, compressed));
|
||||
binaries.put(info.number, readBinary(meta, compressed));
|
||||
} else if (type == Lucene80DocValuesFormat.SORTED) {
|
||||
sorted.put(info.name, readSorted(meta));
|
||||
sorted.put(info.number, readSorted(meta));
|
||||
} else if (type == Lucene80DocValuesFormat.SORTED_SET) {
|
||||
sortedSets.put(info.name, readSortedSet(meta));
|
||||
sortedSets.put(info.number, readSortedSet(meta));
|
||||
} else if (type == Lucene80DocValuesFormat.SORTED_NUMERIC) {
|
||||
sortedNumerics.put(info.name, readSortedNumeric(meta));
|
||||
sortedNumerics.put(info.number, readSortedNumeric(meta));
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid type: " + type, meta);
|
||||
}
|
||||
|
@ -426,7 +425,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||
NumericEntry entry = numerics.get(field.name);
|
||||
NumericEntry entry = numerics.get(field.number);
|
||||
return getNumeric(entry);
|
||||
}
|
||||
|
||||
|
@ -915,7 +914,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
||||
BinaryEntry entry = binaries.get(field.name);
|
||||
BinaryEntry entry = binaries.get(field.number);
|
||||
if (entry.compressed) {
|
||||
return getCompressedBinary(entry);
|
||||
} else {
|
||||
|
@ -973,7 +972,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
||||
SortedEntry entry = sorted.get(field.name);
|
||||
SortedEntry entry = sorted.get(field.number);
|
||||
return getSorted(entry);
|
||||
}
|
||||
|
||||
|
@ -1407,7 +1406,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
SortedNumericEntry entry = sortedNumerics.get(field.name);
|
||||
SortedNumericEntry entry = sortedNumerics.get(field.number);
|
||||
if (entry.numValues == entry.numDocsWithField) {
|
||||
return DocValues.singleton(getNumeric(entry));
|
||||
}
|
||||
|
@ -1543,7 +1542,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||
SortedSetEntry entry = sortedSets.get(field.name);
|
||||
SortedSetEntry entry = sortedSets.get(field.number);
|
||||
if (entry.singleValueEntry != null) {
|
||||
return DocValues.singleton(getSorted(entry.singleValueEntry));
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValuesSkipIndexType;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -186,7 +187,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
|
|||
storePayloads,
|
||||
indexOptions,
|
||||
docValuesType,
|
||||
false,
|
||||
DocValuesSkipIndexType.NONE,
|
||||
dvGen,
|
||||
attributes,
|
||||
pointDataDimensionCount,
|
||||
|
@ -333,7 +334,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
|
|||
output.writeVInt(fi.number);
|
||||
|
||||
byte bits = 0x0;
|
||||
if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
|
||||
if (fi.hasTermVectors()) bits |= STORE_TERMVECTOR;
|
||||
if (fi.omitsNorms()) bits |= OMIT_NORMS;
|
||||
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
|
||||
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
|
||||
|
|
|
@ -224,6 +224,9 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
@Override
|
||||
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(field);
|
||||
if (fieldEntry == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" not found");
|
||||
}
|
||||
return getOffHeapVectorValues(fieldEntry);
|
||||
}
|
||||
|
||||
|
|
|
@ -218,6 +218,9 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
@Override
|
||||
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(field);
|
||||
if (fieldEntry == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" not found");
|
||||
}
|
||||
return getOffHeapVectorValues(fieldEntry);
|
||||
}
|
||||
|
||||
|
|
|
@ -215,6 +215,9 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
|
|||
@Override
|
||||
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(field);
|
||||
if (fieldEntry == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" not found");
|
||||
}
|
||||
return OffHeapFloatVectorValues.load(fieldEntry, vectorData);
|
||||
}
|
||||
|
||||
|
|
|
@ -233,6 +233,9 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
|
|||
@Override
|
||||
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(field);
|
||||
if (fieldEntry == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" not found");
|
||||
}
|
||||
if (fieldEntry.vectorEncoding != VectorEncoding.FLOAT32) {
|
||||
throw new IllegalArgumentException(
|
||||
"field=\""
|
||||
|
@ -248,6 +251,9 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
|
|||
@Override
|
||||
public ByteVectorValues getByteVectorValues(String field) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(field);
|
||||
if (fieldEntry == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" not found");
|
||||
}
|
||||
if (fieldEntry.vectorEncoding != VectorEncoding.BYTE) {
|
||||
throw new IllegalArgumentException(
|
||||
"field=\""
|
||||
|
|
|
@ -241,6 +241,9 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
|
|||
@Override
|
||||
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(field);
|
||||
if (fieldEntry == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" not found");
|
||||
}
|
||||
if (fieldEntry.vectorEncoding != VectorEncoding.FLOAT32) {
|
||||
throw new IllegalArgumentException(
|
||||
"field=\""
|
||||
|
@ -264,6 +267,9 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
|
|||
@Override
|
||||
public ByteVectorValues getByteVectorValues(String field) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(field);
|
||||
if (fieldEntry == null) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" not found");
|
||||
}
|
||||
if (fieldEntry.vectorEncoding != VectorEncoding.BYTE) {
|
||||
throw new IllegalArgumentException(
|
||||
"field=\""
|
||||
|
|
|
@ -46,10 +46,10 @@ import org.apache.lucene.store.IndexOutput;
|
|||
* uptos(position, payload). 4. start offset.
|
||||
*/
|
||||
public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
|
||||
private int[] lastSkipDoc;
|
||||
private long[] lastSkipDocPointer;
|
||||
private long[] lastSkipPosPointer;
|
||||
private long[] lastSkipPayPointer;
|
||||
private final int[] lastSkipDoc;
|
||||
private final long[] lastSkipDocPointer;
|
||||
private final long[] lastSkipPosPointer;
|
||||
private final long[] lastSkipPayPointer;
|
||||
|
||||
private final IndexOutput docOut;
|
||||
private final IndexOutput posOut;
|
||||
|
@ -61,7 +61,7 @@ public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
|
|||
private long curPayPointer;
|
||||
private int curPosBufferUpto;
|
||||
private int curPayloadByteUpto;
|
||||
private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
|
||||
private final CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
|
||||
private boolean fieldHasPositions;
|
||||
private boolean fieldHasOffsets;
|
||||
private boolean fieldHasPayloads;
|
||||
|
@ -85,7 +85,12 @@ public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
|
|||
lastSkipPosPointer = new long[maxSkipLevels];
|
||||
if (payOut != null) {
|
||||
lastSkipPayPointer = new long[maxSkipLevels];
|
||||
} else {
|
||||
lastSkipPayPointer = null;
|
||||
}
|
||||
} else {
|
||||
lastSkipPosPointer = null;
|
||||
lastSkipPayPointer = null;
|
||||
}
|
||||
curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
|
||||
for (int i = 0; i < maxSkipLevels; ++i) {
|
||||
|
|
|
@ -642,13 +642,13 @@ public class BKDWriter60 implements Closeable {
|
|||
throws IOException {
|
||||
assert docMaps == null || readers.size() == docMaps.size();
|
||||
|
||||
BKDMergeQueue queue = new BKDMergeQueue(config.bytesPerDim, readers.size());
|
||||
BKDMergeQueue queue = new BKDMergeQueue(config.bytesPerDim(), readers.size());
|
||||
|
||||
for (int i = 0; i < readers.size(); i++) {
|
||||
PointValues pointValues = readers.get(i);
|
||||
assert pointValues.getNumDimensions() == config.numDims
|
||||
&& pointValues.getBytesPerDimension() == config.bytesPerDim
|
||||
&& pointValues.getNumIndexDimensions() == config.numIndexDims;
|
||||
assert pointValues.getNumDimensions() == config.numDims()
|
||||
&& pointValues.getBytesPerDimension() == config.bytesPerDim()
|
||||
&& pointValues.getNumIndexDimensions() == config.numIndexDims();
|
||||
MergeState.DocMap docMap;
|
||||
if (docMaps == null) {
|
||||
docMap = null;
|
||||
|
@ -1931,7 +1931,7 @@ public class BKDWriter60 implements Closeable {
|
|||
private void computePackedValueBounds(
|
||||
BKDRadixSelector.PathSlice slice, byte[] minPackedValue, byte[] maxPackedValue)
|
||||
throws IOException {
|
||||
try (PointReader reader = slice.writer.getReader(slice.start, slice.count)) {
|
||||
try (PointReader reader = slice.writer().getReader(slice.start(), slice.count())) {
|
||||
if (reader.next() == false) {
|
||||
return;
|
||||
}
|
||||
|
@ -1995,16 +1995,16 @@ public class BKDWriter60 implements Closeable {
|
|||
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more
|
||||
// efficient
|
||||
HeapPointWriter heapSource;
|
||||
if (points.writer instanceof HeapPointWriter == false) {
|
||||
if (points.writer() instanceof HeapPointWriter == false) {
|
||||
// Adversarial cases can cause this, e.g. merging big segments with most of the points
|
||||
// deleted
|
||||
heapSource = switchToHeap(points.writer);
|
||||
heapSource = switchToHeap(points.writer());
|
||||
} else {
|
||||
heapSource = (HeapPointWriter) points.writer;
|
||||
heapSource = (HeapPointWriter) points.writer();
|
||||
}
|
||||
|
||||
int from = Math.toIntExact(points.start);
|
||||
int to = Math.toIntExact(points.start + points.count);
|
||||
int from = Math.toIntExact(points.start());
|
||||
int to = Math.toIntExact(points.start() + points.count());
|
||||
// we store common prefix on scratch1
|
||||
computeCommonPrefixLength(heapSource, scratch1, from, to);
|
||||
|
||||
|
@ -2107,8 +2107,8 @@ public class BKDWriter60 implements Closeable {
|
|||
: "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
|
||||
|
||||
// How many points will be in the left tree:
|
||||
long rightCount = points.count / 2;
|
||||
long leftCount = points.count - rightCount;
|
||||
long rightCount = points.count() / 2;
|
||||
long leftCount = points.count() - rightCount;
|
||||
|
||||
BKDRadixSelector.PathSlice[] slices = new BKDRadixSelector.PathSlice[2];
|
||||
|
||||
|
@ -2128,9 +2128,9 @@ public class BKDWriter60 implements Closeable {
|
|||
radixSelector.select(
|
||||
points,
|
||||
slices,
|
||||
points.start,
|
||||
points.start + points.count,
|
||||
points.start + leftCount,
|
||||
points.start(),
|
||||
points.start() + points.count(),
|
||||
points.start() + leftCount,
|
||||
splitDim,
|
||||
commonPrefixLen);
|
||||
|
||||
|
|
|
@ -78,4 +78,9 @@ public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testEmptyByteVectorData() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testMergingWithDifferentByteKnnFields() {
|
||||
// unimplemented
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,4 +77,9 @@ public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testEmptyByteVectorData() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testMergingWithDifferentByteKnnFields() {
|
||||
// unimplemented
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,4 +67,9 @@ public class TestLucene92HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testEmptyByteVectorData() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testMergingWithDifferentByteKnnFields() {
|
||||
// unimplemented
|
||||
}
|
||||
}
|
||||
|
|
|
@ -388,10 +388,14 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
// write the vector data to a temporary file
|
||||
DocsWithFieldSet docsWithField =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> writeByteVectorData(
|
||||
tempVectorData, MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 -> writeVectorData(
|
||||
tempVectorData, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
case BYTE ->
|
||||
writeByteVectorData(
|
||||
tempVectorData,
|
||||
MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 ->
|
||||
writeVectorData(
|
||||
tempVectorData,
|
||||
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
};
|
||||
CodecUtil.writeFooter(tempVectorData);
|
||||
IOUtils.close(tempVectorData);
|
||||
|
@ -638,13 +642,15 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
throws IOException {
|
||||
int dim = fieldInfo.getVectorDimension();
|
||||
return switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
case BYTE ->
|
||||
new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public byte[] copyValue(byte[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case FLOAT32 -> new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
case FLOAT32 ->
|
||||
new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public float[] copyValue(float[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
|
@ -663,10 +669,12 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
DefaultFlatVectorScorer defaultFlatVectorScorer = new DefaultFlatVectorScorer();
|
||||
RandomVectorScorerSupplier scorerSupplier =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
case BYTE ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
case FLOAT32 -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
case FLOAT32 ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
};
|
||||
|
@ -693,9 +701,9 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
lastDocID = docID;
|
||||
}
|
||||
|
||||
OnHeapHnswGraph getGraph() {
|
||||
OnHeapHnswGraph getGraph() throws IOException {
|
||||
if (vectors.size() > 0) {
|
||||
return hnswGraphBuilder.getGraph();
|
||||
return hnswGraphBuilder.getCompletedGraph();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.lucene.backward_codecs.lucene95;
|
||||
|
||||
import static org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
|
||||
import static org.apache.lucene.codecs.KnnVectorsWriter.MergedVectorValues.hasVectorValues;
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -414,10 +415,14 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
// write the vector data to a temporary file
|
||||
DocsWithFieldSet docsWithField =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> writeByteVectorData(
|
||||
tempVectorData, MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 -> writeVectorData(
|
||||
tempVectorData, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
case BYTE ->
|
||||
writeByteVectorData(
|
||||
tempVectorData,
|
||||
MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 ->
|
||||
writeVectorData(
|
||||
tempVectorData,
|
||||
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
};
|
||||
CodecUtil.writeFooter(tempVectorData);
|
||||
IOUtils.close(tempVectorData);
|
||||
|
@ -472,14 +477,18 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
IncrementalHnswGraphMerger merger =
|
||||
new IncrementalHnswGraphMerger(fieldInfo, scorerSupplier, M, beamWidth);
|
||||
for (int i = 0; i < mergeState.liveDocs.length; i++) {
|
||||
if (hasVectorValues(mergeState.fieldInfos[i], fieldInfo.name)) {
|
||||
merger.addReader(
|
||||
mergeState.knnVectorsReaders[i], mergeState.docMaps[i], mergeState.liveDocs[i]);
|
||||
}
|
||||
}
|
||||
DocIdSetIterator mergedVectorIterator = null;
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> mergedVectorIterator =
|
||||
case BYTE ->
|
||||
mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
|
||||
case FLOAT32 -> mergedVectorIterator =
|
||||
case FLOAT32 ->
|
||||
mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
|
||||
}
|
||||
graph =
|
||||
|
@ -680,13 +689,15 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
throws IOException {
|
||||
int dim = fieldInfo.getVectorDimension();
|
||||
return switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
case BYTE ->
|
||||
new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public byte[] copyValue(byte[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case FLOAT32 -> new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
case FLOAT32 ->
|
||||
new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public float[] copyValue(float[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
|
@ -704,10 +715,12 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
vectors = new ArrayList<>();
|
||||
RandomVectorScorerSupplier scorerSupplier =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
case BYTE ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
case FLOAT32 -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
case FLOAT32 ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
};
|
||||
|
@ -732,9 +745,9 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
lastDocID = docID;
|
||||
}
|
||||
|
||||
OnHeapHnswGraph getGraph() {
|
||||
OnHeapHnswGraph getGraph() throws IOException {
|
||||
if (vectors.size() > 0) {
|
||||
return hnswGraphBuilder.getGraph();
|
||||
return hnswGraphBuilder.getCompletedGraph();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -72,7 +72,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
final Sort sort;
|
||||
try (DirectoryReader reader = DirectoryReader.open(directory)) {
|
||||
assertEquals(1, reader.leaves().size());
|
||||
sort = reader.leaves().get(0).reader().getMetaData().getSort();
|
||||
sort = reader.leaves().get(0).reader().getMetaData().sort();
|
||||
assertNotNull(sort);
|
||||
searchExampleIndex(reader);
|
||||
}
|
||||
|
@ -125,8 +125,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
.add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
2);
|
||||
assertEquals(2, children.totalHits.value);
|
||||
assertEquals(1, parents.totalHits.value);
|
||||
assertEquals(2, children.totalHits.value());
|
||||
assertEquals(1, parents.totalHits.value());
|
||||
// make sure it's sorted
|
||||
assertEquals(children.scoreDocs[0].doc + 1, children.scoreDocs[1].doc);
|
||||
assertEquals(children.scoreDocs[1].doc + 1, parents.scoreDocs[0].doc);
|
||||
|
@ -140,7 +140,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
public void testSortedIndex() throws Exception {
|
||||
try (DirectoryReader reader = DirectoryReader.open(directory)) {
|
||||
assertEquals(1, reader.leaves().size());
|
||||
Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
|
||||
Sort sort = reader.leaves().get(0).reader().getMetaData().sort();
|
||||
assertNotNull(sort);
|
||||
assertEquals("<long: \"dateDV\">!", sort.toString());
|
||||
// This will confirm the docs are really sorted
|
||||
|
@ -195,28 +195,28 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
TopDocs topDocs = searcher.search(new FieldExistsQuery("titleTokenized"), 10);
|
||||
assertEquals(50, topDocs.totalHits.value);
|
||||
assertEquals(50, topDocs.totalHits.value());
|
||||
|
||||
topDocs = searcher.search(new FieldExistsQuery("titleDV"), 10);
|
||||
assertEquals(50, topDocs.totalHits.value);
|
||||
assertEquals(50, topDocs.totalHits.value());
|
||||
|
||||
topDocs =
|
||||
searcher.search(
|
||||
IntPoint.newRangeQuery("docid_int", 42, 44),
|
||||
10,
|
||||
new Sort(new SortField("docid_intDV", SortField.Type.INT)));
|
||||
assertEquals(3, topDocs.totalHits.value);
|
||||
assertEquals(3, topDocs.totalHits.value());
|
||||
assertEquals(3, topDocs.scoreDocs.length);
|
||||
assertEquals(42, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]);
|
||||
assertEquals(43, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]);
|
||||
assertEquals(44, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]);
|
||||
|
||||
topDocs = searcher.search(new TermQuery(new Term("body", "the")), 5);
|
||||
assertTrue(topDocs.totalHits.value > 0);
|
||||
assertTrue(topDocs.totalHits.value() > 0);
|
||||
topDocs =
|
||||
searcher.search(
|
||||
new MatchAllDocsQuery(), 5, new Sort(new SortField("dateDV", SortField.Type.LONG)));
|
||||
assertEquals(50, topDocs.totalHits.value);
|
||||
assertEquals(50, topDocs.totalHits.value());
|
||||
assertEquals(5, topDocs.scoreDocs.length);
|
||||
long firstDate = (Long) ((FieldDoc) topDocs.scoreDocs[0]).fields[0];
|
||||
long lastDate = (Long) ((FieldDoc) topDocs.scoreDocs[4]).fields[0];
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.benchmark.jmh;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.apache.lucene.codecs.lucene912.ForDeltaUtil;
|
||||
import org.apache.lucene.codecs.lucene912.ForUtil;
|
||||
import org.apache.lucene.codecs.lucene912.PostingIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.MMapDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Level;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.TearDown;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
@Warmup(iterations = 5, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
@Fork(
|
||||
value = 3,
|
||||
jvmArgsAppend = {"-Xmx1g", "-Xms1g", "-XX:+AlwaysPreTouch"})
|
||||
public class PostingIndexInputBenchmark {
|
||||
|
||||
private Path path;
|
||||
private Directory dir;
|
||||
private IndexInput in;
|
||||
private PostingIndexInput postingIn;
|
||||
private final ForUtil forUtil = new ForUtil();
|
||||
private final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
|
||||
private final long[] values = new long[128];
|
||||
|
||||
@Param({"2", "3", "4", "5", "6", "7", "8", "9", "10"})
|
||||
public int bpv;
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void setup() throws Exception {
|
||||
path = Files.createTempDirectory("forUtil");
|
||||
dir = MMapDirectory.open(path);
|
||||
try (IndexOutput out = dir.createOutput("docs", IOContext.DEFAULT)) {
|
||||
Random r = new Random(0);
|
||||
// Write enough random data to not reach EOF while decoding
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
out.writeLong(r.nextLong());
|
||||
}
|
||||
}
|
||||
in = dir.openInput("docs", IOContext.DEFAULT);
|
||||
postingIn = new PostingIndexInput(in, forUtil, forDeltaUtil);
|
||||
}
|
||||
|
||||
@TearDown(Level.Trial)
|
||||
public void tearDown() throws Exception {
|
||||
if (dir != null) {
|
||||
dir.deleteFile("docs");
|
||||
}
|
||||
IOUtils.close(in, dir);
|
||||
in = null;
|
||||
dir = null;
|
||||
Files.deleteIfExists(path);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void decode(Blackhole bh) throws IOException {
|
||||
in.seek(3); // random unaligned offset
|
||||
postingIn.decode(bpv, values);
|
||||
bh.consume(values);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void decodeAndPrefixSum(Blackhole bh) throws IOException {
|
||||
in.seek(3); // random unaligned offset
|
||||
postingIn.decodeAndPrefixSum(bpv, 100, values);
|
||||
bh.consume(values);
|
||||
}
|
||||
}
|
|
@ -17,11 +17,10 @@
|
|||
# -------------------------------------------------------------------------------------
|
||||
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||
|
||||
# collector.class can be:
|
||||
# Fully Qualified Class Name of a Collector with a empty constructor
|
||||
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
|
||||
# topScoreDocUnordered - Like above, but allows out of order
|
||||
collector.class=coll:topScoreDoc
|
||||
# collector.manager.class can be:
|
||||
# Fully Qualified Class Name of a CollectorManager with a empty constructor
|
||||
# topScoreDoc - Creates a TopScoreDocCollectorManager
|
||||
collector.manager.class=coll:topScoreDoc
|
||||
|
||||
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
|
||||
directory=FSDirectory
|
||||
|
|
|
@ -17,11 +17,10 @@
|
|||
# -------------------------------------------------------------------------------------
|
||||
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||
|
||||
# collector.class can be:
|
||||
# Fully Qualified Class Name of a Collector with a empty constructor
|
||||
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
|
||||
# topScoreDocUnordered - Like above, but allows out of order
|
||||
collector.class=coll:topScoreDoc
|
||||
# collector.manager.class can be:
|
||||
# Fully Qualified Class Name of a CollectorManager with a empty constructor
|
||||
# topScoreDoc - Creates a TopScoreDocCollectorManager
|
||||
collector.manager.class=coll:topScoreDoc
|
||||
|
||||
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
|
||||
directory=FSDirectory
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.index.DirectoryReader;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiBits;
|
||||
import org.apache.lucene.index.StoredFields;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
@ -119,9 +119,7 @@ public abstract class ReadTask extends PerfTask {
|
|||
hits = searcher.search(q, numHits);
|
||||
}
|
||||
} else {
|
||||
Collector collector = createCollector();
|
||||
|
||||
searcher.search(q, collector);
|
||||
searcher.search(q, createCollectorManager());
|
||||
// hits = collector.topDocs();
|
||||
}
|
||||
|
||||
|
@ -184,9 +182,8 @@ public abstract class ReadTask extends PerfTask {
|
|||
return res;
|
||||
}
|
||||
|
||||
protected Collector createCollector() throws Exception {
|
||||
return new TopScoreDocCollectorManager(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1)
|
||||
.newCollector();
|
||||
protected CollectorManager<?, ?> createCollectorManager() throws Exception {
|
||||
return new TopScoreDocCollectorManager(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
|
||||
}
|
||||
|
||||
protected Document retrieveDoc(StoredFields storedFields, int id) throws IOException {
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.TopScoreDocCollectorManager;
|
||||
|
||||
/** Does search w/ a custom collector */
|
||||
|
@ -37,7 +37,11 @@ public class SearchWithCollectorTask extends SearchTask {
|
|||
// check to make sure either the doc is being stored
|
||||
PerfRunData runData = getRunData();
|
||||
Config config = runData.getConfig();
|
||||
clnName = config.get("collector.class", "");
|
||||
if (config.get("collector.class", null) != null) {
|
||||
throw new IllegalArgumentException(
|
||||
"collector.class is no longer supported as a config parameter, use collector.manager.class instead to provide a CollectorManager class name");
|
||||
}
|
||||
clnName = config.get("collector.manager.class", "");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -46,18 +50,17 @@ public class SearchWithCollectorTask extends SearchTask {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected Collector createCollector() throws Exception {
|
||||
Collector collector = null;
|
||||
protected CollectorManager<?, ?> createCollectorManager() throws Exception {
|
||||
CollectorManager<?, ?> collectorManager;
|
||||
if (clnName.equalsIgnoreCase("topScoreDoc") == true) {
|
||||
collector =
|
||||
new TopScoreDocCollectorManager(numHits(), null, Integer.MAX_VALUE, false).newCollector();
|
||||
collectorManager = new TopScoreDocCollectorManager(numHits(), Integer.MAX_VALUE);
|
||||
} else if (clnName.length() > 0) {
|
||||
collector = Class.forName(clnName).asSubclass(Collector.class).getConstructor().newInstance();
|
||||
|
||||
collectorManager =
|
||||
Class.forName(clnName).asSubclass(CollectorManager.class).getConstructor().newInstance();
|
||||
} else {
|
||||
collector = super.createCollector();
|
||||
collectorManager = super.createCollectorManager();
|
||||
}
|
||||
return collector;
|
||||
return collectorManager;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -91,7 +91,7 @@ public class TestDocMaker extends BenchmarkTestCase {
|
|||
IndexReader reader = DirectoryReader.open(runData.getDirectory());
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
TopDocs td = searcher.search(new TermQuery(new Term("key", "value")), 10);
|
||||
assertEquals(numExpectedResults, td.totalHits.value);
|
||||
assertEquals(numExpectedResults, td.totalHits.value());
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -160,7 +160,7 @@ public class TestLineDocSource extends BenchmarkTestCase {
|
|||
reader = DirectoryReader.open(runData.getDirectory());
|
||||
searcher = newSearcher(reader);
|
||||
TopDocs td = searcher.search(new TermQuery(new Term("body", "body")), 10);
|
||||
assertEquals(numAdds, td.totalHits.value);
|
||||
assertEquals(numAdds, td.totalHits.value());
|
||||
assertNotNull(td.scoreDocs[0]);
|
||||
|
||||
if (storedField == null) {
|
||||
|
|
|
@ -151,13 +151,13 @@ public class BM25NBClassifier implements Classifier<BytesRef> {
|
|||
if (!assignedClasses.isEmpty()) {
|
||||
Collections.sort(assignedClasses);
|
||||
// this is a negative number closest to 0 = a
|
||||
double smax = assignedClasses.get(0).getScore();
|
||||
double smax = assignedClasses.get(0).score();
|
||||
|
||||
double sumLog = 0;
|
||||
// log(sum(exp(x_n-a)))
|
||||
for (ClassificationResult<BytesRef> cr : assignedClasses) {
|
||||
// getScore-smax <=0 (both negative, smax is the smallest abs()
|
||||
sumLog += Math.exp(cr.getScore() - smax);
|
||||
sumLog += Math.exp(cr.score() - smax);
|
||||
}
|
||||
// loga=a+log(sum(exp(x_n-a))) = log(sum(exp(x_n)))
|
||||
double loga = smax;
|
||||
|
@ -165,8 +165,8 @@ public class BM25NBClassifier implements Classifier<BytesRef> {
|
|||
|
||||
// 1/sum*x = exp(log(x))*1/sum = exp(log(x)-log(sum))
|
||||
for (ClassificationResult<BytesRef> cr : assignedClasses) {
|
||||
double scoreDiff = cr.getScore() - loga;
|
||||
returnList.add(new ClassificationResult<>(cr.getAssignedClass(), Math.exp(scoreDiff)));
|
||||
double scoreDiff = cr.score() - loga;
|
||||
returnList.add(new ClassificationResult<>(cr.assignedClass(), Math.exp(scoreDiff)));
|
||||
}
|
||||
}
|
||||
return returnList;
|
||||
|
@ -216,7 +216,7 @@ public class BM25NBClassifier implements Classifier<BytesRef> {
|
|||
builder.add(query, BooleanClause.Occur.MUST);
|
||||
}
|
||||
TopDocs search = indexSearcher.search(builder.build(), 1);
|
||||
return search.totalHits.value > 0 ? search.scoreDocs[0].score : 1;
|
||||
return search.totalHits.value() > 0 ? search.scoreDocs[0].score : 1;
|
||||
}
|
||||
|
||||
private double calculateLogPrior(Term term) throws IOException {
|
||||
|
@ -227,6 +227,6 @@ public class BM25NBClassifier implements Classifier<BytesRef> {
|
|||
bq.add(query, BooleanClause.Occur.MUST);
|
||||
}
|
||||
TopDocs topDocs = indexSearcher.search(bq.build(), 1);
|
||||
return topDocs.totalHits.value > 0 ? Math.log(topDocs.scoreDocs[0].score) : 0;
|
||||
return topDocs.totalHits.value() > 0 ? Math.log(topDocs.scoreDocs[0].score) : 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -148,7 +148,7 @@ public class BooleanPerceptronClassifier implements Classifier<Boolean> {
|
|||
if (textField != null && classField != null) {
|
||||
// assign class to the doc
|
||||
ClassificationResult<Boolean> classificationResult = assignClass(textField.stringValue());
|
||||
Boolean assignedClass = classificationResult.getAssignedClass();
|
||||
Boolean assignedClass = classificationResult.assignedClass();
|
||||
|
||||
Boolean correctClass = Boolean.valueOf(classField.stringValue());
|
||||
double modifier = Math.signum(correctClass.compareTo(assignedClass));
|
||||
|
|
|
@ -126,7 +126,7 @@ public class CachingNaiveBayesClassifier extends SimpleNaiveBayesClassifier {
|
|||
int removeIdx = -1;
|
||||
int i = 0;
|
||||
for (ClassificationResult<BytesRef> cr : ret) {
|
||||
if (cr.getAssignedClass().equals(cclass)) {
|
||||
if (cr.assignedClass().equals(cclass)) {
|
||||
removeIdx = i;
|
||||
break;
|
||||
}
|
||||
|
@ -137,7 +137,7 @@ public class CachingNaiveBayesClassifier extends SimpleNaiveBayesClassifier {
|
|||
ClassificationResult<BytesRef> toRemove = ret.get(removeIdx);
|
||||
ret.add(
|
||||
new ClassificationResult<>(
|
||||
toRemove.getAssignedClass(), toRemove.getScore() + Math.log(wordProbability)));
|
||||
toRemove.assignedClass(), toRemove.score() + Math.log(wordProbability)));
|
||||
ret.remove(removeIdx);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,44 +20,15 @@ package org.apache.lucene.classification;
|
|||
* The result of a call to {@link Classifier#assignClass(String)} holding an assigned class of type
|
||||
* <code>T</code> and a score.
|
||||
*
|
||||
* @param assignedClass the class <code>T</code> assigned by a {@link Classifier}
|
||||
* @param score score the score for the assignedClass as a <code>double</code>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ClassificationResult<T> implements Comparable<ClassificationResult<T>> {
|
||||
|
||||
private final T assignedClass;
|
||||
private final double score;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param assignedClass the class <code>T</code> assigned by a {@link Classifier}
|
||||
* @param score the score for the assignedClass as a <code>double</code>
|
||||
*/
|
||||
public ClassificationResult(T assignedClass, double score) {
|
||||
this.assignedClass = assignedClass;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
/**
|
||||
* retrieve the result class
|
||||
*
|
||||
* @return a <code>T</code> representing an assigned class
|
||||
*/
|
||||
public T getAssignedClass() {
|
||||
return assignedClass;
|
||||
}
|
||||
|
||||
/**
|
||||
* retrieve the result score
|
||||
*
|
||||
* @return a <code>double</code> representing a result score
|
||||
*/
|
||||
public double getScore() {
|
||||
return score;
|
||||
}
|
||||
public record ClassificationResult<T>(T assignedClass, double score)
|
||||
implements Comparable<ClassificationResult<T>> {
|
||||
|
||||
@Override
|
||||
public int compareTo(ClassificationResult<T> o) {
|
||||
return Double.compare(o.getScore(), this.getScore());
|
||||
return Double.compare(o.score(), this.score());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -108,9 +108,9 @@ public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
|
|||
ClassificationResult<BytesRef> assignedClass = null;
|
||||
double maxscore = -Double.MAX_VALUE;
|
||||
for (ClassificationResult<BytesRef> cl : assignedClasses) {
|
||||
if (cl.getScore() > maxscore) {
|
||||
if (cl.score() > maxscore) {
|
||||
assignedClass = cl;
|
||||
maxscore = cl.getScore();
|
||||
maxscore = cl.score();
|
||||
}
|
||||
}
|
||||
return assignedClass;
|
||||
|
@ -159,7 +159,7 @@ public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
|
|||
Map<BytesRef, Integer> classCounts = new HashMap<>();
|
||||
Map<BytesRef, Double> classBoosts =
|
||||
new HashMap<>(); // this is a boost based on class ranking positions in topDocs
|
||||
float maxScore = topDocs.totalHits.value == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
|
||||
float maxScore = topDocs.totalHits.value() == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
|
||||
StoredFields storedFields = indexSearcher.storedFields();
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
IndexableField storableField = storedFields.document(scoreDoc.doc).getField(classFieldName);
|
||||
|
@ -193,7 +193,7 @@ public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
|
|||
if (sumdoc < k) {
|
||||
for (ClassificationResult<BytesRef> cr : temporaryList) {
|
||||
returnList.add(
|
||||
new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
|
||||
new ClassificationResult<>(cr.assignedClass(), cr.score() * k / (double) sumdoc));
|
||||
}
|
||||
} else {
|
||||
returnList = temporaryList;
|
||||
|
|
|
@ -129,9 +129,9 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
|
|||
ClassificationResult<BytesRef> assignedClass = null;
|
||||
double maxscore = -Double.MAX_VALUE;
|
||||
for (ClassificationResult<BytesRef> cl : assignedClasses) {
|
||||
if (cl.getScore() > maxscore) {
|
||||
if (cl.score() > maxscore) {
|
||||
assignedClass = cl;
|
||||
maxscore = cl.getScore();
|
||||
maxscore = cl.score();
|
||||
}
|
||||
}
|
||||
return assignedClass;
|
||||
|
@ -192,7 +192,7 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
|
|||
Map<BytesRef, Integer> classCounts = new HashMap<>();
|
||||
Map<BytesRef, Double> classBoosts =
|
||||
new HashMap<>(); // this is a boost based on class ranking positions in topDocs
|
||||
float maxScore = topDocs.totalHits.value == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
|
||||
float maxScore = topDocs.totalHits.value() == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
|
||||
StoredFields storedFields = indexSearcher.storedFields();
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
IndexableField[] storableFields =
|
||||
|
@ -229,7 +229,7 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
|
|||
if (sumdoc < k) {
|
||||
for (ClassificationResult<BytesRef> cr : temporaryList) {
|
||||
returnList.add(
|
||||
new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
|
||||
new ClassificationResult<>(cr.assignedClass(), cr.score() * k / (double) sumdoc));
|
||||
}
|
||||
} else {
|
||||
returnList = temporaryList;
|
||||
|
|
|
@ -105,9 +105,9 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
|||
ClassificationResult<BytesRef> assignedClass = null;
|
||||
double maxscore = -Double.MAX_VALUE;
|
||||
for (ClassificationResult<BytesRef> c : assignedClasses) {
|
||||
if (c.getScore() > maxscore) {
|
||||
if (c.score() > maxscore) {
|
||||
assignedClass = c;
|
||||
maxscore = c.getScore();
|
||||
maxscore = c.score();
|
||||
}
|
||||
}
|
||||
return assignedClass;
|
||||
|
@ -297,13 +297,13 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
|||
if (!assignedClasses.isEmpty()) {
|
||||
Collections.sort(assignedClasses);
|
||||
// this is a negative number closest to 0 = a
|
||||
double smax = assignedClasses.get(0).getScore();
|
||||
double smax = assignedClasses.get(0).score();
|
||||
|
||||
double sumLog = 0;
|
||||
// log(sum(exp(x_n-a)))
|
||||
for (ClassificationResult<BytesRef> cr : assignedClasses) {
|
||||
// getScore-smax <=0 (both negative, smax is the smallest abs()
|
||||
sumLog += Math.exp(cr.getScore() - smax);
|
||||
sumLog += Math.exp(cr.score() - smax);
|
||||
}
|
||||
// loga=a+log(sum(exp(x_n-a))) = log(sum(exp(x_n)))
|
||||
double loga = smax;
|
||||
|
@ -311,8 +311,8 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
|||
|
||||
// 1/sum*x = exp(log(x))*1/sum = exp(log(x)-log(sum))
|
||||
for (ClassificationResult<BytesRef> cr : assignedClasses) {
|
||||
double scoreDiff = cr.getScore() - loga;
|
||||
returnList.add(new ClassificationResult<>(cr.getAssignedClass(), Math.exp(scoreDiff)));
|
||||
double scoreDiff = cr.score() - loga;
|
||||
returnList.add(new ClassificationResult<>(cr.assignedClass(), Math.exp(scoreDiff)));
|
||||
}
|
||||
}
|
||||
return returnList;
|
||||
|
|
|
@ -80,9 +80,9 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi
|
|||
ClassificationResult<BytesRef> assignedClass = null;
|
||||
double maxscore = -Double.MAX_VALUE;
|
||||
for (ClassificationResult<BytesRef> c : assignedClasses) {
|
||||
if (c.getScore() > maxscore) {
|
||||
if (c.score() > maxscore) {
|
||||
assignedClass = c;
|
||||
maxscore = c.getScore();
|
||||
maxscore = c.score();
|
||||
}
|
||||
}
|
||||
return assignedClass;
|
||||
|
|
|
@ -107,7 +107,7 @@ public class ConfusionMatrixGenerator {
|
|||
time += end - start;
|
||||
|
||||
if (result != null) {
|
||||
T assignedClass = result.getAssignedClass();
|
||||
T assignedClass = result.assignedClass();
|
||||
if (assignedClass != null) {
|
||||
counter++;
|
||||
String classified =
|
||||
|
|
|
@ -138,13 +138,13 @@ public class DatasetSplitter {
|
|||
// iterate over existing documents
|
||||
StoredFields storedFields = originalIndex.storedFields();
|
||||
for (GroupDocs<Object> group : topGroups.groups) {
|
||||
assert group.totalHits.relation == TotalHits.Relation.EQUAL_TO;
|
||||
long totalHits = group.totalHits.value;
|
||||
assert group.totalHits().relation() == TotalHits.Relation.EQUAL_TO;
|
||||
long totalHits = group.totalHits().value();
|
||||
double testSize = totalHits * testRatio;
|
||||
int tc = 0;
|
||||
double cvSize = totalHits * crossValidationRatio;
|
||||
int cvc = 0;
|
||||
for (ScoreDoc scoreDoc : group.scoreDocs) {
|
||||
for (ScoreDoc scoreDoc : group.scoreDocs()) {
|
||||
|
||||
// create a new document for indexing
|
||||
Document doc = createNewDoc(storedFields, ft, scoreDoc, fieldNames);
|
||||
|
|
|
@ -91,7 +91,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
Classifier<T> classifier, String inputDoc, T expectedResult) throws Exception {
|
||||
ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
|
||||
assertNotNull(classificationResult);
|
||||
T assignedClass = classificationResult.getAssignedClass();
|
||||
T assignedClass = classificationResult.assignedClass();
|
||||
assertNotNull(assignedClass);
|
||||
assertEquals(
|
||||
"got an assigned class of " + assignedClass,
|
||||
|
@ -101,7 +101,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
assignedClass instanceof BytesRef
|
||||
? ((BytesRef) assignedClass).utf8ToString()
|
||||
: assignedClass);
|
||||
double score = classificationResult.getScore();
|
||||
double score = classificationResult.score();
|
||||
assertTrue("score should be between 0 and 1, got:" + score, score <= 1 && score >= 0);
|
||||
return classificationResult;
|
||||
}
|
||||
|
@ -130,18 +130,17 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
getSampleIndex(analyzer);
|
||||
|
||||
ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
|
||||
assertNotNull(classificationResult.getAssignedClass());
|
||||
assertNotNull(classificationResult.assignedClass());
|
||||
assertEquals(
|
||||
"got an assigned class of " + classificationResult.getAssignedClass(),
|
||||
"got an assigned class of " + classificationResult.assignedClass(),
|
||||
expectedResult,
|
||||
classificationResult.getAssignedClass());
|
||||
double score = classificationResult.getScore();
|
||||
classificationResult.assignedClass());
|
||||
double score = classificationResult.score();
|
||||
assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
|
||||
updateSampleIndex();
|
||||
ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
|
||||
assertEquals(
|
||||
classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
|
||||
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
|
||||
assertEquals(classificationResult.assignedClass(), secondClassificationResult.assignedClass());
|
||||
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.score()));
|
||||
}
|
||||
|
||||
protected LeafReader getSampleIndex(Analyzer analyzer) throws IOException {
|
||||
|
|
|
@ -88,7 +88,7 @@ public class TestKNearestNeighborClassifier extends ClassificationTestBase<Bytes
|
|||
textFieldName),
|
||||
TECHNOLOGY_INPUT,
|
||||
TECHNOLOGY_RESULT);
|
||||
assertTrue(resultDS.getScore() != resultLMS.getScore());
|
||||
assertTrue(resultDS.score() != resultLMS.score());
|
||||
} finally {
|
||||
IOUtils.close(leafReader);
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ public class TestKNearestNeighborClassifier extends ClassificationTestBase<Bytes
|
|||
leafReader, null, analyzer, null, 6, 1, 1, categoryFieldName, textFieldName);
|
||||
List<ClassificationResult<BytesRef>> classes =
|
||||
knnClassifier.getClasses(STRONG_TECHNOLOGY_INPUT);
|
||||
assertTrue(classes.get(0).getScore() > classes.get(1).getScore());
|
||||
assertTrue(classes.get(0).score() > classes.get(1).score());
|
||||
checkCorrectClassification(knnClassifier, STRONG_TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
||||
} finally {
|
||||
IOUtils.close(leafReader);
|
||||
|
@ -139,7 +139,7 @@ public class TestKNearestNeighborClassifier extends ClassificationTestBase<Bytes
|
|||
leafReader, null, analyzer, null, 3, 1, 1, categoryFieldName, textFieldName);
|
||||
List<ClassificationResult<BytesRef>> classes =
|
||||
knnClassifier.getClasses(SUPER_STRONG_TECHNOLOGY_INPUT);
|
||||
assertTrue(classes.get(0).getScore() > classes.get(1).getScore());
|
||||
assertTrue(classes.get(0).score() > classes.get(1).score());
|
||||
checkCorrectClassification(knnClassifier, SUPER_STRONG_TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
||||
} finally {
|
||||
IOUtils.close(leafReader);
|
||||
|
|
|
@ -58,12 +58,12 @@ public abstract class DocumentClassificationTestBase<T> extends ClassificationTe
|
|||
protected double checkCorrectDocumentClassification(
|
||||
DocumentClassifier<T> classifier, Document inputDoc, T expectedResult) throws Exception {
|
||||
ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
|
||||
assertNotNull(classificationResult.getAssignedClass());
|
||||
assertNotNull(classificationResult.assignedClass());
|
||||
assertEquals(
|
||||
"got an assigned class of " + classificationResult.getAssignedClass(),
|
||||
"got an assigned class of " + classificationResult.assignedClass(),
|
||||
expectedResult,
|
||||
classificationResult.getAssignedClass());
|
||||
double score = classificationResult.getScore();
|
||||
classificationResult.assignedClass());
|
||||
double score = classificationResult.score();
|
||||
assertTrue("score should be between 0 and 1, got:" + score, score <= 1 && score >= 0);
|
||||
return score;
|
||||
}
|
||||
|
|
|
@ -69,28 +69,15 @@ public class BlockTermsWriter extends FieldsConsumer {
|
|||
private final TermsIndexWriterBase termsIndexWriter;
|
||||
private final int maxDoc;
|
||||
|
||||
private static class FieldMetaData {
|
||||
public final FieldInfo fieldInfo;
|
||||
public final long numTerms;
|
||||
public final long termsStartPointer;
|
||||
public final long sumTotalTermFreq;
|
||||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
|
||||
public FieldMetaData(
|
||||
private record FieldMetaData(
|
||||
FieldInfo fieldInfo,
|
||||
long numTerms,
|
||||
long termsStartPointer,
|
||||
long sumTotalTermFreq,
|
||||
long sumDocFreq,
|
||||
int docCount) {
|
||||
private FieldMetaData {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.termsStartPointer = termsStartPointer;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
|
||||
@Override
|
||||
public boolean isIndexTerm(BytesRef term, TermStats stats) {
|
||||
if (stats.docFreq >= docFreqThresh || count >= interval) {
|
||||
if (stats.docFreq() >= docFreqThresh || count >= interval) {
|
||||
count = 1;
|
||||
return true;
|
||||
} else {
|
||||
|
|
|
@ -34,19 +34,14 @@ final class FSTOrdsOutputs extends Outputs<FSTOrdsOutputs.Output> {
|
|||
|
||||
private static final BytesRef NO_BYTES = new BytesRef();
|
||||
|
||||
public static final class Output {
|
||||
public final BytesRef bytes;
|
||||
// Inclusive:
|
||||
public final long startOrd;
|
||||
// Inclusive:
|
||||
public final long endOrd;
|
||||
|
||||
public Output(BytesRef bytes, long startOrd, long endOrd) {
|
||||
/**
|
||||
* @param startOrd Inclusive:
|
||||
* @param endOrd Inclusive:
|
||||
*/
|
||||
public record Output(BytesRef bytes, long startOrd, long endOrd) {
|
||||
public Output {
|
||||
assert startOrd >= 0 : "startOrd=" + startOrd;
|
||||
assert endOrd >= 0 : "endOrd=" + endOrd;
|
||||
this.bytes = bytes;
|
||||
this.startOrd = startOrd;
|
||||
this.endOrd = endOrd;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -60,24 +55,6 @@ final class FSTOrdsOutputs extends Outputs<FSTOrdsOutputs.Output> {
|
|||
}
|
||||
return startOrd + " to " + x;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = bytes.hashCode();
|
||||
hash = (int) (hash ^ startOrd);
|
||||
hash = (int) (hash ^ endOrd);
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object _other) {
|
||||
if (_other instanceof Output) {
|
||||
Output other = (Output) _other;
|
||||
return bytes.equals(other.bytes) && startOrd == other.startOrd && endOrd == other.endOrd;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -139,18 +139,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
final PostingsWriterBase postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
|
||||
private static class FieldMetaData {
|
||||
public final FieldInfo fieldInfo;
|
||||
public final Output rootCode;
|
||||
public final long numTerms;
|
||||
public final long indexStartFP;
|
||||
public final long sumTotalTermFreq;
|
||||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
public final BytesRef minTerm;
|
||||
public final BytesRef maxTerm;
|
||||
|
||||
public FieldMetaData(
|
||||
private record FieldMetaData(
|
||||
FieldInfo fieldInfo,
|
||||
Output rootCode,
|
||||
long numTerms,
|
||||
|
@ -160,17 +149,9 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
int docCount,
|
||||
BytesRef minTerm,
|
||||
BytesRef maxTerm) {
|
||||
private FieldMetaData {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
assert rootCode != null : "field=" + fieldInfo.name + " numTerms=" + numTerms;
|
||||
this.rootCode = rootCode;
|
||||
this.indexStartFP = indexStartFP;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -293,15 +274,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
private static final class SubIndex {
|
||||
public final FST<Output> index;
|
||||
public final long termOrdStart;
|
||||
|
||||
public SubIndex(FST<Output> index, long termOrdStart) {
|
||||
this.index = index;
|
||||
this.termOrdStart = termOrdStart;
|
||||
}
|
||||
}
|
||||
private record SubIndex(FST<Output> index, long termOrdStart) {}
|
||||
|
||||
private static final class PendingBlock extends PendingEntry {
|
||||
public final BytesRef prefix;
|
||||
|
@ -438,7 +411,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
// long blockTermCount = output.endOrd - output.startOrd + 1;
|
||||
Output newOutput =
|
||||
FST_OUTPUTS.newOutput(
|
||||
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
|
||||
output.bytes(), termOrdOffset + output.startOrd(), output.endOrd() - termOrdOffset);
|
||||
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output +
|
||||
// " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput="
|
||||
// + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
|
||||
|
@ -969,9 +942,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
out.writeVInt(field.fieldInfo.number);
|
||||
assert field.numTerms > 0;
|
||||
out.writeVLong(field.numTerms);
|
||||
out.writeVInt(field.rootCode.bytes.length);
|
||||
out.writeVInt(field.rootCode.bytes().length);
|
||||
out.writeBytes(
|
||||
field.rootCode.bytes.bytes, field.rootCode.bytes.offset, field.rootCode.bytes.length);
|
||||
field.rootCode.bytes().bytes,
|
||||
field.rootCode.bytes().offset,
|
||||
field.rootCode.bytes().length);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
out.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
|
|
|
@ -79,7 +79,8 @@ final class OrdsFieldReader extends Terms {
|
|||
// }
|
||||
|
||||
rootBlockFP =
|
||||
(new ByteArrayDataInput(rootCode.bytes.bytes, rootCode.bytes.offset, rootCode.bytes.length))
|
||||
(new ByteArrayDataInput(
|
||||
rootCode.bytes().bytes, rootCode.bytes().offset, rootCode.bytes().length))
|
||||
.readVLong()
|
||||
>>> OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
|
||||
|
||||
|
|
|
@ -142,8 +142,8 @@ final class OrdsIntersectTermsEnumFrame {
|
|||
// + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state="
|
||||
// + state));
|
||||
|
||||
if (output != null && output.bytes != null && transitionCount != 0) {
|
||||
BytesRef frameIndexData = output.bytes;
|
||||
if (output != null && output.bytes() != null && transitionCount != 0) {
|
||||
BytesRef frameIndexData = output.bytes();
|
||||
|
||||
// Floor frame
|
||||
if (floorData.length < frameIndexData.length) {
|
||||
|
|
|
@ -149,7 +149,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
// Pushes a frame we seek'd to
|
||||
OrdsSegmentTermsEnumFrame pushFrame(FST.Arc<Output> arc, Output frameData, int length)
|
||||
throws IOException {
|
||||
scratchReader.reset(frameData.bytes.bytes, frameData.bytes.offset, frameData.bytes.length);
|
||||
scratchReader.reset(
|
||||
frameData.bytes().bytes, frameData.bytes().offset, frameData.bytes().length);
|
||||
final long code = scratchReader.readVLong();
|
||||
final long fpSeek = code >>> OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
|
||||
// System.out.println(" fpSeek=" + fpSeek);
|
||||
|
@ -160,11 +161,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// Must setFloorData before pushFrame in case pushFrame tries to rewind:
|
||||
if (f.isFloor) {
|
||||
f.termOrdOrig = frameData.startOrd;
|
||||
f.setFloorData(scratchReader, frameData.bytes);
|
||||
f.termOrdOrig = frameData.startOrd();
|
||||
f.setFloorData(scratchReader, frameData.bytes());
|
||||
}
|
||||
|
||||
pushFrame(arc, fpSeek, length, frameData.startOrd);
|
||||
pushFrame(arc, fpSeek, length, frameData.startOrd());
|
||||
|
||||
return f;
|
||||
}
|
||||
|
@ -891,7 +892,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
} else if (isSeekFrame && !f.isFloor) {
|
||||
final ByteArrayDataInput reader =
|
||||
new ByteArrayDataInput(
|
||||
output.bytes.bytes, output.bytes.offset, output.bytes.length);
|
||||
output.bytes().bytes, output.bytes().offset, output.bytes().length);
|
||||
final long codeOrig = reader.readVLong();
|
||||
final long code =
|
||||
(f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS)
|
||||
|
@ -1210,7 +1211,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput());
|
||||
// System.out.println(" isFinal: " + finalOutput.startOrd + "-" +
|
||||
// (Long.MAX_VALUE-finalOutput.endOrd));
|
||||
if (targetOrd >= finalOutput.startOrd && targetOrd <= Long.MAX_VALUE - finalOutput.endOrd) {
|
||||
if (targetOrd >= finalOutput.startOrd()
|
||||
&& targetOrd <= Long.MAX_VALUE - finalOutput.endOrd()) {
|
||||
// Only one range should match across all arc leaving this node
|
||||
// assert bestOutput == null;
|
||||
bestOutput = finalOutput;
|
||||
|
@ -1247,9 +1249,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
// System.out.println(" cycle mid=" + mid + " targetOrd=" + targetOrd + " output=" +
|
||||
// minArcOutput.startOrd + "-" + (Long.MAX_VALUE-minArcOutput.endOrd));
|
||||
if (targetOrd > Long.MAX_VALUE - minArcOutput.endOrd) {
|
||||
if (targetOrd > Long.MAX_VALUE - minArcOutput.endOrd()) {
|
||||
low = mid + 1;
|
||||
} else if (targetOrd < minArcOutput.startOrd) {
|
||||
} else if (targetOrd < minArcOutput.startOrd()) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
// System.out.println(" found!!");
|
||||
|
@ -1282,10 +1284,10 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
// this arc:
|
||||
final Output minArcOutput =
|
||||
OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
long endOrd = Long.MAX_VALUE - minArcOutput.endOrd;
|
||||
long endOrd = Long.MAX_VALUE - minArcOutput.endOrd();
|
||||
// System.out.println(" endOrd=" + endOrd + " targetOrd=" + targetOrd);
|
||||
|
||||
if (targetOrd >= minArcOutput.startOrd && targetOrd <= endOrd) {
|
||||
if (targetOrd >= minArcOutput.startOrd() && targetOrd <= endOrd) {
|
||||
// Recurse on this arc:
|
||||
output = minArcOutput;
|
||||
result.setIntAt(upto++, arc.label());
|
||||
|
|
|
@ -71,8 +71,8 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
this.pointCount = pointCount;
|
||||
this.docCount = docCount;
|
||||
this.version = SimpleTextBKDWriter.VERSION_CURRENT;
|
||||
assert minPackedValue.length == config.packedIndexBytesLength;
|
||||
assert maxPackedValue.length == config.packedIndexBytesLength;
|
||||
assert minPackedValue.length == config.packedIndexBytesLength();
|
||||
assert maxPackedValue.length == config.packedIndexBytesLength();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -99,8 +99,8 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
private SimpleTextPointTree(
|
||||
IndexInput in, int nodeID, int level, byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
this.in = in;
|
||||
this.scratchDocIDs = new int[config.maxPointsInLeafNode];
|
||||
this.scratchPackedValue = new byte[config.packedBytesLength];
|
||||
this.scratchDocIDs = new int[config.maxPointsInLeafNode()];
|
||||
this.scratchPackedValue = new byte[config.packedBytesLength()];
|
||||
this.nodeID = nodeID;
|
||||
this.rootNode = nodeID;
|
||||
this.level = level;
|
||||
|
@ -145,38 +145,39 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
private void pushLeft() {
|
||||
int address = nodeID * bytesPerIndexEntry;
|
||||
// final int splitDimPos;
|
||||
if (config.numIndexDims == 1) {
|
||||
if (config.numIndexDims() == 1) {
|
||||
splitDims[level] = 0;
|
||||
} else {
|
||||
splitDims[level] = (splitPackedValues[address++] & 0xff);
|
||||
}
|
||||
final int splitDimPos = splitDims[level] * config.bytesPerDim;
|
||||
final int splitDimPos = splitDims[level] * config.bytesPerDim();
|
||||
if (splitDimValueStack[level] == null) {
|
||||
splitDimValueStack[level] = new byte[config.bytesPerDim];
|
||||
splitDimValueStack[level] = new byte[config.bytesPerDim()];
|
||||
}
|
||||
// save the dimension we are going to change
|
||||
System.arraycopy(
|
||||
maxPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim);
|
||||
maxPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim());
|
||||
assert Arrays.compareUnsigned(
|
||||
maxPackedValue,
|
||||
splitDimPos,
|
||||
splitDimPos + config.bytesPerDim,
|
||||
splitDimPos + config.bytesPerDim(),
|
||||
splitPackedValues,
|
||||
address,
|
||||
address + config.bytesPerDim)
|
||||
address + config.bytesPerDim())
|
||||
>= 0
|
||||
: "config.bytesPerDim="
|
||||
+ config.bytesPerDim
|
||||
: "config.bytesPerDim()="
|
||||
+ config.bytesPerDim()
|
||||
+ " splitDim="
|
||||
+ splitDims[level]
|
||||
+ " config.numIndexDims="
|
||||
+ config.numIndexDims
|
||||
+ " config.numIndexDims()="
|
||||
+ config.numIndexDims()
|
||||
+ " config.numDims="
|
||||
+ config.numDims;
|
||||
+ config.numDims();
|
||||
nodeID *= 2;
|
||||
level++;
|
||||
// add the split dim value:
|
||||
System.arraycopy(splitPackedValues, address, maxPackedValue, splitDimPos, config.bytesPerDim);
|
||||
System.arraycopy(
|
||||
splitPackedValues, address, maxPackedValue, splitDimPos, config.bytesPerDim());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -191,37 +192,38 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
|
||||
private void pushRight() {
|
||||
int address = nodeID * bytesPerIndexEntry;
|
||||
if (config.numIndexDims == 1) {
|
||||
if (config.numIndexDims() == 1) {
|
||||
splitDims[level] = 0;
|
||||
} else {
|
||||
splitDims[level] = (splitPackedValues[address++] & 0xff);
|
||||
}
|
||||
final int splitDimPos = splitDims[level] * config.bytesPerDim;
|
||||
final int splitDimPos = splitDims[level] * config.bytesPerDim();
|
||||
// we should have already visit the left node
|
||||
assert splitDimValueStack[level] != null;
|
||||
// save the dimension we are going to change
|
||||
System.arraycopy(
|
||||
minPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim);
|
||||
minPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim());
|
||||
assert Arrays.compareUnsigned(
|
||||
minPackedValue,
|
||||
splitDimPos,
|
||||
splitDimPos + config.bytesPerDim,
|
||||
splitDimPos + config.bytesPerDim(),
|
||||
splitPackedValues,
|
||||
address,
|
||||
address + config.bytesPerDim)
|
||||
address + config.bytesPerDim())
|
||||
<= 0
|
||||
: "config.bytesPerDim="
|
||||
+ config.bytesPerDim
|
||||
: "config.bytesPerDim()="
|
||||
+ config.bytesPerDim()
|
||||
+ " splitDim="
|
||||
+ splitDims[level]
|
||||
+ " config.numIndexDims="
|
||||
+ config.numIndexDims
|
||||
+ " config.numIndexDims()="
|
||||
+ config.numIndexDims()
|
||||
+ " config.numDims="
|
||||
+ config.numDims;
|
||||
+ config.numDims();
|
||||
nodeID = 2 * nodeID + 1;
|
||||
level++;
|
||||
// add the split dim value:
|
||||
System.arraycopy(splitPackedValues, address, minPackedValue, splitDimPos, config.bytesPerDim);
|
||||
System.arraycopy(
|
||||
splitPackedValues, address, minPackedValue, splitDimPos, config.bytesPerDim());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -242,16 +244,16 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
splitDimValueStack[level],
|
||||
0,
|
||||
maxPackedValue,
|
||||
splitDims[level] * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
splitDims[level] * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
} else {
|
||||
|
||||
System.arraycopy(
|
||||
splitDimValueStack[level],
|
||||
0,
|
||||
minPackedValue,
|
||||
splitDims[level] * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
splitDims[level] * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -290,7 +292,7 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
private long sizeFromBalancedTree(int leftMostLeafNode, int rightMostLeafNode) {
|
||||
// number of points that need to be distributed between leaves, one per leaf
|
||||
final int extraPoints =
|
||||
Math.toIntExact(((long) config.maxPointsInLeafNode * leafNodeOffset) - pointCount);
|
||||
Math.toIntExact(((long) config.maxPointsInLeafNode() * leafNodeOffset) - pointCount);
|
||||
assert extraPoints < leafNodeOffset : "point excess should be lower than leafNodeOffset";
|
||||
// offset where we stop adding one point to the leaves
|
||||
final int nodeOffset = leafNodeOffset - extraPoints;
|
||||
|
@ -298,9 +300,9 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
for (int node = leftMostLeafNode; node <= rightMostLeafNode; node++) {
|
||||
// offsetPosition provides which extra point will be added to this node
|
||||
if (balanceTreeNodePosition(0, leafNodeOffset, node - leafNodeOffset, 0, 0) < nodeOffset) {
|
||||
count += config.maxPointsInLeafNode;
|
||||
count += config.maxPointsInLeafNode();
|
||||
} else {
|
||||
count += config.maxPointsInLeafNode - 1;
|
||||
count += config.maxPointsInLeafNode() - 1;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
|
@ -376,14 +378,14 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
// Again, this time reading values and checking with the visitor
|
||||
visitor.grow(count);
|
||||
// NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
|
||||
assert scratchPackedValue.length == config.packedBytesLength;
|
||||
assert scratchPackedValue.length == config.packedBytesLength();
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
for (int i = 0; i < count; i++) {
|
||||
readLine(in, scratch);
|
||||
assert startsWith(scratch, BLOCK_VALUE);
|
||||
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(scratch, BLOCK_VALUE));
|
||||
assert br.length == config.packedBytesLength;
|
||||
System.arraycopy(br.bytes, br.offset, scratchPackedValue, 0, config.packedBytesLength);
|
||||
assert br.length == config.packedBytesLength();
|
||||
System.arraycopy(br.bytes, br.offset, scratchPackedValue, 0, config.packedBytesLength());
|
||||
visitor.visit(scratchDocIDs[i], scratchPackedValue);
|
||||
}
|
||||
} else {
|
||||
|
@ -443,17 +445,17 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
|
||||
@Override
|
||||
public int getNumDimensions() throws IOException {
|
||||
return config.numDims;
|
||||
return config.numDims();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumIndexDimensions() throws IOException {
|
||||
return config.numIndexDims;
|
||||
return config.numIndexDims();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension() throws IOException {
|
||||
return config.bytesPerDim;
|
||||
return config.bytesPerDim();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -144,28 +144,28 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
this.maxDoc = maxDoc;
|
||||
docsSeen = new FixedBitSet(maxDoc);
|
||||
|
||||
scratchDiff = new byte[config.bytesPerDim];
|
||||
scratch1 = new byte[config.packedBytesLength];
|
||||
scratch2 = new byte[config.packedBytesLength];
|
||||
commonPrefixLengths = new int[config.numDims];
|
||||
scratchDiff = new byte[config.bytesPerDim()];
|
||||
scratch1 = new byte[config.packedBytesLength()];
|
||||
scratch2 = new byte[config.packedBytesLength()];
|
||||
commonPrefixLengths = new int[config.numDims()];
|
||||
|
||||
minPackedValue = new byte[config.packedIndexBytesLength];
|
||||
maxPackedValue = new byte[config.packedIndexBytesLength];
|
||||
minPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
maxPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
|
||||
// Maximum number of points we hold in memory at any time
|
||||
maxPointsSortInHeap =
|
||||
(int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc * config.numDims));
|
||||
(int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc() * config.numDims()));
|
||||
|
||||
// Finally, we must be able to hold at least the leaf node in heap during build:
|
||||
if (maxPointsSortInHeap < config.maxPointsInLeafNode) {
|
||||
if (maxPointsSortInHeap < config.maxPointsInLeafNode()) {
|
||||
throw new IllegalArgumentException(
|
||||
"maxMBSortInHeap="
|
||||
+ maxMBSortInHeap
|
||||
+ " only allows for maxPointsSortInHeap="
|
||||
+ maxPointsSortInHeap
|
||||
+ ", but this is less than config.maxPointsInLeafNode="
|
||||
+ config.maxPointsInLeafNode
|
||||
+ "; either increase maxMBSortInHeap or decrease config.maxPointsInLeafNode");
|
||||
+ ", but this is less than config.maxPointsInLeafNode()="
|
||||
+ config.maxPointsInLeafNode()
|
||||
+ "; either increase maxMBSortInHeap or decrease config.maxPointsInLeafNode()");
|
||||
}
|
||||
|
||||
this.maxMBSortInHeap = maxMBSortInHeap;
|
||||
|
@ -183,10 +183,10 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
public void add(byte[] packedValue, int docID) throws IOException {
|
||||
if (packedValue.length != config.packedBytesLength) {
|
||||
if (packedValue.length != config.packedBytesLength()) {
|
||||
throw new IllegalArgumentException(
|
||||
"packedValue should be length="
|
||||
+ config.packedBytesLength
|
||||
+ config.packedBytesLength()
|
||||
+ " (got: "
|
||||
+ packedValue.length
|
||||
+ ")");
|
||||
|
@ -209,30 +209,30 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
} else {
|
||||
pointWriter = new HeapPointWriter(config, Math.toIntExact(totalPointCount));
|
||||
}
|
||||
System.arraycopy(packedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
|
||||
System.arraycopy(packedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
|
||||
System.arraycopy(packedValue, 0, minPackedValue, 0, config.packedIndexBytesLength());
|
||||
System.arraycopy(packedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength());
|
||||
} else {
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
int offset = dim * config.bytesPerDim;
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
int offset = dim * config.bytesPerDim();
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim,
|
||||
offset + config.bytesPerDim(),
|
||||
minPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
< 0) {
|
||||
System.arraycopy(packedValue, offset, minPackedValue, offset, config.bytesPerDim);
|
||||
System.arraycopy(packedValue, offset, minPackedValue, offset, config.bytesPerDim());
|
||||
}
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim,
|
||||
offset + config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
> 0) {
|
||||
System.arraycopy(packedValue, offset, maxPackedValue, offset, config.bytesPerDim);
|
||||
System.arraycopy(packedValue, offset, maxPackedValue, offset, config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -254,7 +254,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
*/
|
||||
public long writeField(IndexOutput out, String fieldName, MutablePointTree reader)
|
||||
throws IOException {
|
||||
if (config.numIndexDims == 1) {
|
||||
if (config.numIndexDims() == 1) {
|
||||
return writeField1Dim(out, fieldName, reader);
|
||||
} else {
|
||||
return writeFieldNDims(out, fieldName, reader);
|
||||
|
@ -280,7 +280,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
long countPerLeaf = pointCount = values.size();
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > config.maxPointsInLeafNode) {
|
||||
while (countPerLeaf > config.maxPointsInLeafNode()) {
|
||||
countPerLeaf = (countPerLeaf + 1) / 2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
@ -289,7 +289,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
checkMaxLeafNodeCount(numLeaves);
|
||||
|
||||
final byte[] splitPackedValues = new byte[numLeaves * (config.bytesPerDim + 1)];
|
||||
final byte[] splitPackedValues = new byte[numLeaves * (config.bytesPerDim() + 1)];
|
||||
final long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// compute the min/max for this slice
|
||||
|
@ -297,37 +297,37 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
Arrays.fill(maxPackedValue, (byte) 0);
|
||||
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
|
||||
values.getValue(i, scratchBytesRef1);
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
int offset = dim * config.bytesPerDim;
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
int offset = dim * config.bytesPerDim();
|
||||
if (Arrays.compareUnsigned(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + offset,
|
||||
scratchBytesRef1.offset + offset + config.bytesPerDim,
|
||||
scratchBytesRef1.offset + offset + config.bytesPerDim(),
|
||||
minPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
< 0) {
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + offset,
|
||||
minPackedValue,
|
||||
offset,
|
||||
config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
}
|
||||
if (Arrays.compareUnsigned(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + offset,
|
||||
scratchBytesRef1.offset + offset + config.bytesPerDim,
|
||||
scratchBytesRef1.offset + offset + config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
> 0) {
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + offset,
|
||||
maxPackedValue,
|
||||
offset,
|
||||
config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -345,7 +345,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
maxPackedValue,
|
||||
splitPackedValues,
|
||||
leafBlockFPs,
|
||||
new int[config.maxPointsInLeafNode]);
|
||||
new int[config.maxPointsInLeafNode()]);
|
||||
|
||||
long indexFP = out.getFilePointer();
|
||||
writeIndex(out, leafBlockFPs, splitPackedValues, Math.toIntExact(countPerLeaf));
|
||||
|
@ -387,15 +387,15 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
final IndexOutput out;
|
||||
final List<Long> leafBlockFPs = new ArrayList<>();
|
||||
final List<byte[]> leafBlockStartValues = new ArrayList<>();
|
||||
final byte[] leafValues = new byte[config.maxPointsInLeafNode * config.packedBytesLength];
|
||||
final int[] leafDocs = new int[config.maxPointsInLeafNode];
|
||||
final byte[] leafValues = new byte[config.maxPointsInLeafNode() * config.packedBytesLength()];
|
||||
final int[] leafDocs = new int[config.maxPointsInLeafNode()];
|
||||
long valueCount;
|
||||
int leafCount;
|
||||
|
||||
OneDimensionBKDWriter(IndexOutput out) {
|
||||
if (config.numIndexDims != 1) {
|
||||
if (config.numIndexDims() != 1) {
|
||||
throw new UnsupportedOperationException(
|
||||
"config.numIndexDims must be 1 but got " + config.numIndexDims);
|
||||
"config.numIndexDims() must be 1 but got " + config.numIndexDims());
|
||||
}
|
||||
if (pointCount != 0) {
|
||||
throw new IllegalStateException("cannot mix add and merge");
|
||||
|
@ -411,7 +411,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
this.out = out;
|
||||
|
||||
lastPackedValue = new byte[config.packedBytesLength];
|
||||
lastPackedValue = new byte[config.packedBytesLength()];
|
||||
}
|
||||
|
||||
// for asserts
|
||||
|
@ -426,8 +426,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
packedValue,
|
||||
0,
|
||||
leafValues,
|
||||
leafCount * config.packedBytesLength,
|
||||
config.packedBytesLength);
|
||||
leafCount * config.packedBytesLength(),
|
||||
config.packedBytesLength());
|
||||
leafDocs[leafCount] = docID;
|
||||
docsSeen.set(docID);
|
||||
leafCount++;
|
||||
|
@ -441,7 +441,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
+ " values");
|
||||
}
|
||||
|
||||
if (leafCount == config.maxPointsInLeafNode) {
|
||||
if (leafCount == config.maxPointsInLeafNode()) {
|
||||
// We write a block once we hit exactly the max count ... this is different from
|
||||
// when we flush a new segment, where we write between max/2 and max per leaf block,
|
||||
// so merged segments will behave differently from newly flushed segments:
|
||||
|
@ -471,43 +471,44 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
// System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts="
|
||||
// + leafBlockStartValues.size());
|
||||
|
||||
byte[] index = new byte[(1 + numInnerNodes) * (1 + config.bytesPerDim)];
|
||||
byte[] index = new byte[(1 + numInnerNodes) * (1 + config.bytesPerDim())];
|
||||
rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
|
||||
long[] arr = new long[leafBlockFPs.size()];
|
||||
for (int i = 0; i < leafBlockFPs.size(); i++) {
|
||||
arr[i] = leafBlockFPs.get(i);
|
||||
}
|
||||
writeIndex(out, arr, index, config.maxPointsInLeafNode);
|
||||
writeIndex(out, arr, index, config.maxPointsInLeafNode());
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
private void writeLeafBlock() throws IOException {
|
||||
assert leafCount != 0;
|
||||
if (valueCount == 0) {
|
||||
System.arraycopy(leafValues, 0, minPackedValue, 0, config.packedIndexBytesLength);
|
||||
System.arraycopy(leafValues, 0, minPackedValue, 0, config.packedIndexBytesLength());
|
||||
}
|
||||
System.arraycopy(
|
||||
leafValues,
|
||||
(leafCount - 1) * config.packedBytesLength,
|
||||
(leafCount - 1) * config.packedBytesLength(),
|
||||
maxPackedValue,
|
||||
0,
|
||||
config.packedIndexBytesLength);
|
||||
config.packedIndexBytesLength());
|
||||
|
||||
valueCount += leafCount;
|
||||
|
||||
if (leafBlockFPs.size() > 0) {
|
||||
// Save the first (minimum) value in each leaf block except the first, to build the split
|
||||
// value index in the end:
|
||||
leafBlockStartValues.add(ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength));
|
||||
leafBlockStartValues.add(
|
||||
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength()));
|
||||
}
|
||||
leafBlockFPs.add(out.getFilePointer());
|
||||
checkMaxLeafNodeCount(leafBlockFPs.size());
|
||||
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
|
||||
// Find per-dim common prefix:
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
int offset1 = dim * config.bytesPerDim;
|
||||
int offset2 = (leafCount - 1) * config.packedBytesLength + offset1;
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
int offset1 = dim * config.bytesPerDim();
|
||||
int offset2 = (leafCount - 1) * config.packedBytesLength() + offset1;
|
||||
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
|
||||
if (leafValues[offset1 + j] != leafValues[offset2 + j]) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
|
@ -523,24 +524,24 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
{
|
||||
scratch.length = config.packedBytesLength;
|
||||
scratch.length = config.packedBytesLength();
|
||||
scratch.bytes = leafValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef apply(int i) {
|
||||
scratch.offset = config.packedBytesLength * i;
|
||||
scratch.offset = config.packedBytesLength() * i;
|
||||
return scratch;
|
||||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(
|
||||
leafCount,
|
||||
0,
|
||||
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength),
|
||||
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength()),
|
||||
ArrayUtil.copyOfSubArray(
|
||||
leafValues,
|
||||
(leafCount - 1) * config.packedBytesLength,
|
||||
leafCount * config.packedBytesLength),
|
||||
(leafCount - 1) * config.packedBytesLength(),
|
||||
leafCount * config.packedBytesLength()),
|
||||
packedValues,
|
||||
leafDocs,
|
||||
0);
|
||||
|
@ -552,7 +553,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
private void rotateToTree(
|
||||
int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
|
||||
// System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + "
|
||||
// bpd=" + config.bytesPerDim + " index.length=" + index.length);
|
||||
// bpd=" + config.bytesPerDim() + " index.length=" + index.length);
|
||||
if (count == 1) {
|
||||
// Leaf index node
|
||||
// System.out.println(" leaf index node");
|
||||
|
@ -561,8 +562,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
leafBlockStartValues.get(offset),
|
||||
0,
|
||||
index,
|
||||
nodeID * (1 + config.bytesPerDim) + 1,
|
||||
config.bytesPerDim);
|
||||
nodeID * (1 + config.bytesPerDim()) + 1,
|
||||
config.bytesPerDim());
|
||||
} else if (count > 1) {
|
||||
// Internal index node: binary partition of count
|
||||
int countAtLevel = 1;
|
||||
|
@ -587,8 +588,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
leafBlockStartValues.get(rootOffset),
|
||||
0,
|
||||
index,
|
||||
nodeID * (1 + config.bytesPerDim) + 1,
|
||||
config.bytesPerDim);
|
||||
nodeID * (1 + config.bytesPerDim()) + 1,
|
||||
config.bytesPerDim());
|
||||
// System.out.println(" index[" + nodeID + "] = blockStartValues[" + rootOffset + "]");
|
||||
|
||||
// TODO: we could optimize/specialize, when we know it's simply fully balanced binary tree
|
||||
|
@ -611,10 +612,10 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
private void checkMaxLeafNodeCount(int numLeaves) {
|
||||
if ((1 + config.bytesPerDim) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
if ((1 + config.bytesPerDim()) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
throw new IllegalStateException(
|
||||
"too many nodes; increase config.maxPointsInLeafNode (currently "
|
||||
+ config.maxPointsInLeafNode
|
||||
"too many nodes; increase config.maxPointsInLeafNode() (currently "
|
||||
+ config.maxPointsInLeafNode()
|
||||
+ ") and reindex");
|
||||
}
|
||||
}
|
||||
|
@ -652,7 +653,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
long countPerLeaf = pointCount;
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > config.maxPointsInLeafNode) {
|
||||
while (countPerLeaf > config.maxPointsInLeafNode()) {
|
||||
countPerLeaf = (countPerLeaf + 1) / 2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
@ -667,20 +668,20 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
// Indexed by nodeID, but first (root) nodeID is 1. We do 1+ because the lead byte at each
|
||||
// recursion says which dim we split on.
|
||||
byte[] splitPackedValues = new byte[Math.multiplyExact(numLeaves, 1 + config.bytesPerDim)];
|
||||
byte[] splitPackedValues = new byte[Math.multiplyExact(numLeaves, 1 + config.bytesPerDim())];
|
||||
|
||||
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g.
|
||||
// 7)
|
||||
long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// Make sure the math above "worked":
|
||||
assert pointCount / numLeaves <= config.maxPointsInLeafNode
|
||||
assert pointCount / numLeaves <= config.maxPointsInLeafNode()
|
||||
: "pointCount="
|
||||
+ pointCount
|
||||
+ " numLeaves="
|
||||
+ numLeaves
|
||||
+ " config.maxPointsInLeafNode="
|
||||
+ config.maxPointsInLeafNode;
|
||||
+ " config.maxPointsInLeafNode()="
|
||||
+ config.maxPointsInLeafNode();
|
||||
|
||||
// We re-use the selector so we do not need to create an object every time.
|
||||
BKDRadixSelector radixSelector =
|
||||
|
@ -699,7 +700,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
maxPackedValue,
|
||||
splitPackedValues,
|
||||
leafBlockFPs,
|
||||
new int[config.maxPointsInLeafNode]);
|
||||
new int[config.maxPointsInLeafNode()]);
|
||||
|
||||
// If no exception, we should have cleaned everything up:
|
||||
assert tempDir.getCreatedFiles().isEmpty();
|
||||
|
@ -724,15 +725,15 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues, int maxPointsInLeafNode)
|
||||
throws IOException {
|
||||
write(out, NUM_DATA_DIMS);
|
||||
writeInt(out, config.numDims);
|
||||
writeInt(out, config.numDims());
|
||||
newline(out);
|
||||
|
||||
write(out, NUM_INDEX_DIMS);
|
||||
writeInt(out, config.numIndexDims);
|
||||
writeInt(out, config.numIndexDims());
|
||||
newline(out);
|
||||
|
||||
write(out, BYTES_PER_DIM);
|
||||
writeInt(out, config.bytesPerDim);
|
||||
writeInt(out, config.bytesPerDim());
|
||||
newline(out);
|
||||
|
||||
write(out, MAX_LEAF_POINTS);
|
||||
|
@ -767,8 +768,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
newline(out);
|
||||
}
|
||||
|
||||
assert (splitPackedValues.length % (1 + config.bytesPerDim)) == 0;
|
||||
int count = splitPackedValues.length / (1 + config.bytesPerDim);
|
||||
assert (splitPackedValues.length % (1 + config.bytesPerDim())) == 0;
|
||||
int count = splitPackedValues.length / (1 + config.bytesPerDim());
|
||||
assert count == leafBlockFPs.length;
|
||||
|
||||
write(out, SPLIT_COUNT);
|
||||
|
@ -777,10 +778,12 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
for (int i = 0; i < count; i++) {
|
||||
write(out, SPLIT_DIM);
|
||||
writeInt(out, splitPackedValues[i * (1 + config.bytesPerDim)] & 0xff);
|
||||
writeInt(out, splitPackedValues[i * (1 + config.bytesPerDim())] & 0xff);
|
||||
newline(out);
|
||||
write(out, SPLIT_VALUE);
|
||||
br = new BytesRef(splitPackedValues, 1 + (i * (1 + config.bytesPerDim)), config.bytesPerDim);
|
||||
br =
|
||||
new BytesRef(
|
||||
splitPackedValues, 1 + (i * (1 + config.bytesPerDim())), config.bytesPerDim());
|
||||
write(out, br.toString());
|
||||
newline(out);
|
||||
}
|
||||
|
@ -852,25 +855,25 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
/** Called only in assert */
|
||||
private boolean valueInBounds(
|
||||
BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
int offset = config.bytesPerDim * dim;
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
int offset = config.bytesPerDim() * dim;
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + offset,
|
||||
packedValue.offset + offset + config.bytesPerDim,
|
||||
packedValue.offset + offset + config.bytesPerDim(),
|
||||
minPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
< 0) {
|
||||
return false;
|
||||
}
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + offset,
|
||||
packedValue.offset + offset + config.bytesPerDim,
|
||||
packedValue.offset + offset + config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
> 0) {
|
||||
return false;
|
||||
}
|
||||
|
@ -882,13 +885,13 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
// Find which dim has the largest span so we can split on it:
|
||||
int splitDim = -1;
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
NumericUtils.subtract(config.bytesPerDim, dim, maxPackedValue, minPackedValue, scratchDiff);
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
NumericUtils.subtract(config.bytesPerDim(), dim, maxPackedValue, minPackedValue, scratchDiff);
|
||||
if (splitDim == -1
|
||||
|| Arrays.compareUnsigned(
|
||||
scratchDiff, 0, config.bytesPerDim, scratch1, 0, config.bytesPerDim)
|
||||
scratchDiff, 0, config.bytesPerDim(), scratch1, 0, config.bytesPerDim())
|
||||
> 0) {
|
||||
System.arraycopy(scratchDiff, 0, scratch1, 0, config.bytesPerDim);
|
||||
System.arraycopy(scratchDiff, 0, scratch1, 0, config.bytesPerDim());
|
||||
splitDim = dim;
|
||||
}
|
||||
}
|
||||
|
@ -931,15 +934,15 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
if (nodeID >= leafNodeOffset) {
|
||||
// leaf node
|
||||
final int count = to - from;
|
||||
assert count <= config.maxPointsInLeafNode;
|
||||
assert count <= config.maxPointsInLeafNode();
|
||||
|
||||
// Compute common prefixes
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
|
||||
reader.getValue(from, scratchBytesRef1);
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
reader.getValue(i, scratchBytesRef2);
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
final int offset = dim * config.bytesPerDim;
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
final int offset = dim * config.bytesPerDim();
|
||||
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
|
||||
if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j]
|
||||
!= scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
|
||||
|
@ -951,23 +954,23 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims];
|
||||
for (int dim = 0; dim < config.numDims; ++dim) {
|
||||
if (commonPrefixLengths[dim] < config.bytesPerDim) {
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
|
||||
for (int dim = 0; dim < config.numDims(); ++dim) {
|
||||
if (commonPrefixLengths[dim] < config.bytesPerDim()) {
|
||||
usedBytes[dim] = new FixedBitSet(256);
|
||||
}
|
||||
}
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
if (usedBytes[dim] != null) {
|
||||
byte b = reader.getByteAt(i, dim * config.bytesPerDim + commonPrefixLengths[dim]);
|
||||
byte b = reader.getByteAt(i, dim * config.bytesPerDim() + commonPrefixLengths[dim]);
|
||||
usedBytes[dim].set(Byte.toUnsignedInt(b));
|
||||
}
|
||||
}
|
||||
}
|
||||
int sortedDim = 0;
|
||||
int sortedDimCardinality = Integer.MAX_VALUE;
|
||||
for (int dim = 0; dim < config.numDims; ++dim) {
|
||||
for (int dim = 0; dim < config.numDims(); ++dim) {
|
||||
if (usedBytes[dim] != null) {
|
||||
final int cardinality = usedBytes[dim].cardinality();
|
||||
if (cardinality < sortedDimCardinality) {
|
||||
|
@ -1001,7 +1004,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
// Write the common prefixes:
|
||||
reader.getValue(from, scratchBytesRef1);
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength);
|
||||
scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength());
|
||||
|
||||
// Write the full values:
|
||||
IntFunction<BytesRef> packedValues =
|
||||
|
@ -1023,10 +1026,10 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
final int splitDim = split(minPackedValue, maxPackedValue);
|
||||
final int mid = (from + to + 1) >>> 1;
|
||||
|
||||
int commonPrefixLen = config.bytesPerDim;
|
||||
for (int i = 0; i < config.bytesPerDim; ++i) {
|
||||
if (minPackedValue[splitDim * config.bytesPerDim + i]
|
||||
!= maxPackedValue[splitDim * config.bytesPerDim + i]) {
|
||||
int commonPrefixLen = config.bytesPerDim();
|
||||
for (int i = 0; i < config.bytesPerDim(); ++i) {
|
||||
if (minPackedValue[splitDim * config.bytesPerDim() + i]
|
||||
!= maxPackedValue[splitDim * config.bytesPerDim() + i]) {
|
||||
commonPrefixLen = i;
|
||||
break;
|
||||
}
|
||||
|
@ -1044,32 +1047,32 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
scratchBytesRef2);
|
||||
|
||||
// set the split value
|
||||
final int address = nodeID * (1 + config.bytesPerDim);
|
||||
final int address = nodeID * (1 + config.bytesPerDim());
|
||||
splitPackedValues[address] = (byte) splitDim;
|
||||
reader.getValue(mid, scratchBytesRef1);
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
|
||||
splitPackedValues,
|
||||
address + 1,
|
||||
config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
|
||||
byte[] minSplitPackedValue =
|
||||
ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength);
|
||||
ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength());
|
||||
byte[] maxSplitPackedValue =
|
||||
ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength);
|
||||
ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength());
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
|
||||
minSplitPackedValue,
|
||||
splitDim * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
splitDim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
|
||||
maxSplitPackedValue,
|
||||
splitDim * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
splitDim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
|
||||
// recurse
|
||||
build(
|
||||
|
@ -1121,33 +1124,33 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more
|
||||
// efficient
|
||||
HeapPointWriter heapSource;
|
||||
if (points.writer instanceof HeapPointWriter == false) {
|
||||
if (points.writer() instanceof HeapPointWriter == false) {
|
||||
// Adversarial cases can cause this, e.g. merging big segments with most of the points
|
||||
// deleted
|
||||
heapSource = switchToHeap(points.writer);
|
||||
heapSource = switchToHeap(points.writer());
|
||||
} else {
|
||||
heapSource = (HeapPointWriter) points.writer;
|
||||
heapSource = (HeapPointWriter) points.writer();
|
||||
}
|
||||
|
||||
int from = Math.toIntExact(points.start);
|
||||
int to = Math.toIntExact(points.start + points.count);
|
||||
int from = Math.toIntExact(points.start());
|
||||
int to = Math.toIntExact(points.start() + points.count());
|
||||
|
||||
// we store common prefix on scratch1
|
||||
computeCommonPrefixLength(heapSource, scratch1);
|
||||
|
||||
int sortedDim = 0;
|
||||
int sortedDimCardinality = Integer.MAX_VALUE;
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims];
|
||||
for (int dim = 0; dim < config.numDims; ++dim) {
|
||||
if (commonPrefixLengths[dim] < config.bytesPerDim) {
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
|
||||
for (int dim = 0; dim < config.numDims(); ++dim) {
|
||||
if (commonPrefixLengths[dim] < config.bytesPerDim()) {
|
||||
usedBytes[dim] = new FixedBitSet(256);
|
||||
}
|
||||
}
|
||||
// Find the dimension to compress
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
int prefix = commonPrefixLengths[dim];
|
||||
if (prefix < config.bytesPerDim) {
|
||||
int offset = dim * config.bytesPerDim;
|
||||
if (prefix < config.bytesPerDim()) {
|
||||
int offset = dim * config.bytesPerDim();
|
||||
for (int i = 0; i < heapSource.count(); ++i) {
|
||||
PointValue value = heapSource.getPackedValueSlice(i);
|
||||
BytesRef packedValue = value.packedValue();
|
||||
|
@ -1190,7 +1193,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
{
|
||||
scratch.length = config.packedBytesLength;
|
||||
scratch.length = config.packedBytesLength();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1207,7 +1210,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
// Inner node: partition/recurse
|
||||
|
||||
int splitDim;
|
||||
if (config.numIndexDims > 1) {
|
||||
if (config.numIndexDims() > 1) {
|
||||
splitDim = split(minPackedValue, maxPackedValue);
|
||||
} else {
|
||||
splitDim = 0;
|
||||
|
@ -1217,19 +1220,19 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
: "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
|
||||
|
||||
// How many points will be in the left tree:
|
||||
long rightCount = points.count / 2;
|
||||
long leftCount = points.count - rightCount;
|
||||
long rightCount = points.count() / 2;
|
||||
long leftCount = points.count() - rightCount;
|
||||
|
||||
int commonPrefixLen =
|
||||
Arrays.mismatch(
|
||||
minPackedValue,
|
||||
splitDim * config.bytesPerDim,
|
||||
splitDim * config.bytesPerDim + config.bytesPerDim,
|
||||
splitDim * config.bytesPerDim(),
|
||||
splitDim * config.bytesPerDim() + config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
splitDim * config.bytesPerDim,
|
||||
splitDim * config.bytesPerDim + config.bytesPerDim);
|
||||
splitDim * config.bytesPerDim(),
|
||||
splitDim * config.bytesPerDim() + config.bytesPerDim());
|
||||
if (commonPrefixLen == -1) {
|
||||
commonPrefixLen = config.bytesPerDim;
|
||||
commonPrefixLen = config.bytesPerDim();
|
||||
}
|
||||
|
||||
BKDRadixSelector.PathSlice[] pathSlices = new BKDRadixSelector.PathSlice[2];
|
||||
|
@ -1238,26 +1241,34 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
radixSelector.select(
|
||||
points,
|
||||
pathSlices,
|
||||
points.start,
|
||||
points.start + points.count,
|
||||
points.start + leftCount,
|
||||
points.start(),
|
||||
points.start() + points.count(),
|
||||
points.start() + leftCount,
|
||||
splitDim,
|
||||
commonPrefixLen);
|
||||
|
||||
int address = nodeID * (1 + config.bytesPerDim);
|
||||
int address = nodeID * (1 + config.bytesPerDim());
|
||||
splitPackedValues[address] = (byte) splitDim;
|
||||
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, config.bytesPerDim);
|
||||
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, config.bytesPerDim());
|
||||
|
||||
byte[] minSplitPackedValue = new byte[config.packedIndexBytesLength];
|
||||
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, config.packedIndexBytesLength);
|
||||
byte[] minSplitPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, config.packedIndexBytesLength());
|
||||
|
||||
byte[] maxSplitPackedValue = new byte[config.packedIndexBytesLength];
|
||||
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, config.packedIndexBytesLength);
|
||||
byte[] maxSplitPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, config.packedIndexBytesLength());
|
||||
|
||||
System.arraycopy(
|
||||
splitValue, 0, minSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
|
||||
splitValue,
|
||||
0,
|
||||
minSplitPackedValue,
|
||||
splitDim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
System.arraycopy(
|
||||
splitValue, 0, maxSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
|
||||
splitValue,
|
||||
0,
|
||||
maxSplitPackedValue,
|
||||
splitDim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
|
||||
// Recurse on left tree:
|
||||
build(
|
||||
|
@ -1289,30 +1300,30 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
private void computeCommonPrefixLength(HeapPointWriter heapPointWriter, byte[] commonPrefix) {
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
|
||||
PointValue value = heapPointWriter.getPackedValueSlice(0);
|
||||
BytesRef packedValue = value.packedValue();
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
System.arraycopy(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + dim * config.bytesPerDim,
|
||||
packedValue.offset + dim * config.bytesPerDim(),
|
||||
commonPrefix,
|
||||
dim * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
dim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
}
|
||||
for (int i = 1; i < heapPointWriter.count(); i++) {
|
||||
value = heapPointWriter.getPackedValueSlice(i);
|
||||
packedValue = value.packedValue();
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
if (commonPrefixLengths[dim] != 0) {
|
||||
int j =
|
||||
Arrays.mismatch(
|
||||
commonPrefix,
|
||||
dim * config.bytesPerDim,
|
||||
dim * config.bytesPerDim + commonPrefixLengths[dim],
|
||||
dim * config.bytesPerDim(),
|
||||
dim * config.bytesPerDim() + commonPrefixLengths[dim],
|
||||
packedValue.bytes,
|
||||
packedValue.offset + dim * config.bytesPerDim,
|
||||
packedValue.offset + dim * config.bytesPerDim + commonPrefixLengths[dim]);
|
||||
packedValue.offset + dim * config.bytesPerDim(),
|
||||
packedValue.offset + dim * config.bytesPerDim() + commonPrefixLengths[dim]);
|
||||
if (j != -1) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
}
|
||||
|
@ -1331,11 +1342,11 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
int[] docs,
|
||||
int docsOffset)
|
||||
throws IOException {
|
||||
byte[] lastPackedValue = new byte[config.packedBytesLength];
|
||||
byte[] lastPackedValue = new byte[config.packedBytesLength()];
|
||||
int lastDoc = -1;
|
||||
for (int i = 0; i < count; i++) {
|
||||
BytesRef packedValue = values.apply(i);
|
||||
assert packedValue.length == config.packedBytesLength;
|
||||
assert packedValue.length == config.packedBytesLength();
|
||||
assert valueInOrder(
|
||||
i,
|
||||
sortedDim,
|
||||
|
@ -1361,43 +1372,43 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
int packedValueOffset,
|
||||
int doc,
|
||||
int lastDoc) {
|
||||
int dimOffset = sortedDim * config.bytesPerDim;
|
||||
int dimOffset = sortedDim * config.bytesPerDim();
|
||||
if (ord > 0) {
|
||||
int cmp =
|
||||
Arrays.compareUnsigned(
|
||||
lastPackedValue,
|
||||
dimOffset,
|
||||
dimOffset + config.bytesPerDim,
|
||||
dimOffset + config.bytesPerDim(),
|
||||
packedValue,
|
||||
packedValueOffset + dimOffset,
|
||||
packedValueOffset + dimOffset + config.bytesPerDim);
|
||||
packedValueOffset + dimOffset + config.bytesPerDim());
|
||||
if (cmp > 0) {
|
||||
throw new AssertionError(
|
||||
"values out of order: last value="
|
||||
+ new BytesRef(lastPackedValue)
|
||||
+ " current value="
|
||||
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength)
|
||||
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength())
|
||||
+ " ord="
|
||||
+ ord
|
||||
+ " sortedDim="
|
||||
+ sortedDim);
|
||||
}
|
||||
if (cmp == 0 && config.numDims > config.numIndexDims) {
|
||||
int dataOffset = config.numIndexDims * config.bytesPerDim;
|
||||
if (cmp == 0 && config.numDims() > config.numIndexDims()) {
|
||||
int dataOffset = config.numIndexDims() * config.bytesPerDim();
|
||||
cmp =
|
||||
Arrays.compareUnsigned(
|
||||
lastPackedValue,
|
||||
dataOffset,
|
||||
config.packedBytesLength,
|
||||
config.packedBytesLength(),
|
||||
packedValue,
|
||||
packedValueOffset + dataOffset,
|
||||
packedValueOffset + config.packedBytesLength);
|
||||
packedValueOffset + config.packedBytesLength());
|
||||
if (cmp > 0) {
|
||||
throw new AssertionError(
|
||||
"data values out of order: last value="
|
||||
+ new BytesRef(lastPackedValue)
|
||||
+ " current value="
|
||||
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength)
|
||||
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength())
|
||||
+ " ord="
|
||||
+ ord);
|
||||
}
|
||||
|
@ -1414,7 +1425,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
+ sortedDim);
|
||||
}
|
||||
}
|
||||
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, config.packedBytesLength);
|
||||
System.arraycopy(
|
||||
packedValue, packedValueOffset, lastPackedValue, 0, config.packedBytesLength());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collections;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.index.DocValuesSkipIndexType;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -125,8 +126,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
|
||||
boolean docValueSkipper =
|
||||
Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
|
||||
DocValuesSkipIndexType docValueSkipper =
|
||||
docValuesSkipIndexType(readString(DOCVALUES_SKIP_INDEX.length, scratch));
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
|
||||
|
@ -221,6 +222,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
return DocValuesType.valueOf(dvType);
|
||||
}
|
||||
|
||||
public DocValuesSkipIndexType docValuesSkipIndexType(String dvSkipIndexType) {
|
||||
return DocValuesSkipIndexType.valueOf(dvSkipIndexType);
|
||||
}
|
||||
|
||||
public VectorEncoding vectorEncoding(String vectorEncoding) {
|
||||
return VectorEncoding.valueOf(vectorEncoding);
|
||||
}
|
||||
|
@ -268,7 +273,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, STORETV);
|
||||
SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
|
||||
SimpleTextUtil.write(out, Boolean.toString(fi.hasTermVectors()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, PAYLOADS);
|
||||
|
@ -284,7 +289,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
|
||||
SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
|
||||
SimpleTextUtil.write(out, getDocValuesSkipIndexType(fi.docValuesSkipIndexType()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, DOCVALUES_GEN);
|
||||
|
@ -355,4 +360,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
private static String getDocValuesType(DocValuesType type) {
|
||||
return type.toString();
|
||||
}
|
||||
|
||||
private static String getDocValuesSkipIndexType(DocValuesSkipIndexType type) {
|
||||
return type.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -144,14 +144,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
}
|
||||
|
||||
// read-only
|
||||
static class SimpleTextBits implements Bits {
|
||||
final BitSet bits;
|
||||
final int size;
|
||||
|
||||
SimpleTextBits(BitSet bits, int size) {
|
||||
this.bits = bits;
|
||||
this.size = size;
|
||||
}
|
||||
record SimpleTextBits(BitSet bits, int size) implements Bits {
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
|
|
|
@ -25,13 +25,4 @@ import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FieldMetadataTermState {
|
||||
|
||||
public final FieldMetadata fieldMetadata;
|
||||
public final BlockTermState state;
|
||||
|
||||
public FieldMetadataTermState(FieldMetadata fieldMetadata, BlockTermState state) {
|
||||
this.fieldMetadata = fieldMetadata;
|
||||
this.state = state;
|
||||
}
|
||||
}
|
||||
public record FieldMetadataTermState(FieldMetadata fieldMetadata, BlockTermState state) {}
|
||||
|
|
|
@ -57,7 +57,7 @@ public class STBlockLine extends BlockLine {
|
|||
*/
|
||||
public void collectFields(Collection<FieldMetadata> collector) {
|
||||
for (FieldMetadataTermState fieldTermState : termStates) {
|
||||
collector.add(fieldTermState.fieldMetadata);
|
||||
collector.add(fieldTermState.fieldMetadata());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -82,13 +82,13 @@ public class STBlockLine extends BlockLine {
|
|||
assert size > 0 : "not valid block line with :" + size + " lines.";
|
||||
if (size == 1) {
|
||||
// When there is only 1 field, write its id as negative, followed by the field TermState.
|
||||
int fieldID = line.termStates.get(0).fieldMetadata.getFieldInfo().number;
|
||||
int fieldID = line.termStates.get(0).fieldMetadata().getFieldInfo().number;
|
||||
termStatesOutput.writeZInt(-fieldID);
|
||||
fieldMetadataTermState = line.termStates.get(0);
|
||||
encoder.writeTermState(
|
||||
termStatesOutput,
|
||||
fieldMetadataTermState.fieldMetadata.getFieldInfo(),
|
||||
fieldMetadataTermState.state);
|
||||
fieldMetadataTermState.fieldMetadata().getFieldInfo(),
|
||||
fieldMetadataTermState.state());
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -96,15 +96,15 @@ public class STBlockLine extends BlockLine {
|
|||
// First iteration writes the fields ids.
|
||||
for (int i = 0; i < size; i++) {
|
||||
fieldMetadataTermState = line.termStates.get(i);
|
||||
termStatesOutput.writeVInt(fieldMetadataTermState.fieldMetadata.getFieldInfo().number);
|
||||
termStatesOutput.writeVInt(fieldMetadataTermState.fieldMetadata().getFieldInfo().number);
|
||||
}
|
||||
// Second iteration writes the corresponding field TermStates.
|
||||
for (int i = 0; i < size; i++) {
|
||||
fieldMetadataTermState = line.termStates.get(i);
|
||||
encoder.writeTermState(
|
||||
termStatesOutput,
|
||||
fieldMetadataTermState.fieldMetadata.getFieldInfo(),
|
||||
fieldMetadataTermState.state);
|
||||
fieldMetadataTermState.fieldMetadata().getFieldInfo(),
|
||||
fieldMetadataTermState.state());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue