Merge branch 'main' into optimize_prefix_query

This commit is contained in:
zhouhui 2024-09-20 17:17:21 +08:00
commit 73b4ced245
718 changed files with 20858 additions and 7946 deletions

View File

@ -30,7 +30,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest ]
java-version: [ '22' ]
java-version: [ '23-ea' ]
uses-alt-java: [ true, false ]
runs-on: ${{ matrix.os }}
@ -61,7 +61,16 @@ jobs:
# https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-environment-variable
echo "RUNTIME_JAVA_HOME=${{ env.ALT_JAVA_DIR }}" >> "$GITHUB_ENV"
- run: ./gradlew -p lucene/core check -x test
- name: ./gradlew tidy
run: |
./gradlew tidy
if [ ! -z "$(git status --porcelain)" ]; then
echo ":warning: **tidy left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
git status --porcelain >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
git reset --hard && git clean -xfd .
fi
- name: ./gradlew regenerate
run: |
@ -69,7 +78,7 @@ jobs:
sudo apt-get install libwww-perl
./gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks
if [ ! -z "$(git status --porcelain)" ]; then
echo ":warning: **regenerateleft local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
echo ":warning: **regenerate left local checkout in modified state**" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
git status --porcelain >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
@ -79,8 +88,7 @@ jobs:
- run: ./gradlew testOpts
- run: ./gradlew helpWorkflow
- run: ./gradlew licenses updateLicenses
- run: ./gradlew tidy
- run: ./gradlew check -x test
- run: ./gradlew check -x test -Pvalidation.git.failOnModified=false
- run: ./gradlew assembleRelease mavenToLocal
# Conserve resources: only run these in non-alt-java mode.

View File

@ -18,7 +18,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest ]
java-version: [ '21', '22' ]
java-version: [ '21', '22', '23-ea' ]
runs-on: ${{ matrix.os }}
@ -72,3 +72,4 @@ jobs:
name: smoke-tester-logs-jdk-${{ matrix.java-version }}
path: |
${{ env.TMP_DIR }}/**/*.log
/tmp/release.log

View File

@ -41,7 +41,7 @@ import jdk.jfr.consumer.RecordingFile;
*/
public class ProfileResults {
/** Formats a frame to a formatted line. This is deduplicated on! */
static String frameToString(RecordedFrame frame, boolean lineNumbers) {
static String frameToString(RecordedFrame frame, boolean lineNumbers, boolean frameTypes) {
StringBuilder builder = new StringBuilder();
RecordedMethod method = frame.getMethod();
RecordedClass clazz = method.getType();
@ -55,13 +55,14 @@ public class ProfileResults {
builder.append("#");
builder.append(method.getName());
builder.append("()");
if (lineNumbers) {
if (lineNumbers && frame.getLineNumber() != -1) {
builder.append(":");
if (frame.getLineNumber() == -1) {
builder.append("(" + frame.getType() + " code)");
} else {
builder.append(frame.getLineNumber());
}
if (clazz != null && frameTypes) {
builder.append(" [");
builder.append(frame.getType());
builder.append(" code]");
}
return builder.toString();
}
@ -77,6 +78,8 @@ public class ProfileResults {
public static final String COUNT_DEFAULT = "10";
public static final String LINENUMBERS_KEY = "tests.profile.linenumbers";
public static final String LINENUMBERS_DEFAULT = "false";
public static final String FRAMETYPES_KEY = "tests.profile.frametypes";
public static final String FRAMETYPES_DEFAULT = "true";
/**
* Driver method, for testing standalone.
@ -92,7 +95,8 @@ public class ProfileResults {
System.getProperty(MODE_KEY, MODE_DEFAULT),
Integer.parseInt(System.getProperty(STACKSIZE_KEY, STACKSIZE_DEFAULT)),
Integer.parseInt(System.getProperty(COUNT_KEY, COUNT_DEFAULT)),
Boolean.parseBoolean(System.getProperty(LINENUMBERS_KEY, LINENUMBERS_DEFAULT)));
Boolean.parseBoolean(System.getProperty(LINENUMBERS_KEY, LINENUMBERS_DEFAULT)),
Boolean.parseBoolean(System.getProperty(FRAMETYPES_KEY, FRAMETYPES_DEFAULT)));
}
/** true if we care about this event */
@ -152,7 +156,12 @@ public class ProfileResults {
/** Process all the JFR files passed in args and print a merged summary. */
public static void printReport(
List<String> files, String mode, int stacksize, int count, boolean lineNumbers)
List<String> files,
String mode,
int stacksize,
int count,
boolean lineNumbers,
boolean frameTypes)
throws IOException {
if (!"cpu".equals(mode) && !"heap".equals(mode)) {
throw new IllegalArgumentException("tests.profile.mode must be one of (cpu,heap)");
@ -181,7 +190,7 @@ public class ProfileResults {
if (stack.length() > 0) {
stack.append("\n").append(framePadding).append(" at ");
}
stack.append(frameToString(trace.getFrames().get(i), lineNumbers));
stack.append(frameToString(trace.getFrames().get(i), lineNumbers, frameTypes));
}
String line = stack.toString();
SimpleEntry<String, Long> entry =

View File

@ -60,8 +60,8 @@ public class WrapperDownloader {
public static void checkVersion() {
int major = Runtime.version().feature();
if (major != 21 && major != 22) {
throw new IllegalStateException("java version must be 21 or 22, your version: " + major);
if (major != 21 && major != 22 && major != 23) {
throw new IllegalStateException("java version must be 21, 22 or 23, your version: " + major);
}
}

View File

@ -80,6 +80,9 @@ ext {
// Minimum Java version required to compile and run Lucene.
minJavaVersion = JavaVersion.toVersion(deps.versions.minJava.get())
// also change this in extractor tool: ExtractForeignAPI
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_21, JavaVersion.VERSION_22, JavaVersion.VERSION_23 ] as Set
// snapshot build marker used in scripts.
snapshotBuild = version.contains("SNAPSHOT")
@ -117,10 +120,6 @@ apply from: file('gradle/generation/local-settings.gradle')
// Make sure the build environment is consistent.
apply from: file('gradle/validation/check-environment.gradle')
// IDE support, settings and specials.
apply from: file('gradle/ide/intellij-idea.gradle')
apply from: file('gradle/ide/eclipse.gradle')
// Set up defaults and configure aspects for certain modules or functionality
// (java, tests)
apply from: file('gradle/java/folder-layout.gradle')
@ -133,6 +132,10 @@ apply from: file('gradle/testing/alternative-jdk-support.gradle')
apply from: file('gradle/java/jar-manifest.gradle')
apply from: file('gradle/java/modules.gradle')
// IDE support, settings and specials.
apply from: file('gradle/ide/intellij-idea.gradle')
apply from: file('gradle/ide/eclipse.gradle')
// Maven artifact publishing.
apply from: file('gradle/maven/publications.gradle')

View File

@ -112,8 +112,8 @@ def prepare(root, version, pause_before_sign, gpg_key_id, gpg_password, gpg_home
checkDOAPfiles(version)
if not dev_mode:
print(' ./gradlew --no-daemon clean check')
run('./gradlew --no-daemon clean check')
print(' ./gradlew --stacktrace --no-daemon clean check')
run('./gradlew --stacktrace --no-daemon clean check')
else:
print(' skipping precommit check due to dev-mode')
@ -121,7 +121,7 @@ def prepare(root, version, pause_before_sign, gpg_key_id, gpg_password, gpg_home
input("Tests complete! Please press ENTER to proceed to assembleRelease: ")
print(' prepare-release')
cmd = './gradlew --no-daemon assembleRelease' \
cmd = './gradlew --stacktrace --no-daemon assembleRelease' \
' -Dversion.release=%s' % version
if dev_mode:
cmd += ' -Pvalidation.git.failOnModified=false'

View File

@ -32,7 +32,7 @@ allprojects {
missingdoclet "org.apache.lucene.tools:missing-doclet"
}
ext {
project.ext {
relativeDocPath = project.path.replaceFirst(/:\w+:/, "").replace(':', '/')
}

View File

@ -17,13 +17,6 @@
def resources = scriptResources(buildscript)
configure(rootProject) {
ext {
// also change this in extractor tool: ExtractForeignAPI
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_21, JavaVersion.VERSION_22 ] as Set
}
}
configure(project(":lucene:core")) {
ext {
apijars = layout.projectDirectory.dir("src/generated/jdk")

View File

@ -43,6 +43,31 @@ configure(project(":lucene:core")) {
andThenTasks: ["spotlessJava", "spotlessJavaApply"],
mustRunBefore: [ "compileJava" ]
])
task generateForDeltaUtilInternal() {
description "Regenerate gen_ForDeltaUtil.py"
group "generation"
def genDir = file("src/java/org/apache/lucene/codecs/lucene912")
def genScript = file("${genDir}/gen_ForDeltaUtil.py")
def genOutput = file("${genDir}/ForDeltaUtil.java")
inputs.file genScript
outputs.file genOutput
doLast {
quietExec {
workingDir genDir
executable project.externalTool("python3")
args = [ '-B', genScript ]
}
}
}
regenerate.dependsOn wrapWithPersistentChecksums(generateForDeltaUtilInternal, [
andThenTasks: ["spotlessJava", "spotlessJavaApply"],
mustRunBefore: [ "compileJava" ]
])
}
configure(project(":lucene:backward-codecs")) {

View File

@ -65,10 +65,8 @@ configure(project(":lucene:analysis:icu")) {
icupkg = file("${icuBinDir}/icupkg")
}
// Resolve version lazily (can't resolve at configuration time).
def icu4jVersionProvider = project.provider { getVersion('com.ibm.icu', 'icu4j') }
// lazy gstring with ICU version.
def icu4jVersion = "${-> icu4jVersionProvider.get()}"
def icu4jVersion = deps.icu4j.get().version
def icuCompileTask = Os.isFamily(Os.FAMILY_WINDOWS) ? "compileIcuWindows" : "compileIcuLinux"

View File

@ -33,7 +33,7 @@ configure(project(":lucene:analysis:kuromoji")) {
apply plugin: deps.plugins.undercouch.download.get().pluginId
plugins.withType(JavaPlugin) {
ext {
project.ext {
targetDir = file("src/resources")
}

View File

@ -33,7 +33,7 @@ configure(project(":lucene:analysis:nori")) {
apply plugin: deps.plugins.undercouch.download.get().pluginId
plugins.withType(JavaPlugin) {
ext {
project.ext {
targetDir = file("src/resources")
}

View File

@ -22,10 +22,11 @@ import org.gradle.plugins.ide.eclipse.model.ClasspathEntry
def resources = scriptResources(buildscript)
configure(rootProject) {
plugins.withType(JavaPlugin) {
apply plugin: "eclipse"
if (gradle.startParameter.taskNames.contains("eclipse")) {
project.pluginManager.apply("java-base")
project.pluginManager.apply("eclipse")
def eclipseJavaVersion = propertyOrDefault("eclipse.javaVersion", rootProject.minJavaVersion)
def eclipseJavaVersion = propertyOrDefault("eclipse.javaVersion", deps.versions.minJava.get())
def relativize = { other -> rootProject.rootDir.relativePath(other).toString() }
eclipse {
@ -107,7 +108,7 @@ configure(rootProject) {
eclipseJdt {
enabled = false
dependsOn 'luceneEclipse'
dependsOn 'luceneEclipseJdt'
}
eclipseClasspath {

View File

@ -27,7 +27,7 @@ def beastingMode = gradle.startParameter.taskNames.any{ name -> name == 'beast'
allprojects {
plugins.withType(JavaPlugin) {
ext {
project.ext {
testOptions += [
[propName: 'tests.dups', value: 0, description: "Reiterate runs of entire test suites ('beast' task)."]
]

View File

@ -19,7 +19,7 @@ def recordings = files()
allprojects {
plugins.withType(JavaPlugin) {
ext {
project.ext {
testOptions += [
[propName: 'tests.profile', value: false, description: "Enable Java Flight Recorder profiling."]
]

View File

@ -62,7 +62,7 @@ allprojects {
// Configure test property defaults and their descriptions.
allprojects {
plugins.withType(JavaPlugin) {
ext {
project.ext {
String randomVectorSize = RandomPicks.randomFrom(new Random(projectSeedLong), ["default", "128", "256", "512"])
testOptions += [
// seed, repetition and amplification.
@ -135,14 +135,14 @@ allprojects {
}
afterEvaluate {
ext.testOptionsResolved = testOptions.findAll { opt ->
project.ext.testOptionsResolved = testOptions.findAll { opt ->
propertyOrDefault(opt.propName, opt.value) != null
}.collectEntries { opt ->
[(opt.propName): Objects.toString(resolvedTestOption(opt.propName))]
}
// Compute the "reproduce with" string.
ext.testOptionsForReproduceLine = testOptions.findAll { opt ->
project.ext.testOptionsForReproduceLine = testOptions.findAll { opt ->
if (opt["includeInReproLine"] == false) {
return false
}

View File

@ -22,7 +22,7 @@ def allSuites = []
allprojects {
plugins.withType(JavaPlugin) {
ext {
project.ext {
testOptions += [
[propName: 'tests.slowestTests', value: true, description: "Print the summary of the slowest tests."],
[propName: 'tests.slowestSuites', value: true, description: "Print the summary of the slowest suites."]

View File

@ -75,6 +75,18 @@ configure(rootProject) {
it.dependsOn(":versionCatalogFormatDeps")
}
// correct crlf/ default encoding after version catalog formatting finishes.
tasks.matching {
it.path in [
":versionCatalogFormatDeps"
]
}.configureEach {
it.doLast {
ant.fixcrlf(file: it.catalogFile.get().asFile,
eol: "lf", fixlast: "true", encoding: "UTF-8")
}
}
tasks.matching {
it.path in [
":versionCatalogUpdateDeps"

View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
@defaultMessage Spawns threads with vague names; use a custom thread factory (Lucene's NamedThreadFactory, Solr's SolrNamedThreadFactory) and name threads so that you can tell (by its name) which executor it is associated with
@defaultMessage Spawns threads with vague names; use a custom thread factory (Lucene's NamedThreadFactory) and name threads so that you can tell (by its name) which executor it is associated with
java.util.concurrent.Executors#newFixedThreadPool(int)
java.util.concurrent.Executors#newSingleThreadExecutor()
java.util.concurrent.Executors#newCachedThreadPool()

View File

@ -74,21 +74,6 @@ configure(rootProject) {
logger.warn("WARNING: Directory is not a valid git checkout (won't check dirty files): ${rootProject.projectDir}")
}
} else {
// git ignores any folders which are empty (this includes folders with recursively empty sub-folders).
def untrackedNonEmptyFolders = status.untrackedFolders.findAll { path ->
File location = file("${rootProject.projectDir}/${path}")
boolean hasFiles = false
Files.walkFileTree(location.toPath(), new SimpleFileVisitor<Path>() {
@Override
FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
hasFiles = true
// Terminate early.
return FileVisitResult.TERMINATE
}
})
return hasFiles
}
def offenders = [
// Exclude staged changes. These are fine in precommit.
// "(added)": status.added,
@ -97,8 +82,7 @@ configure(rootProject) {
"(conflicting)": status.conflicting,
"(missing)": status.missing,
"(modified)": status.modified,
"(untracked)": status.untracked,
"(untracked non-empty dir)": untrackedNonEmptyFolders
"(untracked)": status.untracked
].collectMany { fileStatus, files ->
files.collect {file -> " - ${file} ${fileStatus}" }
}.sort()

View File

@ -20,6 +20,10 @@
// 2) notice file
// 3) checksum validation/ generation.
// WARNING: The tasks in this file share internal state between tasks without using files.
// Because of this all tasks here must always execute together, so they cannot define task outputs.
// TODO: Rewrite the internal state to use state files containing the ext.jarInfos and its referencedFiles
// This should be false only for debugging.
def failOnError = true
@ -194,13 +198,6 @@ subprojects {
description = "Validate license and notice files of dependencies"
dependsOn collectJarInfos
def outputFileName = 'validateJarLicenses'
inputs.dir(file(project.rootDir.path + '/lucene/licenses'))
.withPropertyName('licenses')
.withPathSensitivity(PathSensitivity.RELATIVE)
outputs.file(layout.buildDirectory.file(outputFileName))
.withPropertyName('validateJarLicensesResult')
doLast {
def errors = []
jarInfos.each { dep ->
@ -246,9 +243,7 @@ subprojects {
}
}
}
// Required to take advantage of incremental building and the build cache
def f = new File(project.buildDir.path + "/" + outputFileName)
f.write(errors.toString(), "UTF-8")
if (errors) {
def msg = "Certain license/ notice files are missing:\n - " + errors.join("\n - ")
if (failOnError) {

View File

@ -1 +1 @@
cb0da6751c2b753a16ac168bb354870ebb1e162e9083f116729cec9c781156b8
2db75c40782f5e8ba1fc278a5574bab070adccb2d21ca5a6e5ed840888448046

View File

@ -1 +1 @@
8.8.0
8.10.0

View File

@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME

View File

@ -112,6 +112,16 @@ API Changes
* GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz)
* GITHUB#13708: Move Operations.sameLanguage/subsetOf to test-framework. (Robert Muir)
* GITHUB#13733: Move FacetsCollector#search utility methods to `FacetsCollectorManager`, replace the `Collector`
argument with a `FacetsCollectorManager` and update the return type to include both `TopDocs` results as well as
facets results. (Luca Cavanna)
* GITHUB#13328: Convert many basic Lucene classes to record classes, including CollectionStatistics, TermStatistics and LeafMetadata. (Shubham Chaudhary)
* GITHUB#13780: Remove `IndexSearcher#search(List<LeafReaderContext>, Weight, Collector)` in favour of the newly
introduced `IndexSearcher#search(LeafReaderContextPartition[], Weight, Collector)`
New Features
---------------------
@ -141,6 +151,19 @@ New Features
* GITHUB#13604: Add Kmeans clustering on vectors (Mayya Sharipova, Jim Ferenczi, Tom Veasey)
* GITHUB#13592: Take advantage of the doc value skipper when it is primary sort in SortedNumericDocValuesRangeQuery
and SortedSetDocValuesRangeQuery. (Ignacio Vera)
* GITHUB#13542: Add initial support for intra-segment concurrency. IndexSearcher now supports searching across leaf
reader partitions concurrently. This is useful to max out available resource usage especially with force merged
indices or big segments. There is still a performance penalty for queries that require segment-level computation
ahead of time, such as points/range queries. This is an implementation limitation that we expect to improve in
future releases, ad that's why intra-segment slicing is not enabled by default, but leveraged in tests when the
searcher is created via LuceneTestCase#newSearcher. Users may override IndexSearcher#slices(List) to optionally
create slices that target segment partitions. (Luca Cavanna)
* GITHUB#13741: Implement Accountable for NFARunAutomaton, fix hashCode implementation of CompiledAutomaton. (Patrick Zhai)
Improvements
---------------------
@ -164,6 +187,8 @@ Improvements
* GITHUB#12172: Update Romanian stopwords list to include the modern unicode forms. (Trey Jones)
* GITHUB#13707: Improve Operations.isTotal() to work with non-minimal automata. (Dawid Weiss, Robert Muir)
Optimizations
---------------------
@ -176,6 +201,8 @@ Optimizations
* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)
* GITHUB#13672: Leverage doc value skip lists in DocValuesRewriteMethod if indexed. (Greg Miller)
Bug Fixes
---------------------
@ -213,6 +240,9 @@ Changes in Backwards Compatibility Policy
* GITHUB#13230: Remove the Kp and Lovins snowball algorithms which are not supported
or intended for general use. (Robert Muir)
* GITHUB#13602: SearchWithCollectorTask no longer supports the `collector.class` config parameter to load a custom
collector implementation. `collector.manager.class` allows users to load a collector manager instead. (Luca Cavanna)
Other
---------------------
@ -253,6 +283,13 @@ Other
* GITHUB#13499: Remove usage of TopScoreDocCollector + TopFieldCollector deprecated methods (#create, #createSharedManager) (Jakub Slowinski)
Build
---------------------
* GITHUB#13649: Fix eclipse ide settings generation #13649 (Uwe Schindler, Dawid Weiss)
* GITHUB#13698: Upgrade to gradle 8.10 (Dawid Weiss)
======================== Lucene 9.12.0 =======================
API Changes
@ -268,6 +305,12 @@ API Changes
* GITHUB#13559: Add BitSet#nextSetBit(int, int) to get the index of the first set bit in range. (Egor Potemkin)
* GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and
MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar)
* GITHUB#13568, GITHUB#13750: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions
or drill-down. (Egor Potemkin)
New Features
---------------------
@ -280,8 +323,21 @@ New Features
and LogByteSizeMergePolicy via a new #setTargetConcurrency setter.
(Adrien Grand)
* GITHUB#13568: Add sandbox facets module to compute facets while collecting. (Egor Potemkin, Shradha Shankar)
* GITHUB#13678: Add support JDK 23 to the Panama Vectorization Provider. (Chris Hegarty)
* GITHUB#13689: Add a new faceting feature, dynamic range facets, which automatically picks a balanced set of numeric
ranges based on the distribution of values that occur across all hits. For use cases that have a highly variable
numeric doc values field, such as "price" in an e-commerce application, this facet method is powerful as it allows the
presented ranges to adapt depending on what hits the query actually matches. This is in contrast to existing range
faceting that requires the application to provide the specific fixed ranges up front. (Yuting Gan, Greg Miller,
Stefan Vodita)
Improvements
---------------------
* GITHUB#13475: Re-enable intra-merge parallelism except for terms, norms, and doc values.
Related to GITHUB#13478. (Ben Trent)
* GITHUB#13548: Refactor and javadoc update for KNN vector writer classes. (Patrick Zhai)
@ -299,6 +355,11 @@ Improvements
* GITHUB#13201: Better cost estimation on MultiTermQuery over few terms. (Michael Froh)
* GITHUB#13735: Migrate monitor package usage of deprecated IndexSearcher#search(Query, Collector)
to IndexSearcher#search(Query, CollectorManager). (Greg Miller)
* GITHUB#13746: Introduce ProfilerCollectorManager to parallelize search when using ProfilerCollector. (Luca Cavanna)
Optimizations
---------------------
@ -330,8 +391,14 @@ Optimizations
Closing many individual index files can potentially lead to a degradation in execution performance.
Index files are mmapped one-to-one with the JDK's foreign shared Arena. The JVM deoptimizes the top
few frames of all threads when closing a shared Arena (see JDK-8335480). We mitigate this situation
by 1) using a confined Arena where appropriate, and 2) grouping files from the same segment to a
single shared Arena. (Chris Hegarty, Michael Gibney, Uwe Schindler)
when running with JDK 21 and greater, by 1) using a confined Arena where appropriate, and 2) grouping
files from the same segment to a single shared Arena.
A system property has been added that allows to control the total maximum number of mmapped files
that may be associated with a single shared Arena. For example, to set the max number of permits to
256, pass the following on the command line
-Dorg.apache.lucene.store.MMapDirectory.sharedArenaMaxPermits=256. Setting a value of 1 associates
a single file to a single shared arena.
(Chris Hegarty, Michael Gibney, Uwe Schindler)
* GITHUB#13585: Lucene912PostingsFormat, the new default postings format, now
only has 2 levels of skip data, which are inlined into postings instead of
@ -341,6 +408,22 @@ Optimizations
* GITHUB#13581: OnHeapHnswGraph no longer allocates a lock for every graph node (Mike Sokolov)
* GITHUB#13636, GITHUB#13658: Optimizations to the decoding logic of blocks of
postings. (Adrien Grand, Uwe Schindler, Greg Miller)
* GITHUB##13644: Improve NumericComparator competitive iterator logic by comparing the missing value with the top
value even after the hit queue is full (Pan Guixin)
* GITHUB#13587: Use Max WAND optimizations with ToParentBlockJoinQuery when using ScoreMode.Max (Mike Pellegrini)
* GITHUB#13742: Reorder checks in LRUQueryCache#count (Shubham Chaudhary)
* GITHUB#13686: Replace Map<String,Object> with IntObjectHashMap for DV producer (Pan Guixin)
* GITHUB#13697: Add a bulk scorer to ToParentBlockJoinQuery, which delegates to the bulk scorer of the child query.
This should speed up query evaluation when the child query has a specialized bulk scorer, such as disjunctive queries.
(Mike Pellegrini)
Changes in runtime behavior
---------------------
@ -366,9 +449,38 @@ Bug Fixes
* GITHUB#13627: Fix race condition on flush for DWPT seqNo generation. (Ben Trent, Ao Li)
* GITHUB#13691: Fix incorrect exponent value in explain of SigmoidFunction. (Owais Kazi)
* GITHUB#13703: Fix bug in LatLonPoint queries where narrow polygons close to latitude 90 don't
match any points due to an Integer overflow. (Ignacio Vera)
* GITHUB#13641: Unify how KnnFormats handle missing fields and correctly handle missing vector fields when
merging segments. (Ben Trent)
* GITHUB#13519: 8 bit scalar vector quantization is no longer
supported: it was buggy starting in 9.11 (GITHUB#13197). 4 and 7
bit quantization are still supported. Existing (9.x) Lucene indices
that previously used 8 bit quantization can still be read/searched
but the results from `KNN*VectorQuery` are silently buggy. Further
8 bit quantized vector indexing into such (9.11) indices is not
permitted, so your path forward if you wish to continue using the
same 9.11 index is to index additional vectors into the same field
with either 4 or 7 bit quantization (or no quantization), and ensure
all older (9.11 written) segments are rewritten either via
`IndexWriter.forceMerge` or
`IndexWriter.addIndexes(CodecReader...)`, or reindexing entirely.
Build
---------------------
* GITHUB#13695, GITHUB#13696: Fix Gradle build sometimes gives spurious "unreferenced license file" warnings.
(Uwe Schindler)
Other
--------------------
(No changes)
* GITHUB#13720: Add float comparison based on unit of least precision and use it to stop test failures caused by float
summation not being associative in IEEE 754. (Alex Herbert, Stefan Vodita)
======================== Lucene 9.11.1 =======================

View File

@ -80,9 +80,22 @@ behaviour as 9.x, clone `PersianAnalyzer` in 9.x or create custom analyzer by us
### AutomatonQuery/CompiledAutomaton/RunAutomaton/RegExp no longer determinize (LUCENE-10010)
These classes no longer take a `determinizeWorkLimit` and no longer determinize
behind the scenes. It is the responsibility of the caller to to call
behind the scenes. It is the responsibility of the caller to call
`Operations.determinize()` for DFA execution.
### RegExp optional complement syntax has been deprecated
Support for the optional complement syntax (`~`) has been deprecated.
The `COMPLEMENT` syntax flag has been removed and replaced by the
`DEPRECATED_COMPLEMENT` flag. Users wanting to enable the deprecated
complement support can do so by explicitly passing a syntax flags that
has `DEPRECATED_COMPLEMENT` when creating a `RegExp`. For example:
`new RegExp("~(foo)", RegExp.DEPRECATED_COMPLEMENT)`.
Alternatively, and quite commonly, a more simple _complement bracket expression_,
`[^...]`, may be a suitable replacement, For example, `[^fo]` matches any
character that is not an `f` or `o`.
### DocValuesFieldExistsQuery, NormsFieldExistsQuery and KnnVectorFieldExistsQuery removed in favor of FieldExistsQuery (LUCENE-10436)
These classes have been removed and consolidated into `FieldExistsQuery`. To migrate, caller simply replace those classes
@ -180,6 +193,9 @@ access the members using method calls instead of field accesses. Affected classe
- `IOContext`, `MergeInfo`, and `FlushInfo` (GITHUB#13205)
- `BooleanClause` (GITHUB#13261)
- `TotalHits` (GITHUB#13762)
- `TermAndVector` (GITHUB#13772)
- Many basic Lucene classes, including `CollectionStatistics`, `TermStatistics` and `LeafMetadata` (GITHUB#13328)
### Boolean flags on IOContext replaced with a new ReadAdvice enum.
@ -248,6 +264,11 @@ ConcurrentMergeScheduler now disables auto I/O throttling by default. There is s
happening at the CPU level, since ConcurrentMergeScheduler has a maximum number of threads it can
use, which is only a fraction of the total number of threads of the host by default.
### FieldInfos#hasVectors and FieldInfo#hasVectors renamed to hasTermVectors
To reduce confusion between term vectors and numeric vectors, `hasVectors` has been renamed to
`hasTermVectors`.
## Migration from Lucene 9.0 to Lucene 9.1
### Test framework package migration and module (LUCENE-10301)
@ -793,3 +814,77 @@ Specifically, the method `FunctionValues#getScorer(Weight weight, LeafReaderCont
Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on
Scorer.
### `FacetsCollector#search` utility methods moved and updated
The static `search` methods exposed by `FacetsCollector` have been moved to `FacetsCollectorManager`.
Furthermore, they take a `FacetsCollectorManager` last argument in place of a `Collector` so that they support
intra query concurrency. The return type has also be updated to `FacetsCollectorManager.FacetsResult` which includes
both `TopDocs` as well as facets results included in a reduced `FacetsCollector` instance.
### `SearchWithCollectorTask` no longer supports the `collector.class` config parameter
`collector.class` used to allow users to load a custom collector implementation. `collector.manager.class`
replaces it by allowing users to load a custom collector manager instead.
### BulkScorer#score(LeafCollector collector, Bits acceptDocs) removed
Use `BulkScorer#score(LeafCollector collector, Bits acceptDocs, int min, int max)` instead. In order to score the
entire leaf, provide `0` as min and `DocIdSetIterator.NO_MORE_DOCS` as max. `BulkScorer` subclasses that override
such method need to instead override the method variant that takes the range of doc ids as well as arguments.
### CollectorManager#newCollector and Collector#getLeafCollector contract
With the introduction of intra-segment query concurrency support, multiple `LeafCollector`s may be requested for the
same `LeafReaderContext` via `Collector#getLeafCollector(LeafReaderContext)` across the different `Collector` instances
returned by multiple `CollectorManager#newCollector` calls. Any logic or computation that needs to happen
once per segment requires specific handling in the collector manager implementation. See `TotalHitCountCollectorManager`
as an example. Individual collectors don't need to be adapted as a specific `Collector` instance will still see a given
`LeafReaderContext` once, given that it is not possible to add more than one partition of the same segment to the same
leaf slice.
### Weight#scorer, Weight#bulkScorer and Weight#scorerSupplier contract
With the introduction of intra-segment query concurrency support, multiple `Scorer`s, `ScorerSupplier`s or `BulkScorer`s
may be requested for the same `LeafReaderContext` instance as part of a single search call. That may happen concurrently
from separate threads each searching a specific doc id range of the segment. `Weight` implementations that rely on the
assumption that a scorer, bulk scorer or scorer supplier for a given `LeafReaderContext` is requested once per search
need updating.
### Signature of IndexSearcher#searchLeaf changed
With the introduction of intra-segment query concurrency support, the `IndexSearcher#searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector)`
method now accepts two additional int arguments to identify the min/max range of doc ids that will be searched in this
leaf partition`: IndexSearcher#searchLeaf(LeafReaderContext ctx, int minDocId, int maxDocId, Weight weight, Collector collector)`.
Subclasses of `IndexSearcher` that call or override the `searchLeaf` method need to be updated accordingly.
### Signature of static IndexSearch#slices method changed
The static `IndexSearcher#slices(List<LeafReaderContext> leaves, int maxDocsPerSlice, int maxSegmentsPerSlice)`
method now supports an additional 4th and last argument to optionally enable creating segment partitions:
`IndexSearcher#slices(List<LeafReaderContext> leaves, int maxDocsPerSlice, int maxSegmentsPerSlice, boolean allowSegmentPartitions)`
### TotalHitCountCollectorManager constructor
`TotalHitCountCollectorManager` now requires that an array of `LeafSlice`s, retrieved via `IndexSearcher#getSlices`,
is provided to its constructor. Depending on whether segment partitions are present among slices, the manager can
optimize the type of collectors it creates and exposes via `newCollector`.
### `IndexSearcher#search(List<LeafReaderContext>, Weight, Collector)` removed
The protected `IndexSearcher#search(List<LeafReaderContext> leaves, Weight weight, Collector collector)` method has been
removed in favour of the newly introduced `search(LeafReaderContextPartition[] partitions, Weight weight, Collector collector)`.
`IndexSearcher` subclasses that override this method need to instead override the new method.
### Indexing vectors with 8 bit scalar quantization is no longer supported but 7 and 4 bit quantization still work (GITHUB#13519)
8 bit scalar vector quantization is no longer supported: it was buggy
starting in 9.11 (GITHUB#13197). 4 and 7 bit quantization are still
supported. Existing (9.11) Lucene indices that previously used 8 bit
quantization can still be read/searched but the results from
`KNN*VectorQuery` are silently buggy. Further 8 bit quantized vector
indexing into such (9.11) indices is not permitted, so your path
forward if you wish to continue using the same 9.11 index is to index
additional vectors into the same field with either 4 or 7 bit
quantization (or no quantization), and ensure all older (9.x written)
segments are rewritten either via `IndexWriter.forceMerge` or
`IndexWriter.addIndexes(CodecReader...)`, or reindexing entirely.

View File

@ -1,5 +1,5 @@
{
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "1f7a446f3483326385eef257cea8366c27da0850",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "e62dcd8c25219d8f5d783823b228ffe38d2bacde",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex": "f52109bb7d5701979fde90aeeeda726246a8d5fd"
}

View File

@ -1,5 +1,5 @@
{
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "ac298e08bc5b96202efca0c01f9f0376fda976bd",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "2b5df5ff35543a6380c82f298225eb5fa06e4453",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex": "0b8c7774b98e8237702013e82c352d4711509bd0"
}

View File

@ -52,7 +52,7 @@ class CheckCompoundPattern {
boolean prohibitsCompounding(CharsRef word, int breakPos, Root<?> rootBefore, Root<?> rootAfter) {
if (isNonAffixedPattern(endChars)) {
if (!charsMatch(word, breakPos - rootBefore.word.length(), rootBefore.word)) {
if (!charsMatch(word, breakPos - rootBefore.word().length(), rootBefore.word())) {
return false;
}
} else if (!charsMatch(word, breakPos - endChars.length(), endChars)) {
@ -60,7 +60,7 @@ class CheckCompoundPattern {
}
if (isNonAffixedPattern(beginChars)) {
if (!charsMatch(word, breakPos, rootAfter.word)) {
if (!charsMatch(word, breakPos, rootAfter.word())) {
return false;
}
} else if (!charsMatch(word, breakPos, beginChars)) {
@ -84,7 +84,7 @@ class CheckCompoundPattern {
private boolean hasAllFlags(Root<?> root, char[] flags) {
for (char flag : flags) {
if (!dictionary.hasFlag(root.entryId, flag)) {
if (!dictionary.hasFlag(root.entryId(), flag)) {
return false;
}
}

View File

@ -24,7 +24,6 @@ import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.TreeSet;
@ -62,8 +61,7 @@ class GeneratingSuggester {
private List<Weighted<Root<String>>> findSimilarDictionaryEntries(
String word, WordCase originalCase) {
Comparator<Weighted<Root<String>>> natural = Comparator.naturalOrder();
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(natural.reversed());
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(Comparator.reverseOrder());
char[] excludeFlags = dictionary.allNonSuggestibleFlags();
FlagEnumerator.Lookup flagLookup = dictionary.flagLookup;
@ -111,7 +109,7 @@ class GeneratingSuggester {
private static boolean isWorseThan(int score, CharsRef candidate, Weighted<Root<String>> root) {
return score < root.score
|| score == root.score && CharSequence.compare(candidate, root.word.word) > 0;
|| score == root.score && CharSequence.compare(candidate, root.word.word()) > 0;
}
private void processSuggestibleWords(
@ -162,11 +160,11 @@ class GeneratingSuggester {
List<char[]> crossProducts = new ArrayList<>();
Set<String> result = new LinkedHashSet<>();
if (!dictionary.hasFlag(root.entryId, dictionary.needaffix)) {
result.add(root.word);
if (!dictionary.hasFlag(root.entryId(), dictionary.needaffix)) {
result.add(root.word());
}
char[] wordChars = root.word.toCharArray();
char[] wordChars = root.word().toCharArray();
// suffixes
processAffixes(
@ -180,7 +178,7 @@ class GeneratingSuggester {
}
String suffix = misspelled.substring(misspelled.length() - suffixLength);
String withSuffix = root.word.substring(0, root.word.length() - stripLength) + suffix;
String withSuffix = root.word().substring(0, root.word().length() - stripLength) + suffix;
result.add(withSuffix);
if (dictionary.isCrossProduct(suffixId)) {
crossProducts.add(withSuffix.toCharArray());
@ -192,7 +190,7 @@ class GeneratingSuggester {
true,
misspelled,
(prefixLength, prefixId) -> {
if (!dictionary.hasFlag(root.entryId, dictionary.affixData(prefixId, AFFIX_FLAG))
if (!dictionary.hasFlag(root.entryId(), dictionary.affixData(prefixId, AFFIX_FLAG))
|| !dictionary.isCrossProduct(prefixId)) {
return;
}
@ -217,7 +215,7 @@ class GeneratingSuggester {
if (hasCompatibleFlags(root, prefixId)
&& checkAffixCondition(prefixId, wordChars, stripLength, stemLength)) {
String prefix = misspelled.substring(0, prefixLength);
result.add(prefix + root.word.substring(stripLength));
result.add(prefix + root.word().substring(stripLength));
}
});
@ -263,7 +261,7 @@ class GeneratingSuggester {
}
private boolean hasCompatibleFlags(Root<?> root, int affixId) {
if (!dictionary.hasFlag(root.entryId, dictionary.affixData(affixId, AFFIX_FLAG))) {
if (!dictionary.hasFlag(root.entryId(), dictionary.affixData(affixId, AFFIX_FLAG))) {
return false;
}
@ -447,28 +445,8 @@ class GeneratingSuggester {
return commonScore;
}
private static class Weighted<T extends Comparable<T>> implements Comparable<Weighted<T>> {
final T word;
final int score;
Weighted(T word, int score) {
this.word = word;
this.score = score;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Weighted)) return false;
@SuppressWarnings("unchecked")
Weighted<T> that = (Weighted<T>) o;
return score == that.score && word.equals(that.word);
}
@Override
public int hashCode() {
return Objects.hash(word, score);
}
private record Weighted<T extends Comparable<T>>(T word, int score)
implements Comparable<Weighted<T>> {
@Override
public String toString() {

View File

@ -132,7 +132,7 @@ public class Hunspell {
Boolean checkSimpleWord(char[] wordChars, int length, WordCase originalCase) {
Root<CharsRef> entry = findStem(wordChars, 0, length, originalCase, SIMPLE_WORD);
if (entry != null) {
return !dictionary.hasFlag(entry.entryId, dictionary.forbiddenword);
return !dictionary.hasFlag(entry.entryId(), dictionary.forbiddenword);
}
return null;
@ -229,7 +229,7 @@ public class Hunspell {
stem = findStem(word.chars, word.offset, breakPos + 1, originalCase, context);
}
if (stem != null
&& !dictionary.hasFlag(stem.entryId, dictionary.forbiddenword)
&& !dictionary.hasFlag(stem.entryId(), dictionary.forbiddenword)
&& (prev == null || prev.mayCompound(stem, breakPos, originalCase))) {
CompoundPart part = new CompoundPart(prev, word, breakPos, stem, null);
if (checkCompoundsAfter(originalCase, part)) {
@ -274,7 +274,7 @@ public class Hunspell {
Root<CharsRef> lastRoot =
findStem(word.chars, breakOffset, remainingLength, originalCase, COMPOUND_END);
if (lastRoot != null
&& !dictionary.hasFlag(lastRoot.entryId, dictionary.forbiddenword)
&& !dictionary.hasFlag(lastRoot.entryId(), dictionary.forbiddenword)
&& !(dictionary.checkCompoundDup && prev.root.equals(lastRoot))
&& !hasForceUCaseProblem(lastRoot, originalCase, word.chars)
&& prev.mayCompound(lastRoot, remainingLength, originalCase)) {
@ -288,7 +288,7 @@ public class Hunspell {
private boolean hasForceUCaseProblem(Root<?> root, WordCase originalCase, char[] wordChars) {
if (originalCase == WordCase.TITLE || originalCase == WordCase.UPPER) return false;
if (originalCase == null && Character.isUpperCase(wordChars[0])) return false;
return dictionary.hasFlag(root.entryId, dictionary.forceUCase);
return dictionary.hasFlag(root.entryId(), dictionary.forceUCase);
}
/**

View File

@ -17,8 +17,6 @@
package org.apache.lucene.analysis.hunspell;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -117,7 +115,16 @@ public final class HunspellStemFilter extends TokenFilter {
}
if (longestOnly && buffer.size() > 1) {
Collections.sort(buffer, lengthComparator);
buffer.sort(
(o1, o2) -> {
int cmp = Integer.compare(o2.length, o1.length);
if (cmp == 0) {
// tie break on text
return o2.compareTo(o1);
} else {
return cmp;
}
});
}
CharsRef stem = buffer.remove(0);
@ -139,18 +146,4 @@ public final class HunspellStemFilter extends TokenFilter {
super.reset();
buffer = null;
}
static final Comparator<CharsRef> lengthComparator =
new Comparator<CharsRef>() {
@Override
public int compare(CharsRef o1, CharsRef o2) {
int cmp = Integer.compare(o2.length, o1.length);
if (cmp == 0) {
// tie break on text
return o2.compareTo(o1);
} else {
return cmp;
}
}
};
}

View File

@ -16,36 +16,13 @@
*/
package org.apache.lucene.analysis.hunspell;
import java.util.Objects;
class Root<T extends CharSequence> implements Comparable<Root<T>> {
final T word;
final int entryId;
Root(T word, int entryId) {
this.word = word;
this.entryId = entryId;
}
record Root<T extends CharSequence>(T word, int entryId) implements Comparable<Root<T>> {
@Override
public String toString() {
return word.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Root)) return false;
@SuppressWarnings("unchecked")
Root<T> root = (Root<T>) o;
return entryId == root.entryId && word.equals(root.word);
}
@Override
public int hashCode() {
return Objects.hash(word, entryId);
}
@Override
public int compareTo(Root<T> o) {
return CharSequence.compare(word, o.word);

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -147,9 +146,7 @@ public class FingerprintFilter extends TokenFilter {
Arrays.sort(
items,
new Comparator<Object>() {
@Override
public int compare(Object o1, Object o2) {
(o1, o2) -> {
char[] v1 = (char[]) o1;
char[] v2 = (char[]) o2;
int len1 = v1.length;
@ -166,7 +163,6 @@ public class FingerprintFilter extends TokenFilter {
k++;
}
return len1 - len2;
}
});
// TODO lets append directly to termAttribute?

View File

@ -59,12 +59,12 @@ public class PatternTypingFilter extends TokenFilter {
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
for (PatternTypingRule rule : replacementAndFlagByPattern) {
Matcher matcher = rule.getPattern().matcher(termAtt);
Matcher matcher = rule.pattern().matcher(termAtt);
if (matcher.find()) {
// allow 2nd reset() and find() that occurs inside replaceFirst to avoid excess string
// creation
typeAtt.setType(matcher.replaceFirst(rule.getTypeTemplate()));
flagAtt.setFlags(rule.getFlags());
typeAtt.setType(matcher.replaceFirst(rule.typeTemplate()));
flagAtt.setFlags(rule.flags());
return true;
}
}
@ -74,27 +74,5 @@ public class PatternTypingFilter extends TokenFilter {
}
/** Value holding class for pattern typing rules. */
public static class PatternTypingRule {
private final Pattern pattern;
private final int flags;
private final String typeTemplate;
public PatternTypingRule(Pattern pattern, int flags, String typeTemplate) {
this.pattern = pattern;
this.flags = flags;
this.typeTemplate = typeTemplate;
}
public Pattern getPattern() {
return pattern;
}
public int getFlags() {
return flags;
}
public String getTypeTemplate() {
return typeTemplate;
}
}
public record PatternTypingRule(Pattern pattern, int flags, String typeTemplate) {}
}

View File

@ -142,22 +142,10 @@ public final class SynonymGraphFilter extends TokenFilter {
}
}
static class BufferedOutputToken {
final String term;
// Non-null if this was an incoming token:
final State state;
final int startNode;
final int endNode;
public BufferedOutputToken(State state, String term, int startNode, int endNode) {
this.state = state;
this.term = term;
this.startNode = startNode;
this.endNode = endNode;
}
}
/**
* @param state Non-null if this was an incoming token:
*/
record BufferedOutputToken(State state, String term, int startNode, int endNode) {}
/**
* Apply previously built synonyms to incoming tokens.

View File

@ -18,17 +18,15 @@ package org.apache.lucene.analysis.synonym.word2vec;
import org.apache.lucene.util.BytesRef;
/** Wraps a term and boost */
public class TermAndBoost {
/** the term */
public final BytesRef term;
/** the boost */
public final float boost;
/**
* Wraps a term and boost
*
* @param term the term
* @param boost the boost
*/
public record TermAndBoost(BytesRef term, float boost) {
/** Creates a new TermAndBoost */
public TermAndBoost(BytesRef term, float boost) {
this.term = BytesRef.deepCopyOf(term);
this.boost = boost;
public TermAndBoost {
term = BytesRef.deepCopyOf(term);
}
}

View File

@ -56,25 +56,25 @@ public class Word2VecModel implements RandomAccessVectorValues.Floats {
}
public void addTermAndVector(TermAndVector modelEntry) {
modelEntry.normalizeVector();
modelEntry = modelEntry.normalizeVector();
this.termsAndVectors[loadedCount++] = modelEntry;
this.word2Vec.add(modelEntry.getTerm());
this.word2Vec.add(modelEntry.term());
}
@Override
public float[] vectorValue(int targetOrd) {
return termsAndVectors[targetOrd].getVector();
return termsAndVectors[targetOrd].vector();
}
public float[] vectorValue(BytesRef term) {
int termOrd = this.word2Vec.find(term);
if (termOrd < 0) return null;
TermAndVector entry = this.termsAndVectors[termOrd];
return (entry == null) ? null : entry.getVector();
return (entry == null) ? null : entry.vector();
}
public BytesRef termValue(int targetOrd) {
return termsAndVectors[targetOrd].getTerm();
return termsAndVectors[targetOrd].term();
}
@Override

View File

@ -80,7 +80,7 @@ public final class Word2VecSynonymFilter extends TokenFilter {
clearAttributes();
restoreState(this.lastState);
termAtt.setEmpty();
termAtt.append(synonym.term.utf8ToString());
termAtt.append(synonym.term().utf8ToString());
typeAtt.setType(SynonymGraphFilter.TYPE_SYNONYM);
posLenAtt.setPositionLength(1);
posIncrementAtt.setPositionIncrement(0);

View File

@ -1490,7 +1490,7 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
}
assertTrue(approxEquals(actual, expected));
assertTrue(Operations.sameLanguage(actual, expected));
assertTrue(AutomatonTestUtil.sameLanguage(actual, expected));
}
a.close();

View File

@ -64,7 +64,7 @@ public class TestWord2VecSynonymProvider extends LuceneTestCase {
assertEquals(4, actualSynonymsResults.size());
for (int i = 0; i < expectedSynonyms.length; i++) {
assertEquals(new BytesRef(expectedSynonyms[i]), actualSynonymsResults.get(i).term);
assertEquals(new BytesRef(expectedSynonyms[i]), actualSynonymsResults.get(i).term());
}
}
@ -83,8 +83,8 @@ public class TestWord2VecSynonymProvider extends LuceneTestCase {
BytesRef expectedFirstSynonymTerm = new BytesRef("b");
double expectedFirstSynonymBoost = 1.0;
assertEquals(expectedFirstSynonymTerm, actualSynonymsResults.get(0).term);
assertEquals(expectedFirstSynonymBoost, actualSynonymsResults.get(0).boost, 0.001f);
assertEquals(expectedFirstSynonymTerm, actualSynonymsResults.get(0).term());
assertEquals(expectedFirstSynonymBoost, actualSynonymsResults.get(0).boost(), 0.001f);
}
@Test
@ -120,8 +120,8 @@ public class TestWord2VecSynonymProvider extends LuceneTestCase {
@Test
public void normalizedVector_shouldReturnModule1() {
TermAndVector synonymTerm = new TermAndVector(new BytesRef("a"), new float[] {10, 10});
synonymTerm.normalizeVector();
float[] vector = synonymTerm.getVector();
synonymTerm = synonymTerm.normalizeVector();
float[] vector = synonymTerm.vector();
float len = 0;
for (int i = 0; i < vector.length; i++) {
len += vector[i] * vector[i];

View File

@ -139,19 +139,7 @@ public final class JapaneseCompletionFilter extends TokenFilter {
}
}
private static class CompletionToken {
final String term;
final boolean isFirst;
final int startOffset;
final int endOffset;
CompletionToken(String term, boolean isFirst, int startOffset, int endOffset) {
this.term = term;
this.isFirst = isFirst;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
}
private record CompletionToken(String term, boolean isFirst, int startOffset, int endOffset) {}
private static class CompletionTokenGenerator implements Iterator<CompletionToken> {

View File

@ -180,13 +180,5 @@ public class KatakanaRomanizer {
return null;
}
private static class MatchedKeystroke {
final int keystrokeLen;
final int keystrokeIndex;
MatchedKeystroke(int keystrokeLen, int keystrokeIndex) {
this.keystrokeLen = keystrokeLen;
this.keystrokeIndex = keystrokeIndex;
}
}
private record MatchedKeystroke(int keystrokeLen, int keystrokeIndex) {}
}

View File

@ -20,8 +20,6 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.morph.Dictionary;
@ -83,14 +81,7 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
// TODO: should we allow multiple segmentations per input 'phrase'?
// the old treemap didn't support this either, and i'm not sure if it's needed/useful?
Collections.sort(
featureEntries,
new Comparator<String[]>() {
@Override
public int compare(String[] left, String[] right) {
return left[0].compareTo(right[0]);
}
});
featureEntries.sort((left, right) -> left[0].compareTo(right[0]));
List<String> data = new ArrayList<>(featureEntries.size());
List<int[]> segmentations = new ArrayList<>(featureEntries.size());

View File

@ -268,19 +268,19 @@ final class Viterbi
final KoMorphData.Morpheme morpheme = morphemes[i];
final Token compoundToken;
if (token.getPOSType() == POS.Type.COMPOUND) {
assert endOffset - morpheme.surfaceForm.length() >= 0;
assert endOffset - morpheme.surfaceForm().length() >= 0;
compoundToken =
new DecompoundToken(
morpheme.posTag,
morpheme.surfaceForm,
endOffset - morpheme.surfaceForm.length(),
morpheme.posTag(),
morpheme.surfaceForm(),
endOffset - morpheme.surfaceForm().length(),
endOffset,
backType);
} else {
compoundToken =
new DecompoundToken(
morpheme.posTag,
morpheme.surfaceForm,
morpheme.posTag(),
morpheme.surfaceForm(),
token.getStartOffset(),
token.getEndOffset(),
backType);
@ -289,7 +289,7 @@ final class Viterbi
compoundToken.setPositionIncrement(0);
}
++posLen;
endOffset -= morpheme.surfaceForm.length();
endOffset -= morpheme.surfaceForm().length();
pending.add(compoundToken);
if (VERBOSE) {
System.out.println(" add token=" + pending.get(pending.size() - 1));

View File

@ -22,15 +22,7 @@ import org.apache.lucene.analysis.morph.MorphData;
/** Represents Korean morphological information. */
public interface KoMorphData extends MorphData {
/** A morpheme extracted from a compound token. */
class Morpheme {
public final POS.Tag posTag;
public final String surfaceForm;
public Morpheme(POS.Tag posTag, String surfaceForm) {
this.posTag = posTag;
this.surfaceForm = surfaceForm;
}
}
record Morpheme(POS.Tag posTag, String surfaceForm) {}
/**
* Get the {@link org.apache.lucene.analysis.ko.POS.Type} of specified word (morpheme, compound,

View File

@ -150,13 +150,13 @@ class TokenInfoDictionaryEntryWriter extends DictionaryEntryWriter {
int compoundOffset = 0;
for (KoMorphData.Morpheme morpheme : morphemes) {
if (hasSinglePOS == false) {
buffer.put((byte) morpheme.posTag.ordinal());
buffer.put((byte) morpheme.posTag().ordinal());
}
if (posType != POS.Type.INFLECT) {
buffer.put((byte) morpheme.surfaceForm.length());
compoundOffset += morpheme.surfaceForm.length();
buffer.put((byte) morpheme.surfaceForm().length());
compoundOffset += morpheme.surfaceForm().length();
} else {
writeString(morpheme.surfaceForm);
writeString(morpheme.surfaceForm());
}
assert compoundOffset <= entry[0].length() : Arrays.toString(entry);
}

View File

@ -86,11 +86,11 @@ public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSp
builder.append("+");
}
builder
.append(morpheme.surfaceForm)
.append(morpheme.surfaceForm())
.append('/')
.append(morpheme.posTag.name())
.append(morpheme.posTag().name())
.append('(')
.append(morpheme.posTag.description())
.append(morpheme.posTag().description())
.append(')');
}
return builder.toString();

View File

@ -170,14 +170,14 @@ public class TestTokenInfoDictionary extends LuceneTestCase {
if (decompound != null) {
int offset = 0;
for (KoMorphData.Morpheme morph : decompound) {
assertTrue(UnicodeUtil.validUTF16String(morph.surfaceForm));
assertFalse(morph.surfaceForm.isEmpty());
assertEquals(morph.surfaceForm.trim(), morph.surfaceForm);
assertTrue(UnicodeUtil.validUTF16String(morph.surfaceForm()));
assertFalse(morph.surfaceForm().isEmpty());
assertEquals(morph.surfaceForm().trim(), morph.surfaceForm());
if (type != POS.Type.INFLECT) {
assertEquals(
morph.surfaceForm,
surfaceForm.substring(offset, offset + morph.surfaceForm.length()));
offset += morph.surfaceForm.length();
morph.surfaceForm(),
surfaceForm.substring(offset, offset + morph.surfaceForm().length()));
offset += morph.surfaceForm().length();
}
}
assertTrue(offset <= surfaceForm.length());

View File

@ -43,10 +43,10 @@ public class TestUserDictionary extends LuceneTestCase {
dictionary.getMorphAttributes().getMorphemes(wordIds.get(1), sArray, 0, s.length());
assertNotNull(decompound);
assertEquals(2, decompound.length);
assertEquals(decompound[0].posTag, POS.Tag.NNG);
assertEquals(decompound[0].surfaceForm, "세종");
assertEquals(decompound[1].posTag, POS.Tag.NNG);
assertEquals(decompound[1].surfaceForm, "");
assertEquals(decompound[0].posTag(), POS.Tag.NNG);
assertEquals(decompound[0].surfaceForm(), "세종");
assertEquals(decompound[1].posTag(), POS.Tag.NNG);
assertEquals(decompound[1].surfaceForm(), "");
s = "c++";
sArray = s.toCharArray();

View File

@ -103,19 +103,20 @@ final class ForUtil {
for (int bpv = 1; bpv <= 32; ++bpv) {
final FormatAndBits formatAndBits =
PackedInts.fastestFormatAndBits(BLOCK_SIZE, bpv, acceptableOverheadRatio);
assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
assert formatAndBits.bitsPerValue <= 32;
assert formatAndBits.format().isSupported(formatAndBits.bitsPerValue());
assert formatAndBits.bitsPerValue() <= 32;
encodedSizes[bpv] =
encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
encodedSize(
formatAndBits.format(), PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue());
encoders[bpv] =
PackedInts.getEncoder(
formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
formatAndBits.format(), PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue());
decoders[bpv] =
PackedInts.getDecoder(
formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
formatAndBits.format(), PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue());
iterations[bpv] = computeIterations(decoders[bpv]);
out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1));
out.writeVInt(formatAndBits.format().getId() << 5 | (formatAndBits.bitsPerValue() - 1));
}
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -209,7 +210,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
false,
DocValuesSkipIndexType.NONE,
dvGen,
attributes,
pointDataDimensionCount,
@ -347,7 +348,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
output.writeVInt(fi.number);
byte bits = 0x0;
if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
if (fi.hasTermVectors()) bits |= STORE_TERMVECTOR;
if (fi.omitsNorms()) bits |= OMIT_NORMS;
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;

View File

@ -17,8 +17,6 @@
package org.apache.lucene.backward_codecs.lucene80;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicReader;
import org.apache.lucene.backward_codecs.packed.LegacyDirectReader;
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
@ -41,6 +39,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
@ -53,11 +52,11 @@ import org.apache.lucene.util.compress.LZ4;
/** reader for {@link Lucene80DocValuesFormat} */
final class Lucene80DocValuesProducer extends DocValuesProducer {
private final Map<String, NumericEntry> numerics = new HashMap<>();
private final Map<String, BinaryEntry> binaries = new HashMap<>();
private final Map<String, SortedEntry> sorted = new HashMap<>();
private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
private final Map<String, SortedNumericEntry> sortedNumerics = new HashMap<>();
private final IntObjectHashMap<NumericEntry> numerics = new IntObjectHashMap<>();
private final IntObjectHashMap<BinaryEntry> binaries = new IntObjectHashMap<>();
private final IntObjectHashMap<SortedEntry> sorted = new IntObjectHashMap<>();
private final IntObjectHashMap<SortedSetEntry> sortedSets = new IntObjectHashMap<>();
private final IntObjectHashMap<SortedNumericEntry> sortedNumerics = new IntObjectHashMap<>();
private final IndexInput data;
private final int maxDoc;
private int version = -1;
@ -139,7 +138,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
}
byte type = meta.readByte();
if (type == Lucene80DocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta));
numerics.put(info.number, readNumeric(meta));
} else if (type == Lucene80DocValuesFormat.BINARY) {
final boolean compressed;
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
@ -158,13 +157,13 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
} else {
compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
}
binaries.put(info.name, readBinary(meta, compressed));
binaries.put(info.number, readBinary(meta, compressed));
} else if (type == Lucene80DocValuesFormat.SORTED) {
sorted.put(info.name, readSorted(meta));
sorted.put(info.number, readSorted(meta));
} else if (type == Lucene80DocValuesFormat.SORTED_SET) {
sortedSets.put(info.name, readSortedSet(meta));
sortedSets.put(info.number, readSortedSet(meta));
} else if (type == Lucene80DocValuesFormat.SORTED_NUMERIC) {
sortedNumerics.put(info.name, readSortedNumeric(meta));
sortedNumerics.put(info.number, readSortedNumeric(meta));
} else {
throw new CorruptIndexException("invalid type: " + type, meta);
}
@ -426,7 +425,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.name);
NumericEntry entry = numerics.get(field.number);
return getNumeric(entry);
}
@ -915,7 +914,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
BinaryEntry entry = binaries.get(field.name);
BinaryEntry entry = binaries.get(field.number);
if (entry.compressed) {
return getCompressedBinary(entry);
} else {
@ -973,7 +972,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
SortedEntry entry = sorted.get(field.name);
SortedEntry entry = sorted.get(field.number);
return getSorted(entry);
}
@ -1407,7 +1406,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
SortedNumericEntry entry = sortedNumerics.get(field.name);
SortedNumericEntry entry = sortedNumerics.get(field.number);
if (entry.numValues == entry.numDocsWithField) {
return DocValues.singleton(getNumeric(entry));
}
@ -1543,7 +1542,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
SortedSetEntry entry = sortedSets.get(field.name);
SortedSetEntry entry = sortedSets.get(field.number);
if (entry.singleValueEntry != null) {
return DocValues.singleton(getSorted(entry.singleValueEntry));
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -186,7 +187,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
false,
DocValuesSkipIndexType.NONE,
dvGen,
attributes,
pointDataDimensionCount,
@ -333,7 +334,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
output.writeVInt(fi.number);
byte bits = 0x0;
if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
if (fi.hasTermVectors()) bits |= STORE_TERMVECTOR;
if (fi.omitsNorms()) bits |= OMIT_NORMS;
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;

View File

@ -224,6 +224,9 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
@Override
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null) {
throw new IllegalArgumentException("field=\"" + field + "\" not found");
}
return getOffHeapVectorValues(fieldEntry);
}

View File

@ -218,6 +218,9 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
@Override
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null) {
throw new IllegalArgumentException("field=\"" + field + "\" not found");
}
return getOffHeapVectorValues(fieldEntry);
}

View File

@ -215,6 +215,9 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
@Override
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null) {
throw new IllegalArgumentException("field=\"" + field + "\" not found");
}
return OffHeapFloatVectorValues.load(fieldEntry, vectorData);
}

View File

@ -233,6 +233,9 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
@Override
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null) {
throw new IllegalArgumentException("field=\"" + field + "\" not found");
}
if (fieldEntry.vectorEncoding != VectorEncoding.FLOAT32) {
throw new IllegalArgumentException(
"field=\""
@ -248,6 +251,9 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
@Override
public ByteVectorValues getByteVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null) {
throw new IllegalArgumentException("field=\"" + field + "\" not found");
}
if (fieldEntry.vectorEncoding != VectorEncoding.BYTE) {
throw new IllegalArgumentException(
"field=\""

View File

@ -241,6 +241,9 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
@Override
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null) {
throw new IllegalArgumentException("field=\"" + field + "\" not found");
}
if (fieldEntry.vectorEncoding != VectorEncoding.FLOAT32) {
throw new IllegalArgumentException(
"field=\""
@ -264,6 +267,9 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
@Override
public ByteVectorValues getByteVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null) {
throw new IllegalArgumentException("field=\"" + field + "\" not found");
}
if (fieldEntry.vectorEncoding != VectorEncoding.BYTE) {
throw new IllegalArgumentException(
"field=\""

View File

@ -46,10 +46,10 @@ import org.apache.lucene.store.IndexOutput;
* uptos(position, payload). 4. start offset.
*/
public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
private int[] lastSkipDoc;
private long[] lastSkipDocPointer;
private long[] lastSkipPosPointer;
private long[] lastSkipPayPointer;
private final int[] lastSkipDoc;
private final long[] lastSkipDocPointer;
private final long[] lastSkipPosPointer;
private final long[] lastSkipPayPointer;
private final IndexOutput docOut;
private final IndexOutput posOut;
@ -61,7 +61,7 @@ public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
private long curPayPointer;
private int curPosBufferUpto;
private int curPayloadByteUpto;
private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
private final CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
private boolean fieldHasPositions;
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
@ -85,7 +85,12 @@ public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
lastSkipPosPointer = new long[maxSkipLevels];
if (payOut != null) {
lastSkipPayPointer = new long[maxSkipLevels];
} else {
lastSkipPayPointer = null;
}
} else {
lastSkipPosPointer = null;
lastSkipPayPointer = null;
}
curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
for (int i = 0; i < maxSkipLevels; ++i) {

View File

@ -642,13 +642,13 @@ public class BKDWriter60 implements Closeable {
throws IOException {
assert docMaps == null || readers.size() == docMaps.size();
BKDMergeQueue queue = new BKDMergeQueue(config.bytesPerDim, readers.size());
BKDMergeQueue queue = new BKDMergeQueue(config.bytesPerDim(), readers.size());
for (int i = 0; i < readers.size(); i++) {
PointValues pointValues = readers.get(i);
assert pointValues.getNumDimensions() == config.numDims
&& pointValues.getBytesPerDimension() == config.bytesPerDim
&& pointValues.getNumIndexDimensions() == config.numIndexDims;
assert pointValues.getNumDimensions() == config.numDims()
&& pointValues.getBytesPerDimension() == config.bytesPerDim()
&& pointValues.getNumIndexDimensions() == config.numIndexDims();
MergeState.DocMap docMap;
if (docMaps == null) {
docMap = null;
@ -1931,7 +1931,7 @@ public class BKDWriter60 implements Closeable {
private void computePackedValueBounds(
BKDRadixSelector.PathSlice slice, byte[] minPackedValue, byte[] maxPackedValue)
throws IOException {
try (PointReader reader = slice.writer.getReader(slice.start, slice.count)) {
try (PointReader reader = slice.writer().getReader(slice.start(), slice.count())) {
if (reader.next() == false) {
return;
}
@ -1995,16 +1995,16 @@ public class BKDWriter60 implements Closeable {
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more
// efficient
HeapPointWriter heapSource;
if (points.writer instanceof HeapPointWriter == false) {
if (points.writer() instanceof HeapPointWriter == false) {
// Adversarial cases can cause this, e.g. merging big segments with most of the points
// deleted
heapSource = switchToHeap(points.writer);
heapSource = switchToHeap(points.writer());
} else {
heapSource = (HeapPointWriter) points.writer;
heapSource = (HeapPointWriter) points.writer();
}
int from = Math.toIntExact(points.start);
int to = Math.toIntExact(points.start + points.count);
int from = Math.toIntExact(points.start());
int to = Math.toIntExact(points.start() + points.count());
// we store common prefix on scratch1
computeCommonPrefixLength(heapSource, scratch1, from, to);
@ -2107,8 +2107,8 @@ public class BKDWriter60 implements Closeable {
: "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
// How many points will be in the left tree:
long rightCount = points.count / 2;
long leftCount = points.count - rightCount;
long rightCount = points.count() / 2;
long leftCount = points.count() - rightCount;
BKDRadixSelector.PathSlice[] slices = new BKDRadixSelector.PathSlice[2];
@ -2128,9 +2128,9 @@ public class BKDWriter60 implements Closeable {
radixSelector.select(
points,
slices,
points.start,
points.start + points.count,
points.start + leftCount,
points.start(),
points.start() + points.count(),
points.start() + leftCount,
splitDim,
commonPrefixLen);

View File

@ -78,4 +78,9 @@ public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testEmptyByteVectorData() {
// unimplemented
}
@Override
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}
}

View File

@ -77,4 +77,9 @@ public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testEmptyByteVectorData() {
// unimplemented
}
@Override
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}
}

View File

@ -67,4 +67,9 @@ public class TestLucene92HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testEmptyByteVectorData() {
// unimplemented
}
@Override
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}
}

View File

@ -388,10 +388,14 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
// write the vector data to a temporary file
DocsWithFieldSet docsWithField =
switch (fieldInfo.getVectorEncoding()) {
case BYTE -> writeByteVectorData(
tempVectorData, MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
case FLOAT32 -> writeVectorData(
tempVectorData, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
case BYTE ->
writeByteVectorData(
tempVectorData,
MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
case FLOAT32 ->
writeVectorData(
tempVectorData,
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
};
CodecUtil.writeFooter(tempVectorData);
IOUtils.close(tempVectorData);
@ -638,13 +642,15 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
throws IOException {
int dim = fieldInfo.getVectorDimension();
return switch (fieldInfo.getVectorEncoding()) {
case BYTE -> new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
case BYTE ->
new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
@Override
public byte[] copyValue(byte[] value) {
return ArrayUtil.copyOfSubArray(value, 0, dim);
}
};
case FLOAT32 -> new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
case FLOAT32 ->
new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
@Override
public float[] copyValue(float[] value) {
return ArrayUtil.copyOfSubArray(value, 0, dim);
@ -663,10 +669,12 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
DefaultFlatVectorScorer defaultFlatVectorScorer = new DefaultFlatVectorScorer();
RandomVectorScorerSupplier scorerSupplier =
switch (fieldInfo.getVectorEncoding()) {
case BYTE -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
case BYTE ->
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
case FLOAT32 -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
case FLOAT32 ->
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
};
@ -693,9 +701,9 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
lastDocID = docID;
}
OnHeapHnswGraph getGraph() {
OnHeapHnswGraph getGraph() throws IOException {
if (vectors.size() > 0) {
return hnswGraphBuilder.getGraph();
return hnswGraphBuilder.getCompletedGraph();
} else {
return null;
}

View File

@ -18,6 +18,7 @@
package org.apache.lucene.backward_codecs.lucene95;
import static org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.KnnVectorsWriter.MergedVectorValues.hasVectorValues;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
@ -414,10 +415,14 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
// write the vector data to a temporary file
DocsWithFieldSet docsWithField =
switch (fieldInfo.getVectorEncoding()) {
case BYTE -> writeByteVectorData(
tempVectorData, MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
case FLOAT32 -> writeVectorData(
tempVectorData, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
case BYTE ->
writeByteVectorData(
tempVectorData,
MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
case FLOAT32 ->
writeVectorData(
tempVectorData,
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
};
CodecUtil.writeFooter(tempVectorData);
IOUtils.close(tempVectorData);
@ -472,14 +477,18 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
IncrementalHnswGraphMerger merger =
new IncrementalHnswGraphMerger(fieldInfo, scorerSupplier, M, beamWidth);
for (int i = 0; i < mergeState.liveDocs.length; i++) {
if (hasVectorValues(mergeState.fieldInfos[i], fieldInfo.name)) {
merger.addReader(
mergeState.knnVectorsReaders[i], mergeState.docMaps[i], mergeState.liveDocs[i]);
}
}
DocIdSetIterator mergedVectorIterator = null;
switch (fieldInfo.getVectorEncoding()) {
case BYTE -> mergedVectorIterator =
case BYTE ->
mergedVectorIterator =
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
case FLOAT32 -> mergedVectorIterator =
case FLOAT32 ->
mergedVectorIterator =
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
}
graph =
@ -680,13 +689,15 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
throws IOException {
int dim = fieldInfo.getVectorDimension();
return switch (fieldInfo.getVectorEncoding()) {
case BYTE -> new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
case BYTE ->
new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
@Override
public byte[] copyValue(byte[] value) {
return ArrayUtil.copyOfSubArray(value, 0, dim);
}
};
case FLOAT32 -> new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
case FLOAT32 ->
new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
@Override
public float[] copyValue(float[] value) {
return ArrayUtil.copyOfSubArray(value, 0, dim);
@ -704,10 +715,12 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
vectors = new ArrayList<>();
RandomVectorScorerSupplier scorerSupplier =
switch (fieldInfo.getVectorEncoding()) {
case BYTE -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
case BYTE ->
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
case FLOAT32 -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
case FLOAT32 ->
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
};
@ -732,9 +745,9 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
lastDocID = docID;
}
OnHeapHnswGraph getGraph() {
OnHeapHnswGraph getGraph() throws IOException {
if (vectors.size() > 0) {
return hnswGraphBuilder.getGraph();
return hnswGraphBuilder.getCompletedGraph();
} else {
return null;
}

View File

@ -72,7 +72,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
final Sort sort;
try (DirectoryReader reader = DirectoryReader.open(directory)) {
assertEquals(1, reader.leaves().size());
sort = reader.leaves().get(0).reader().getMetaData().getSort();
sort = reader.leaves().get(0).reader().getMetaData().sort();
assertNotNull(sort);
searchExampleIndex(reader);
}
@ -125,8 +125,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
.add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST)
.build(),
2);
assertEquals(2, children.totalHits.value);
assertEquals(1, parents.totalHits.value);
assertEquals(2, children.totalHits.value());
assertEquals(1, parents.totalHits.value());
// make sure it's sorted
assertEquals(children.scoreDocs[0].doc + 1, children.scoreDocs[1].doc);
assertEquals(children.scoreDocs[1].doc + 1, parents.scoreDocs[0].doc);
@ -140,7 +140,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
public void testSortedIndex() throws Exception {
try (DirectoryReader reader = DirectoryReader.open(directory)) {
assertEquals(1, reader.leaves().size());
Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
Sort sort = reader.leaves().get(0).reader().getMetaData().sort();
assertNotNull(sort);
assertEquals("<long: \"dateDV\">!", sort.toString());
// This will confirm the docs are really sorted
@ -195,28 +195,28 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
IndexSearcher searcher = newSearcher(reader);
TopDocs topDocs = searcher.search(new FieldExistsQuery("titleTokenized"), 10);
assertEquals(50, topDocs.totalHits.value);
assertEquals(50, topDocs.totalHits.value());
topDocs = searcher.search(new FieldExistsQuery("titleDV"), 10);
assertEquals(50, topDocs.totalHits.value);
assertEquals(50, topDocs.totalHits.value());
topDocs =
searcher.search(
IntPoint.newRangeQuery("docid_int", 42, 44),
10,
new Sort(new SortField("docid_intDV", SortField.Type.INT)));
assertEquals(3, topDocs.totalHits.value);
assertEquals(3, topDocs.totalHits.value());
assertEquals(3, topDocs.scoreDocs.length);
assertEquals(42, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]);
assertEquals(43, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]);
assertEquals(44, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]);
topDocs = searcher.search(new TermQuery(new Term("body", "the")), 5);
assertTrue(topDocs.totalHits.value > 0);
assertTrue(topDocs.totalHits.value() > 0);
topDocs =
searcher.search(
new MatchAllDocsQuery(), 5, new Sort(new SortField("dateDV", SortField.Type.LONG)));
assertEquals(50, topDocs.totalHits.value);
assertEquals(50, topDocs.totalHits.value());
assertEquals(5, topDocs.scoreDocs.length);
long firstDate = (Long) ((FieldDoc) topDocs.scoreDocs[0]).fields[0];
long lastDate = (Long) ((FieldDoc) topDocs.scoreDocs[4]).fields[0];

View File

@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.benchmark.jmh;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.lucene912.ForDeltaUtil;
import org.apache.lucene.codecs.lucene912.ForUtil;
import org.apache.lucene.codecs.lucene912.PostingIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.IOUtils;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Benchmark)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 5, time = 1)
@Fork(
value = 3,
jvmArgsAppend = {"-Xmx1g", "-Xms1g", "-XX:+AlwaysPreTouch"})
public class PostingIndexInputBenchmark {
private Path path;
private Directory dir;
private IndexInput in;
private PostingIndexInput postingIn;
private final ForUtil forUtil = new ForUtil();
private final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
private final long[] values = new long[128];
@Param({"2", "3", "4", "5", "6", "7", "8", "9", "10"})
public int bpv;
@Setup(Level.Trial)
public void setup() throws Exception {
path = Files.createTempDirectory("forUtil");
dir = MMapDirectory.open(path);
try (IndexOutput out = dir.createOutput("docs", IOContext.DEFAULT)) {
Random r = new Random(0);
// Write enough random data to not reach EOF while decoding
for (int i = 0; i < 100; ++i) {
out.writeLong(r.nextLong());
}
}
in = dir.openInput("docs", IOContext.DEFAULT);
postingIn = new PostingIndexInput(in, forUtil, forDeltaUtil);
}
@TearDown(Level.Trial)
public void tearDown() throws Exception {
if (dir != null) {
dir.deleteFile("docs");
}
IOUtils.close(in, dir);
in = null;
dir = null;
Files.deleteIfExists(path);
}
@Benchmark
public void decode(Blackhole bh) throws IOException {
in.seek(3); // random unaligned offset
postingIn.decode(bpv, values);
bh.consume(values);
}
@Benchmark
public void decodeAndPrefixSum(Blackhole bh) throws IOException {
in.seek(3); // random unaligned offset
postingIn.decodeAndPrefixSum(bpv, 100, values);
bh.consume(values);
}
}

View File

@ -17,11 +17,10 @@
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
# collector.class can be:
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDoc
# collector.manager.class can be:
# Fully Qualified Class Name of a CollectorManager with a empty constructor
# topScoreDoc - Creates a TopScoreDocCollectorManager
collector.manager.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory

View File

@ -17,11 +17,10 @@
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
# collector.class can be:
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDoc
# collector.manager.class can be:
# Fully Qualified Class Name of a CollectorManager with a empty constructor
# topScoreDoc - Creates a TopScoreDocCollectorManager
collector.manager.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory

View File

@ -24,7 +24,7 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
@ -119,9 +119,7 @@ public abstract class ReadTask extends PerfTask {
hits = searcher.search(q, numHits);
}
} else {
Collector collector = createCollector();
searcher.search(q, collector);
searcher.search(q, createCollectorManager());
// hits = collector.topDocs();
}
@ -184,9 +182,8 @@ public abstract class ReadTask extends PerfTask {
return res;
}
protected Collector createCollector() throws Exception {
return new TopScoreDocCollectorManager(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1)
.newCollector();
protected CollectorManager<?, ?> createCollectorManager() throws Exception {
return new TopScoreDocCollectorManager(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
}
protected Document retrieveDoc(StoredFields storedFields, int id) throws IOException {

View File

@ -19,7 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.TopScoreDocCollectorManager;
/** Does search w/ a custom collector */
@ -37,7 +37,11 @@ public class SearchWithCollectorTask extends SearchTask {
// check to make sure either the doc is being stored
PerfRunData runData = getRunData();
Config config = runData.getConfig();
clnName = config.get("collector.class", "");
if (config.get("collector.class", null) != null) {
throw new IllegalArgumentException(
"collector.class is no longer supported as a config parameter, use collector.manager.class instead to provide a CollectorManager class name");
}
clnName = config.get("collector.manager.class", "");
}
@Override
@ -46,18 +50,17 @@ public class SearchWithCollectorTask extends SearchTask {
}
@Override
protected Collector createCollector() throws Exception {
Collector collector = null;
protected CollectorManager<?, ?> createCollectorManager() throws Exception {
CollectorManager<?, ?> collectorManager;
if (clnName.equalsIgnoreCase("topScoreDoc") == true) {
collector =
new TopScoreDocCollectorManager(numHits(), null, Integer.MAX_VALUE, false).newCollector();
collectorManager = new TopScoreDocCollectorManager(numHits(), Integer.MAX_VALUE);
} else if (clnName.length() > 0) {
collector = Class.forName(clnName).asSubclass(Collector.class).getConstructor().newInstance();
collectorManager =
Class.forName(clnName).asSubclass(CollectorManager.class).getConstructor().newInstance();
} else {
collector = super.createCollector();
collectorManager = super.createCollectorManager();
}
return collector;
return collectorManager;
}
@Override

View File

@ -91,7 +91,7 @@ public class TestDocMaker extends BenchmarkTestCase {
IndexReader reader = DirectoryReader.open(runData.getDirectory());
IndexSearcher searcher = newSearcher(reader);
TopDocs td = searcher.search(new TermQuery(new Term("key", "value")), 10);
assertEquals(numExpectedResults, td.totalHits.value);
assertEquals(numExpectedResults, td.totalHits.value());
reader.close();
}

View File

@ -160,7 +160,7 @@ public class TestLineDocSource extends BenchmarkTestCase {
reader = DirectoryReader.open(runData.getDirectory());
searcher = newSearcher(reader);
TopDocs td = searcher.search(new TermQuery(new Term("body", "body")), 10);
assertEquals(numAdds, td.totalHits.value);
assertEquals(numAdds, td.totalHits.value());
assertNotNull(td.scoreDocs[0]);
if (storedField == null) {

View File

@ -151,13 +151,13 @@ public class BM25NBClassifier implements Classifier<BytesRef> {
if (!assignedClasses.isEmpty()) {
Collections.sort(assignedClasses);
// this is a negative number closest to 0 = a
double smax = assignedClasses.get(0).getScore();
double smax = assignedClasses.get(0).score();
double sumLog = 0;
// log(sum(exp(x_n-a)))
for (ClassificationResult<BytesRef> cr : assignedClasses) {
// getScore-smax <=0 (both negative, smax is the smallest abs()
sumLog += Math.exp(cr.getScore() - smax);
sumLog += Math.exp(cr.score() - smax);
}
// loga=a+log(sum(exp(x_n-a))) = log(sum(exp(x_n)))
double loga = smax;
@ -165,8 +165,8 @@ public class BM25NBClassifier implements Classifier<BytesRef> {
// 1/sum*x = exp(log(x))*1/sum = exp(log(x)-log(sum))
for (ClassificationResult<BytesRef> cr : assignedClasses) {
double scoreDiff = cr.getScore() - loga;
returnList.add(new ClassificationResult<>(cr.getAssignedClass(), Math.exp(scoreDiff)));
double scoreDiff = cr.score() - loga;
returnList.add(new ClassificationResult<>(cr.assignedClass(), Math.exp(scoreDiff)));
}
}
return returnList;
@ -216,7 +216,7 @@ public class BM25NBClassifier implements Classifier<BytesRef> {
builder.add(query, BooleanClause.Occur.MUST);
}
TopDocs search = indexSearcher.search(builder.build(), 1);
return search.totalHits.value > 0 ? search.scoreDocs[0].score : 1;
return search.totalHits.value() > 0 ? search.scoreDocs[0].score : 1;
}
private double calculateLogPrior(Term term) throws IOException {
@ -227,6 +227,6 @@ public class BM25NBClassifier implements Classifier<BytesRef> {
bq.add(query, BooleanClause.Occur.MUST);
}
TopDocs topDocs = indexSearcher.search(bq.build(), 1);
return topDocs.totalHits.value > 0 ? Math.log(topDocs.scoreDocs[0].score) : 0;
return topDocs.totalHits.value() > 0 ? Math.log(topDocs.scoreDocs[0].score) : 0;
}
}

View File

@ -148,7 +148,7 @@ public class BooleanPerceptronClassifier implements Classifier<Boolean> {
if (textField != null && classField != null) {
// assign class to the doc
ClassificationResult<Boolean> classificationResult = assignClass(textField.stringValue());
Boolean assignedClass = classificationResult.getAssignedClass();
Boolean assignedClass = classificationResult.assignedClass();
Boolean correctClass = Boolean.valueOf(classField.stringValue());
double modifier = Math.signum(correctClass.compareTo(assignedClass));

View File

@ -126,7 +126,7 @@ public class CachingNaiveBayesClassifier extends SimpleNaiveBayesClassifier {
int removeIdx = -1;
int i = 0;
for (ClassificationResult<BytesRef> cr : ret) {
if (cr.getAssignedClass().equals(cclass)) {
if (cr.assignedClass().equals(cclass)) {
removeIdx = i;
break;
}
@ -137,7 +137,7 @@ public class CachingNaiveBayesClassifier extends SimpleNaiveBayesClassifier {
ClassificationResult<BytesRef> toRemove = ret.get(removeIdx);
ret.add(
new ClassificationResult<>(
toRemove.getAssignedClass(), toRemove.getScore() + Math.log(wordProbability)));
toRemove.assignedClass(), toRemove.score() + Math.log(wordProbability)));
ret.remove(removeIdx);
}
}

View File

@ -20,44 +20,15 @@ package org.apache.lucene.classification;
* The result of a call to {@link Classifier#assignClass(String)} holding an assigned class of type
* <code>T</code> and a score.
*
* @param assignedClass the class <code>T</code> assigned by a {@link Classifier}
* @param score score the score for the assignedClass as a <code>double</code>
* @lucene.experimental
*/
public class ClassificationResult<T> implements Comparable<ClassificationResult<T>> {
private final T assignedClass;
private final double score;
/**
* Constructor
*
* @param assignedClass the class <code>T</code> assigned by a {@link Classifier}
* @param score the score for the assignedClass as a <code>double</code>
*/
public ClassificationResult(T assignedClass, double score) {
this.assignedClass = assignedClass;
this.score = score;
}
/**
* retrieve the result class
*
* @return a <code>T</code> representing an assigned class
*/
public T getAssignedClass() {
return assignedClass;
}
/**
* retrieve the result score
*
* @return a <code>double</code> representing a result score
*/
public double getScore() {
return score;
}
public record ClassificationResult<T>(T assignedClass, double score)
implements Comparable<ClassificationResult<T>> {
@Override
public int compareTo(ClassificationResult<T> o) {
return Double.compare(o.getScore(), this.getScore());
return Double.compare(o.score(), this.score());
}
}

View File

@ -108,9 +108,9 @@ public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
ClassificationResult<BytesRef> assignedClass = null;
double maxscore = -Double.MAX_VALUE;
for (ClassificationResult<BytesRef> cl : assignedClasses) {
if (cl.getScore() > maxscore) {
if (cl.score() > maxscore) {
assignedClass = cl;
maxscore = cl.getScore();
maxscore = cl.score();
}
}
return assignedClass;
@ -159,7 +159,7 @@ public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
Map<BytesRef, Integer> classCounts = new HashMap<>();
Map<BytesRef, Double> classBoosts =
new HashMap<>(); // this is a boost based on class ranking positions in topDocs
float maxScore = topDocs.totalHits.value == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
float maxScore = topDocs.totalHits.value() == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
StoredFields storedFields = indexSearcher.storedFields();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
IndexableField storableField = storedFields.document(scoreDoc.doc).getField(classFieldName);
@ -193,7 +193,7 @@ public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
if (sumdoc < k) {
for (ClassificationResult<BytesRef> cr : temporaryList) {
returnList.add(
new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
new ClassificationResult<>(cr.assignedClass(), cr.score() * k / (double) sumdoc));
}
} else {
returnList = temporaryList;

View File

@ -129,9 +129,9 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
ClassificationResult<BytesRef> assignedClass = null;
double maxscore = -Double.MAX_VALUE;
for (ClassificationResult<BytesRef> cl : assignedClasses) {
if (cl.getScore() > maxscore) {
if (cl.score() > maxscore) {
assignedClass = cl;
maxscore = cl.getScore();
maxscore = cl.score();
}
}
return assignedClass;
@ -192,7 +192,7 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
Map<BytesRef, Integer> classCounts = new HashMap<>();
Map<BytesRef, Double> classBoosts =
new HashMap<>(); // this is a boost based on class ranking positions in topDocs
float maxScore = topDocs.totalHits.value == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
float maxScore = topDocs.totalHits.value() == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
StoredFields storedFields = indexSearcher.storedFields();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
IndexableField[] storableFields =
@ -229,7 +229,7 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
if (sumdoc < k) {
for (ClassificationResult<BytesRef> cr : temporaryList) {
returnList.add(
new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
new ClassificationResult<>(cr.assignedClass(), cr.score() * k / (double) sumdoc));
}
} else {
returnList = temporaryList;

View File

@ -105,9 +105,9 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
ClassificationResult<BytesRef> assignedClass = null;
double maxscore = -Double.MAX_VALUE;
for (ClassificationResult<BytesRef> c : assignedClasses) {
if (c.getScore() > maxscore) {
if (c.score() > maxscore) {
assignedClass = c;
maxscore = c.getScore();
maxscore = c.score();
}
}
return assignedClass;
@ -297,13 +297,13 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
if (!assignedClasses.isEmpty()) {
Collections.sort(assignedClasses);
// this is a negative number closest to 0 = a
double smax = assignedClasses.get(0).getScore();
double smax = assignedClasses.get(0).score();
double sumLog = 0;
// log(sum(exp(x_n-a)))
for (ClassificationResult<BytesRef> cr : assignedClasses) {
// getScore-smax <=0 (both negative, smax is the smallest abs()
sumLog += Math.exp(cr.getScore() - smax);
sumLog += Math.exp(cr.score() - smax);
}
// loga=a+log(sum(exp(x_n-a))) = log(sum(exp(x_n)))
double loga = smax;
@ -311,8 +311,8 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
// 1/sum*x = exp(log(x))*1/sum = exp(log(x)-log(sum))
for (ClassificationResult<BytesRef> cr : assignedClasses) {
double scoreDiff = cr.getScore() - loga;
returnList.add(new ClassificationResult<>(cr.getAssignedClass(), Math.exp(scoreDiff)));
double scoreDiff = cr.score() - loga;
returnList.add(new ClassificationResult<>(cr.assignedClass(), Math.exp(scoreDiff)));
}
}
return returnList;

View File

@ -80,9 +80,9 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi
ClassificationResult<BytesRef> assignedClass = null;
double maxscore = -Double.MAX_VALUE;
for (ClassificationResult<BytesRef> c : assignedClasses) {
if (c.getScore() > maxscore) {
if (c.score() > maxscore) {
assignedClass = c;
maxscore = c.getScore();
maxscore = c.score();
}
}
return assignedClass;

View File

@ -107,7 +107,7 @@ public class ConfusionMatrixGenerator {
time += end - start;
if (result != null) {
T assignedClass = result.getAssignedClass();
T assignedClass = result.assignedClass();
if (assignedClass != null) {
counter++;
String classified =

View File

@ -138,13 +138,13 @@ public class DatasetSplitter {
// iterate over existing documents
StoredFields storedFields = originalIndex.storedFields();
for (GroupDocs<Object> group : topGroups.groups) {
assert group.totalHits.relation == TotalHits.Relation.EQUAL_TO;
long totalHits = group.totalHits.value;
assert group.totalHits().relation() == TotalHits.Relation.EQUAL_TO;
long totalHits = group.totalHits().value();
double testSize = totalHits * testRatio;
int tc = 0;
double cvSize = totalHits * crossValidationRatio;
int cvc = 0;
for (ScoreDoc scoreDoc : group.scoreDocs) {
for (ScoreDoc scoreDoc : group.scoreDocs()) {
// create a new document for indexing
Document doc = createNewDoc(storedFields, ft, scoreDoc, fieldNames);

View File

@ -91,7 +91,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
Classifier<T> classifier, String inputDoc, T expectedResult) throws Exception {
ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
assertNotNull(classificationResult);
T assignedClass = classificationResult.getAssignedClass();
T assignedClass = classificationResult.assignedClass();
assertNotNull(assignedClass);
assertEquals(
"got an assigned class of " + assignedClass,
@ -101,7 +101,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
assignedClass instanceof BytesRef
? ((BytesRef) assignedClass).utf8ToString()
: assignedClass);
double score = classificationResult.getScore();
double score = classificationResult.score();
assertTrue("score should be between 0 and 1, got:" + score, score <= 1 && score >= 0);
return classificationResult;
}
@ -130,18 +130,17 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
getSampleIndex(analyzer);
ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
assertNotNull(classificationResult.getAssignedClass());
assertNotNull(classificationResult.assignedClass());
assertEquals(
"got an assigned class of " + classificationResult.getAssignedClass(),
"got an assigned class of " + classificationResult.assignedClass(),
expectedResult,
classificationResult.getAssignedClass());
double score = classificationResult.getScore();
classificationResult.assignedClass());
double score = classificationResult.score();
assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
updateSampleIndex();
ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
assertEquals(
classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
assertEquals(classificationResult.assignedClass(), secondClassificationResult.assignedClass());
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.score()));
}
protected LeafReader getSampleIndex(Analyzer analyzer) throws IOException {

View File

@ -88,7 +88,7 @@ public class TestKNearestNeighborClassifier extends ClassificationTestBase<Bytes
textFieldName),
TECHNOLOGY_INPUT,
TECHNOLOGY_RESULT);
assertTrue(resultDS.getScore() != resultLMS.getScore());
assertTrue(resultDS.score() != resultLMS.score());
} finally {
IOUtils.close(leafReader);
}
@ -113,7 +113,7 @@ public class TestKNearestNeighborClassifier extends ClassificationTestBase<Bytes
leafReader, null, analyzer, null, 6, 1, 1, categoryFieldName, textFieldName);
List<ClassificationResult<BytesRef>> classes =
knnClassifier.getClasses(STRONG_TECHNOLOGY_INPUT);
assertTrue(classes.get(0).getScore() > classes.get(1).getScore());
assertTrue(classes.get(0).score() > classes.get(1).score());
checkCorrectClassification(knnClassifier, STRONG_TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
} finally {
IOUtils.close(leafReader);
@ -139,7 +139,7 @@ public class TestKNearestNeighborClassifier extends ClassificationTestBase<Bytes
leafReader, null, analyzer, null, 3, 1, 1, categoryFieldName, textFieldName);
List<ClassificationResult<BytesRef>> classes =
knnClassifier.getClasses(SUPER_STRONG_TECHNOLOGY_INPUT);
assertTrue(classes.get(0).getScore() > classes.get(1).getScore());
assertTrue(classes.get(0).score() > classes.get(1).score());
checkCorrectClassification(knnClassifier, SUPER_STRONG_TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
} finally {
IOUtils.close(leafReader);

View File

@ -58,12 +58,12 @@ public abstract class DocumentClassificationTestBase<T> extends ClassificationTe
protected double checkCorrectDocumentClassification(
DocumentClassifier<T> classifier, Document inputDoc, T expectedResult) throws Exception {
ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
assertNotNull(classificationResult.getAssignedClass());
assertNotNull(classificationResult.assignedClass());
assertEquals(
"got an assigned class of " + classificationResult.getAssignedClass(),
"got an assigned class of " + classificationResult.assignedClass(),
expectedResult,
classificationResult.getAssignedClass());
double score = classificationResult.getScore();
classificationResult.assignedClass());
double score = classificationResult.score();
assertTrue("score should be between 0 and 1, got:" + score, score <= 1 && score >= 0);
return score;
}

View File

@ -69,28 +69,15 @@ public class BlockTermsWriter extends FieldsConsumer {
private final TermsIndexWriterBase termsIndexWriter;
private final int maxDoc;
private static class FieldMetaData {
public final FieldInfo fieldInfo;
public final long numTerms;
public final long termsStartPointer;
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
public FieldMetaData(
private record FieldMetaData(
FieldInfo fieldInfo,
long numTerms,
long termsStartPointer,
long sumTotalTermFreq,
long sumDocFreq,
int docCount) {
private FieldMetaData {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.termsStartPointer = termsStartPointer;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
}
}

View File

@ -127,7 +127,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
@Override
public boolean isIndexTerm(BytesRef term, TermStats stats) {
if (stats.docFreq >= docFreqThresh || count >= interval) {
if (stats.docFreq() >= docFreqThresh || count >= interval) {
count = 1;
return true;
} else {

View File

@ -34,19 +34,14 @@ final class FSTOrdsOutputs extends Outputs<FSTOrdsOutputs.Output> {
private static final BytesRef NO_BYTES = new BytesRef();
public static final class Output {
public final BytesRef bytes;
// Inclusive:
public final long startOrd;
// Inclusive:
public final long endOrd;
public Output(BytesRef bytes, long startOrd, long endOrd) {
/**
* @param startOrd Inclusive:
* @param endOrd Inclusive:
*/
public record Output(BytesRef bytes, long startOrd, long endOrd) {
public Output {
assert startOrd >= 0 : "startOrd=" + startOrd;
assert endOrd >= 0 : "endOrd=" + endOrd;
this.bytes = bytes;
this.startOrd = startOrd;
this.endOrd = endOrd;
}
@Override
@ -60,24 +55,6 @@ final class FSTOrdsOutputs extends Outputs<FSTOrdsOutputs.Output> {
}
return startOrd + " to " + x;
}
@Override
public int hashCode() {
int hash = bytes.hashCode();
hash = (int) (hash ^ startOrd);
hash = (int) (hash ^ endOrd);
return hash;
}
@Override
public boolean equals(Object _other) {
if (_other instanceof Output) {
Output other = (Output) _other;
return bytes.equals(other.bytes) && startOrd == other.startOrd && endOrd == other.endOrd;
} else {
return false;
}
}
}
@Override

View File

@ -139,18 +139,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
private static class FieldMetaData {
public final FieldInfo fieldInfo;
public final Output rootCode;
public final long numTerms;
public final long indexStartFP;
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
public final BytesRef minTerm;
public final BytesRef maxTerm;
public FieldMetaData(
private record FieldMetaData(
FieldInfo fieldInfo,
Output rootCode,
long numTerms,
@ -160,17 +149,9 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
int docCount,
BytesRef minTerm,
BytesRef maxTerm) {
private FieldMetaData {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
assert rootCode != null : "field=" + fieldInfo.name + " numTerms=" + numTerms;
this.rootCode = rootCode;
this.indexStartFP = indexStartFP;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
}
}
@ -293,15 +274,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
}
}
private static final class SubIndex {
public final FST<Output> index;
public final long termOrdStart;
public SubIndex(FST<Output> index, long termOrdStart) {
this.index = index;
this.termOrdStart = termOrdStart;
}
}
private record SubIndex(FST<Output> index, long termOrdStart) {}
private static final class PendingBlock extends PendingEntry {
public final BytesRef prefix;
@ -438,7 +411,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
// long blockTermCount = output.endOrd - output.startOrd + 1;
Output newOutput =
FST_OUTPUTS.newOutput(
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
output.bytes(), termOrdOffset + output.startOrd(), output.endOrd() - termOrdOffset);
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output +
// " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput="
// + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
@ -969,9 +942,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
out.writeVInt(field.fieldInfo.number);
assert field.numTerms > 0;
out.writeVLong(field.numTerms);
out.writeVInt(field.rootCode.bytes.length);
out.writeVInt(field.rootCode.bytes().length);
out.writeBytes(
field.rootCode.bytes.bytes, field.rootCode.bytes.offset, field.rootCode.bytes.length);
field.rootCode.bytes().bytes,
field.rootCode.bytes().offset,
field.rootCode.bytes().length);
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
out.writeVLong(field.sumTotalTermFreq);
}

View File

@ -79,7 +79,8 @@ final class OrdsFieldReader extends Terms {
// }
rootBlockFP =
(new ByteArrayDataInput(rootCode.bytes.bytes, rootCode.bytes.offset, rootCode.bytes.length))
(new ByteArrayDataInput(
rootCode.bytes().bytes, rootCode.bytes().offset, rootCode.bytes().length))
.readVLong()
>>> OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;

View File

@ -142,8 +142,8 @@ final class OrdsIntersectTermsEnumFrame {
// + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state="
// + state));
if (output != null && output.bytes != null && transitionCount != 0) {
BytesRef frameIndexData = output.bytes;
if (output != null && output.bytes() != null && transitionCount != 0) {
BytesRef frameIndexData = output.bytes();
// Floor frame
if (floorData.length < frameIndexData.length) {

View File

@ -149,7 +149,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// Pushes a frame we seek'd to
OrdsSegmentTermsEnumFrame pushFrame(FST.Arc<Output> arc, Output frameData, int length)
throws IOException {
scratchReader.reset(frameData.bytes.bytes, frameData.bytes.offset, frameData.bytes.length);
scratchReader.reset(
frameData.bytes().bytes, frameData.bytes().offset, frameData.bytes().length);
final long code = scratchReader.readVLong();
final long fpSeek = code >>> OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
// System.out.println(" fpSeek=" + fpSeek);
@ -160,11 +161,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// Must setFloorData before pushFrame in case pushFrame tries to rewind:
if (f.isFloor) {
f.termOrdOrig = frameData.startOrd;
f.setFloorData(scratchReader, frameData.bytes);
f.termOrdOrig = frameData.startOrd();
f.setFloorData(scratchReader, frameData.bytes());
}
pushFrame(arc, fpSeek, length, frameData.startOrd);
pushFrame(arc, fpSeek, length, frameData.startOrd());
return f;
}
@ -891,7 +892,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
} else if (isSeekFrame && !f.isFloor) {
final ByteArrayDataInput reader =
new ByteArrayDataInput(
output.bytes.bytes, output.bytes.offset, output.bytes.length);
output.bytes().bytes, output.bytes().offset, output.bytes().length);
final long codeOrig = reader.readVLong();
final long code =
(f.fp << OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS)
@ -1210,7 +1211,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput());
// System.out.println(" isFinal: " + finalOutput.startOrd + "-" +
// (Long.MAX_VALUE-finalOutput.endOrd));
if (targetOrd >= finalOutput.startOrd && targetOrd <= Long.MAX_VALUE - finalOutput.endOrd) {
if (targetOrd >= finalOutput.startOrd()
&& targetOrd <= Long.MAX_VALUE - finalOutput.endOrd()) {
// Only one range should match across all arc leaving this node
// assert bestOutput == null;
bestOutput = finalOutput;
@ -1247,9 +1249,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
}
// System.out.println(" cycle mid=" + mid + " targetOrd=" + targetOrd + " output=" +
// minArcOutput.startOrd + "-" + (Long.MAX_VALUE-minArcOutput.endOrd));
if (targetOrd > Long.MAX_VALUE - minArcOutput.endOrd) {
if (targetOrd > Long.MAX_VALUE - minArcOutput.endOrd()) {
low = mid + 1;
} else if (targetOrd < minArcOutput.startOrd) {
} else if (targetOrd < minArcOutput.startOrd()) {
high = mid - 1;
} else {
// System.out.println(" found!!");
@ -1282,10 +1284,10 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// this arc:
final Output minArcOutput =
OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
long endOrd = Long.MAX_VALUE - minArcOutput.endOrd;
long endOrd = Long.MAX_VALUE - minArcOutput.endOrd();
// System.out.println(" endOrd=" + endOrd + " targetOrd=" + targetOrd);
if (targetOrd >= minArcOutput.startOrd && targetOrd <= endOrd) {
if (targetOrd >= minArcOutput.startOrd() && targetOrd <= endOrd) {
// Recurse on this arc:
output = minArcOutput;
result.setIntAt(upto++, arc.label());

View File

@ -71,8 +71,8 @@ final class SimpleTextBKDReader extends PointValues {
this.pointCount = pointCount;
this.docCount = docCount;
this.version = SimpleTextBKDWriter.VERSION_CURRENT;
assert minPackedValue.length == config.packedIndexBytesLength;
assert maxPackedValue.length == config.packedIndexBytesLength;
assert minPackedValue.length == config.packedIndexBytesLength();
assert maxPackedValue.length == config.packedIndexBytesLength();
}
@Override
@ -99,8 +99,8 @@ final class SimpleTextBKDReader extends PointValues {
private SimpleTextPointTree(
IndexInput in, int nodeID, int level, byte[] minPackedValue, byte[] maxPackedValue) {
this.in = in;
this.scratchDocIDs = new int[config.maxPointsInLeafNode];
this.scratchPackedValue = new byte[config.packedBytesLength];
this.scratchDocIDs = new int[config.maxPointsInLeafNode()];
this.scratchPackedValue = new byte[config.packedBytesLength()];
this.nodeID = nodeID;
this.rootNode = nodeID;
this.level = level;
@ -145,38 +145,39 @@ final class SimpleTextBKDReader extends PointValues {
private void pushLeft() {
int address = nodeID * bytesPerIndexEntry;
// final int splitDimPos;
if (config.numIndexDims == 1) {
if (config.numIndexDims() == 1) {
splitDims[level] = 0;
} else {
splitDims[level] = (splitPackedValues[address++] & 0xff);
}
final int splitDimPos = splitDims[level] * config.bytesPerDim;
final int splitDimPos = splitDims[level] * config.bytesPerDim();
if (splitDimValueStack[level] == null) {
splitDimValueStack[level] = new byte[config.bytesPerDim];
splitDimValueStack[level] = new byte[config.bytesPerDim()];
}
// save the dimension we are going to change
System.arraycopy(
maxPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim);
maxPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim());
assert Arrays.compareUnsigned(
maxPackedValue,
splitDimPos,
splitDimPos + config.bytesPerDim,
splitDimPos + config.bytesPerDim(),
splitPackedValues,
address,
address + config.bytesPerDim)
address + config.bytesPerDim())
>= 0
: "config.bytesPerDim="
+ config.bytesPerDim
: "config.bytesPerDim()="
+ config.bytesPerDim()
+ " splitDim="
+ splitDims[level]
+ " config.numIndexDims="
+ config.numIndexDims
+ " config.numIndexDims()="
+ config.numIndexDims()
+ " config.numDims="
+ config.numDims;
+ config.numDims();
nodeID *= 2;
level++;
// add the split dim value:
System.arraycopy(splitPackedValues, address, maxPackedValue, splitDimPos, config.bytesPerDim);
System.arraycopy(
splitPackedValues, address, maxPackedValue, splitDimPos, config.bytesPerDim());
}
@Override
@ -191,37 +192,38 @@ final class SimpleTextBKDReader extends PointValues {
private void pushRight() {
int address = nodeID * bytesPerIndexEntry;
if (config.numIndexDims == 1) {
if (config.numIndexDims() == 1) {
splitDims[level] = 0;
} else {
splitDims[level] = (splitPackedValues[address++] & 0xff);
}
final int splitDimPos = splitDims[level] * config.bytesPerDim;
final int splitDimPos = splitDims[level] * config.bytesPerDim();
// we should have already visit the left node
assert splitDimValueStack[level] != null;
// save the dimension we are going to change
System.arraycopy(
minPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim);
minPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim());
assert Arrays.compareUnsigned(
minPackedValue,
splitDimPos,
splitDimPos + config.bytesPerDim,
splitDimPos + config.bytesPerDim(),
splitPackedValues,
address,
address + config.bytesPerDim)
address + config.bytesPerDim())
<= 0
: "config.bytesPerDim="
+ config.bytesPerDim
: "config.bytesPerDim()="
+ config.bytesPerDim()
+ " splitDim="
+ splitDims[level]
+ " config.numIndexDims="
+ config.numIndexDims
+ " config.numIndexDims()="
+ config.numIndexDims()
+ " config.numDims="
+ config.numDims;
+ config.numDims();
nodeID = 2 * nodeID + 1;
level++;
// add the split dim value:
System.arraycopy(splitPackedValues, address, minPackedValue, splitDimPos, config.bytesPerDim);
System.arraycopy(
splitPackedValues, address, minPackedValue, splitDimPos, config.bytesPerDim());
}
@Override
@ -242,16 +244,16 @@ final class SimpleTextBKDReader extends PointValues {
splitDimValueStack[level],
0,
maxPackedValue,
splitDims[level] * config.bytesPerDim,
config.bytesPerDim);
splitDims[level] * config.bytesPerDim(),
config.bytesPerDim());
} else {
System.arraycopy(
splitDimValueStack[level],
0,
minPackedValue,
splitDims[level] * config.bytesPerDim,
config.bytesPerDim);
splitDims[level] * config.bytesPerDim(),
config.bytesPerDim());
}
}
@ -290,7 +292,7 @@ final class SimpleTextBKDReader extends PointValues {
private long sizeFromBalancedTree(int leftMostLeafNode, int rightMostLeafNode) {
// number of points that need to be distributed between leaves, one per leaf
final int extraPoints =
Math.toIntExact(((long) config.maxPointsInLeafNode * leafNodeOffset) - pointCount);
Math.toIntExact(((long) config.maxPointsInLeafNode() * leafNodeOffset) - pointCount);
assert extraPoints < leafNodeOffset : "point excess should be lower than leafNodeOffset";
// offset where we stop adding one point to the leaves
final int nodeOffset = leafNodeOffset - extraPoints;
@ -298,9 +300,9 @@ final class SimpleTextBKDReader extends PointValues {
for (int node = leftMostLeafNode; node <= rightMostLeafNode; node++) {
// offsetPosition provides which extra point will be added to this node
if (balanceTreeNodePosition(0, leafNodeOffset, node - leafNodeOffset, 0, 0) < nodeOffset) {
count += config.maxPointsInLeafNode;
count += config.maxPointsInLeafNode();
} else {
count += config.maxPointsInLeafNode - 1;
count += config.maxPointsInLeafNode() - 1;
}
}
return count;
@ -376,14 +378,14 @@ final class SimpleTextBKDReader extends PointValues {
// Again, this time reading values and checking with the visitor
visitor.grow(count);
// NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
assert scratchPackedValue.length == config.packedBytesLength;
assert scratchPackedValue.length == config.packedBytesLength();
BytesRefBuilder scratch = new BytesRefBuilder();
for (int i = 0; i < count; i++) {
readLine(in, scratch);
assert startsWith(scratch, BLOCK_VALUE);
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(scratch, BLOCK_VALUE));
assert br.length == config.packedBytesLength;
System.arraycopy(br.bytes, br.offset, scratchPackedValue, 0, config.packedBytesLength);
assert br.length == config.packedBytesLength();
System.arraycopy(br.bytes, br.offset, scratchPackedValue, 0, config.packedBytesLength());
visitor.visit(scratchDocIDs[i], scratchPackedValue);
}
} else {
@ -443,17 +445,17 @@ final class SimpleTextBKDReader extends PointValues {
@Override
public int getNumDimensions() throws IOException {
return config.numDims;
return config.numDims();
}
@Override
public int getNumIndexDimensions() throws IOException {
return config.numIndexDims;
return config.numIndexDims();
}
@Override
public int getBytesPerDimension() throws IOException {
return config.bytesPerDim;
return config.bytesPerDim();
}
@Override

View File

@ -144,28 +144,28 @@ final class SimpleTextBKDWriter implements Closeable {
this.maxDoc = maxDoc;
docsSeen = new FixedBitSet(maxDoc);
scratchDiff = new byte[config.bytesPerDim];
scratch1 = new byte[config.packedBytesLength];
scratch2 = new byte[config.packedBytesLength];
commonPrefixLengths = new int[config.numDims];
scratchDiff = new byte[config.bytesPerDim()];
scratch1 = new byte[config.packedBytesLength()];
scratch2 = new byte[config.packedBytesLength()];
commonPrefixLengths = new int[config.numDims()];
minPackedValue = new byte[config.packedIndexBytesLength];
maxPackedValue = new byte[config.packedIndexBytesLength];
minPackedValue = new byte[config.packedIndexBytesLength()];
maxPackedValue = new byte[config.packedIndexBytesLength()];
// Maximum number of points we hold in memory at any time
maxPointsSortInHeap =
(int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc * config.numDims));
(int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc() * config.numDims()));
// Finally, we must be able to hold at least the leaf node in heap during build:
if (maxPointsSortInHeap < config.maxPointsInLeafNode) {
if (maxPointsSortInHeap < config.maxPointsInLeafNode()) {
throw new IllegalArgumentException(
"maxMBSortInHeap="
+ maxMBSortInHeap
+ " only allows for maxPointsSortInHeap="
+ maxPointsSortInHeap
+ ", but this is less than config.maxPointsInLeafNode="
+ config.maxPointsInLeafNode
+ "; either increase maxMBSortInHeap or decrease config.maxPointsInLeafNode");
+ ", but this is less than config.maxPointsInLeafNode()="
+ config.maxPointsInLeafNode()
+ "; either increase maxMBSortInHeap or decrease config.maxPointsInLeafNode()");
}
this.maxMBSortInHeap = maxMBSortInHeap;
@ -183,10 +183,10 @@ final class SimpleTextBKDWriter implements Closeable {
}
public void add(byte[] packedValue, int docID) throws IOException {
if (packedValue.length != config.packedBytesLength) {
if (packedValue.length != config.packedBytesLength()) {
throw new IllegalArgumentException(
"packedValue should be length="
+ config.packedBytesLength
+ config.packedBytesLength()
+ " (got: "
+ packedValue.length
+ ")");
@ -209,30 +209,30 @@ final class SimpleTextBKDWriter implements Closeable {
} else {
pointWriter = new HeapPointWriter(config, Math.toIntExact(totalPointCount));
}
System.arraycopy(packedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(packedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(packedValue, 0, minPackedValue, 0, config.packedIndexBytesLength());
System.arraycopy(packedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength());
} else {
for (int dim = 0; dim < config.numIndexDims; dim++) {
int offset = dim * config.bytesPerDim;
for (int dim = 0; dim < config.numIndexDims(); dim++) {
int offset = dim * config.bytesPerDim();
if (Arrays.compareUnsigned(
packedValue,
offset,
offset + config.bytesPerDim,
offset + config.bytesPerDim(),
minPackedValue,
offset,
offset + config.bytesPerDim)
offset + config.bytesPerDim())
< 0) {
System.arraycopy(packedValue, offset, minPackedValue, offset, config.bytesPerDim);
System.arraycopy(packedValue, offset, minPackedValue, offset, config.bytesPerDim());
}
if (Arrays.compareUnsigned(
packedValue,
offset,
offset + config.bytesPerDim,
offset + config.bytesPerDim(),
maxPackedValue,
offset,
offset + config.bytesPerDim)
offset + config.bytesPerDim())
> 0) {
System.arraycopy(packedValue, offset, maxPackedValue, offset, config.bytesPerDim);
System.arraycopy(packedValue, offset, maxPackedValue, offset, config.bytesPerDim());
}
}
}
@ -254,7 +254,7 @@ final class SimpleTextBKDWriter implements Closeable {
*/
public long writeField(IndexOutput out, String fieldName, MutablePointTree reader)
throws IOException {
if (config.numIndexDims == 1) {
if (config.numIndexDims() == 1) {
return writeField1Dim(out, fieldName, reader);
} else {
return writeFieldNDims(out, fieldName, reader);
@ -280,7 +280,7 @@ final class SimpleTextBKDWriter implements Closeable {
long countPerLeaf = pointCount = values.size();
long innerNodeCount = 1;
while (countPerLeaf > config.maxPointsInLeafNode) {
while (countPerLeaf > config.maxPointsInLeafNode()) {
countPerLeaf = (countPerLeaf + 1) / 2;
innerNodeCount *= 2;
}
@ -289,7 +289,7 @@ final class SimpleTextBKDWriter implements Closeable {
checkMaxLeafNodeCount(numLeaves);
final byte[] splitPackedValues = new byte[numLeaves * (config.bytesPerDim + 1)];
final byte[] splitPackedValues = new byte[numLeaves * (config.bytesPerDim() + 1)];
final long[] leafBlockFPs = new long[numLeaves];
// compute the min/max for this slice
@ -297,37 +297,37 @@ final class SimpleTextBKDWriter implements Closeable {
Arrays.fill(maxPackedValue, (byte) 0);
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
values.getValue(i, scratchBytesRef1);
for (int dim = 0; dim < config.numIndexDims; dim++) {
int offset = dim * config.bytesPerDim;
for (int dim = 0; dim < config.numIndexDims(); dim++) {
int offset = dim * config.bytesPerDim();
if (Arrays.compareUnsigned(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + offset,
scratchBytesRef1.offset + offset + config.bytesPerDim,
scratchBytesRef1.offset + offset + config.bytesPerDim(),
minPackedValue,
offset,
offset + config.bytesPerDim)
offset + config.bytesPerDim())
< 0) {
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + offset,
minPackedValue,
offset,
config.bytesPerDim);
config.bytesPerDim());
}
if (Arrays.compareUnsigned(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + offset,
scratchBytesRef1.offset + offset + config.bytesPerDim,
scratchBytesRef1.offset + offset + config.bytesPerDim(),
maxPackedValue,
offset,
offset + config.bytesPerDim)
offset + config.bytesPerDim())
> 0) {
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + offset,
maxPackedValue,
offset,
config.bytesPerDim);
config.bytesPerDim());
}
}
@ -345,7 +345,7 @@ final class SimpleTextBKDWriter implements Closeable {
maxPackedValue,
splitPackedValues,
leafBlockFPs,
new int[config.maxPointsInLeafNode]);
new int[config.maxPointsInLeafNode()]);
long indexFP = out.getFilePointer();
writeIndex(out, leafBlockFPs, splitPackedValues, Math.toIntExact(countPerLeaf));
@ -387,15 +387,15 @@ final class SimpleTextBKDWriter implements Closeable {
final IndexOutput out;
final List<Long> leafBlockFPs = new ArrayList<>();
final List<byte[]> leafBlockStartValues = new ArrayList<>();
final byte[] leafValues = new byte[config.maxPointsInLeafNode * config.packedBytesLength];
final int[] leafDocs = new int[config.maxPointsInLeafNode];
final byte[] leafValues = new byte[config.maxPointsInLeafNode() * config.packedBytesLength()];
final int[] leafDocs = new int[config.maxPointsInLeafNode()];
long valueCount;
int leafCount;
OneDimensionBKDWriter(IndexOutput out) {
if (config.numIndexDims != 1) {
if (config.numIndexDims() != 1) {
throw new UnsupportedOperationException(
"config.numIndexDims must be 1 but got " + config.numIndexDims);
"config.numIndexDims() must be 1 but got " + config.numIndexDims());
}
if (pointCount != 0) {
throw new IllegalStateException("cannot mix add and merge");
@ -411,7 +411,7 @@ final class SimpleTextBKDWriter implements Closeable {
this.out = out;
lastPackedValue = new byte[config.packedBytesLength];
lastPackedValue = new byte[config.packedBytesLength()];
}
// for asserts
@ -426,8 +426,8 @@ final class SimpleTextBKDWriter implements Closeable {
packedValue,
0,
leafValues,
leafCount * config.packedBytesLength,
config.packedBytesLength);
leafCount * config.packedBytesLength(),
config.packedBytesLength());
leafDocs[leafCount] = docID;
docsSeen.set(docID);
leafCount++;
@ -441,7 +441,7 @@ final class SimpleTextBKDWriter implements Closeable {
+ " values");
}
if (leafCount == config.maxPointsInLeafNode) {
if (leafCount == config.maxPointsInLeafNode()) {
// We write a block once we hit exactly the max count ... this is different from
// when we flush a new segment, where we write between max/2 and max per leaf block,
// so merged segments will behave differently from newly flushed segments:
@ -471,43 +471,44 @@ final class SimpleTextBKDWriter implements Closeable {
// System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts="
// + leafBlockStartValues.size());
byte[] index = new byte[(1 + numInnerNodes) * (1 + config.bytesPerDim)];
byte[] index = new byte[(1 + numInnerNodes) * (1 + config.bytesPerDim())];
rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
long[] arr = new long[leafBlockFPs.size()];
for (int i = 0; i < leafBlockFPs.size(); i++) {
arr[i] = leafBlockFPs.get(i);
}
writeIndex(out, arr, index, config.maxPointsInLeafNode);
writeIndex(out, arr, index, config.maxPointsInLeafNode());
return indexFP;
}
private void writeLeafBlock() throws IOException {
assert leafCount != 0;
if (valueCount == 0) {
System.arraycopy(leafValues, 0, minPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(leafValues, 0, minPackedValue, 0, config.packedIndexBytesLength());
}
System.arraycopy(
leafValues,
(leafCount - 1) * config.packedBytesLength,
(leafCount - 1) * config.packedBytesLength(),
maxPackedValue,
0,
config.packedIndexBytesLength);
config.packedIndexBytesLength());
valueCount += leafCount;
if (leafBlockFPs.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split
// value index in the end:
leafBlockStartValues.add(ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength));
leafBlockStartValues.add(
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength()));
}
leafBlockFPs.add(out.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
// Find per-dim common prefix:
for (int dim = 0; dim < config.numDims; dim++) {
int offset1 = dim * config.bytesPerDim;
int offset2 = (leafCount - 1) * config.packedBytesLength + offset1;
for (int dim = 0; dim < config.numDims(); dim++) {
int offset1 = dim * config.bytesPerDim();
int offset2 = (leafCount - 1) * config.packedBytesLength() + offset1;
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
if (leafValues[offset1 + j] != leafValues[offset2 + j]) {
commonPrefixLengths[dim] = j;
@ -523,24 +524,24 @@ final class SimpleTextBKDWriter implements Closeable {
final BytesRef scratch = new BytesRef();
{
scratch.length = config.packedBytesLength;
scratch.length = config.packedBytesLength();
scratch.bytes = leafValues;
}
@Override
public BytesRef apply(int i) {
scratch.offset = config.packedBytesLength * i;
scratch.offset = config.packedBytesLength() * i;
return scratch;
}
};
assert valuesInOrderAndBounds(
leafCount,
0,
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength),
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength()),
ArrayUtil.copyOfSubArray(
leafValues,
(leafCount - 1) * config.packedBytesLength,
leafCount * config.packedBytesLength),
(leafCount - 1) * config.packedBytesLength(),
leafCount * config.packedBytesLength()),
packedValues,
leafDocs,
0);
@ -552,7 +553,7 @@ final class SimpleTextBKDWriter implements Closeable {
private void rotateToTree(
int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
// System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + "
// bpd=" + config.bytesPerDim + " index.length=" + index.length);
// bpd=" + config.bytesPerDim() + " index.length=" + index.length);
if (count == 1) {
// Leaf index node
// System.out.println(" leaf index node");
@ -561,8 +562,8 @@ final class SimpleTextBKDWriter implements Closeable {
leafBlockStartValues.get(offset),
0,
index,
nodeID * (1 + config.bytesPerDim) + 1,
config.bytesPerDim);
nodeID * (1 + config.bytesPerDim()) + 1,
config.bytesPerDim());
} else if (count > 1) {
// Internal index node: binary partition of count
int countAtLevel = 1;
@ -587,8 +588,8 @@ final class SimpleTextBKDWriter implements Closeable {
leafBlockStartValues.get(rootOffset),
0,
index,
nodeID * (1 + config.bytesPerDim) + 1,
config.bytesPerDim);
nodeID * (1 + config.bytesPerDim()) + 1,
config.bytesPerDim());
// System.out.println(" index[" + nodeID + "] = blockStartValues[" + rootOffset + "]");
// TODO: we could optimize/specialize, when we know it's simply fully balanced binary tree
@ -611,10 +612,10 @@ final class SimpleTextBKDWriter implements Closeable {
}
private void checkMaxLeafNodeCount(int numLeaves) {
if ((1 + config.bytesPerDim) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
if ((1 + config.bytesPerDim()) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalStateException(
"too many nodes; increase config.maxPointsInLeafNode (currently "
+ config.maxPointsInLeafNode
"too many nodes; increase config.maxPointsInLeafNode() (currently "
+ config.maxPointsInLeafNode()
+ ") and reindex");
}
}
@ -652,7 +653,7 @@ final class SimpleTextBKDWriter implements Closeable {
long countPerLeaf = pointCount;
long innerNodeCount = 1;
while (countPerLeaf > config.maxPointsInLeafNode) {
while (countPerLeaf > config.maxPointsInLeafNode()) {
countPerLeaf = (countPerLeaf + 1) / 2;
innerNodeCount *= 2;
}
@ -667,20 +668,20 @@ final class SimpleTextBKDWriter implements Closeable {
// Indexed by nodeID, but first (root) nodeID is 1. We do 1+ because the lead byte at each
// recursion says which dim we split on.
byte[] splitPackedValues = new byte[Math.multiplyExact(numLeaves, 1 + config.bytesPerDim)];
byte[] splitPackedValues = new byte[Math.multiplyExact(numLeaves, 1 + config.bytesPerDim())];
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g.
// 7)
long[] leafBlockFPs = new long[numLeaves];
// Make sure the math above "worked":
assert pointCount / numLeaves <= config.maxPointsInLeafNode
assert pointCount / numLeaves <= config.maxPointsInLeafNode()
: "pointCount="
+ pointCount
+ " numLeaves="
+ numLeaves
+ " config.maxPointsInLeafNode="
+ config.maxPointsInLeafNode;
+ " config.maxPointsInLeafNode()="
+ config.maxPointsInLeafNode();
// We re-use the selector so we do not need to create an object every time.
BKDRadixSelector radixSelector =
@ -699,7 +700,7 @@ final class SimpleTextBKDWriter implements Closeable {
maxPackedValue,
splitPackedValues,
leafBlockFPs,
new int[config.maxPointsInLeafNode]);
new int[config.maxPointsInLeafNode()]);
// If no exception, we should have cleaned everything up:
assert tempDir.getCreatedFiles().isEmpty();
@ -724,15 +725,15 @@ final class SimpleTextBKDWriter implements Closeable {
IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues, int maxPointsInLeafNode)
throws IOException {
write(out, NUM_DATA_DIMS);
writeInt(out, config.numDims);
writeInt(out, config.numDims());
newline(out);
write(out, NUM_INDEX_DIMS);
writeInt(out, config.numIndexDims);
writeInt(out, config.numIndexDims());
newline(out);
write(out, BYTES_PER_DIM);
writeInt(out, config.bytesPerDim);
writeInt(out, config.bytesPerDim());
newline(out);
write(out, MAX_LEAF_POINTS);
@ -767,8 +768,8 @@ final class SimpleTextBKDWriter implements Closeable {
newline(out);
}
assert (splitPackedValues.length % (1 + config.bytesPerDim)) == 0;
int count = splitPackedValues.length / (1 + config.bytesPerDim);
assert (splitPackedValues.length % (1 + config.bytesPerDim())) == 0;
int count = splitPackedValues.length / (1 + config.bytesPerDim());
assert count == leafBlockFPs.length;
write(out, SPLIT_COUNT);
@ -777,10 +778,12 @@ final class SimpleTextBKDWriter implements Closeable {
for (int i = 0; i < count; i++) {
write(out, SPLIT_DIM);
writeInt(out, splitPackedValues[i * (1 + config.bytesPerDim)] & 0xff);
writeInt(out, splitPackedValues[i * (1 + config.bytesPerDim())] & 0xff);
newline(out);
write(out, SPLIT_VALUE);
br = new BytesRef(splitPackedValues, 1 + (i * (1 + config.bytesPerDim)), config.bytesPerDim);
br =
new BytesRef(
splitPackedValues, 1 + (i * (1 + config.bytesPerDim())), config.bytesPerDim());
write(out, br.toString());
newline(out);
}
@ -852,25 +855,25 @@ final class SimpleTextBKDWriter implements Closeable {
/** Called only in assert */
private boolean valueInBounds(
BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue) {
for (int dim = 0; dim < config.numIndexDims; dim++) {
int offset = config.bytesPerDim * dim;
for (int dim = 0; dim < config.numIndexDims(); dim++) {
int offset = config.bytesPerDim() * dim;
if (Arrays.compareUnsigned(
packedValue.bytes,
packedValue.offset + offset,
packedValue.offset + offset + config.bytesPerDim,
packedValue.offset + offset + config.bytesPerDim(),
minPackedValue,
offset,
offset + config.bytesPerDim)
offset + config.bytesPerDim())
< 0) {
return false;
}
if (Arrays.compareUnsigned(
packedValue.bytes,
packedValue.offset + offset,
packedValue.offset + offset + config.bytesPerDim,
packedValue.offset + offset + config.bytesPerDim(),
maxPackedValue,
offset,
offset + config.bytesPerDim)
offset + config.bytesPerDim())
> 0) {
return false;
}
@ -882,13 +885,13 @@ final class SimpleTextBKDWriter implements Closeable {
protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
// Find which dim has the largest span so we can split on it:
int splitDim = -1;
for (int dim = 0; dim < config.numIndexDims; dim++) {
NumericUtils.subtract(config.bytesPerDim, dim, maxPackedValue, minPackedValue, scratchDiff);
for (int dim = 0; dim < config.numIndexDims(); dim++) {
NumericUtils.subtract(config.bytesPerDim(), dim, maxPackedValue, minPackedValue, scratchDiff);
if (splitDim == -1
|| Arrays.compareUnsigned(
scratchDiff, 0, config.bytesPerDim, scratch1, 0, config.bytesPerDim)
scratchDiff, 0, config.bytesPerDim(), scratch1, 0, config.bytesPerDim())
> 0) {
System.arraycopy(scratchDiff, 0, scratch1, 0, config.bytesPerDim);
System.arraycopy(scratchDiff, 0, scratch1, 0, config.bytesPerDim());
splitDim = dim;
}
}
@ -931,15 +934,15 @@ final class SimpleTextBKDWriter implements Closeable {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= config.maxPointsInLeafNode;
assert count <= config.maxPointsInLeafNode();
// Compute common prefixes
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim = 0; dim < config.numDims; dim++) {
final int offset = dim * config.bytesPerDim;
for (int dim = 0; dim < config.numDims(); dim++) {
final int offset = dim * config.bytesPerDim();
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j]
!= scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
@ -951,23 +954,23 @@ final class SimpleTextBKDWriter implements Closeable {
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims];
for (int dim = 0; dim < config.numDims; ++dim) {
if (commonPrefixLengths[dim] < config.bytesPerDim) {
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
for (int dim = 0; dim < config.numDims(); ++dim) {
if (commonPrefixLengths[dim] < config.bytesPerDim()) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim = 0; dim < config.numDims; dim++) {
for (int dim = 0; dim < config.numDims(); dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * config.bytesPerDim + commonPrefixLengths[dim]);
byte b = reader.getByteAt(i, dim * config.bytesPerDim() + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < config.numDims; ++dim) {
for (int dim = 0; dim < config.numDims(); ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
@ -1001,7 +1004,7 @@ final class SimpleTextBKDWriter implements Closeable {
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(
scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength);
scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength());
// Write the full values:
IntFunction<BytesRef> packedValues =
@ -1023,10 +1026,10 @@ final class SimpleTextBKDWriter implements Closeable {
final int splitDim = split(minPackedValue, maxPackedValue);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = config.bytesPerDim;
for (int i = 0; i < config.bytesPerDim; ++i) {
if (minPackedValue[splitDim * config.bytesPerDim + i]
!= maxPackedValue[splitDim * config.bytesPerDim + i]) {
int commonPrefixLen = config.bytesPerDim();
for (int i = 0; i < config.bytesPerDim(); ++i) {
if (minPackedValue[splitDim * config.bytesPerDim() + i]
!= maxPackedValue[splitDim * config.bytesPerDim() + i]) {
commonPrefixLen = i;
break;
}
@ -1044,32 +1047,32 @@ final class SimpleTextBKDWriter implements Closeable {
scratchBytesRef2);
// set the split value
final int address = nodeID * (1 + config.bytesPerDim);
final int address = nodeID * (1 + config.bytesPerDim());
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
splitPackedValues,
address + 1,
config.bytesPerDim);
config.bytesPerDim());
byte[] minSplitPackedValue =
ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength);
ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength());
byte[] maxSplitPackedValue =
ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength);
ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength());
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
minSplitPackedValue,
splitDim * config.bytesPerDim,
config.bytesPerDim);
splitDim * config.bytesPerDim(),
config.bytesPerDim());
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
maxSplitPackedValue,
splitDim * config.bytesPerDim,
config.bytesPerDim);
splitDim * config.bytesPerDim(),
config.bytesPerDim());
// recurse
build(
@ -1121,33 +1124,33 @@ final class SimpleTextBKDWriter implements Closeable {
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more
// efficient
HeapPointWriter heapSource;
if (points.writer instanceof HeapPointWriter == false) {
if (points.writer() instanceof HeapPointWriter == false) {
// Adversarial cases can cause this, e.g. merging big segments with most of the points
// deleted
heapSource = switchToHeap(points.writer);
heapSource = switchToHeap(points.writer());
} else {
heapSource = (HeapPointWriter) points.writer;
heapSource = (HeapPointWriter) points.writer();
}
int from = Math.toIntExact(points.start);
int to = Math.toIntExact(points.start + points.count);
int from = Math.toIntExact(points.start());
int to = Math.toIntExact(points.start() + points.count());
// we store common prefix on scratch1
computeCommonPrefixLength(heapSource, scratch1);
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims];
for (int dim = 0; dim < config.numDims; ++dim) {
if (commonPrefixLengths[dim] < config.bytesPerDim) {
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
for (int dim = 0; dim < config.numDims(); ++dim) {
if (commonPrefixLengths[dim] < config.bytesPerDim()) {
usedBytes[dim] = new FixedBitSet(256);
}
}
// Find the dimension to compress
for (int dim = 0; dim < config.numDims; dim++) {
for (int dim = 0; dim < config.numDims(); dim++) {
int prefix = commonPrefixLengths[dim];
if (prefix < config.bytesPerDim) {
int offset = dim * config.bytesPerDim;
if (prefix < config.bytesPerDim()) {
int offset = dim * config.bytesPerDim();
for (int i = 0; i < heapSource.count(); ++i) {
PointValue value = heapSource.getPackedValueSlice(i);
BytesRef packedValue = value.packedValue();
@ -1190,7 +1193,7 @@ final class SimpleTextBKDWriter implements Closeable {
final BytesRef scratch = new BytesRef();
{
scratch.length = config.packedBytesLength;
scratch.length = config.packedBytesLength();
}
@Override
@ -1207,7 +1210,7 @@ final class SimpleTextBKDWriter implements Closeable {
// Inner node: partition/recurse
int splitDim;
if (config.numIndexDims > 1) {
if (config.numIndexDims() > 1) {
splitDim = split(minPackedValue, maxPackedValue);
} else {
splitDim = 0;
@ -1217,19 +1220,19 @@ final class SimpleTextBKDWriter implements Closeable {
: "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
// How many points will be in the left tree:
long rightCount = points.count / 2;
long leftCount = points.count - rightCount;
long rightCount = points.count() / 2;
long leftCount = points.count() - rightCount;
int commonPrefixLen =
Arrays.mismatch(
minPackedValue,
splitDim * config.bytesPerDim,
splitDim * config.bytesPerDim + config.bytesPerDim,
splitDim * config.bytesPerDim(),
splitDim * config.bytesPerDim() + config.bytesPerDim(),
maxPackedValue,
splitDim * config.bytesPerDim,
splitDim * config.bytesPerDim + config.bytesPerDim);
splitDim * config.bytesPerDim(),
splitDim * config.bytesPerDim() + config.bytesPerDim());
if (commonPrefixLen == -1) {
commonPrefixLen = config.bytesPerDim;
commonPrefixLen = config.bytesPerDim();
}
BKDRadixSelector.PathSlice[] pathSlices = new BKDRadixSelector.PathSlice[2];
@ -1238,26 +1241,34 @@ final class SimpleTextBKDWriter implements Closeable {
radixSelector.select(
points,
pathSlices,
points.start,
points.start + points.count,
points.start + leftCount,
points.start(),
points.start() + points.count(),
points.start() + leftCount,
splitDim,
commonPrefixLen);
int address = nodeID * (1 + config.bytesPerDim);
int address = nodeID * (1 + config.bytesPerDim());
splitPackedValues[address] = (byte) splitDim;
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, config.bytesPerDim);
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, config.bytesPerDim());
byte[] minSplitPackedValue = new byte[config.packedIndexBytesLength];
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, config.packedIndexBytesLength);
byte[] minSplitPackedValue = new byte[config.packedIndexBytesLength()];
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, config.packedIndexBytesLength());
byte[] maxSplitPackedValue = new byte[config.packedIndexBytesLength];
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, config.packedIndexBytesLength);
byte[] maxSplitPackedValue = new byte[config.packedIndexBytesLength()];
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, config.packedIndexBytesLength());
System.arraycopy(
splitValue, 0, minSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
splitValue,
0,
minSplitPackedValue,
splitDim * config.bytesPerDim(),
config.bytesPerDim());
System.arraycopy(
splitValue, 0, maxSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
splitValue,
0,
maxSplitPackedValue,
splitDim * config.bytesPerDim(),
config.bytesPerDim());
// Recurse on left tree:
build(
@ -1289,30 +1300,30 @@ final class SimpleTextBKDWriter implements Closeable {
}
private void computeCommonPrefixLength(HeapPointWriter heapPointWriter, byte[] commonPrefix) {
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
PointValue value = heapPointWriter.getPackedValueSlice(0);
BytesRef packedValue = value.packedValue();
for (int dim = 0; dim < config.numDims; dim++) {
for (int dim = 0; dim < config.numDims(); dim++) {
System.arraycopy(
packedValue.bytes,
packedValue.offset + dim * config.bytesPerDim,
packedValue.offset + dim * config.bytesPerDim(),
commonPrefix,
dim * config.bytesPerDim,
config.bytesPerDim);
dim * config.bytesPerDim(),
config.bytesPerDim());
}
for (int i = 1; i < heapPointWriter.count(); i++) {
value = heapPointWriter.getPackedValueSlice(i);
packedValue = value.packedValue();
for (int dim = 0; dim < config.numDims; dim++) {
for (int dim = 0; dim < config.numDims(); dim++) {
if (commonPrefixLengths[dim] != 0) {
int j =
Arrays.mismatch(
commonPrefix,
dim * config.bytesPerDim,
dim * config.bytesPerDim + commonPrefixLengths[dim],
dim * config.bytesPerDim(),
dim * config.bytesPerDim() + commonPrefixLengths[dim],
packedValue.bytes,
packedValue.offset + dim * config.bytesPerDim,
packedValue.offset + dim * config.bytesPerDim + commonPrefixLengths[dim]);
packedValue.offset + dim * config.bytesPerDim(),
packedValue.offset + dim * config.bytesPerDim() + commonPrefixLengths[dim]);
if (j != -1) {
commonPrefixLengths[dim] = j;
}
@ -1331,11 +1342,11 @@ final class SimpleTextBKDWriter implements Closeable {
int[] docs,
int docsOffset)
throws IOException {
byte[] lastPackedValue = new byte[config.packedBytesLength];
byte[] lastPackedValue = new byte[config.packedBytesLength()];
int lastDoc = -1;
for (int i = 0; i < count; i++) {
BytesRef packedValue = values.apply(i);
assert packedValue.length == config.packedBytesLength;
assert packedValue.length == config.packedBytesLength();
assert valueInOrder(
i,
sortedDim,
@ -1361,43 +1372,43 @@ final class SimpleTextBKDWriter implements Closeable {
int packedValueOffset,
int doc,
int lastDoc) {
int dimOffset = sortedDim * config.bytesPerDim;
int dimOffset = sortedDim * config.bytesPerDim();
if (ord > 0) {
int cmp =
Arrays.compareUnsigned(
lastPackedValue,
dimOffset,
dimOffset + config.bytesPerDim,
dimOffset + config.bytesPerDim(),
packedValue,
packedValueOffset + dimOffset,
packedValueOffset + dimOffset + config.bytesPerDim);
packedValueOffset + dimOffset + config.bytesPerDim());
if (cmp > 0) {
throw new AssertionError(
"values out of order: last value="
+ new BytesRef(lastPackedValue)
+ " current value="
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength)
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength())
+ " ord="
+ ord
+ " sortedDim="
+ sortedDim);
}
if (cmp == 0 && config.numDims > config.numIndexDims) {
int dataOffset = config.numIndexDims * config.bytesPerDim;
if (cmp == 0 && config.numDims() > config.numIndexDims()) {
int dataOffset = config.numIndexDims() * config.bytesPerDim();
cmp =
Arrays.compareUnsigned(
lastPackedValue,
dataOffset,
config.packedBytesLength,
config.packedBytesLength(),
packedValue,
packedValueOffset + dataOffset,
packedValueOffset + config.packedBytesLength);
packedValueOffset + config.packedBytesLength());
if (cmp > 0) {
throw new AssertionError(
"data values out of order: last value="
+ new BytesRef(lastPackedValue)
+ " current value="
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength)
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength())
+ " ord="
+ ord);
}
@ -1414,7 +1425,8 @@ final class SimpleTextBKDWriter implements Closeable {
+ sortedDim);
}
}
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, config.packedBytesLength);
System.arraycopy(
packedValue, packedValueOffset, lastPackedValue, 0, config.packedBytesLength());
return true;
}

View File

@ -22,6 +22,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -125,8 +126,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
boolean docValueSkipper =
Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
DocValuesSkipIndexType docValueSkipper =
docValuesSkipIndexType(readString(DOCVALUES_SKIP_INDEX.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
@ -221,6 +222,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
return DocValuesType.valueOf(dvType);
}
public DocValuesSkipIndexType docValuesSkipIndexType(String dvSkipIndexType) {
return DocValuesSkipIndexType.valueOf(dvSkipIndexType);
}
public VectorEncoding vectorEncoding(String vectorEncoding) {
return VectorEncoding.valueOf(vectorEncoding);
}
@ -268,7 +273,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, STORETV);
SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
SimpleTextUtil.write(out, Boolean.toString(fi.hasTermVectors()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, PAYLOADS);
@ -284,7 +289,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
SimpleTextUtil.write(out, getDocValuesSkipIndexType(fi.docValuesSkipIndexType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_GEN);
@ -355,4 +360,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
private static String getDocValuesType(DocValuesType type) {
return type.toString();
}
private static String getDocValuesSkipIndexType(DocValuesSkipIndexType type) {
return type.toString();
}
}

View File

@ -144,14 +144,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
}
// read-only
static class SimpleTextBits implements Bits {
final BitSet bits;
final int size;
SimpleTextBits(BitSet bits, int size) {
this.bits = bits;
this.size = size;
}
record SimpleTextBits(BitSet bits, int size) implements Bits {
@Override
public boolean get(int index) {

View File

@ -25,13 +25,4 @@ import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
*
* @lucene.experimental
*/
public class FieldMetadataTermState {
public final FieldMetadata fieldMetadata;
public final BlockTermState state;
public FieldMetadataTermState(FieldMetadata fieldMetadata, BlockTermState state) {
this.fieldMetadata = fieldMetadata;
this.state = state;
}
}
public record FieldMetadataTermState(FieldMetadata fieldMetadata, BlockTermState state) {}

View File

@ -57,7 +57,7 @@ public class STBlockLine extends BlockLine {
*/
public void collectFields(Collection<FieldMetadata> collector) {
for (FieldMetadataTermState fieldTermState : termStates) {
collector.add(fieldTermState.fieldMetadata);
collector.add(fieldTermState.fieldMetadata());
}
}
@ -82,13 +82,13 @@ public class STBlockLine extends BlockLine {
assert size > 0 : "not valid block line with :" + size + " lines.";
if (size == 1) {
// When there is only 1 field, write its id as negative, followed by the field TermState.
int fieldID = line.termStates.get(0).fieldMetadata.getFieldInfo().number;
int fieldID = line.termStates.get(0).fieldMetadata().getFieldInfo().number;
termStatesOutput.writeZInt(-fieldID);
fieldMetadataTermState = line.termStates.get(0);
encoder.writeTermState(
termStatesOutput,
fieldMetadataTermState.fieldMetadata.getFieldInfo(),
fieldMetadataTermState.state);
fieldMetadataTermState.fieldMetadata().getFieldInfo(),
fieldMetadataTermState.state());
return;
}
@ -96,15 +96,15 @@ public class STBlockLine extends BlockLine {
// First iteration writes the fields ids.
for (int i = 0; i < size; i++) {
fieldMetadataTermState = line.termStates.get(i);
termStatesOutput.writeVInt(fieldMetadataTermState.fieldMetadata.getFieldInfo().number);
termStatesOutput.writeVInt(fieldMetadataTermState.fieldMetadata().getFieldInfo().number);
}
// Second iteration writes the corresponding field TermStates.
for (int i = 0; i < size; i++) {
fieldMetadataTermState = line.termStates.get(i);
encoder.writeTermState(
termStatesOutput,
fieldMetadataTermState.fieldMetadata.getFieldInfo(),
fieldMetadataTermState.state);
fieldMetadataTermState.fieldMetadata().getFieldInfo(),
fieldMetadataTermState.state());
}
}

Some files were not shown because too many files have changed in this diff Show More