Merge branch 'main' into java_21

This commit is contained in:
ChrisHegarty 2024-02-19 11:43:46 +00:00
commit 07f4b5b19f
105 changed files with 1317 additions and 1423 deletions

View File

@ -1,15 +0,0 @@
name: "Set up caches"
description: "Set up cached resources"
runs:
using: "composite"
steps:
- name: Cache/Restore cached gradle files
uses: actions/cache@v2
with:
path: |
~/.gradle/caches
~/.gradle/jdks
key: ${{ runner.os }}-gradle-caches-${{ hashFiles('versions.lock', '**/gradle-wrapper.properties') }}
restore-keys: |
${{ runner.os }}-gradle-caches-

View File

@ -0,0 +1,29 @@
# This composite action is included in other workflows to have a shared setup
# for java, gradle, caches, etc.
name: Prepare Lucene build
inputs:
java-version:
required: false
default: 17
description: "The default JDK version to set up."
java-distribution:
required: false
default: "temurin"
description: "The default JDK distribution type"
runs:
using: "composite"
steps:
- name: Set up Java (${{ inputs.java-distribution }}, ${{ inputs.java-version }})"
uses: actions/setup-java@v4
with:
distribution: ${{ inputs.java-distribution }}
java-version: ${{ inputs.java-version }}
java-package: jdk
# This includes "smart" caching of the wrapper and dependencies.
- name: Set up Gradle
uses: gradle/actions/setup-gradle@v3

View File

@ -1,44 +0,0 @@
name: Distribution tests
on:
# Allow manual triggers for testing the action.
workflow_dispatch:
pull_request:
branches:
- 'main'
push:
branches:
- 'main'
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
test:
name: Run distribution tests
timeout-minutes: 15
runs-on: ${{ matrix.os }}
strategy:
matrix:
# we want to run the distribution tests on all major OSs, but it's occasionally too slow (or hangs or the forked process is not started at all..., not sure the cause) on windows.
#os: [ubuntu-latest, macos-latest, windows-latest]
os: [ubuntu-latest, macos-latest]
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: 21
java-package: jdk
- name: Prepare caches
uses: ./.github/actions/gradle-caches
- name: Run all distribution tests including GUI tests (${{ matrix.os }})
run: ./gradlew -p lucene/distribution.tests test

View File

@ -1,84 +0,0 @@
name: Gradle Precommit Checks
on:
pull_request:
branches:
- '*'
push:
branches:
- main
- branch_9x
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
# This runs all validation checks without tests.
checks:
name: gradle check -x test (JDK ${{ matrix.java }} on ${{ matrix.os }})
timeout-minutes: 15
runs-on: ${{ matrix.os }}
strategy:
matrix:
# Operating systems to run on.
os: [ubuntu-latest]
# Test JVMs.
java: [ '21' ]
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
java-package: jdk
- name: Prepare caches
uses: ./.github/actions/gradle-caches
- name: Run gradle check (without tests)
run: ./gradlew check -x test -Ptask.times=true --max-workers 2
# This runs all tests without any other validation checks.
tests:
name: gradle test (JDK ${{ matrix.java }} on ${{ matrix.os }})
timeout-minutes: 30
runs-on: ${{ matrix.os }}
strategy:
matrix:
# Operating systems to run on.
# windows-latest: fairly slow to build and results in odd errors (see LUCENE-10167)
# macos-latest: a tad slower than ubuntu and pretty much the same (?) so leaving out.
os: [ubuntu-latest]
# Test JVMs.
java: [ '21' ]
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
java-package: jdk
- name: Prepare caches
uses: ./.github/actions/gradle-caches
- name: Run gradle tests
run: ./gradlew test "-Ptask.times=true" --max-workers 2
- name: Echo settings
run: cat gradle.properties

View File

@ -1,35 +0,0 @@
name: Hunspell regression tests
on:
pull_request:
branches:
- 'main'
paths:
- '.github/workflows/hunspell.yml'
- 'lucene/analysis/common/**'
jobs:
test:
name: Run Hunspell regression tests
timeout-minutes: 15
runs-on: ubuntu-latest
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: 21
java-package: jdk
- name: Prepare caches
uses: ./.github/actions/gradle-caches
- name: Run regular and regression tests
run: ./gradlew -p lucene/analysis/common check testRegressions

View File

@ -12,29 +12,29 @@ on:
jobs: jobs:
stale: stale:
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
pull-requests: write pull-requests: write
steps: steps:
- uses: actions/stale@v5 - uses: actions/stale@v5
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
days-before-pr-stale: 14 days-before-pr-stale: 14
days-before-issue-stale: -1 # don't mark issues as stale days-before-issue-stale: -1 # don't mark issues as stale
exempt-draft-pr: true # don't mark draft PRs as stale exempt-draft-pr: true # don't mark draft PRs as stale
days-before-close: -1 # don't close stale PRs/issues days-before-close: -1 # don't close stale PRs/issues
stale-pr-message: > stale-pr-message: >
This PR has not had activity in the past 2 weeks, labeling it as stale. This PR has not had activity in the past 2 weeks, labeling it as stale.
If the PR is waiting for review, notify the dev@lucene.apache.org list. If the PR is waiting for review, notify the dev@lucene.apache.org list.
Thank you for your contribution! Thank you for your contribution!
debug-only: false # turn on to run the action without applying changes debug-only: false # turn on to run the action without applying changes
operations-per-run: 500 # operations budget operations-per-run: 500 # operations budget
# The table shows the cost in operations of all combinations of stale / not-stale for a PR. # The table shows the cost in operations of all combinations of stale / not-stale for a PR.
# Processing a non-PR issue takes 0 operations, since we don't perform any action on it. # Processing a non-PR issue takes 0 operations, since we don't perform any action on it.

67
.github/workflows/run-checks-all.yml vendored Normal file
View File

@ -0,0 +1,67 @@
name: "Run checks: all modules"
on:
workflow_dispatch:
pull_request:
branches:
- '*'
push:
branches:
- 'main'
- 'branch_9x'
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
# We split the workflow into two parallel jobs for efficiency:
# one is running all validation checks without tests,
# the other runs all tests without other validation checks.
jobs:
# This runs all validation checks without tests.
checks:
name: checks without tests (JDK ${{ matrix.java }} on ${{ matrix.os }})
timeout-minutes: 15
strategy:
matrix:
os: [ ubuntu-latest ]
java: [ '17' ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/prepare-for-build
- name: Run gradle check (without tests)
run: ./gradlew check -x test -Ptask.times=true --max-workers 2
# This runs all tests without any other validation checks.
tests:
name: tests (JDK ${{ matrix.java }} on ${{ matrix.os }})
timeout-minutes: 30
strategy:
matrix:
# Operating systems to run on.
# windows-latest: fairly slow to build and results in odd errors (see LUCENE-10167)
# macos-latest: a tad slower than ubuntu and pretty much the same (?) so leaving out.
os: [ ubuntu-latest ]
java: [ '17' ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/prepare-for-build
- name: Run gradle tests
run: ./gradlew test "-Ptask.times=true" --max-workers 2
- name: List automatically-initialized gradle.properties
run: cat gradle.properties

View File

@ -0,0 +1,37 @@
name: "Run checks: module lucene/analysis/common"
on:
workflow_dispatch:
pull_request:
branches:
- 'main'
- 'branch_9x'
paths:
- '.github/workflows/run-checks-mod-analysis-common.yml'
- 'lucene/analysis/common/**'
push:
branches:
- 'main'
- 'branch_9x'
paths:
- '.github/workflows/run-checks-mod-analysis-common.yml'
- 'lucene/analysis/common/**'
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
jobs:
test:
name: Extra regression tests
timeout-minutes: 15
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/prepare-for-build
- name: Run 'gradlew lucene/analysis/common check testRegressions'
run: ./gradlew -p lucene/analysis/common check testRegressions

View File

@ -0,0 +1,36 @@
name: "Run checks: module lucene/distribution.tests"
on:
workflow_dispatch:
pull_request:
branches:
- 'main'
- 'branch_9x'
push:
branches:
- 'main'
- 'branch_9x'
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
jobs:
test:
timeout-minutes: 15
strategy:
matrix:
# ubuntu-latest is checked as part of run-checks-everything.yml
# windows-latest is slow and sometimes flaky.
os: [ macos-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/prepare-for-build
- name: Run 'gradlew lucene/distribution.tests test' (on ${{ matrix.os }})
run: ./gradlew -p lucene/distribution.tests test

View File

@ -23,23 +23,23 @@
xmlns:asfext="http://projects.apache.org/ns/asfext#" xmlns:asfext="http://projects.apache.org/ns/asfext#"
xmlns:foaf="http://xmlns.com/foaf/0.1/"> xmlns:foaf="http://xmlns.com/foaf/0.1/">
<!-- <!--
This file's canonical URL is: http://lucene.apache.org/core/doap.rdf This file's canonical URL is: https://lucene.apache.org/core/doap.rdf
Note that the canonical URL may redirect to other non-canonical locations. Note that the canonical URL may redirect to other non-canonical locations.
--> -->
<Project rdf:about="http://lucene.apache.org/core/"> <Project rdf:about="https://lucene.apache.org/core/">
<created>2001-09-01</created> <created>2001-09-01</created>
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/> <license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
<name>Apache Lucene Core</name> <name>Apache Lucene Core</name>
<homepage rdf:resource="http://lucene.apache.org/core/" /> <homepage rdf:resource="https://lucene.apache.org/core/" />
<asfext:pmc rdf:resource="http://lucene.apache.org" /> <asfext:pmc rdf:resource="https://lucene.apache.org" />
<shortdesc>Apache Lucene is a high-performance, full-featured text search engine library</shortdesc> <shortdesc>Apache Lucene is a high-performance, full-featured text search engine library</shortdesc>
<description>Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform. <description>Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
</description> </description>
<bug-database rdf:resource="https://github.com/apache/lucene/issues" /> <bug-database rdf:resource="https://github.com/apache/lucene/issues" />
<mailing-list rdf:resource="http://lucene.apache.org/core/discussion.html" /> <mailing-list rdf:resource="https://lucene.apache.org/core/discussion.html" />
<download-page rdf:resource="http://lucene.apache.org/core/downloads.html" /> <download-page rdf:resource="https://lucene.apache.org/core/downloads.html" />
<programming-language>Java</programming-language> <programming-language>Java</programming-language>
<!-- <!--

View File

@ -96,16 +96,15 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
scriptutil.run('rm -rf %s' % bc_index_dir) scriptutil.run('rm -rf %s' % bc_index_dir)
print('done') print('done')
def update_backcompat_tests(types, index_version, current_version): def update_backcompat_tests(index_version, current_version):
print(' adding new indexes %s to backcompat tests...' % types, end='', flush=True) print(' adding new indexes to backcompat tests...', end='', flush=True)
module = 'lucene/backward-codecs' module = 'lucene/backward-codecs'
filename = '%s/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java' % module
filename = None
if not current_version.is_back_compat_with(index_version): if not current_version.is_back_compat_with(index_version):
matcher = re.compile(r'final String\[\] unsupportedNames = {|};') filename = '%s/src/test/org/apache/lucene/backward_index/unsupported_versions.txt' % module
elif 'sorted' in types:
matcher = re.compile(r'static final String\[\] oldSortedNames = {|};')
else: else:
matcher = re.compile(r'static final String\[\] oldNames = {|};') filename = '%s/src/test/org/apache/lucene/backward_index/versions.txt' % module
strip_dash_suffix_re = re.compile(r'-.*') strip_dash_suffix_re = re.compile(r'-.*')
@ -114,53 +113,25 @@ def update_backcompat_tests(types, index_version, current_version):
x = re.sub(strip_dash_suffix_re, '', x) # remove the -suffix if any x = re.sub(strip_dash_suffix_re, '', x) # remove the -suffix if any
return scriptutil.Version.parse(x) return scriptutil.Version.parse(x)
class Edit(object): def edit(buffer, match, line):
start = None v = find_version(line)
def __call__(self, buffer, match, line): changed = False
if self.start: if v.on_or_after(index_version):
# find where this version should exist if not index_version.on_or_after(v):
i = len(buffer) - 1 buffer.append(('%s\n') % index_version)
previous_version_exists = not ('};' in line and buffer[-1].strip().endswith("{")) changed = True
if previous_version_exists: # Only look if there is a version here buffer.append(line)
v = find_version(buffer[i]) return changed
while i >= self.start and v.on_or_after(index_version):
i -= 1
v = find_version(buffer[i])
i += 1 # readjust since we skipped past by 1
# unfortunately python doesn't have a range remove from list... def append(buffer, changed):
# here we want to remove any previous references to the version we are adding if changed:
while i < len(buffer) and index_version.on_or_after(find_version(buffer[i])): return changed
buffer.pop(i) if not buffer[len(buffer)-1].endswith('\n'):
buffer.append('\n')
if i == len(buffer) and previous_version_exists and not buffer[-1].strip().endswith(","): buffer.append(('%s\n') % index_version)
# add comma return True
buffer[-1] = buffer[-1].rstrip() + ",\n"
if previous_version_exists:
last = buffer[-1]
spaces = ' ' * (len(last) - len(last.lstrip()))
else:
spaces = ' '
for (j, t) in enumerate(types):
if t == 'sorted':
newline = spaces + ('"sorted.%s"') % index_version
else:
newline = spaces + ('"%s-%s"' % (index_version, t))
if j < len(types) - 1 or i < len(buffer):
newline += ','
buffer.insert(i, newline + '\n')
i += 1
buffer.append(line)
return True
if 'Names = {' in line:
self.start = len(buffer) # location of first index name
buffer.append(line)
return False
changed = scriptutil.update_file(filename, matcher, Edit()) changed = scriptutil.update_file(filename, re.compile(r'.*'), edit, append)
print('done' if changed else 'uptodate') print('done' if changed else 'uptodate')
def check_backcompat_tests(): def check_backcompat_tests():
@ -251,9 +222,8 @@ def main():
print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing') print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing')
print('\nAdding backwards compatibility tests') print('\nAdding backwards compatibility tests')
update_backcompat_tests(['cfs', 'nocfs'], c.version, current_version) update_backcompat_tests(c.version, current_version)
if should_make_sorted:
update_backcompat_tests(['sorted'], c.version, current_version)
print('\nTesting changes') print('\nTesting changes')
check_backcompat_tests() check_backcompat_tests()

View File

@ -88,7 +88,7 @@ def run(cmd, cwd=None):
raise e raise e
return output.decode('utf-8') return output.decode('utf-8')
def update_file(filename, line_re, edit): def update_file(filename, line_re, edit, append=None):
infile = open(filename, 'r') infile = open(filename, 'r')
buffer = [] buffer = []
@ -102,6 +102,8 @@ def update_file(filename, line_re, edit):
return False return False
continue continue
buffer.append(line) buffer.append(line)
if append:
changed = append(buffer, changed) # in the case did not change in edit but have an append function
if not changed: if not changed:
raise Exception('Could not find %s in %s' % (line_re, filename)) raise Exception('Could not find %s in %s' % (line_re, filename))
with open(filename, 'w') as f: with open(filename, 'w') as f:

View File

@ -20,7 +20,7 @@ def resources = scriptResources(buildscript)
configure(rootProject) { configure(rootProject) {
ext { ext {
// also change this in extractor tool: ExtractForeignAPI // also change this in extractor tool: ExtractForeignAPI
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_20, JavaVersion.VERSION_21 ] as Set vectorIncubatorJavaVersions = [ JavaVersion.VERSION_20, JavaVersion.VERSION_21, JavaVersion.VERSION_22 ] as Set
} }
} }

View File

@ -28,7 +28,6 @@ configure(project(":lucene").subprojects) { prj ->
spotless { spotless {
java { java {
toggleOffOn() // obviously, only to be used sparingly.
// TODO: Work out how to support multiple different header files (we have // TODO: Work out how to support multiple different header files (we have
// classes in the codebase that have original headers). We currently use // classes in the codebase that have original headers). We currently use
// Apache RAT to enforce headers so this is of lesser priority. // Apache RAT to enforce headers so this is of lesser priority.

View File

@ -114,6 +114,8 @@ Improvements
* GITHUB#12873: Expressions module now uses JEP 371 "Hidden Classes" with JEP 309 * GITHUB#12873: Expressions module now uses JEP 371 "Hidden Classes" with JEP 309
"Dynamic Class-File Constants" to implement Javascript expressions. (Uwe Schindler) "Dynamic Class-File Constants" to implement Javascript expressions. (Uwe Schindler)
* GITHUB#11657, LUCENE-10621: Upgrade to OpenNLP 2.3.2. (Christine Poerschke, Eric Pugh)
Optimizations Optimizations
--------------------- ---------------------
@ -176,6 +178,36 @@ Other
* GITHUB#13001: Put Thread#sleep() on the list of forbidden APIs. (Shubham Chaudhary) * GITHUB#13001: Put Thread#sleep() on the list of forbidden APIs. (Shubham Chaudhary)
======================== Lucene 9.11.0 =======================
API Changes
---------------------
(No changes)
New Features
---------------------
(No changes)
Improvements
---------------------
* GITHUB#13092: `static final Map` constants have been made immutable (Dmitry Cherniachenko)
* GITHUB#13041: TokenizedPhraseQueryNode code cleanup (Dmitry Cherniachenko)
Optimizations
---------------------
(No changes)
Bug Fixes
---------------------
(No changes)
Other
---------------------
* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
======================== Lucene 9.10.0 ======================= ======================== Lucene 9.10.0 =======================
API Changes API Changes
@ -200,6 +232,17 @@ New Features
* GITHUB#12336: Index additional data per facet label in the taxonomy. (Shai Erera, Egor Potemkin, Mike McCandless, * GITHUB#12336: Index additional data per facet label in the taxonomy. (Shai Erera, Egor Potemkin, Mike McCandless,
Stefan Vodita) Stefan Vodita)
* GITHUB#12706: Add support for the final release of Java foreign memory API in Java 22 (and later).
Lucene's MMapDirectory will now mmap Lucene indexes in chunks of 16 GiB (instead of 1 GiB) starting
from Java 19. Indexes closed while queries are running can no longer crash the JVM.
Support for vectorized implementations of VectorUtil based on jdk.incubator.vector APIs was added
for exactly Java 22. Therefore, applications started with command line parameter
"java --add-modules jdk.incubator.vector" will automatically use the new vectorized implementations
if running on a supported platform (Java 20/21/22 on x86 CPUs with AVX2 or later or ARM NEON CPUs).
This is an opt-in feature and requires explicit Java command line flag! When enabled, Lucene logs
a notice using java.util.logging. Please test thoroughly and report bugs/slowness to Lucene's mailing
list. (Uwe Schindler, Chris Hegarty)
Improvements Improvements
--------------------- ---------------------
@ -219,8 +262,6 @@ Improvements
Tests are running with random byte order to ensure that the order does not affect correctness Tests are running with random byte order to ensure that the order does not affect correctness
of code. Native order was enabled for LZ4 compression. (Uwe Schindler) of code. Native order was enabled for LZ4 compression. (Uwe Schindler)
* GITHUB#11657, LUCENE-10621: Upgrade to OpenNLP 2.3.2. (Christine Poerschke, Eric Pugh)
Optimizations Optimizations
--------------------- ---------------------

View File

@ -19,6 +19,10 @@
## Migration from Lucene 9.x to Lucene 10.0 ## Migration from Lucene 9.x to Lucene 10.0
### OpenNLP dependency upgrade
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using and be running on Java 17.
### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829) ### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829)
For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
@ -147,12 +151,6 @@ may throw `IOException` on index problems, bubbling up unexpectedly to the calle
`(Reverse)PathHierarchyTokenizer` now produces sequential (instead of overlapping) tokens with accurate `(Reverse)PathHierarchyTokenizer` now produces sequential (instead of overlapping) tokens with accurate
offsets, making positional queries and highlighters possible for fields tokenized with this tokenizer. offsets, making positional queries and highlighters possible for fields tokenized with this tokenizer.
## Migration from Lucene 9.9 to Lucene 9.10
### OpenNLP dependency upgrade
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using and be running on Java 17.
## Migration from Lucene 9.0 to Lucene 9.1 ## Migration from Lucene 9.0 to Lucene 9.1
### Test framework package migration and module (LUCENE-10301) ### Test framework package migration and module (LUCENE-10301)

View File

@ -59,11 +59,11 @@ public class MinHashFilter extends TokenFilter {
private final List<List<FixedSizeTreeSet<LongPair>>> minHashSets; private final List<List<FixedSizeTreeSet<LongPair>>> minHashSets;
private int hashSetSize = DEFAULT_HASH_SET_SIZE; private final int hashSetSize;
private int bucketCount = DEFAULT_BUCKET_COUNT; private final int bucketCount;
private int hashCount = DEFAULT_HASH_COUNT; private final int hashCount;
private boolean requiresInitialisation = true; private boolean requiresInitialisation = true;

View File

@ -32,13 +32,13 @@ public class MinHashFilterFactory extends TokenFilterFactory {
/** SPI name */ /** SPI name */
public static final String NAME = "minHash"; public static final String NAME = "minHash";
private int hashCount = MinHashFilter.DEFAULT_HASH_COUNT; private final int hashCount;
private int bucketCount = MinHashFilter.DEFAULT_BUCKET_COUNT; private final int bucketCount;
private int hashSetSize = MinHashFilter.DEFAULT_HASH_SET_SIZE; private final int hashSetSize;
private boolean withRotation; private final boolean withRotation;
/** Create a {@link MinHashFilterFactory}. */ /** Create a {@link MinHashFilterFactory}. */
public MinHashFilterFactory(Map<String, String> args) { public MinHashFilterFactory(Map<String, String> args) {

View File

@ -67,7 +67,7 @@ public class WordDelimiterGraphFilterFactory extends TokenFilterFactory
private final int flags; private final int flags;
byte[] typeTable = null; byte[] typeTable = null;
private CharArraySet protectedWords = null; private CharArraySet protectedWords = null;
private boolean adjustOffsets = false; private final boolean adjustOffsets;
/** Creates a new WordDelimiterGraphFilterFactory */ /** Creates a new WordDelimiterGraphFilterFactory */
public WordDelimiterGraphFilterFactory(Map<String, String> args) { public WordDelimiterGraphFilterFactory(Map<String, String> args) {

View File

@ -89,7 +89,7 @@ public final class DutchAnalyzer extends Analyzer {
private final CharArraySet stoptable; private final CharArraySet stoptable;
/** Contains words that should be indexed but not stemmed. */ /** Contains words that should be indexed but not stemmed. */
private CharArraySet excltable = CharArraySet.EMPTY_SET; private final CharArraySet excltable;
private final StemmerOverrideMap stemdict; private final StemmerOverrideMap stemdict;

View File

@ -41,8 +41,8 @@ public class PatternCaptureGroupFilterFactory extends TokenFilterFactory {
/** SPI name */ /** SPI name */
public static final String NAME = "patternCaptureGroup"; public static final String NAME = "patternCaptureGroup";
private Pattern pattern; private final Pattern pattern;
private boolean preserveOriginal = true; private final boolean preserveOriginal;
public PatternCaptureGroupFilterFactory(Map<String, String> args) { public PatternCaptureGroupFilterFactory(Map<String, String> args) {
super(args); super(args);

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.shingle;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -175,7 +176,7 @@ public final class ShingleFilter extends TokenFilter {
* @param tokenType token tokenType * @param tokenType token tokenType
*/ */
public void setTokenType(String tokenType) { public void setTokenType(String tokenType) {
this.tokenType = tokenType; this.tokenType = Objects.requireNonNull(tokenType, "tokenType");
} }
/** /**

View File

@ -114,7 +114,7 @@ public class JapaneseTokenizerFactory extends TokenizerFactory implements Resour
* /箱根山-箱根/成田空港-成田/ requests "箱根" and "成田" to be in the result in NBEST output. * /箱根山-箱根/成田空港-成田/ requests "箱根" and "成田" to be in the result in NBEST output.
*/ */
private final String nbestExamples; private final String nbestExamples;
private int nbestCost = -1; private int nbestCost;
/** Creates a new JapaneseTokenizerFactory */ /** Creates a new JapaneseTokenizerFactory */
public JapaneseTokenizerFactory(Map<String, String> args) { public JapaneseTokenizerFactory(Map<String, String> args) {

View File

@ -17,103 +17,107 @@
package org.apache.lucene.analysis.ja.dict; package org.apache.lucene.analysis.ja.dict;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map;
/** Utility class for english translations of morphological data, used only for debugging. */ /** Utility class for english translations of morphological data, used only for debugging. */
public class ToStringUtil { public class ToStringUtil {
// a translation map for parts of speech, only used for reflectWith // a translation map for parts of speech, only used for reflectWith
private static final HashMap<String, String> posTranslations = new HashMap<>(); private static final Map<String, String> posTranslations;
static { static {
posTranslations.put("名詞", "noun"); Map<String, String> translations = new HashMap<>();
posTranslations.put("名詞-一般", "noun-common"); translations.put("名詞", "noun");
posTranslations.put("名詞-固有名詞", "noun-proper"); translations.put("名詞-一般", "noun-common");
posTranslations.put("名詞-固有名詞-一般", "noun-proper-misc"); translations.put("名詞-固有名詞", "noun-proper");
posTranslations.put("名詞-固有名詞-人名", "noun-proper-person"); translations.put("名詞-固有名詞-一般", "noun-proper-misc");
posTranslations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc"); translations.put("名詞-固有名詞-人名", "noun-proper-person");
posTranslations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname"); translations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc");
posTranslations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name"); translations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname");
posTranslations.put("名詞-固有名詞-組織", "noun-proper-organization"); translations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name");
posTranslations.put("名詞-固有名詞-地域", "noun-proper-place"); translations.put("名詞-固有名詞-組織", "noun-proper-organization");
posTranslations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc"); translations.put("名詞-固有名詞-地域", "noun-proper-place");
posTranslations.put("名詞-固有名詞-地域-国", "noun-proper-place-country"); translations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc");
posTranslations.put("名詞-代名詞", "noun-pronoun"); translations.put("名詞-固有名詞-地域-国", "noun-proper-place-country");
posTranslations.put("名詞-代名詞-一般", "noun-pronoun-misc"); translations.put("名詞-代名詞", "noun-pronoun");
posTranslations.put("名詞-代名詞-縮約", "noun-pronoun-contraction"); translations.put("名詞-代名詞-一般", "noun-pronoun-misc");
posTranslations.put("名詞-副詞可能", "noun-adverbial"); translations.put("名詞-代名詞-縮約", "noun-pronoun-contraction");
posTranslations.put("名詞-サ変接続", "noun-verbal"); translations.put("名詞-副詞可能", "noun-adverbial");
posTranslations.put("名詞-形容動詞語幹", "noun-adjective-base"); translations.put("名詞-サ変接続", "noun-verbal");
posTranslations.put("名詞-数", "noun-numeric"); translations.put("名詞-形容動詞語幹", "noun-adjective-base");
posTranslations.put("名詞-非自立", "noun-affix"); translations.put("名詞-数", "noun-numeric");
posTranslations.put("名詞-非自立-一般", "noun-affix-misc"); translations.put("名詞-非自立", "noun-affix");
posTranslations.put("名詞-非自立-副詞可能", "noun-affix-adverbial"); translations.put("名詞-非自立-一般", "noun-affix-misc");
posTranslations.put("名詞-非自立-助動詞語幹", "noun-affix-aux"); translations.put("名詞-非自立-副詞可能", "noun-affix-adverbial");
posTranslations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base"); translations.put("名詞-非自立-助動詞語幹", "noun-affix-aux");
posTranslations.put("名詞-特殊", "noun-special"); translations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base");
posTranslations.put("名詞-特殊-助動詞語幹", "noun-special-aux"); translations.put("名詞-特殊", "noun-special");
posTranslations.put("名詞-接尾", "noun-suffix"); translations.put("名詞-特殊-助動詞語幹", "noun-special-aux");
posTranslations.put("名詞-接尾-一般", "noun-suffix-misc"); translations.put("名詞-接尾", "noun-suffix");
posTranslations.put("名詞-接尾-人名", "noun-suffix-person"); translations.put("名詞-接尾-一般", "noun-suffix-misc");
posTranslations.put("名詞-接尾-地域", "noun-suffix-place"); translations.put("名詞-接尾-人名", "noun-suffix-person");
posTranslations.put("名詞-接尾-サ変接続", "noun-suffix-verbal"); translations.put("名詞-接尾-地域", "noun-suffix-place");
posTranslations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux"); translations.put("名詞-接尾-サ変接続", "noun-suffix-verbal");
posTranslations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base"); translations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux");
posTranslations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial"); translations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base");
posTranslations.put("名詞-接尾-助数詞", "noun-suffix-classifier"); translations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial");
posTranslations.put("名詞-接尾-特殊", "noun-suffix-special"); translations.put("名詞-接尾-助数詞", "noun-suffix-classifier");
posTranslations.put("名詞-接続詞的", "noun-suffix-conjunctive"); translations.put("名詞-接尾-特殊", "noun-suffix-special");
posTranslations.put("名詞-動詞非自立的", "noun-verbal_aux"); translations.put("名詞-接続詞的", "noun-suffix-conjunctive");
posTranslations.put("名詞-引用文字列", "noun-quotation"); translations.put("名詞-動詞非自立的", "noun-verbal_aux");
posTranslations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective"); translations.put("名詞-引用文字列", "noun-quotation");
posTranslations.put("接頭詞", "prefix"); translations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective");
posTranslations.put("接頭詞-名詞接続", "prefix-nominal"); translations.put("接頭詞", "prefix");
posTranslations.put("接頭詞-動詞接続", "prefix-verbal"); translations.put("接頭詞-名詞接続", "prefix-nominal");
posTranslations.put("接頭詞-形容詞接続", "prefix-adjectival"); translations.put("接頭詞-動詞接続", "prefix-verbal");
posTranslations.put("接頭詞-数接続", "prefix-numerical"); translations.put("接頭詞-形容詞接続", "prefix-adjectival");
posTranslations.put("動詞", "verb"); translations.put("接頭詞-数接続", "prefix-numerical");
posTranslations.put("動詞-自立", "verb-main"); translations.put("動詞", "verb");
posTranslations.put("動詞-非自立", "verb-auxiliary"); translations.put("動詞-自立", "verb-main");
posTranslations.put("動詞-接尾", "verb-suffix"); translations.put("動詞-非自立", "verb-auxiliary");
posTranslations.put("形容詞", "adjective"); translations.put("動詞-接尾", "verb-suffix");
posTranslations.put("形容詞-自立", "adjective-main"); translations.put("形容詞", "adjective");
posTranslations.put("形容詞-非自立", "adjective-auxiliary"); translations.put("形容詞-自立", "adjective-main");
posTranslations.put("形容詞-接尾", "adjective-suffix"); translations.put("形容詞-非自立", "adjective-auxiliary");
posTranslations.put("副詞", "adverb"); translations.put("形容詞-接尾", "adjective-suffix");
posTranslations.put("副詞-一般", "adverb-misc"); translations.put("副詞", "adverb");
posTranslations.put("副詞-助詞類接続", "adverb-particle_conjunction"); translations.put("副詞-一般", "adverb-misc");
posTranslations.put("連体詞", "adnominal"); translations.put("副詞-助詞類接続", "adverb-particle_conjunction");
posTranslations.put("接続詞", "conjunction"); translations.put("連体詞", "adnominal");
posTranslations.put("助詞", "particle"); translations.put("接続詞", "conjunction");
posTranslations.put("助詞-格助詞", "particle-case"); translations.put("助詞", "particle");
posTranslations.put("助詞-格助詞-一般", "particle-case-misc"); translations.put("助詞-格助詞", "particle-case");
posTranslations.put("助詞-格助詞-引用", "particle-case-quote"); translations.put("助詞-格助詞-一般", "particle-case-misc");
posTranslations.put("助詞-格助詞-連語", "particle-case-compound"); translations.put("助詞-格助詞-引用", "particle-case-quote");
posTranslations.put("助詞-接続助詞", "particle-conjunctive"); translations.put("助詞-格助詞-連語", "particle-case-compound");
posTranslations.put("助詞-係助詞", "particle-dependency"); translations.put("助詞-接続助詞", "particle-conjunctive");
posTranslations.put("助詞-副助詞", "particle-adverbial"); translations.put("助詞-係助詞", "particle-dependency");
posTranslations.put("助詞-間投助詞", "particle-interjective"); translations.put("助詞-副助詞", "particle-adverbial");
posTranslations.put("助詞-並立助詞", "particle-coordinate"); translations.put("助詞-間投助詞", "particle-interjective");
posTranslations.put("助詞-終助詞", "particle-final"); translations.put("助詞-並立助詞", "particle-coordinate");
posTranslations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final"); translations.put("助詞-終助詞", "particle-final");
posTranslations.put("助詞-連体化", "particle-adnominalizer"); translations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final");
posTranslations.put("助詞-副詞化", "particle-adnominalizer"); translations.put("助詞-連体化", "particle-adnominalizer");
posTranslations.put("助詞-特殊", "particle-special"); translations.put("助詞-副詞化", "particle-adnominalizer");
posTranslations.put("助動詞", "auxiliary-verb"); translations.put("助詞-特殊", "particle-special");
posTranslations.put("感動詞", "interjection"); translations.put("助動詞", "auxiliary-verb");
posTranslations.put("記号", "symbol"); translations.put("感動詞", "interjection");
posTranslations.put("記号-一般", "symbol-misc"); translations.put("記号", "symbol");
posTranslations.put("記号-句点", "symbol-period"); translations.put("記号-一般", "symbol-misc");
posTranslations.put("記号-読点", "symbol-comma"); translations.put("記号-句点", "symbol-period");
posTranslations.put("記号-空白", "symbol-space"); translations.put("記号-読点", "symbol-comma");
posTranslations.put("記号-括弧開", "symbol-open_bracket"); translations.put("記号-空白", "symbol-space");
posTranslations.put("記号-括弧閉", "symbol-close_bracket"); translations.put("記号-括弧開", "symbol-open_bracket");
posTranslations.put("記号-アルファベット", "symbol-alphabetic"); translations.put("記号-括弧閉", "symbol-close_bracket");
posTranslations.put("その他", "other"); translations.put("記号-アルファベット", "symbol-alphabetic");
posTranslations.put("その他-間投", "other-interjection"); translations.put("その他", "other");
posTranslations.put("フィラー", "filler"); translations.put("その他-間投", "other-interjection");
posTranslations.put("非言語音", "non-verbal"); translations.put("フィラー", "filler");
posTranslations.put("語断片", "fragment"); translations.put("非言語音", "non-verbal");
posTranslations.put("未知語", "unknown"); translations.put("語断片", "fragment");
translations.put("未知語", "unknown");
posTranslations = Collections.unmodifiableMap(translations);
} }
/** Get the english form of a POS tag */ /** Get the english form of a POS tag */
@ -122,67 +126,69 @@ public class ToStringUtil {
} }
// a translation map for inflection types, only used for reflectWith // a translation map for inflection types, only used for reflectWith
private static final HashMap<String, String> inflTypeTranslations = new HashMap<>(); private static final Map<String, String> inflTypeTranslations;
static { static {
inflTypeTranslations.put("*", "*"); Map<String, String> translations = new HashMap<>();
inflTypeTranslations.put("形容詞・アウオ段", "adj-group-a-o-u"); translations.put("*", "*");
inflTypeTranslations.put("形容詞・イ段", "adj-group-i"); translations.put("形容詞・アウオ段", "adj-group-a-o-u");
inflTypeTranslations.put("形容詞・イイ", "adj-group-ii"); translations.put("形容詞・イ段", "adj-group-i");
inflTypeTranslations.put("不変化型", "non-inflectional"); translations.put("形容詞・イイ", "adj-group-ii");
inflTypeTranslations.put("特殊・タ", "special-da"); translations.put("不変化型", "non-inflectional");
inflTypeTranslations.put("特殊・ダ", "special-ta"); translations.put("特殊・タ", "special-da");
inflTypeTranslations.put("文語・ゴトシ", "classical-gotoshi"); translations.put("特殊・ダ", "special-ta");
inflTypeTranslations.put("特殊・ジャ", "special-ja"); translations.put("文語・ゴトシ", "classical-gotoshi");
inflTypeTranslations.put("特殊・ナイ", "special-nai"); translations.put("特殊・ジャ", "special-ja");
inflTypeTranslations.put("五段・ラ行特殊", "5-row-cons-r-special"); translations.put("特殊・ナイ", "special-nai");
inflTypeTranslations.put("特殊・ヌ", "special-nu"); translations.put("五段・ラ行特殊", "5-row-cons-r-special");
inflTypeTranslations.put("文語・キ", "classical-ki"); translations.put("特殊・ヌ", "special-nu");
inflTypeTranslations.put("特殊・タイ", "special-tai"); translations.put("文語・キ", "classical-ki");
inflTypeTranslations.put("文語・ベシ", "classical-beshi"); translations.put("特殊・タイ", "special-tai");
inflTypeTranslations.put("特殊・ヤ", "special-ya"); translations.put("文語・ベシ", "classical-beshi");
inflTypeTranslations.put("文語・マジ", "classical-maji"); translations.put("特殊・ヤ", "special-ya");
inflTypeTranslations.put("下二・タ行", "2-row-lower-cons-t"); translations.put("文語・マジ", "classical-maji");
inflTypeTranslations.put("特殊・デス", "special-desu"); translations.put("下二・タ行", "2-row-lower-cons-t");
inflTypeTranslations.put("特殊・マス", "special-masu"); translations.put("特殊・デス", "special-desu");
inflTypeTranslations.put("五段・ラ行アル", "5-row-aru"); translations.put("特殊・マス", "special-masu");
inflTypeTranslations.put("文語・ナリ", "classical-nari"); translations.put("五段・ラ行アル", "5-row-aru");
inflTypeTranslations.put("文語・リ", "classical-ri"); translations.put("文語・ナリ", "classical-nari");
inflTypeTranslations.put("文語・ケリ", "classical-keri"); translations.put("文語・リ", "classical-ri");
inflTypeTranslations.put("文語・ル", "classical-ru"); translations.put("文語・ケリ", "classical-keri");
inflTypeTranslations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin"); translations.put("文語・ル", "classical-ru");
inflTypeTranslations.put("五段・サ行", "5-row-cons-s"); translations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin");
inflTypeTranslations.put("一段", "1-row"); translations.put("五段・サ行", "5-row-cons-s");
inflTypeTranslations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin"); translations.put("一段", "1-row");
inflTypeTranslations.put("五段・マ行", "5-row-cons-m"); translations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin");
inflTypeTranslations.put("五段・タ行", "5-row-cons-t"); translations.put("五段・マ行", "5-row-cons-m");
inflTypeTranslations.put("五段・ラ行", "5-row-cons-r"); translations.put("五段・タ行", "5-row-cons-t");
inflTypeTranslations.put("サ変・−スル", "irregular-suffix-suru"); translations.put("五段・ラ行", "5-row-cons-r");
inflTypeTranslations.put("五段・ガ行", "5-row-cons-g"); translations.put("サ変・−スル", "irregular-suffix-suru");
inflTypeTranslations.put("サ変・−ズル", "irregular-suffix-zuru"); translations.put("五段・ガ行", "5-row-cons-g");
inflTypeTranslations.put("五段・バ行", "5-row-cons-b"); translations.put("サ変・−ズル", "irregular-suffix-zuru");
inflTypeTranslations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin"); translations.put("五段・バ行", "5-row-cons-b");
inflTypeTranslations.put("下二・ダ行", "2-row-lower-cons-d"); translations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin");
inflTypeTranslations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku"); translations.put("下二・ダ行", "2-row-lower-cons-d");
inflTypeTranslations.put("上二・ダ行", "2-row-upper-cons-d"); translations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku");
inflTypeTranslations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin"); translations.put("上二・ダ行", "2-row-upper-cons-d");
inflTypeTranslations.put("一段・得ル", "1-row-eru"); translations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin");
inflTypeTranslations.put("四段・タ行", "4-row-cons-t"); translations.put("一段・得ル", "1-row-eru");
inflTypeTranslations.put("五段・ナ行", "5-row-cons-n"); translations.put("四段・タ行", "4-row-cons-t");
inflTypeTranslations.put("下二・ハ行", "2-row-lower-cons-h"); translations.put("五段・ナ行", "5-row-cons-n");
inflTypeTranslations.put("四段・ハ行", "4-row-cons-h"); translations.put("下二・ハ行", "2-row-lower-cons-h");
inflTypeTranslations.put("四段・バ行", "4-row-cons-b"); translations.put("四段・ハ行", "4-row-cons-h");
inflTypeTranslations.put("サ変・スル", "irregular-suru"); translations.put("四段・バ行", "4-row-cons-b");
inflTypeTranslations.put("上二・ハ行", "2-row-upper-cons-h"); translations.put("サ変・スル", "irregular-suru");
inflTypeTranslations.put("下二・マ行", "2-row-lower-cons-m"); translations.put("上二・ハ行", "2-row-upper-cons-h");
inflTypeTranslations.put("四段・サ行", "4-row-cons-s"); translations.put("下二・マ行", "2-row-lower-cons-m");
inflTypeTranslations.put("下二・ガ行", "2-row-lower-cons-g"); translations.put("四段・サ行", "4-row-cons-s");
inflTypeTranslations.put("カ変・来ル", "kuru-kanji"); translations.put("下二・ガ行", "2-row-lower-cons-g");
inflTypeTranslations.put("一段・クレル", "1-row-kureru"); translations.put("カ変・来ル", "kuru-kanji");
inflTypeTranslations.put("下二・得", "2-row-lower-u"); translations.put("一段・クレル", "1-row-kureru");
inflTypeTranslations.put("カ変・クル", "kuru-kana"); translations.put("下二・得", "2-row-lower-u");
inflTypeTranslations.put("ラ変", "irregular-cons-r"); translations.put("カ変・クル", "kuru-kana");
inflTypeTranslations.put("下二・カ行", "2-row-lower-cons-k"); translations.put("ラ変", "irregular-cons-r");
translations.put("下二・カ行", "2-row-lower-cons-k");
inflTypeTranslations = Collections.unmodifiableMap(translations);
} }
/** Get the english form of inflection type */ /** Get the english form of inflection type */
@ -191,37 +197,39 @@ public class ToStringUtil {
} }
// a translation map for inflection forms, only used for reflectWith // a translation map for inflection forms, only used for reflectWith
private static final HashMap<String, String> inflFormTranslations = new HashMap<>(); private static final Map<String, String> inflFormTranslations;
static { static {
inflFormTranslations.put("*", "*"); Map<String, String> translations = new HashMap<>();
inflFormTranslations.put("基本形", "base"); translations.put("*", "*");
inflFormTranslations.put("文語基本形", "classical-base"); translations.put("基本形", "base");
inflFormTranslations.put("未然ヌ接続", "imperfective-nu-connection"); translations.put("文語基本形", "classical-base");
inflFormTranslations.put("未然ウ接続", "imperfective-u-connection"); translations.put("未然ヌ接続", "imperfective-nu-connection");
inflFormTranslations.put("連用タ接続", "conjunctive-ta-connection"); translations.put("未然ウ接続", "imperfective-u-connection");
inflFormTranslations.put("連用テ接続", "conjunctive-te-connection"); translations.put("連用タ接続", "conjunctive-ta-connection");
inflFormTranslations.put("連用ゴザイ接続", "conjunctive-gozai-connection"); translations.put("連用テ接続", "conjunctive-te-connection");
inflFormTranslations.put("体言接続", "uninflected-connection"); translations.put("連用ゴザイ接続", "conjunctive-gozai-connection");
inflFormTranslations.put("仮定形", "subjunctive"); translations.put("体言接続", "uninflected-connection");
inflFormTranslations.put("命令e", "imperative-e"); translations.put("仮定形", "subjunctive");
inflFormTranslations.put("仮定縮約1", "conditional-contracted-1"); translations.put("命令e", "imperative-e");
inflFormTranslations.put("仮定縮約2", "conditional-contracted-2"); translations.put("仮定縮約1", "conditional-contracted-1");
inflFormTranslations.put("ガル接続", "garu-connection"); translations.put("仮定縮約2", "conditional-contracted-2");
inflFormTranslations.put("未然形", "imperfective"); translations.put("ガル接続", "garu-connection");
inflFormTranslations.put("連用形", "conjunctive"); translations.put("未然形", "imperfective");
inflFormTranslations.put("音便基本形", "onbin-base"); translations.put("連用形", "conjunctive");
inflFormTranslations.put("連用デ接続", "conjunctive-de-connection"); translations.put("音便基本形", "onbin-base");
inflFormTranslations.put("未然特殊", "imperfective-special"); translations.put("連用デ接続", "conjunctive-de-connection");
inflFormTranslations.put("命令i", "imperative-i"); translations.put("未然特殊", "imperfective-special");
inflFormTranslations.put("連用ニ接続", "conjunctive-ni-connection"); translations.put("命令i", "imperative-i");
inflFormTranslations.put("命令yo", "imperative-yo"); translations.put("連用ニ接続", "conjunctive-ni-connection");
inflFormTranslations.put("体言接続特殊", "adnominal-special"); translations.put("命令yo", "imperative-yo");
inflFormTranslations.put("命令ro", "imperative-ro"); translations.put("体言接続特殊", "adnominal-special");
inflFormTranslations.put("体言接続特殊2", "uninflected-special-connection-2"); translations.put("命令ro", "imperative-ro");
inflFormTranslations.put("未然レル接続", "imperfective-reru-connection"); translations.put("体言接続特殊2", "uninflected-special-connection-2");
inflFormTranslations.put("現代基本形", "modern-base"); translations.put("未然レル接続", "imperfective-reru-connection");
inflFormTranslations.put("基本形-促音便", "base-onbin"); // not sure about this translations.put("現代基本形", "modern-base");
translations.put("基本形-促音便", "base-onbin"); // not sure about this
inflFormTranslations = Collections.unmodifiableMap(translations);
} }
/** Get the english form of inflected form */ /** Get the english form of inflected form */

View File

@ -44,7 +44,7 @@ public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
private int sentenceStart = 0; private int sentenceStart = 0;
private int sentenceIndex = -1; private int sentenceIndex = -1;
private NLPTokenizerOp tokenizerOp = null; private final NLPTokenizerOp tokenizerOp;
public OpenNLPTokenizer( public OpenNLPTokenizer(
AttributeFactory factory, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) AttributeFactory factory, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp)

View File

@ -23,7 +23,7 @@ import opennlp.tools.chunker.ChunkerModel;
/** Supply OpenNLP Chunking tool Requires binary models from OpenNLP project on SourceForge. */ /** Supply OpenNLP Chunking tool Requires binary models from OpenNLP project on SourceForge. */
public class NLPChunkerOp { public class NLPChunkerOp {
private ChunkerME chunker = null; private final ChunkerME chunker;
public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException { public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException {
chunker = new ChunkerME(chunkerModel); chunker = new ChunkerME(chunkerModel);

View File

@ -27,7 +27,7 @@ import opennlp.tools.postag.POSTaggerME;
* SourceForge. * SourceForge.
*/ */
public class NLPPOSTaggerOp { public class NLPPOSTaggerOp {
private POSTagger tagger = null; private final POSTagger tagger;
public NLPPOSTaggerOp(POSModel model) throws IOException { public NLPPOSTaggerOp(POSModel model) throws IOException {
tagger = new POSTaggerME(model); tagger = new POSTaggerME(model);

View File

@ -32,10 +32,10 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
*/ */
public final class DaitchMokotoffSoundexFilter extends TokenFilter { public final class DaitchMokotoffSoundexFilter extends TokenFilter {
/** true if encoded tokens should be added as synonyms */ /** true if encoded tokens should be added as synonyms */
protected boolean inject = true; private final boolean inject;
/** phonetic encoder */ /** phonetic encoder */
protected DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex(); private final DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
// output is a string such as ab|ac|... // output is a string such as ab|ac|...
private static final Pattern pattern = Pattern.compile("([^|]+)"); private static final Pattern pattern = Pattern.compile("([^|]+)");

View File

@ -32,13 +32,13 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
*/ */
public final class PhoneticFilter extends TokenFilter { public final class PhoneticFilter extends TokenFilter {
/** true if encoded tokens should be added as synonyms */ /** true if encoded tokens should be added as synonyms */
protected boolean inject = true; private final boolean inject;
/** phonetic encoder */ /** phonetic encoder */
protected Encoder encoder = null; private final Encoder encoder;
/** captured state, non-null when <code>inject=true</code> and a token is buffered */ /** captured state, non-null when <code>inject=true</code> and a token is buffered */
protected State save = null; private State save = null;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);

View File

@ -73,7 +73,7 @@ public class Trie {
List<CharSequence> cmds = new ArrayList<>(); List<CharSequence> cmds = new ArrayList<>();
int root; int root;
boolean forward = false; boolean forward;
/** /**
* Constructor for the Trie object. * Constructor for the Trie object.

View File

@ -191,7 +191,7 @@ public final class FieldReader extends Terms {
@Override @Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" + // if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
// BlockTreeTermsWriter.brToString(startTerm)); // ToStringUtils.bytesRefToString(startTerm));
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton); // System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum? // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...? // can we optimize knowing that...?

View File

@ -543,19 +543,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
} }
} }
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
private void copyTerm() { private void copyTerm() {
final int len = currentFrame.prefix + currentFrame.suffix; final int len = currentFrame.prefix + currentFrame.suffix;
if (term.bytes.length < len) { if (term.bytes.length < len) {

View File

@ -354,24 +354,6 @@ public final class Lucene40BlockTreeTermsReader extends FieldsProducer {
return fieldMap.size(); return fieldMap.size();
} }
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override @Override
public void checkIntegrity() throws IOException { public void checkIntegrity() throws IOException {
// terms index // terms index

View File

@ -256,8 +256,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc; f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) { if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix); // term.length + " vs prefix=" + f.prefix);
// if (f.prefix > targetBeforeCurrentLength) { // if (f.prefix > targetBeforeCurrentLength) {
@ -279,7 +279,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length; // final int sav = term.length;
// term.length = length; // term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term)); // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav; // term.length = sav;
// } // }
} }
@ -299,27 +299,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return true; return true;
} }
/*
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRefBuilder b) {
return brToString(b.get());
}
*/
@Override @Override
public boolean seekExact(BytesRef target) throws IOException { public boolean seekExact(BytesRef target) throws IOException {
@ -337,8 +316,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// + termExists + ") validIndexPrefix=" + validIndexPrefix); // ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
// ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out); // printSeekState(System.out);
// } // }
@ -496,8 +476,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} }
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength); // targetBeforeCurrentLength);
// } // }
@ -528,7 +508,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel); term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto); term.setLength(1 + targetUpto);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -544,7 +524,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} else { } else {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" + // System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term)); // ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -587,7 +567,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
termExists = false; termExists = false;
term.setLength(targetUpto); term.setLength(targetUpto);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -623,7 +603,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + // System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term) // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) +
// " current=" + ToStringUtils.bytesRefToString(term)
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix); // + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(System.out); // printSeekState(System.out);
// } // }
@ -667,9 +648,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + // " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
// " output=" + output); // + " output=" + output);
// } // }
if (cmp != 0) { if (cmp != 0) {
break; break;
@ -781,8 +762,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} }
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength); // targetBeforeCurrentLength);
// } // }
@ -818,7 +799,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
if (next() != null) { if (next() != null) {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term)); // System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// } // }
return SeekStatus.NOT_FOUND; return SeekStatus.NOT_FOUND;
} else { } else {
@ -829,7 +811,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} }
} else { } else {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term)); // System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// } // }
return result; return result;
} }
@ -1029,9 +1012,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert !eof; assert !eof;
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + " // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" + // ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix); // fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out); // printSeekState(System.out);
// } // }
@ -1095,8 +1079,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// try to scan to the right floor frame: // try to scan to the right floor frame:
currentFrame.loadBlock(); currentFrame.loadBlock();
} else { } else {
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord=" // if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
// + currentFrame.ord); // " currentFrame.ord=" + currentFrame.ord);
return term.get(); return term.get();
} }
} }

View File

@ -317,8 +317,8 @@ final class SegmentTermsEnumFrame {
} }
public void nextLeaf() { public void nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// entCount=" + entCount); // " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++; nextEnt++;
@ -410,8 +410,8 @@ final class SegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1); newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0; hasTerms = (code & 1) != 0;
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + " // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// } // }
isLastInFloor = numFollowFloorBlocks == 1; isLastInFloor = numFollowFloorBlocks == 1;
@ -566,28 +566,14 @@ final class SegmentTermsEnumFrame {
private long subCode; private long subCode;
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION; CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
// for debugging
/*
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
*/
// Target's prefix matches this block's prefix; we // Target's prefix matches this block's prefix; we
// scan the entries check if the suffix matches. // scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException { public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// brToString(term)); // ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1; assert nextEnt != -1;
@ -617,7 +603,7 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix; // suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + brToString(suffixBytesRef)); // + ToStringUtils.bytesRefToString(suffixBytesRef));
// } // }
startBytePos = suffixesReader.getPosition(); startBytePos = suffixesReader.getPosition();
@ -682,8 +668,9 @@ final class SegmentTermsEnumFrame {
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException { public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// brToString(target)); // ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1; assert nextEnt != -1;
@ -711,7 +698,8 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix; // suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef)); // (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// } // }
final int termLen = prefix + suffix; final int termLen = prefix + suffix;
@ -743,8 +731,8 @@ final class SegmentTermsEnumFrame {
// return NOT_FOUND: // return NOT_FOUND:
fillTerm(); fillTerm();
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
// ste.termExists=" + ste.termExists); // " ste.termExists=" + ste.termExists);
if (!exactOnly && !ste.termExists) { if (!exactOnly && !ste.termExists) {
// System.out.println(" now pushFrame"); // System.out.println(" now pushFrame");

View File

@ -46,6 +46,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.compress.LowercaseAsciiCompression; import org.apache.lucene.util.compress.LowercaseAsciiCompression;
import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.ByteSequenceOutputs;
@ -349,7 +350,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
} }
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + // if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
// brToString(term)); // ToStringUtils.bytesRefToString(term));
termsWriter.write(term, termsEnum, norms); termsWriter.write(term, termsEnum, norms);
} }
@ -388,33 +389,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
@Override @Override
public String toString() { public String toString() {
return "TERM: " + brToString(termBytes); return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
} }
} }
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
if (b == null) {
return "(null)";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
private static final class PendingBlock extends PendingEntry { private static final class PendingBlock extends PendingEntry {
public final BytesRef prefix; public final BytesRef prefix;
public final long fp; public final long fp;
@ -442,7 +420,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
@Override @Override
public String toString() { public String toString() {
return "BLOCK: prefix=" + brToString(prefix); return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
} }
public void compileIndex( public void compileIndex(
@ -600,8 +578,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// if (DEBUG2) { // if (DEBUG2) {
// BytesRef br = new BytesRef(lastTerm.bytes()); // BytesRef br = new BytesRef(lastTerm.bytes());
// br.length = prefixLength; // br.length = prefixLength;
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count=" // System.out.println("writeBlocks: seg=" + segment + " prefix=" +
// + count); // ToStringUtils.bytesRefToString(br) + " count=" + count);
// } // }
// Root block better write all remaining pending entries: // Root block better write all remaining pending entries:
@ -754,9 +732,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
prefix.length = prefixLength; prefix.length = prefixLength;
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" + // if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end == // ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + // " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks); // " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
// hasSubBlocks);
// Write block header: // Write block header:
int numEntries = end - start; int numEntries = end - start;
@ -769,7 +748,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
/* /*
if (DEBUG) { if (DEBUG) {
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : "")); System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
} }
*/ */
@ -804,7 +785,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix); // BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix; // suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes)); // System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// } // }
// For leaf block we write suffix straight // For leaf block we write suffix straight
@ -837,7 +819,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix); // BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix; // suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes)); // System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// } // }
// For non-leaf block we borrow 1 bit to record // For non-leaf block we borrow 1 bit to record
@ -879,8 +862,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix); // BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); // System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix; // suffixBytes.length = suffix;
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " // System.out.println(" write sub-block suffix=" +
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); // ToStringUtils.bytesRefToString(suffixBytes) +
// " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// } // }
assert floorLeadLabel == -1 assert floorLeadLabel == -1
@ -998,7 +982,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) { if (DEBUG) {
int[] tmp = new int[lastTerm.length]; int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length); System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size()); System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
" pending.size()=" + pending.size());
} }
*/ */
@ -1051,8 +1036,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// we are closing: // we are closing:
int prefixTopSize = pending.size() - prefixStarts[i]; int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) { if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// minItemsInBlock=" + minItemsInBlock); // " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize); writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1; prefixStarts[i] -= prefixTopSize - 1;
} }

View File

@ -19,19 +19,25 @@ package org.apache.lucene.backward_index;
import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.Name;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.LineNumberReader;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.lang.reflect.Modifier; import java.lang.reflect.Modifier;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Set; import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -47,26 +53,31 @@ import org.junit.Before;
public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase { public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
protected final Version version; static final Set<String> OLD_VERSIONS;
private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion();
protected final String indexPattern;
protected static final Set<Version> BINARY_SUPPORTED_VERSIONS; protected static final Set<Version> BINARY_SUPPORTED_VERSIONS;
static { private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion();
String[] oldVersions =
new String[] {
"8.0.0", "8.0.0", "8.1.0", "8.1.0", "8.1.1", "8.1.1", "8.2.0", "8.2.0", "8.3.0", "8.3.0",
"8.3.1", "8.3.1", "8.4.0", "8.4.0", "8.4.1", "8.4.1", "8.5.0", "8.5.0", "8.5.1", "8.5.1",
"8.5.2", "8.5.2", "8.6.0", "8.6.0", "8.6.1", "8.6.1", "8.6.2", "8.6.2", "8.6.3", "8.6.3",
"8.7.0", "8.7.0", "8.8.0", "8.8.0", "8.8.1", "8.8.1", "8.8.2", "8.8.2", "8.9.0", "8.9.0",
"8.10.0", "8.10.0", "8.10.1", "8.10.1", "8.11.0", "8.11.0", "8.11.1", "8.11.1", "8.11.2",
"8.11.2", "8.11.3", "8.11.3", "9.0.0", "9.1.0", "9.2.0", "9.3.0", "9.4.0", "9.4.1",
"9.4.2", "9.5.0", "9.6.0", "9.7.0", "9.8.0", "9.9.0", "9.9.1", "9.9.2", "9.10.0",
"10.0.0",
};
protected final Version version;
protected final String indexPattern;
static {
String name = "versions.txt";
try (LineNumberReader in =
new LineNumberReader(
IOUtils.getDecodingReader(
IOUtils.requireResourceNonNull(
BackwardsCompatibilityTestBase.class.getResourceAsStream(name), name),
StandardCharsets.UTF_8))) {
OLD_VERSIONS =
in.lines()
.filter(Predicate.not(String::isBlank))
.collect(Collectors.toCollection(LinkedHashSet::new));
} catch (IOException exception) {
throw new RuntimeException("failed to load resource", exception);
}
Set<Version> binaryVersions = new HashSet<>(); Set<Version> binaryVersions = new HashSet<>();
for (String version : oldVersions) { for (String version : OLD_VERSIONS) {
try { try {
Version v = Version.parse(version); Version v = Version.parse(version);
assertTrue("Unsupported binary version: " + v, v.major >= Version.MIN_SUPPORTED_MAJOR - 1); assertTrue("Unsupported binary version: " + v, v.major >= Version.MIN_SUPPORTED_MAJOR - 1);
@ -75,8 +86,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
throw new RuntimeException(ex); throw new RuntimeException(ex);
} }
} }
List<Version> allCurrentVersions = getAllCurrentVersions();
for (Version version : allCurrentVersions) { for (Version version : getAllCurrentReleasedVersions()) {
// make sure we never miss a version. // make sure we never miss a version.
assertTrue("Version: " + version + " missing", binaryVersions.remove(version)); assertTrue("Version: " + version + " missing", binaryVersions.remove(version));
} }
@ -181,19 +192,51 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
return versions; return versions;
} }
private static List<Version> getAllCurrentReleasedVersions() {
List<Version> currentReleasedVersions = getAllCurrentVersions();
// The latest version from the current major is always under development.
assertTrue(currentReleasedVersions.remove(Version.LATEST));
// The latest minor from the previous major is also under development.
assertTrue(currentReleasedVersions.remove(LATEST_PREVIOUS_MAJOR));
// In addition to those, we may need to remove one more version in case a release is in
// progress, and the version constant has been added but backward-compatibility indexes have not
// been checked in yet.
List<Version> missingVersions = new ArrayList<>();
for (Iterator<Version> it = currentReleasedVersions.iterator(); it.hasNext(); ) {
Version version = it.next();
String indexName = String.format(Locale.ROOT, "index.%s-cfs.zip", version);
if (TestAncientIndicesCompatibility.class.getResource(indexName) == null) {
missingVersions.add(version);
it.remove();
}
}
if (missingVersions.size() > 1) {
throw new AssertionError(
"More than one version is missing backward-compatibility data: " + missingVersions);
}
return currentReleasedVersions;
}
/** Get all versions that are released, plus the latest version which is unreleased. */
public static List<Version> getAllCurrentReleasedVersionsAndCurrent() {
List<Version> versions = new ArrayList<>(getAllCurrentReleasedVersions());
versions.add(Version.LATEST);
return versions;
}
public static Iterable<Object[]> allVersion(String name, String... suffixes) { public static Iterable<Object[]> allVersion(String name, String... suffixes) {
List<Object> patterns = new ArrayList<>(); List<Object> patterns = new ArrayList<>();
for (String suffix : suffixes) { for (String suffix : suffixes) {
patterns.add(createPattern(name, suffix)); patterns.add(createPattern(name, suffix));
} }
List<Object[]> versionAndPatterns = new ArrayList<>(); List<Object[]> versionAndPatterns = new ArrayList<>();
List<Version> versionList = getAllCurrentVersions(); List<Version> versionList = getAllCurrentReleasedVersionsAndCurrent();
for (Version v : versionList) { for (Version v : versionList) {
if (v.equals(LATEST_PREVIOUS_MAJOR) for (Object p : patterns) {
== false) { // the latest prev-major has not yet been released versionAndPatterns.add(new Object[] {v, p});
for (Object p : patterns) {
versionAndPatterns.add(new Object[] {v, p});
}
} }
} }
return versionAndPatterns; return versionAndPatterns;

View File

@ -21,8 +21,16 @@ import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.LineNumberReader;
import java.io.PrintStream; import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.IndexFormatTooOldException;
@ -36,274 +44,57 @@ import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.apache.lucene.tests.store.BaseDirectoryWrapper;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.IOUtils;
@SuppressWarnings("deprecation")
public class TestAncientIndicesCompatibility extends LuceneTestCase { public class TestAncientIndicesCompatibility extends LuceneTestCase {
static final Set<String> UNSUPPORTED_INDEXES;
static final String[] unsupportedNames = { static {
"1.9.0-cfs", String name = "unsupported_versions.txt";
"1.9.0-nocfs", Set<String> indices;
"2.0.0-cfs", try (LineNumberReader in =
"2.0.0-nocfs", new LineNumberReader(
"2.1.0-cfs", IOUtils.getDecodingReader(
"2.1.0-nocfs", IOUtils.requireResourceNonNull(
"2.2.0-cfs", TestAncientIndicesCompatibility.class.getResourceAsStream(name), name),
"2.2.0-nocfs", StandardCharsets.UTF_8))) {
"2.3.0-cfs", indices =
"2.3.0-nocfs", in.lines()
"2.4.0-cfs", .filter(Predicate.not(String::isBlank))
"2.4.0-nocfs", .flatMap(version -> Stream.of(version + "-cfs", version + "-nocfs"))
"2.4.1-cfs", .collect(Collectors.toCollection(LinkedHashSet::new));
"2.4.1-nocfs", } catch (IOException exception) {
"2.9.0-cfs", throw new RuntimeException("failed to load resource", exception);
"2.9.0-nocfs", }
"2.9.1-cfs",
"2.9.1-nocfs", name = "unsupported_indices.txt";
"2.9.2-cfs", try (LineNumberReader in =
"2.9.2-nocfs", new LineNumberReader(
"2.9.3-cfs", IOUtils.getDecodingReader(
"2.9.3-nocfs", IOUtils.requireResourceNonNull(
"2.9.4-cfs", TestAncientIndicesCompatibility.class.getResourceAsStream(name), name),
"2.9.4-nocfs", StandardCharsets.UTF_8))) {
"3.0.0-cfs", indices.addAll(
"3.0.0-nocfs", in.lines()
"3.0.1-cfs", .filter(Predicate.not(String::isBlank))
"3.0.1-nocfs", .collect(Collectors.toCollection(LinkedHashSet::new)));
"3.0.2-cfs", } catch (IOException exception) {
"3.0.2-nocfs", throw new RuntimeException("failed to load resource", exception);
"3.0.3-cfs", }
"3.0.3-nocfs", UNSUPPORTED_INDEXES = Collections.unmodifiableSet(indices);
"3.1.0-cfs", }
"3.1.0-nocfs",
"3.2.0-cfs",
"3.2.0-nocfs",
"3.3.0-cfs",
"3.3.0-nocfs",
"3.4.0-cfs",
"3.4.0-nocfs",
"3.5.0-cfs",
"3.5.0-nocfs",
"3.6.0-cfs",
"3.6.0-nocfs",
"3.6.1-cfs",
"3.6.1-nocfs",
"3.6.2-cfs",
"3.6.2-nocfs",
"4.0.0-cfs",
"4.0.0-cfs",
"4.0.0-nocfs",
"4.0.0.1-cfs",
"4.0.0.1-nocfs",
"4.0.0.2-cfs",
"4.0.0.2-nocfs",
"4.1.0-cfs",
"4.1.0-nocfs",
"4.2.0-cfs",
"4.2.0-nocfs",
"4.2.1-cfs",
"4.2.1-nocfs",
"4.3.0-cfs",
"4.3.0-nocfs",
"4.3.1-cfs",
"4.3.1-nocfs",
"4.4.0-cfs",
"4.4.0-nocfs",
"4.5.0-cfs",
"4.5.0-nocfs",
"4.5.1-cfs",
"4.5.1-nocfs",
"4.6.0-cfs",
"4.6.0-nocfs",
"4.6.1-cfs",
"4.6.1-nocfs",
"4.7.0-cfs",
"4.7.0-nocfs",
"4.7.1-cfs",
"4.7.1-nocfs",
"4.7.2-cfs",
"4.7.2-nocfs",
"4.8.0-cfs",
"4.8.0-nocfs",
"4.8.1-cfs",
"4.8.1-nocfs",
"4.9.0-cfs",
"4.9.0-nocfs",
"4.9.1-cfs",
"4.9.1-nocfs",
"4.10.0-cfs",
"4.10.0-nocfs",
"4.10.1-cfs",
"4.10.1-nocfs",
"4.10.2-cfs",
"4.10.2-nocfs",
"4.10.3-cfs",
"4.10.3-nocfs",
"4.10.4-cfs",
"4.10.4-nocfs",
"5x-with-4x-segments-cfs",
"5x-with-4x-segments-nocfs",
"5.0.0.singlesegment-cfs",
"5.0.0.singlesegment-nocfs",
"5.0.0-cfs",
"5.0.0-nocfs",
"5.1.0-cfs",
"5.1.0-nocfs",
"5.2.0-cfs",
"5.2.0-nocfs",
"5.2.1-cfs",
"5.2.1-nocfs",
"5.3.0-cfs",
"5.3.0-nocfs",
"5.3.1-cfs",
"5.3.1-nocfs",
"5.3.2-cfs",
"5.3.2-nocfs",
"5.4.0-cfs",
"5.4.0-nocfs",
"5.4.1-cfs",
"5.4.1-nocfs",
"5.5.0-cfs",
"5.5.0-nocfs",
"5.5.1-cfs",
"5.5.1-nocfs",
"5.5.2-cfs",
"5.5.2-nocfs",
"5.5.3-cfs",
"5.5.3-nocfs",
"5.5.4-cfs",
"5.5.4-nocfs",
"5.5.5-cfs",
"5.5.5-nocfs",
"6.0.0-cfs",
"6.0.0-nocfs",
"6.0.1-cfs",
"6.0.1-nocfs",
"6.1.0-cfs",
"6.1.0-nocfs",
"6.2.0-cfs",
"6.2.0-nocfs",
"6.2.1-cfs",
"6.2.1-nocfs",
"6.3.0-cfs",
"6.3.0-nocfs",
"6.4.0-cfs",
"6.4.0-nocfs",
"6.4.1-cfs",
"6.4.1-nocfs",
"6.4.2-cfs",
"6.4.2-nocfs",
"6.5.0-cfs",
"6.5.0-nocfs",
"6.5.1-cfs",
"6.5.1-nocfs",
"6.6.0-cfs",
"6.6.0-nocfs",
"6.6.1-cfs",
"6.6.1-nocfs",
"6.6.2-cfs",
"6.6.2-nocfs",
"6.6.3-cfs",
"6.6.3-nocfs",
"6.6.4-cfs",
"6.6.4-nocfs",
"6.6.5-cfs",
"6.6.5-nocfs",
"6.6.6-cfs",
"6.6.6-nocfs",
"7.0.0-cfs",
"7.0.0-nocfs",
"7.0.1-cfs",
"7.0.1-nocfs",
"7.1.0-cfs",
"7.1.0-nocfs",
"7.2.0-cfs",
"7.2.0-nocfs",
"7.2.1-cfs",
"7.2.1-nocfs",
"7.3.0-cfs",
"7.3.0-nocfs",
"7.3.1-cfs",
"7.3.1-nocfs",
"7.4.0-cfs",
"7.4.0-nocfs",
"7.5.0-cfs",
"7.5.0-nocfs",
"7.6.0-cfs",
"7.6.0-nocfs",
"7.7.0-cfs",
"7.7.0-nocfs",
"7.7.1-cfs",
"7.7.1-nocfs",
"7.7.2-cfs",
"7.7.2-nocfs",
"7.7.3-cfs",
"7.7.3-nocfs",
"8.0.0-cfs",
"8.0.0-nocfs",
"8.1.0-cfs",
"8.1.0-nocfs",
"8.1.1-cfs",
"8.1.1-nocfs",
"8.2.0-cfs",
"8.2.0-nocfs",
"8.3.0-cfs",
"8.3.0-nocfs",
"8.3.1-cfs",
"8.3.1-nocfs",
"8.4.0-cfs",
"8.4.0-nocfs",
"8.4.1-cfs",
"8.4.1-nocfs",
"8.5.0-cfs",
"8.5.0-nocfs",
"8.5.1-cfs",
"8.5.1-nocfs",
"8.5.2-cfs",
"8.5.2-nocfs",
"8.6.0-cfs",
"8.6.0-nocfs",
"8.6.1-cfs",
"8.6.1-nocfs",
"8.6.2-cfs",
"8.6.2-nocfs",
"8.6.3-cfs",
"8.6.3-nocfs",
"8.7.0-cfs",
"8.7.0-nocfs",
"8.8.0-cfs",
"8.8.0-nocfs",
"8.8.1-cfs",
"8.8.1-nocfs",
"8.8.2-cfs",
"8.8.2-nocfs",
"8.9.0-cfs",
"8.9.0-nocfs",
"8.10.0-cfs",
"8.10.0-nocfs",
"8.10.1-cfs",
"8.10.1-nocfs",
"8.11.0-cfs",
"8.11.0-nocfs",
"8.11.1-cfs",
"8.11.1-nocfs",
"8.11.2-cfs",
"8.11.2-nocfs",
"8.11.3-cfs",
"8.11.3-nocfs"
};
/** /**
* This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate * This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate
* on too old indexes! * on too old indexes!
*/ */
public void testUnsupportedOldIndexes() throws Exception { public void testUnsupportedOldIndexes() throws Exception {
for (int i = 0; i < unsupportedNames.length; i++) { for (String version : UNSUPPORTED_INDEXES) {
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: index " + unsupportedNames[i]); System.out.println("TEST: index " + version);
} }
Path oldIndexDir = createTempDir(unsupportedNames[i]); Path oldIndexDir = createTempDir(version);
TestUtil.unzip( TestUtil.unzip(getDataInputStream("unsupported." + version + ".zip"), oldIndexDir);
getDataInputStream("unsupported." + unsupportedNames[i] + ".zip"), oldIndexDir);
BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir); BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir);
// don't checkindex, these are intentionally not supported // don't checkindex, these are intentionally not supported
dir.setCheckIndexOnClose(false); dir.setCheckIndexOnClose(false);
@ -312,7 +103,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
IndexWriter writer = null; IndexWriter writer = null;
try { try {
reader = DirectoryReader.open(dir); reader = DirectoryReader.open(dir);
fail("DirectoryReader.open should not pass for " + unsupportedNames[i]); fail("DirectoryReader.open should not pass for " + version);
} catch (IndexFormatTooOldException e) { } catch (IndexFormatTooOldException e) {
if (e.getReason() != null) { if (e.getReason() != null) {
assertNull(e.getVersion()); assertNull(e.getVersion());
@ -353,7 +144,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
writer = writer =
new IndexWriter( new IndexWriter(
dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false)); dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false));
fail("IndexWriter creation should not pass for " + unsupportedNames[i]); fail("IndexWriter creation should not pass for " + version);
} catch (IndexFormatTooOldException e) { } catch (IndexFormatTooOldException e) {
if (e.getReason() != null) { if (e.getReason() != null) {
assertNull(e.getVersion()); assertNull(e.getVersion());
@ -406,7 +197,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
CheckIndex checker = new CheckIndex(dir); CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, UTF_8)); checker.setInfoStream(new PrintStream(bos, false, UTF_8));
CheckIndex.Status indexStatus = checker.checkIndex(); CheckIndex.Status indexStatus = checker.checkIndex();
if (unsupportedNames[i].startsWith("8.")) { if (version.startsWith("8.")) {
assertTrue(indexStatus.clean); assertTrue(indexStatus.clean);
} else { } else {
assertFalse(indexStatus.clean); assertFalse(indexStatus.clean);

View File

@ -101,8 +101,6 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE); KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f}; private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
static final int MIN_BINARY_SUPPORTED_MAJOR = Version.MIN_SUPPORTED_MAJOR - 1;
/** /**
* A parameter constructor for {@link com.carrotsearch.randomizedtesting.RandomizedRunner}. See * A parameter constructor for {@link com.carrotsearch.randomizedtesting.RandomizedRunner}. See
* {@link #testVersionsFactory()} for details on the values provided to the framework. * {@link #testVersionsFactory()} for details on the values provided to the framework.

View File

@ -62,7 +62,6 @@ public class TestBinaryBackwardsCompatibility extends BackwardsCompatibilityTest
@Nightly @Nightly
public void testReadNMinusTwoCommit() throws IOException { public void testReadNMinusTwoCommit() throws IOException {
try (BaseDirectoryWrapper dir = newDirectory(directory)) { try (BaseDirectoryWrapper dir = newDirectory(directory)) {
IndexCommit commit = DirectoryReader.listCommits(dir).get(0); IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close(); StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();

View File

@ -55,6 +55,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
static final String INDEX_NAME = "sorted"; static final String INDEX_NAME = "sorted";
static final String SUFFIX = ""; static final String SUFFIX = "";
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_10_0;
private static final String PARENT_FIELD_NAME = "___parent";
public TestIndexSortBackwardsCompatibility(Version version, String pattern) { public TestIndexSortBackwardsCompatibility(Version version, String pattern) {
super(version, pattern); super(version, pattern);
@ -79,8 +81,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
.setOpenMode(IndexWriterConfig.OpenMode.APPEND) .setOpenMode(IndexWriterConfig.OpenMode.APPEND)
.setIndexSort(sort) .setIndexSort(sort)
.setMergePolicy(newLogMergePolicy()); .setMergePolicy(newLogMergePolicy());
if (this.version.onOrAfter(Version.LUCENE_10_0_0)) { if (this.version.onOrAfter(FIRST_PARENT_DOC_VERSION)) {
indexWriterConfig.setParentField("___parent"); indexWriterConfig.setParentField(PARENT_FIELD_NAME);
} }
// open writer // open writer
try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) { try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) {
@ -89,7 +91,10 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
Document child = new Document(); Document child = new Document();
child.add(new StringField("relation", "child", Field.Store.NO)); child.add(new StringField("relation", "child", Field.Store.NO));
child.add(new StringField("bid", "" + i, Field.Store.NO)); child.add(new StringField("bid", "" + i, Field.Store.NO));
child.add(new NumericDocValuesField("dateDV", i)); if (version.onOrAfter(FIRST_PARENT_DOC_VERSION)
== false) { // only add this to earlier versions
child.add(new NumericDocValuesField("dateDV", i));
}
Document parent = new Document(); Document parent = new Document();
parent.add(new StringField("relation", "parent", Field.Store.NO)); parent.add(new StringField("relation", "parent", Field.Store.NO));
parent.add(new StringField("bid", "" + i, Field.Store.NO)); parent.add(new StringField("bid", "" + i, Field.Store.NO));
@ -158,6 +163,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
conf.setUseCompoundFile(false); conf.setUseCompoundFile(false);
conf.setCodec(TestUtil.getDefaultCodec()); conf.setCodec(TestUtil.getDefaultCodec());
conf.setParentField("___parent"); conf.setParentField("___parent");
conf.setParentField(PARENT_FIELD_NAME);
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true))); conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
IndexWriter writer = new IndexWriter(directory, conf); IndexWriter writer = new IndexWriter(directory, conf);
LineFileDocs docs = new LineFileDocs(new Random(0)); LineFileDocs docs = new LineFileDocs(new Random(0));

View File

@ -0,0 +1,4 @@
5x-with-4x-segments-cfs
5x-with-4x-segments-nocfs
5.0.0.singlesegment-cfs
5.0.0.singlesegment-nocfs

View File

@ -0,0 +1,122 @@
1.9.0
2.0.0
2.1.0
2.2.0
2.3.0
2.4.0
2.4.1
2.9.0
2.9.1
2.9.2
2.9.3
2.9.4
3.0.0
3.0.1
3.0.2
3.0.3
3.1.0
3.2.0
3.3.0
3.4.0
3.5.0
3.6.0
3.6.1
3.6.2
4.0.0
4.0.0.1
4.0.0.2
4.1.0
4.2.0
4.2.1
4.3.0
4.3.1
4.4.0
4.5.0
4.5.1
4.6.0
4.6.1
4.7.0
4.7.1
4.7.2
4.8.0
4.8.1
4.9.0
4.9.1
4.10.0
4.10.1
4.10.2
4.10.3
4.10.4
5.0.0
5.1.0
5.2.0
5.2.1
5.3.0
5.3.1
5.3.2
5.4.0
5.4.1
5.5.0
5.5.1
5.5.2
5.5.3
5.5.4
5.5.5
6.0.0
6.0.1
6.1.0
6.2.0
6.2.1
6.3.0
6.4.0
6.4.1
6.4.2
6.5.0
6.5.1
6.6.0
6.6.1
6.6.2
6.6.3
6.6.4
6.6.5
6.6.6
7.0.0
7.0.1
7.1.0
7.2.0
7.2.1
7.3.0
7.3.1
7.4.0
7.5.0
7.6.0
7.7.0
7.7.1
7.7.2
7.7.3
8.0.0
8.1.0
8.1.1
8.2.0
8.3.0
8.3.1
8.4.0
8.4.1
8.5.0
8.5.1
8.5.2
8.6.0
8.6.1
8.6.2
8.6.3
8.7.0
8.8.0
8.8.1
8.8.2
8.9.0
8.10.0
8.10.1
8.11.0
8.11.1
8.11.2
8.11.3

View File

@ -0,0 +1,40 @@
8.0.0
8.1.0
8.1.1
8.2.0
8.3.0
8.3.1
8.4.0
8.4.1
8.5.0
8.5.1
8.5.2
8.6.0
8.6.1
8.6.2
8.6.3
8.7.0
8.8.0
8.8.1
8.8.2
8.9.0
8.10.0
8.10.1
8.11.0
8.11.1
8.11.2
8.11.3
9.0.0
9.1.0
9.2.0
9.3.0
9.4.0
9.4.1
9.4.2
9.5.0
9.6.0
9.7.0
9.8.0
9.9.0
9.9.1
9.9.2

View File

@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType; import java.lang.invoke.MethodType;
import java.text.ParseException; import java.text.ParseException;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
@ -67,7 +68,7 @@ public class ExpressionsBenchmark {
lookup.findStatic( lookup.findStatic(
lookup.lookupClass(), "ident", MethodType.methodType(double.class, double.class))); lookup.lookupClass(), "ident", MethodType.methodType(double.class, double.class)));
m.put("mh_identity", MethodHandles.identity(double.class)); m.put("mh_identity", MethodHandles.identity(double.class));
return m; return Collections.unmodifiableMap(m);
} catch (ReflectiveOperationException e) { } catch (ReflectiveOperationException e) {
throw new AssertionError(e); throw new AssertionError(e);
} }

View File

@ -21,7 +21,6 @@ import java.io.InputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import javax.xml.XMLConstants; import javax.xml.XMLConstants;
@ -68,7 +67,7 @@ public class EnwikiContentSource extends ContentSource {
private boolean stopped = false; private boolean stopped = false;
private String[] tuple; private String[] tuple;
private NoMoreDataException nmde; private NoMoreDataException nmde;
private StringBuilder contents = new StringBuilder(); private final StringBuilder contents = new StringBuilder();
private String title; private String title;
private String body; private String body;
private String time; private String time;
@ -262,7 +261,6 @@ public class EnwikiContentSource extends ContentSource {
} }
} }
private static final Map<String, Integer> ELEMENTS = new HashMap<>();
private static final int TITLE = 0; private static final int TITLE = 0;
private static final int DATE = TITLE + 1; private static final int DATE = TITLE + 1;
private static final int BODY = DATE + 1; private static final int BODY = DATE + 1;
@ -272,24 +270,24 @@ public class EnwikiContentSource extends ContentSource {
// should not be part of the tuple, we should define them after LENGTH. // should not be part of the tuple, we should define them after LENGTH.
private static final int PAGE = LENGTH + 1; private static final int PAGE = LENGTH + 1;
private static final Map<String, Integer> ELEMENTS =
Map.of(
"page", PAGE,
"text", BODY,
"timestamp", DATE,
"title", TITLE,
"id", ID);
private static final String[] months = { private static final String[] months = {
"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC" "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
}; };
static {
ELEMENTS.put("page", Integer.valueOf(PAGE));
ELEMENTS.put("text", Integer.valueOf(BODY));
ELEMENTS.put("timestamp", Integer.valueOf(DATE));
ELEMENTS.put("title", Integer.valueOf(TITLE));
ELEMENTS.put("id", Integer.valueOf(ID));
}
/** /**
* Returns the type of the element if defined, otherwise returns -1. This method is useful in * Returns the type of the element if defined, otherwise returns -1. This method is useful in
* startElement and endElement, by not needing to compare the element qualified name over and * startElement and endElement, by not needing to compare the element qualified name over and
* over. * over.
*/ */
private static final int getElementType(String elem) { private static int getElementType(String elem) {
Integer val = ELEMENTS.get(elem); Integer val = ELEMENTS.get(elem);
return val == null ? -1 : val.intValue(); return val == null ? -1 : val.intValue();
} }
@ -297,7 +295,7 @@ public class EnwikiContentSource extends ContentSource {
private Path file; private Path file;
private boolean keepImages = true; private boolean keepImages = true;
private InputStream is; private InputStream is;
private Parser parser = new Parser(); private final Parser parser = new Parser();
@Override @Override
public void close() throws IOException { public void close() throws IOException {

View File

@ -18,6 +18,8 @@ package org.apache.lucene.benchmark.byTask.feeds;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap; import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
@ -40,22 +42,28 @@ public abstract class TrecDocParser {
/** trec parser type used for unknown extensions */ /** trec parser type used for unknown extensions */
public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2; public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2;
static final Map<ParsePathType, TrecDocParser> pathType2parser = new HashMap<>(); static final Map<ParsePathType, TrecDocParser> pathType2Parser;
static { static {
pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser()); pathType2Parser =
pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser()); Collections.unmodifiableMap(
pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser()); new EnumMap<>(
pathType2parser.put(ParsePathType.FT, new TrecFTParser()); Map.of(
pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser()); ParsePathType.GOV2, new TrecGov2Parser(),
ParsePathType.FBIS, new TrecFBISParser(),
ParsePathType.FR94, new TrecFR94Parser(),
ParsePathType.FT, new TrecFTParser(),
ParsePathType.LATIMES, new TrecLATimesParser())));
} }
static final Map<String, ParsePathType> pathName2Type = new HashMap<>(); static final Map<String, ParsePathType> pathName2Type;
static { static {
Map<String, ParsePathType> name2Type = new HashMap<>();
for (ParsePathType ppt : ParsePathType.values()) { for (ParsePathType ppt : ParsePathType.values()) {
pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt); name2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt);
} }
pathName2Type = Collections.unmodifiableMap(name2Type);
} }
/** max length of walk up from file to its ancestors when looking for a known path type */ /** max length of walk up from file to its ancestors when looking for a known path type */

View File

@ -32,6 +32,6 @@ public class TrecParserByPath extends TrecDocParser {
StringBuilder docBuf, StringBuilder docBuf,
ParsePathType pathType) ParsePathType pathType)
throws IOException { throws IOException {
return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType); return pathType2Parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
} }
} }

View File

@ -43,7 +43,7 @@ public class TaskSequence extends PerfTask {
private boolean resetExhausted = false; private boolean resetExhausted = false;
private PerfTask[] tasksArray; private PerfTask[] tasksArray;
private boolean anyExhaustibleTasks; private boolean anyExhaustibleTasks;
private boolean collapsable = false; // to not collapse external sequence named in alg. private final boolean collapsable; // to not collapse external sequence named in alg.
private boolean fixedTime; // true if we run for fixed time private boolean fixedTime; // true if we run for fixed time
private double runTimeSec; // how long to run for private double runTimeSec; // how long to run for

View File

@ -23,7 +23,6 @@ import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorException;
@ -70,15 +69,9 @@ public class StreamUtils {
} }
} }
private static final Map<String, Type> extensionToType = new HashMap<>(); // these are in lower case, we will lower case at the test as well
private static final Map<String, Type> extensionToType =
static { Map.of(".bz2", Type.BZIP2, ".bzip", Type.BZIP2, ".gz", Type.GZIP, ".gzip", Type.GZIP);
// these in are lower case, we will lower case at the test as well
extensionToType.put(".bz2", Type.BZIP2);
extensionToType.put(".bzip", Type.BZIP2);
extensionToType.put(".gz", Type.GZIP);
extensionToType.put(".gzip", Type.GZIP);
}
/** /**
* Returns an {@link InputStream} over the requested file. This method attempts to identify the * Returns an {@link InputStream} over the requested file. This method attempts to identify the

View File

@ -36,7 +36,7 @@ public class TestTrecContentSource extends LuceneTestCase {
/** A TrecDocMaker which works on a String and not files. */ /** A TrecDocMaker which works on a String and not files. */
private static class StringableTrecSource extends TrecContentSource { private static class StringableTrecSource extends TrecContentSource {
private String docs = null; private final String docs;
public StringableTrecSource(String docs, boolean forever) { public StringableTrecSource(String docs, boolean forever) {
this.docs = docs; this.docs = docs;

View File

@ -230,24 +230,6 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
return fields.size(); return fields.size();
} }
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override @Override
public void checkIntegrity() throws IOException { public void checkIntegrity() throws IOException {
// term dictionary // term dictionary

View File

@ -43,6 +43,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;
@ -288,29 +289,10 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
@Override @Override
public String toString() { public String toString() {
return brToString(termBytes); return ToStringUtils.bytesRefToString(termBytes);
} }
} }
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
private static final class SubIndex { private static final class SubIndex {
public final FST<Output> index; public final FST<Output> index;
public final long termOrdStart; public final long termOrdStart;
@ -353,7 +335,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
@Override @Override
public String toString() { public String toString() {
return "BLOCK: " + brToString(prefix); return "BLOCK: " + ToStringUtils.bytesRefToString(prefix);
} }
public void compileIndex( public void compileIndex(
@ -457,9 +439,9 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
Output newOutput = Output newOutput =
FST_OUTPUTS.newOutput( FST_OUTPUTS.newOutput(
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset); output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + " // System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output +
// termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" + // " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput="
// newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd)); // + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput); fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
} }
} }
@ -642,8 +624,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
long startFP = out.getFilePointer(); long startFP = out.getFilePointer();
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + " // if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor +
// floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + // " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
// hasTerms + " hasSubBlocks=" + hasSubBlocks); // hasTerms + " hasSubBlocks=" + hasSubBlocks);
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1; boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
@ -662,11 +644,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
out.writeVInt(code); out.writeVInt(code);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " // System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment +
// pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + // " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
// brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? (" // ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP +
// floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + // (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") +
// isLastInFloor); // " isLastInFloor=" + isLastInFloor);
// } // }
final List<SubIndex> subIndices; final List<SubIndex> subIndices;
@ -784,7 +766,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
BytesRef suffixBytes = new BytesRef(suffix); BytesRef suffixBytes = new BytesRef(suffix);
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix; suffixBytes.length = suffix;
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) +
" subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
} }
*/ */
@ -842,7 +825,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) { if (DEBUG) {
int[] tmp = new int[lastTerm.length]; int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length); System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size()); System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" +
Arrays.toString(tmp) + " pending.size()=" + pending.size());
} }
*/ */
@ -885,8 +869,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
// we are closing: // we are closing:
int prefixTopSize = pending.size() - prefixStarts[i]; int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) { if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// minItemsInBlock=" + minItemsInBlock); // " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize); writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1; prefixStarts[i] -= prefixTopSize - 1;
} }

View File

@ -61,7 +61,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
throws IOException { throws IOException {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + // System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" +
// brToString(compiled.commonSuffixRef)); // ToStringUtils.bytesRefToString(compiled.commonSuffixRef));
// } // }
this.fr = fr; this.fr = fr;
this.byteRunnable = compiled.getByteRunnable(); this.byteRunnable = compiled.getByteRunnable();
@ -283,13 +283,15 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
currentFrame.loadNextFloorBlock(); currentFrame.loadNextFloorBlock();
continue; continue;
} else { } else {
// if (DEBUG) System.out.println(" return term=" + brToString(term)); // if (DEBUG) System.out.println(" return term=" +
// ToStringUtils.bytesRefToString(term));
return; return;
} }
} }
continue; continue;
} else if (cmp == 0) { } else if (cmp == 0) {
// if (DEBUG) System.out.println(" return term=" + brToString(term)); // if (DEBUG) System.out.println(" return term=" +
// ToStringUtils.bytesRefToString(term));
return; return;
} else { } else {
// Fallback to prior entry: the semantics of // Fallback to prior entry: the semantics of
@ -327,10 +329,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nintEnum.next seg=" + segment); // System.out.println("\nintEnum.next seg=" + segment);
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new // System.out.println(" frame ord=" + currentFrame.ord + " prefix=" +
// BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " // ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) +
// lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + // " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// (currentFrame.transitions.length == 0 ? "n/a" : // " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix); // currentFrame.outputPrefix);
// } // }
@ -343,9 +345,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// if (DEBUG) System.out.println(" next-floor-block"); // if (DEBUG) System.out.println(" next-floor-block");
currentFrame.loadNextFloorBlock(); currentFrame.loadNextFloorBlock();
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + // if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + // ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + // currentFrame.prefix)) +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : // " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix); // currentFrame.outputPrefix);
} else { } else {
@ -357,9 +360,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
currentFrame = stack[currentFrame.ord - 1]; currentFrame = stack[currentFrame.ord - 1];
assert currentFrame.lastSubFP == lastFP; assert currentFrame.lastSubFP == lastFP;
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + // if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + // ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + // currentFrame.prefix)) +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : // " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix); // currentFrame.outputPrefix);
} }
@ -373,7 +377,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// suffixRef.length = currentFrame.suffix; // suffixRef.length = currentFrame.suffix;
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " + // System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " +
// currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + // currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" +
// brToString(suffixRef)); // ToStringUtils.bytesRefToString(suffixRef));
// } // }
if (currentFrame.suffix != 0) { if (currentFrame.suffix != 0) {
@ -480,15 +484,16 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
copyTerm(); copyTerm();
currentFrame = pushFrame(state); currentFrame = pushFrame(state);
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + // if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + // ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + // currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : // currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix); // currentFrame.outputPrefix);
} else if (byteRunnable.isAccept(state)) { } else if (byteRunnable.isAccept(state)) {
copyTerm(); copyTerm();
// if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" + // if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" +
// brToString(term)); // ToStringUtils.bytesRefToString(term));
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0 assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString(); : "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
return term; return term;

View File

@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
@ -174,11 +175,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
throws IOException { throws IOException {
final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc; f.arc = arc;
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp + " // System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp +
// nextEnt=" + f.nextEnt); // " nextEnt=" + f.nextEnt);
if (f.fpOrig == fp && f.nextEnt != -1) { if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix); // term.length + " vs prefix=" + f.prefix);
if (f.prefix > targetBeforeCurrentLength) { if (f.prefix > targetBeforeCurrentLength) {
@ -204,7 +205,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length; // final int sav = term.length;
// term.length = length; // term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term)); // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav; // term.length = sav;
// } // }
} }
@ -224,19 +225,6 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
return true; return true;
} }
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
@Override @Override
public boolean seekExact(final BytesRef target) throws IOException { public boolean seekExact(final BytesRef target) throws IOException {
@ -250,7 +238,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
/* /*
if (DEBUG) { if (DEBUG) {
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix); System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" +
ToStringUtils.bytesRefToString(target) + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
") validIndexPrefix=" + validIndexPrefix);
printSeekState(System.out); printSeekState(System.out);
} }
*/ */
@ -411,8 +401,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
positioned = true; positioned = true;
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength); // targetBeforeCurrentLength);
// } // }
@ -443,7 +433,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel); term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto); term.setLength(1 + targetUpto);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -459,7 +449,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
} else { } else {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" + // System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term)); // ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -502,7 +492,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
termExists = false; termExists = false;
term.setLength(targetUpto); term.setLength(targetUpto);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -537,8 +527,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + // System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + // target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
// termExists + ") validIndexPrefix= " + validIndexPrefix); // " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(); // printSeekState();
// } // }
@ -581,9 +571,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
// " output=" + output); // + " output=" + output);
// } // }
if (cmp != 0) { if (cmp != 0) {
break; break;
@ -697,8 +687,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
positioned = true; positioned = true;
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength); // targetBeforeCurrentLength);
// } // }
@ -733,7 +723,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
if (next() != null) { if (next() != null) {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term); // System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// } // }
return SeekStatus.NOT_FOUND; return SeekStatus.NOT_FOUND;
} else { } else {
@ -744,7 +735,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
} }
} else { } else {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term); // System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// } // }
return result; return result;
} }
@ -829,7 +821,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen=" + " prefixLen="
+ f.prefix + f.prefix
+ " prefix=" + " prefix="
+ brToString(prefix) + ToStringUtils.bytesRefToString(prefix)
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+ " hasTerms=" + " hasTerms="
+ f.hasTerms + f.hasTerms
@ -859,7 +851,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen=" + " prefixLen="
+ f.prefix + f.prefix
+ " prefix=" + " prefix="
+ brToString(prefix) + ToStringUtils.bytesRefToString(prefix)
+ " nextEnt=" + " nextEnt="
+ f.nextEnt + f.nextEnt
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
@ -951,8 +943,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
assert !eof; assert !eof;
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " // System.out.println("\nBTTR.next seg=" + segment + " term=" +
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + // ToStringUtils.bytesRefToString(term) +
// " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix); // currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// printSeekState(); // printSeekState();
// } // }
@ -1019,8 +1012,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// currentFrame.hasTerms = true; // currentFrame.hasTerms = true;
currentFrame.loadBlock(); currentFrame.loadBlock();
} else { } else {
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + " // if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
// currentFrame.ord=" + currentFrame.ord); // " currentFrame.ord=" + currentFrame.ord);
positioned = true; positioned = true;
return term.get(); return term.get();
} }
@ -1235,8 +1228,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
int low = 0; int low = 0;
int high = arc.numArcs() - 1; int high = arc.numArcs() - 1;
int mid = 0; int mid = 0;
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " // System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput +
// output=" + output); // " output=" + output);
boolean found = false; boolean found = false;
while (low <= high) { while (low <= high) {
mid = (low + high) >>> 1; mid = (low + high) >>> 1;

View File

@ -119,8 +119,8 @@ final class OrdsSegmentTermsEnumFrame {
numFollowFloorBlocks = floorDataReader.readVInt(); numFollowFloorBlocks = floorDataReader.readVInt();
nextFloorLabel = floorDataReader.readByte() & 0xff; nextFloorLabel = floorDataReader.readByte() & 0xff;
nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong(); nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + " // System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd +
// shift=" + (nextFloorTermOrd-termOrdOrig)); // " shift=" + (nextFloorTermOrd-termOrdOrig));
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new // System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
@ -289,8 +289,8 @@ final class OrdsSegmentTermsEnumFrame {
// Decodes next entry; returns true if it's a sub-block // Decodes next entry; returns true if it's a sub-block
public boolean nextLeaf() { public boolean nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// entCount=" + entCount); // " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd; : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
nextEnt++; nextEnt++;
@ -306,8 +306,8 @@ final class OrdsSegmentTermsEnumFrame {
} }
public boolean nextNonLeaf() { public boolean nextNonLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// entCount=" + entCount); // " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++; nextEnt++;
@ -374,8 +374,8 @@ final class OrdsSegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1); newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0; hasTerms = (code & 1) != 0;
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + " // System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// } // }
isLastInFloor = numFollowFloorBlocks == 1; isLastInFloor = numFollowFloorBlocks == 1;
@ -440,8 +440,8 @@ final class OrdsSegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1); newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0; hasTerms = (code & 1) != 0;
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + " // System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// } // }
isLastInFloor = numFollowFloorBlocks == 1; isLastInFloor = numFollowFloorBlocks == 1;
@ -495,8 +495,8 @@ final class OrdsSegmentTermsEnumFrame {
boolean absolute = metaDataUpto == 0; boolean absolute = metaDataUpto == 0;
assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt; assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt;
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " // if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment +
// mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit); // " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
// TODO: better API would be "jump straight to term=N"??? // TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) { while (metaDataUpto < limit) {
@ -593,10 +593,10 @@ final class OrdsSegmentTermsEnumFrame {
// scan the entries check if the suffix matches. // scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException { public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// OrdsSegmentTermsEnum.brToString(target) + " term=" + // ToStringUtils.bytesRefToString(target) + " term=" +
// OrdsSegmentTermsEnum.brToString(ste.term)); // ToStringUtils.bytesRefToString(ste.term));
assert nextEnt != -1; assert nextEnt != -1;
@ -627,7 +627,7 @@ final class OrdsSegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix; // suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + OrdsSegmentTermsEnum.brToString(suffixBytesRef)); // + ToStringUtils.bytesRefToString(suffixBytesRef));
// } // }
final int termLen = prefix + suffix; final int termLen = prefix + suffix;
@ -714,8 +714,8 @@ final class OrdsSegmentTermsEnumFrame {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// OrdsSegmentTermsEnum.brToString(target) + " term=" + // ToStringUtils.bytesRefToString(target) + " term=" +
// OrdsSegmentTermsEnum.brToString(ste.term)); // ToStringUtils.bytesRefToString(ste.term));
assert nextEnt != -1; assert nextEnt != -1;
@ -743,7 +743,8 @@ final class OrdsSegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix; // suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef)); // (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// } // }
ste.termExists = (code & 1) == 0; ste.termExists = (code & 1) == 0;

View File

@ -210,7 +210,7 @@ public final class FieldReader extends Terms {
@Override @Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" + // if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
// BlockTreeTermsWriter.brToString(startTerm)); // ToStringUtils.bytesRefToString(startTerm));
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton); // System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum? // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...? // can we optimize knowing that...?

View File

@ -549,19 +549,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
} }
} }
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
private void copyTerm() { private void copyTerm() {
final int len = currentFrame.prefix + currentFrame.suffix; final int len = currentFrame.prefix + currentFrame.suffix;
if (term.bytes.length < len) { if (term.bytes.length < len) {

View File

@ -307,24 +307,6 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
return fieldMap.size(); return fieldMap.size();
} }
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override @Override
public void checkIntegrity() throws IOException { public void checkIntegrity() throws IOException {
// terms index // terms index

View File

@ -47,6 +47,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.compress.LowercaseAsciiCompression; import org.apache.lucene.util.compress.LowercaseAsciiCompression;
import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.ByteSequenceOutputs;
@ -394,7 +395,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
} }
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + // if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
// brToString(term)); // ToStringUtils.bytesRefToString(term));
termsWriter.write(term, termsEnum, norms); termsWriter.write(term, termsEnum, norms);
} }
@ -433,33 +434,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
@Override @Override
public String toString() { public String toString() {
return "TERM: " + brToString(termBytes); return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
} }
} }
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
if (b == null) {
return "(null)";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
/** /**
* Encodes long value to variable length byte[], in MSB order. Use {@link * Encodes long value to variable length byte[], in MSB order. Use {@link
* FieldReader#readMSBVLong} to decode. * FieldReader#readMSBVLong} to decode.
@ -506,7 +484,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
@Override @Override
public String toString() { public String toString() {
return "BLOCK: prefix=" + brToString(prefix); return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
} }
public void compileIndex( public void compileIndex(
@ -689,8 +667,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// if (DEBUG2) { // if (DEBUG2) {
// BytesRef br = new BytesRef(lastTerm.bytes()); // BytesRef br = new BytesRef(lastTerm.bytes());
// br.length = prefixLength; // br.length = prefixLength;
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count=" // System.out.println("writeBlocks: seg=" + segment + " prefix=" +
// + count); // ToStringUtils.bytesRefToString(br) + " count=" + count);
// } // }
// Root block better write all remaining pending entries: // Root block better write all remaining pending entries:
@ -843,9 +821,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
prefix.length = prefixLength; prefix.length = prefixLength;
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" + // if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end == // ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + // " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks); // " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
// hasSubBlocks);
// Write block header: // Write block header:
int numEntries = end - start; int numEntries = end - start;
@ -858,7 +837,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
/* /*
if (DEBUG) { if (DEBUG) {
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : "")); System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
} }
*/ */
@ -893,7 +874,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix); // BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix; // suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes)); // System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// } // }
// For leaf block we write suffix straight // For leaf block we write suffix straight
@ -926,7 +908,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix); // BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix; // suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes)); // System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// } // }
// For non-leaf block we borrow 1 bit to record // For non-leaf block we borrow 1 bit to record
@ -968,8 +951,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix); // BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); // System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix; // suffixBytes.length = suffix;
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " // System.out.println(" write sub-block suffix=" +
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); // ToStringUtils.bytesRefToString(suffixBytes) + " subFP=" + block.fp + " subCode=" +
// (startFP-block.fp) + " floor=" + block.isFloor);
// } // }
assert floorLeadLabel == -1 assert floorLeadLabel == -1
@ -1090,7 +1074,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) { if (DEBUG) {
int[] tmp = new int[lastTerm.length]; int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length); System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size()); System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
" pending.size()=" + pending.size());
} }
*/ */
@ -1143,8 +1128,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// we are closing: // we are closing:
int prefixTopSize = pending.size() - prefixStarts[i]; int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) { if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// minItemsInBlock=" + minItemsInBlock); // " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize); writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1; prefixStarts[i] -= prefixTopSize - 1;
} }

View File

@ -263,8 +263,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc; f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) { if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix); // term.length + " vs prefix=" + f.prefix);
// if (f.prefix > targetBeforeCurrentLength) { // if (f.prefix > targetBeforeCurrentLength) {
@ -286,7 +286,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length; // final int sav = term.length;
// term.length = length; // term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term)); // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav; // term.length = sav;
// } // }
} }
@ -306,27 +306,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return true; return true;
} }
/*
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRefBuilder b) {
return brToString(b.get());
}
*/
@Override @Override
public boolean seekExact(BytesRef target) throws IOException { public boolean seekExact(BytesRef target) throws IOException {
@ -344,8 +323,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// + termExists + ") validIndexPrefix=" + validIndexPrefix); // ToStringUtils.bytesRefToString(term) +
// " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out); // printSeekState(System.out);
// } // }
@ -499,8 +479,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} }
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength); // targetBeforeCurrentLength);
// } // }
@ -531,7 +511,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel); term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto); term.setLength(1 + targetUpto);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -547,7 +527,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} else { } else {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" + // System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term)); // ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -586,7 +566,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
termExists = false; termExists = false;
term.setLength(targetUpto); term.setLength(targetUpto);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -622,8 +602,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + // System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term) // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix); // ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
// ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(System.out); // printSeekState(System.out);
// } // }
@ -663,9 +644,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + // " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
// " output=" + output); // + " output=" + output);
// } // }
if (cmp != 0) { if (cmp != 0) {
break; break;
@ -771,8 +752,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} }
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength); // targetBeforeCurrentLength);
// } // }
@ -808,7 +789,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
if (next() != null) { if (next() != null) {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term)); // System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// } // }
return SeekStatus.NOT_FOUND; return SeekStatus.NOT_FOUND;
} else { } else {
@ -819,7 +801,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} }
} else { } else {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term)); // System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// } // }
return result; return result;
} }
@ -1015,9 +998,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert !eof; assert !eof;
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + " // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" + // ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix); // fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out); // printSeekState(System.out);
// } // }
@ -1081,8 +1065,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// try to scan to the right floor frame: // try to scan to the right floor frame:
currentFrame.loadBlock(); currentFrame.loadBlock();
} else { } else {
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord=" // if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
// + currentFrame.ord); // " currentFrame.ord=" + currentFrame.ord);
return term.get(); return term.get();
} }
} }

View File

@ -295,8 +295,8 @@ final class SegmentTermsEnumFrame {
} }
public void nextLeaf() { public void nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// entCount=" + entCount); // " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++; nextEnt++;
@ -388,8 +388,8 @@ final class SegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1); newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0; hasTerms = (code & 1) != 0;
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + " // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// } // }
isLastInFloor = numFollowFloorBlocks == 1; isLastInFloor = numFollowFloorBlocks == 1;
@ -531,28 +531,14 @@ final class SegmentTermsEnumFrame {
private long subCode; private long subCode;
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION; CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
// for debugging
/*
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
*/
// Target's prefix matches this block's prefix; we // Target's prefix matches this block's prefix; we
// scan the entries check if the suffix matches. // scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException { public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// brToString(term)); // ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1; assert nextEnt != -1;
@ -582,7 +568,7 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix; // suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + brToString(suffixBytesRef)); // + ToStringUtils.bytesRefToString(suffixBytesRef));
// } // }
startBytePos = suffixesReader.getPosition(); startBytePos = suffixesReader.getPosition();
@ -647,8 +633,9 @@ final class SegmentTermsEnumFrame {
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException { public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// brToString(target)); // ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1; assert nextEnt != -1;
@ -676,7 +663,8 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix; // suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef)); // (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// } // }
final int termLen = prefix + suffix; final int termLen = prefix + suffix;
@ -708,8 +696,8 @@ final class SegmentTermsEnumFrame {
// return NOT_FOUND: // return NOT_FOUND:
fillTerm(); fillTerm();
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
// ste.termExists=" + ste.termExists); // " ste.termExists=" + ste.termExists);
if (!exactOnly && !ste.termExists) { if (!exactOnly && !ste.termExists) {
// System.out.println(" now pushFrame"); // System.out.println(" now pushFrame");

View File

@ -166,6 +166,16 @@ public final class FeatureField extends Field {
return stream; return stream;
} }
/**
* This is useful if you have multiple features sharing a name and you want to take action to
* deduplicate them.
*
* @return the feature value of this field.
*/
public float getFeatureValue() {
return featureValue;
}
private static final class FeatureTokenStream extends TokenStream { private static final class FeatureTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class); private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);

View File

@ -21,6 +21,7 @@ import java.io.StreamTokenizer;
import java.io.StringReader; import java.io.StringReader;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
@ -404,21 +405,23 @@ public class SimpleWKTShapeParser {
ENVELOPE("envelope"); // not part of the actual WKB spec ENVELOPE("envelope"); // not part of the actual WKB spec
private final String shapeName; private final String shapeName;
private static final Map<String, ShapeType> shapeTypeMap = new HashMap<>(); private static final Map<String, ShapeType> shapeTypeMap;
private static final String BBOX = "BBOX"; private static final String BBOX = "BBOX";
static { static {
Map<String, ShapeType> shapeTypes = new HashMap<>();
for (ShapeType type : values()) { for (ShapeType type : values()) {
shapeTypeMap.put(type.shapeName, type); shapeTypes.put(type.shapeName, type);
} }
shapeTypeMap.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE); shapeTypes.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE);
shapeTypeMap = Collections.unmodifiableMap(shapeTypes);
} }
ShapeType(String shapeName) { ShapeType(String shapeName) {
this.shapeName = shapeName; this.shapeName = shapeName;
} }
protected String typename() { String typename() {
return shapeName; return shapeName;
} }

View File

@ -32,7 +32,7 @@ public final class FieldInfo {
/** Internal field number */ /** Internal field number */
public final int number; public final int number;
private DocValuesType docValuesType = DocValuesType.NONE; private DocValuesType docValuesType;
// True if any document indexed term vectors // True if any document indexed term vectors
private boolean storeTermVector; private boolean storeTermVector;

View File

@ -84,7 +84,7 @@ public class LiveIndexWriterConfig {
protected volatile int perThreadHardLimitMB; protected volatile int perThreadHardLimitMB;
/** True if segment flushes should use compound file format */ /** True if segment flushes should use compound file format */
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; protected volatile boolean useCompoundFile;
/** True if calls to {@link IndexWriter#close()} should first do a commit. */ /** True if calls to {@link IndexWriter#close()} should first do a commit. */
protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE; protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;

View File

@ -597,12 +597,12 @@ public abstract class MergePolicy {
* If the size of the merge segment exceeds this ratio of the total index size then it will remain * If the size of the merge segment exceeds this ratio of the total index size then it will remain
* in non-compound format * in non-compound format
*/ */
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; protected double noCFSRatio;
/** /**
* If the size of the merged segment exceeds this value then it will not use compound file format. * If the size of the merged segment exceeds this value then it will not use compound file format.
*/ */
protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE; protected long maxCFSSegmentSize;
/** Creates a new merge policy instance. */ /** Creates a new merge policy instance. */
protected MergePolicy() { protected MergePolicy() {

View File

@ -103,7 +103,7 @@ public abstract class VectorizationProvider {
// visible for tests // visible for tests
static VectorizationProvider lookup(boolean testMode) { static VectorizationProvider lookup(boolean testMode) {
final int runtimeVersion = Runtime.version().feature(); final int runtimeVersion = Runtime.version().feature();
if (runtimeVersion >= 20 && runtimeVersion <= 21) { if (runtimeVersion >= 20 && runtimeVersion <= 22) {
// is locale sane (only buggy in Java 20) // is locale sane (only buggy in Java 20)
if (isAffectedByJDK8301190()) { if (isAffectedByJDK8301190()) {
LOG.warning( LOG.warning(
@ -169,9 +169,9 @@ public abstract class VectorizationProvider {
} catch (ClassNotFoundException cnfe) { } catch (ClassNotFoundException cnfe) {
throw new LinkageError("PanamaVectorizationProvider is missing in Lucene JAR file", cnfe); throw new LinkageError("PanamaVectorizationProvider is missing in Lucene JAR file", cnfe);
} }
} else if (runtimeVersion >= 22) { } else if (runtimeVersion >= 23) {
LOG.warning( LOG.warning(
"You are running with Java 22 or later. To make full use of the Vector API, please update Apache Lucene."); "You are running with Java 23 or later. To make full use of the Vector API, please update Apache Lucene.");
} else if (lookupVectorModule().isPresent()) { } else if (lookupVectorModule().isPresent()) {
LOG.warning( LOG.warning(
"Java vector incubator module was enabled by command line flags, but your Java version is too old: " "Java vector incubator module was enabled by command line flags, but your Java version is too old: "

View File

@ -120,7 +120,7 @@ final class WANDScorer extends Scorer {
private final int scalingFactor; private final int scalingFactor;
// scaled min competitive score // scaled min competitive score
private long minCompetitiveScore = 0; private long minCompetitiveScore;
private final Scorer[] allScorers; private final Scorer[] allScorers;

View File

@ -89,7 +89,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
private boolean singleSort; private boolean singleSort;
/** Whether this comparator is allowed to skip documents. */ /** Whether this comparator is allowed to skip documents. */
private boolean canSkipDocuments = true; private boolean canSkipDocuments;
/** Whether the collector is done with counting hits so that we can start skipping documents. */ /** Whether the collector is done with counting hits so that we can start skipping documents. */
private boolean hitsThresholdReached = false; private boolean hitsThresholdReached = false;

View File

@ -346,7 +346,7 @@ public class MMapDirectory extends FSDirectory {
} }
final var lookup = MethodHandles.lookup(); final var lookup = MethodHandles.lookup();
final int runtimeVersion = Runtime.version().feature(); final int runtimeVersion = Runtime.version().feature();
if (runtimeVersion >= 19 && runtimeVersion <= 21) { if (runtimeVersion >= 19) {
try { try {
final var cls = lookup.findClass("org.apache.lucene.store.MemorySegmentIndexInputProvider"); final var cls = lookup.findClass("org.apache.lucene.store.MemorySegmentIndexInputProvider");
// we use method handles, so we do not need to deal with setAccessible as we have private // we use method handles, so we do not need to deal with setAccessible as we have private
@ -366,9 +366,6 @@ public class MMapDirectory extends FSDirectory {
throw new LinkageError( throw new LinkageError(
"MemorySegmentIndexInputProvider is missing in Lucene JAR file", cnfe); "MemorySegmentIndexInputProvider is missing in Lucene JAR file", cnfe);
} }
} else if (runtimeVersion >= 22) {
LOG.warning(
"You are running with Java 22 or later. To make full use of MMapDirectory, please update Apache Lucene.");
} }
return new MappedByteBufferIndexInputProvider(); return new MappedByteBufferIndexInputProvider();
} }

View File

@ -130,17 +130,20 @@ public final class BytesRef implements Comparable<BytesRef>, Cloneable {
return false; return false;
} }
/** Interprets stored bytes as UTF8 bytes, returning the resulting string */ /**
* Interprets stored bytes as UTF-8 bytes, returning the resulting string. May throw an {@link
* AssertionError} or a {@link RuntimeException} if the data is not well-formed UTF-8.
*/
public String utf8ToString() { public String utf8ToString() {
final char[] ref = new char[length]; final char[] ref = new char[length];
final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref); final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
return new String(ref, 0, len); return new String(ref, 0, len);
} }
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */ /** Returns hex encoded bytes, e.g. "[6c 75 63 65 6e 65]" */
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder(2 + 3 * length);
sb.append('['); sb.append('[');
final int end = offset + length; final int end = offset + length;
for (int i = offset; i < end; i++) { for (int i = offset; i < end; i++) {

View File

@ -253,7 +253,7 @@ public class RoaringDocIdSet extends DocIdSet {
private class Iterator extends DocIdSetIterator { private class Iterator extends DocIdSetIterator {
int block; int block;
DocIdSetIterator sub = null; DocIdSetIterator sub;
int doc; int doc;
Iterator() throws IOException { Iterator() throws IOException {

View File

@ -32,6 +32,10 @@ public final class ToStringUtils {
private static final char[] HEX = "0123456789abcdef".toCharArray(); private static final char[] HEX = "0123456789abcdef".toCharArray();
/**
* Unlike {@link Long#toHexString(long)} returns a String with a "0x" prefix and all the leading
* zeros.
*/
public static String longHex(long x) { public static String longHex(long x) {
char[] asHex = new char[16]; char[] asHex = new char[16];
for (int i = 16; --i >= 0; x >>>= 4) { for (int i = 16; --i >= 0; x >>>= 4) {
@ -39,4 +43,31 @@ public final class ToStringUtils {
} }
return "0x" + new String(asHex); return "0x" + new String(asHex);
} }
/**
* Builds a String with both textual representation of the {@link BytesRef} data and the bytes hex
* values. For example: {@code "hello [68 65 6c 6c 6f]"}. If the content is not a valid UTF-8
* sequence, only the bytes hex values are returned, as per {@link BytesRef#toString()}.
*/
@SuppressWarnings("unused")
public static String bytesRefToString(BytesRef b) {
if (b == null) {
return "null";
}
try {
return b.utf8ToString() + " " + b;
} catch (AssertionError | RuntimeException t) {
// If BytesRef isn't actually UTF-8, or it's e.g. a prefix of UTF-8
// that ends mid-unicode-char, we fall back to hex:
return b.toString();
}
}
public static String bytesRefToString(BytesRefBuilder b) {
return bytesRefToString(b.get());
}
public static String bytesRefToString(byte[] b) {
return bytesRefToString(new BytesRef(b));
}
} }

View File

@ -133,10 +133,17 @@ public final class Version {
/** /**
* Match settings and bugs in Lucene's 9.10.0 release. * Match settings and bugs in Lucene's 9.10.0 release.
* *
* @deprecated Use latest * @deprecated (9.11.0) Use latest
*/ */
@Deprecated public static final Version LUCENE_9_10_0 = new Version(9, 10, 0); @Deprecated public static final Version LUCENE_9_10_0 = new Version(9, 10, 0);
/**
* Match settings and bugs in Lucene's 9.11.0 release.
*
* @deprecated Use latest
*/
@Deprecated public static final Version LUCENE_9_11_0 = new Version(9, 11, 0);
/** /**
* Match settings and bugs in Lucene's 10.0.0 release. * Match settings and bugs in Lucene's 10.0.0 release.
* *

View File

@ -31,7 +31,7 @@ import org.apache.lucene.util.IntsRef;
*/ */
public class LimitedFiniteStringsIterator extends FiniteStringsIterator { public class LimitedFiniteStringsIterator extends FiniteStringsIterator {
/** Maximum number of finite strings to create. */ /** Maximum number of finite strings to create. */
private int limit = Integer.MAX_VALUE; private final int limit;
/** Number of generated finite strings. */ /** Number of generated finite strings. */
private int count = 0; private int count = 0;

View File

@ -108,10 +108,16 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces
if (this.curSegment == null) { if (this.curSegment == null) {
return new AlreadyClosedException("Already closed: " + this); return new AlreadyClosedException("Already closed: " + this);
} }
// ISE can be thrown by MemorySegment and contains "closed" in message: // in Java 22 or later we can check the isAlive status of all segments
// (see https://bugs.openjdk.org/browse/JDK-8310644):
if (Arrays.stream(segments).allMatch(s -> s.scope().isAlive()) == false) {
return new AlreadyClosedException("Already closed: " + this);
}
// fallback for Java 21: ISE can be thrown by MemorySegment and contains "closed" in message:
if (e instanceof IllegalStateException if (e instanceof IllegalStateException
&& e.getMessage() != null && e.getMessage() != null
&& e.getMessage().contains("closed")) { && e.getMessage().contains("closed")) {
// the check is on message only, so preserve original cause for debugging:
return new AlreadyClosedException("Already closed: " + this, e); return new AlreadyClosedException("Already closed: " + this, e);
} }
// otherwise rethrow unmodified NPE/ISE (as it possibly a bug with passing a null parameter to // otherwise rethrow unmodified NPE/ISE (as it possibly a bug with passing a null parameter to

View File

@ -33,7 +33,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
public MemorySegmentIndexInputProvider() { public MemorySegmentIndexInputProvider() {
var log = Logger.getLogger(getClass().getName()); var log = Logger.getLogger(getClass().getName());
log.info( log.info(
"Using MemorySegmentIndexInput with Java 21; to disable start with -D" "Using MemorySegmentIndexInput with Java 21 or later; to disable start with -D"
+ MMapDirectory.ENABLE_MEMORY_SEGMENTS_SYSPROP + MMapDirectory.ENABLE_MEMORY_SEGMENTS_SYSPROP
+ "=false"); + "=false");
} }

View File

@ -154,6 +154,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
IndexWriter writer = IndexWriter writer =
new IndexWriter( new IndexWriter(
directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp)); directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp));
TestUtil.reduceOpenFiles(writer);
Document doc = new Document(); Document doc = new Document();
Field idField = newStringField("id", "", Field.Store.YES); Field idField = newStringField("id", "", Field.Store.YES);
@ -779,6 +780,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setMaxBufferedDocs(2); iwc.setMaxBufferedDocs(2);
iwc.setUseCompoundFile(true); // reduce open files
IndexWriter w = new IndexWriter(dir, iwc); IndexWriter w = new IndexWriter(dir, iwc);
int numDocs = TEST_NIGHTLY ? 1000 : 100; int numDocs = TEST_NIGHTLY ? 1000 : 100;
for (int i = 0; i < numDocs; i++) { for (int i = 0; i < numDocs; i++) {

View File

@ -67,7 +67,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
assertAllBetween(last2, j, bd2, ids); assertAllBetween(last2, j, bd2, ids);
last2 = j + 1; last2 = j + 1;
} }
assertEquals(j + 1, queue.numGlobalTermDeletes()); assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes());
} }
assertEquals(uniqueValues, bd1.deleteTerms.keySet()); assertEquals(uniqueValues, bd1.deleteTerms.keySet());
assertEquals(uniqueValues, bd2.deleteTerms.keySet()); assertEquals(uniqueValues, bd2.deleteTerms.keySet());

View File

@ -258,6 +258,7 @@ public class TestIndexWriterThreadsToSegments extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(r, new MockAnalyzer(r)); IndexWriterConfig iwc = newIndexWriterConfig(r, new MockAnalyzer(r));
iwc.setCommitOnClose(false); iwc.setCommitOnClose(false);
final RandomIndexWriter w = new RandomIndexWriter(r, dir, iwc); final RandomIndexWriter w = new RandomIndexWriter(r, dir, iwc);
TestUtil.reduceOpenFiles(w.w);
w.setDoRandomForceMerge(false); w.setDoRandomForceMerge(false);
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 30)]; Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 30)];
final CountDownLatch startingGun = new CountDownLatch(1); final CountDownLatch startingGun = new CountDownLatch(1);

View File

@ -48,9 +48,9 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
public void testCorrectImplementation() { public void testCorrectImplementation() {
final int runtimeVersion = Runtime.version().feature(); final int runtimeVersion = Runtime.version().feature();
if (runtimeVersion >= 19 && runtimeVersion <= 21) { if (runtimeVersion >= 19) {
assertTrue( assertTrue(
"on Java 19, 20, and 21 we should use MemorySegmentIndexInputProvider to create mmap IndexInputs", "on Java 19 or later we should use MemorySegmentIndexInputProvider to create mmap IndexInputs",
isMemorySegmentImpl()); isMemorySegmentImpl());
} else { } else {
assertSame(MappedByteBufferIndexInputProvider.class, MMapDirectory.PROVIDER.getClass()); assertSame(MappedByteBufferIndexInputProvider.class, MMapDirectory.PROVIDER.getClass());

View File

@ -820,7 +820,7 @@ public final class JavascriptCompiler {
*/ */
public static final Map<String, MethodHandle> DEFAULT_FUNCTIONS = loadDefaultFunctions(); public static final Map<String, MethodHandle> DEFAULT_FUNCTIONS = loadDefaultFunctions();
private static final Map<String, MethodHandle> loadDefaultFunctions() { private static Map<String, MethodHandle> loadDefaultFunctions() {
final Map<String, MethodHandle> map = new HashMap<>(); final Map<String, MethodHandle> map = new HashMap<>();
final Lookup publicLookup = MethodHandles.publicLookup(); final Lookup publicLookup = MethodHandles.publicLookup();
try { try {
@ -852,7 +852,7 @@ public final class JavascriptCompiler {
} catch (ReflectiveOperationException | IOException e) { } catch (ReflectiveOperationException | IOException e) {
throw new Error("Cannot resolve function", e); throw new Error("Cannot resolve function", e);
} }
return Map.copyOf(map); return Collections.unmodifiableMap(map);
} }
/** Check Method signature for compatibility. */ /** Check Method signature for compatibility. */

View File

@ -123,7 +123,7 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
private final PostingsEnum postingsEnum; // with offsets private final PostingsEnum postingsEnum; // with offsets
private final int freq; private final int freq;
private int posCounter = -1; private int posCounter;
public OfPostings(BytesRef term, int freq, PostingsEnum postingsEnum) throws IOException { public OfPostings(BytesRef term, int freq, PostingsEnum postingsEnum) throws IOException {
this.term = Objects.requireNonNull(term); this.term = Objects.requireNonNull(term);

View File

@ -23,6 +23,9 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
@ -208,21 +211,23 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = new IndexSearcher(reader);
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader()); BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter); Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
assertScorerResults(searcher, query, new float[] {1f, 1f / 51f}, new String[] {"2", "7"}); assertScorerResults(
searcher, query, new float[] {1f, 1f / 51f}, new String[] {"2", "7"}, 2);
query = getParentJoinKnnQuery("field", new float[] {6, 6}, null, 3, parentFilter); query = getParentJoinKnnQuery("field", new float[] {6, 6}, null, 3, parentFilter);
assertScorerResults( assertScorerResults(
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}); searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 2);
query = query =
getParentJoinKnnQuery( getParentJoinKnnQuery(
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 20, parentFilter); "field", new float[] {6, 6}, new MatchAllDocsQuery(), 20, parentFilter);
assertScorerResults( assertScorerResults(
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}); searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 2);
query = query =
getParentJoinKnnQuery( getParentJoinKnnQuery(
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 1, parentFilter); "field", new float[] {6, 6}, new MatchAllDocsQuery(), 1, parentFilter);
assertScorerResults(searcher, query, new float[] {1f / 3f}, new String[] {"5"}); assertScorerResults(
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 1);
} }
} }
} }
@ -324,7 +329,8 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
assertEquals(expectedId, actualId); assertEquals(expectedId, actualId);
} }
void assertScorerResults(IndexSearcher searcher, Query query, float[] scores, String[] ids) void assertScorerResults(
IndexSearcher searcher, Query query, float[] possibleScores, String[] possibleIds, int count)
throws IOException { throws IOException {
IndexReader reader = searcher.getIndexReader(); IndexReader reader = searcher.getIndexReader();
Query rewritten = query.rewrite(searcher); Query rewritten = query.rewrite(searcher);
@ -334,11 +340,16 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
assertEquals(-1, scorer.docID()); assertEquals(-1, scorer.docID());
expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score); expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score);
DocIdSetIterator it = scorer.iterator(); DocIdSetIterator it = scorer.iterator();
for (int i = 0; i < scores.length; i++) { Map<String, Float> idToScore =
IntStream.range(0, possibleIds.length)
.boxed()
.collect(Collectors.toMap(i -> possibleIds[i], i -> possibleScores[i]));
for (int i = 0; i < count; i++) {
int docId = it.nextDoc(); int docId = it.nextDoc();
assertNotEquals(NO_MORE_DOCS, docId); assertNotEquals(NO_MORE_DOCS, docId);
assertEquals(scores[i], scorer.score(), 0.0001); String actualId = reader.storedFields().document(docId).get("id");
assertIdMatches(reader, ids[i], docId); assertTrue(idToScore.containsKey(actualId));
assertEquals(idToScore.get(actualId), scorer.score(), 0.0001);
} }
} }
} }

View File

@ -81,7 +81,8 @@ public class TestParentBlockJoinFloatKnnVectorQuery extends ParentBlockJoinKnnVe
float score1 = float score1 =
(float) ((1 + (2 * 2 + 3 * 4) / Math.sqrt((2 * 2 + 3 * 3) * (2 * 2 + 4 * 4))) / 2); (float) ((1 + (2 * 2 + 3 * 4) / Math.sqrt((2 * 2 + 3 * 3) * (2 * 2 + 4 * 4))) / 2);
assertScorerResults(searcher, query, new float[] {score0, score1}, new String[] {"1", "2"}); assertScorerResults(
searcher, query, new float[] {score0, score1}, new String[] {"1", "2"}, 2);
} }
} }
} }

View File

@ -239,7 +239,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
} }
// Test data - format is artist, song, weeks at top of charts // Test data - format is artist, song, weeks at top of charts
private static String[] hitsOfThe60s = { private static final String[] hitsOfThe60s = {
"1966\tSPENCER DAVIS GROUP\tKEEP ON RUNNING\t1", "1966\tSPENCER DAVIS GROUP\tKEEP ON RUNNING\t1",
"1966\tOVERLANDERS\tMICHELLE\t3", "1966\tOVERLANDERS\tMICHELLE\t3",
"1966\tNANCY SINATRA\tTHESE BOOTS ARE MADE FOR WALKIN'\t4", "1966\tNANCY SINATRA\tTHESE BOOTS ARE MADE FOR WALKIN'\t4",
@ -317,7 +317,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
"1969\tARCHIES\tSUGAR, SUGAR\t4" "1969\tARCHIES\tSUGAR, SUGAR\t4"
}; };
private static final Map<String, Record> parsedRecords = new HashMap<String, Record>(); private static final Map<String, Record> parsedRecords = new HashMap<>();
private Directory dir; private Directory dir;
private IndexReader reader; private IndexReader reader;
private IndexSearcher searcher; private IndexSearcher searcher;
@ -452,7 +452,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
private int getMaxNumRecordsPerArtist(ScoreDoc[] sd) throws IOException { private int getMaxNumRecordsPerArtist(ScoreDoc[] sd) throws IOException {
int result = 0; int result = 0;
HashMap<String, Integer> artistCounts = new HashMap<String, Integer>(); HashMap<String, Integer> artistCounts = new HashMap<>();
for (int i = 0; i < sd.length; i++) { for (int i = 0; i < sd.length; i++) {
Document doc = reader.storedFields().document(sd[i].doc); Document doc = reader.storedFields().document(sd[i].doc);
Record record = parsedRecords.get(doc.get("id")); Record record = parsedRecords.get(doc.get("id"));

View File

@ -17,7 +17,9 @@
package org.apache.lucene.queries.payloads; package org.apache.lucene.queries.payloads;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.EnumMap; import java.util.EnumMap;
import java.util.Map;
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation; import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation;
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType; import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
@ -30,32 +32,45 @@ import org.apache.lucene.util.BytesRef;
*/ */
public class PayloadMatcherFactory { public class PayloadMatcherFactory {
private static final EnumMap<PayloadType, EnumMap<MatchOperation, PayloadMatcher>> private static final Map<PayloadType, Map<MatchOperation, PayloadMatcher>>
payloadCheckerOpTypeMap; payloadCheckerOpTypeMap;
static { static {
payloadCheckerOpTypeMap = new EnumMap<>(PayloadType.class);
// ints // ints
EnumMap<MatchOperation, PayloadMatcher> intCheckers = new EnumMap<>(MatchOperation.class); Map<MatchOperation, PayloadMatcher> intCheckers =
intCheckers.put(MatchOperation.LT, new LTIntPayloadMatcher()); Collections.unmodifiableMap(
intCheckers.put(MatchOperation.LTE, new LTEIntPayloadMatcher()); new EnumMap<>(
intCheckers.put(MatchOperation.GT, new GTIntPayloadMatcher()); Map.of(
intCheckers.put(MatchOperation.GTE, new GTEIntPayloadMatcher()); MatchOperation.LT, new LTIntPayloadMatcher(),
EnumMap<MatchOperation, PayloadMatcher> floatCheckers = new EnumMap<>(MatchOperation.class); MatchOperation.LTE, new LTEIntPayloadMatcher(),
floatCheckers.put(MatchOperation.LT, new LTFloatPayloadMatcher()); MatchOperation.GT, new GTIntPayloadMatcher(),
floatCheckers.put(MatchOperation.LTE, new LTEFloatPayloadMatcher()); MatchOperation.GTE, new GTEIntPayloadMatcher())));
floatCheckers.put(MatchOperation.GT, new GTFloatPayloadMatcher()); // floats
floatCheckers.put(MatchOperation.GTE, new GTEFloatPayloadMatcher()); Map<MatchOperation, PayloadMatcher> floatCheckers =
Collections.unmodifiableMap(
new EnumMap<>(
Map.of(
MatchOperation.LT, new LTFloatPayloadMatcher(),
MatchOperation.LTE, new LTEFloatPayloadMatcher(),
MatchOperation.GT, new GTFloatPayloadMatcher(),
MatchOperation.GTE, new GTEFloatPayloadMatcher())));
// strings // strings
EnumMap<MatchOperation, PayloadMatcher> stringCheckers = new EnumMap<>(MatchOperation.class); Map<MatchOperation, PayloadMatcher> stringCheckers =
stringCheckers.put(MatchOperation.LT, new LTStringPayloadMatcher()); Collections.unmodifiableMap(
stringCheckers.put(MatchOperation.LTE, new LTEStringPayloadMatcher()); new EnumMap<>(
stringCheckers.put(MatchOperation.GT, new GTStringPayloadMatcher()); Map.of(
stringCheckers.put(MatchOperation.GTE, new GTEStringPayloadMatcher()); MatchOperation.LT, new LTStringPayloadMatcher(),
MatchOperation.LTE, new LTEStringPayloadMatcher(),
MatchOperation.GT, new GTStringPayloadMatcher(),
MatchOperation.GTE, new GTEStringPayloadMatcher())));
// load the matcher maps per payload type // load the matcher maps per payload type
payloadCheckerOpTypeMap.put(PayloadType.INT, intCheckers); payloadCheckerOpTypeMap =
payloadCheckerOpTypeMap.put(PayloadType.FLOAT, floatCheckers); Collections.unmodifiableMap(
payloadCheckerOpTypeMap.put(PayloadType.STRING, stringCheckers); new EnumMap<>(
Map.of(
PayloadType.INT, intCheckers,
PayloadType.FLOAT, floatCheckers,
PayloadType.STRING, stringCheckers)));
} }
/** /**
@ -75,7 +90,7 @@ public class PayloadMatcherFactory {
return new EQPayloadMatcher(); return new EQPayloadMatcher();
} }
// otherwise, we need to pay attention to the payload type and operation // otherwise, we need to pay attention to the payload type and operation
EnumMap<MatchOperation, PayloadMatcher> opMap = payloadCheckerOpTypeMap.get(payloadType); Map<MatchOperation, PayloadMatcher> opMap = payloadCheckerOpTypeMap.get(payloadType);
if (opMap != null) { if (opMap != null) {
return opMap.get(op); return opMap.get(op);
} else { } else {

View File

@ -269,10 +269,11 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
MatchOperation.GT); MatchOperation.GT);
checkHits( checkHits(
stringGT2, stringGT2,
new int[] { // spotless:off alignedIntArray(
155, 255, 355, 455, 555, 655, 755, 855, 955, """
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955 155, 255, 355, 455, 555, 655, 755, 855, 955,
}); // spotless:on 1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
"""));
SpanQuery stringGTE2 = SpanQuery stringGTE2 =
new SpanPayloadCheckQuery( new SpanPayloadCheckQuery(
new SpanNearQuery(new SpanQuery[] {termFifty, termFive}, 0, true), new SpanNearQuery(new SpanQuery[] {termFifty, termFive}, 0, true),
@ -281,10 +282,11 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
MatchOperation.GTE); MatchOperation.GTE);
checkHits( checkHits(
stringGTE2, stringGTE2,
new int[] { // spotless:off alignedIntArray(
55, 155, 255, 355, 455, 555, 655, 755, 855, 955, """
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955 55, 155, 255, 355, 455, 555, 655, 755, 855, 955,
}); // spotless:on 1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
"""));
SpanQuery stringLT2 = SpanQuery stringLT2 =
new SpanPayloadCheckQuery( new SpanPayloadCheckQuery(
@ -306,6 +308,23 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
// sets "upto" back to zero between SpanOrQuery subclauses. // sets "upto" back to zero between SpanOrQuery subclauses.
} }
/**
* Parses a comma-separated array of integers, ignoring white space around them. This allows for
* arbitrary alignment of integers in the source string to convey additional information about
* their mutual relations. For example:
*
* <pre>{@code
* var ints =
* """
* 1, 2, 3,
* 11, 12, 13
* """
* }</pre>
*/
private static int[] alignedIntArray(String ints) {
return Arrays.stream(ints.split(",")).map(String::trim).mapToInt(Integer::parseInt).toArray();
}
public void testUnorderedPayloadChecks() throws Exception { public void testUnorderedPayloadChecks() throws Exception {
SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five")); SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five"));

View File

@ -30,7 +30,7 @@ import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
*/ */
public class BoostQueryNode extends QueryNodeImpl { public class BoostQueryNode extends QueryNodeImpl {
private float value = 0; private float value;
/** /**
* Constructs a boost node * Constructs a boost node

View File

@ -84,7 +84,7 @@ public class ModifierQueryNode extends QueryNodeImpl {
} }
} }
private Modifier modifier = Modifier.MOD_NONE; private Modifier modifier;
/** /**
* Used to store the modifier value on the original query string * Used to store the modifier value on the original query string

View File

@ -25,9 +25,9 @@ import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
*/ */
public class OpaqueQueryNode extends QueryNodeImpl { public class OpaqueQueryNode extends QueryNodeImpl {
private CharSequence schema = null; private CharSequence schema;
private CharSequence value = null; private CharSequence value;
/** /**
* @param schema - schema identifier * @param schema - schema identifier

View File

@ -41,7 +41,7 @@ public class PathQueryNode extends QueryNodeImpl {
/** Term text with a beginning and end position */ /** Term text with a beginning and end position */
public static class QueryText implements Cloneable { public static class QueryText implements Cloneable {
CharSequence value = null; CharSequence value;
/** != null The term's begin position. */ /** != null The term's begin position. */
int begin; int begin;
@ -97,7 +97,7 @@ public class PathQueryNode extends QueryNodeImpl {
} }
} }
private List<QueryText> values = null; private List<QueryText> values;
/** /**
* @param pathElements - List of QueryText objects * @param pathElements - List of QueryText objects

View File

@ -25,7 +25,7 @@ import org.apache.lucene.search.PhraseQuery; // javadocs
/** Query node for {@link PhraseQuery}'s slop factor. */ /** Query node for {@link PhraseQuery}'s slop factor. */
public class PhraseSlopQueryNode extends QueryNodeImpl implements FieldableNode { public class PhraseSlopQueryNode extends QueryNodeImpl implements FieldableNode {
private int value = 0; private int value;
/** /**
* @exception QueryNodeError throw in overridden method to disallow * @exception QueryNodeError throw in overridden method to disallow

View File

@ -57,9 +57,9 @@ public class ProximityQueryNode extends BooleanQueryNode {
/** utility class containing the distance condition and number */ /** utility class containing the distance condition and number */
public static class ProximityType { public static class ProximityType {
int pDistance = 0; int pDistance;
Type pType = null; Type pType;
public ProximityType(Type type) { public ProximityType(Type type) {
this(type, 0); this(type, 0);
@ -71,10 +71,10 @@ public class ProximityQueryNode extends BooleanQueryNode {
} }
} }
private Type proximityType = Type.SENTENCE; private Type proximityType;
private int distance = -1; private int distance = -1;
private boolean inorder = false; private final boolean inorder;
private CharSequence field = null; private CharSequence field;
/** /**
* @param clauses - QueryNode children * @param clauses - QueryNode children

View File

@ -32,7 +32,7 @@ import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
*/ */
public class SlopQueryNode extends QueryNodeImpl implements FieldableNode { public class SlopQueryNode extends QueryNodeImpl implements FieldableNode {
private int value = 0; private int value;
/** /**
* @param query - QueryNode Tree with the phrase * @param query - QueryNode Tree with the phrase

View File

@ -32,10 +32,11 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
@Override @Override
public String toString() { public String toString() {
if (getChildren() == null || getChildren().size() == 0) return "<tokenizedphrase/>"; List<QueryNode> children = getChildren();
if (children == null || children.isEmpty()) return "<tokenizedphrase/>";
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("<tokenizedtphrase>"); sb.append("<tokenizedphrase>");
for (QueryNode child : getChildren()) { for (QueryNode child : children) {
sb.append("\n"); sb.append("\n");
sb.append(child.toString()); sb.append(child.toString());
} }
@ -46,16 +47,15 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
// This text representation is not re-parseable // This text representation is not re-parseable
@Override @Override
public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) { public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
if (getChildren() == null || getChildren().size() == 0) return ""; List<QueryNode> children = getChildren();
if (children == null || children.isEmpty()) return "";
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
String filler = ""; String filler = "";
for (QueryNode child : getChildren()) { for (QueryNode child : children) {
sb.append(filler).append(child.toQueryString(escapeSyntaxParser)); sb.append(filler).append(child.toQueryString(escapeSyntaxParser));
filler = ","; filler = ",";
} }
return "[TP[" + sb + "]]";
return "[TP[" + sb.toString() + "]]";
} }
@Override @Override
@ -70,27 +70,25 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
@Override @Override
public CharSequence getField() { public CharSequence getField() {
List<QueryNode> children = getChildren(); List<QueryNode> children = getChildren();
if (children != null) {
if (children == null || children.size() == 0) { for (QueryNode child : children) {
return null; if (child instanceof FieldableNode) {
return ((FieldableNode) child).getField();
} else { }
return ((FieldableNode) children.get(0)).getField(); }
} }
return null;
} }
@Override @Override
public void setField(CharSequence fieldName) { public void setField(CharSequence fieldName) {
List<QueryNode> children = getChildren(); List<QueryNode> children = getChildren();
if (children != null) { if (children != null) {
for (QueryNode child : children) {
for (QueryNode child : getChildren()) {
if (child instanceof FieldableNode) { if (child instanceof FieldableNode) {
((FieldableNode) child).setField(fieldName); ((FieldableNode) child).setField(fieldName);
} }
} }
} }
} }
} // end class MultitermQueryNode }

View File

@ -34,7 +34,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfi
*/ */
public class FieldBoostMapFCListener implements FieldConfigListener { public class FieldBoostMapFCListener implements FieldConfigListener {
private QueryConfigHandler config = null; private final QueryConfigHandler config;
public FieldBoostMapFCListener(QueryConfigHandler config) { public FieldBoostMapFCListener(QueryConfigHandler config) {
this.config = config; this.config = config;

View File

@ -36,7 +36,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfi
*/ */
public class FieldDateResolutionFCListener implements FieldConfigListener { public class FieldDateResolutionFCListener implements FieldConfigListener {
private QueryConfigHandler config = null; private final QueryConfigHandler config;
public FieldDateResolutionFCListener(QueryConfigHandler config) { public FieldDateResolutionFCListener(QueryConfigHandler config) {
this.config = config; this.config = config;

View File

@ -30,6 +30,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs.Pair; import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
@ -175,8 +176,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc; f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) { if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix); // term.length + " vs prefix=" + f.prefix);
if (f.prefix > targetBeforeCurrentLength) { if (f.prefix > targetBeforeCurrentLength) {
@ -197,7 +198,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length; // final int sav = term.length;
// term.length = length; // term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term)); // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav; // term.length = sav;
// } // }
} }
@ -222,19 +223,6 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
return seekExact(target, 0); return seekExact(target, 0);
} }
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
/** Get the version of the currently seek'd term; only valid if we are positioned. */ /** Get the version of the currently seek'd term; only valid if we are positioned. */
public long getVersion() { public long getVersion() {
return ((IDVersionTermState) currentFrame.state).idVersion; return ((IDVersionTermState) currentFrame.state).idVersion;
@ -258,8 +246,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current=" // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" +
// + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix); // minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" +
// termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out); // printSeekState(System.out);
// } // }
@ -460,8 +449,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
} }
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength + " termExists=" + termExists); // targetBeforeCurrentLength + " termExists=" + termExists);
// } // }
@ -492,7 +481,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel); term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto); term.setLength(1 + targetUpto);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -520,10 +509,11 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// termExists = false; // termExists = false;
// } // }
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " // System.out.println(" FAST version NOT_FOUND term=" +
// targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + // ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto +
// " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + // " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" +
// currentFrame.fp + " termExists=" + termExists); // validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp +
// " termExists=" + termExists);
// } // }
return false; return false;
} }
@ -553,7 +543,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
} else { } else {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" + // System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term)); // ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -604,7 +594,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
termExists = false; termExists = false;
term.setLength(targetUpto); term.setLength(targetUpto);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// } // }
return false; return false;
} }
@ -656,8 +646,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + // System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + // target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
// termExists + ") validIndexPrefix= " + validIndexPrefix); // " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(); // printSeekState();
// } // }
@ -700,9 +690,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
// " output=" + output); // + " output=" + output);
// } // }
if (cmp != 0) { if (cmp != 0) {
break; break;
@ -814,8 +804,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
} }
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength); // targetBeforeCurrentLength);
// } // }
@ -850,7 +840,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
if (next() != null) { if (next() != null) {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term); // System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// } // }
return SeekStatus.NOT_FOUND; return SeekStatus.NOT_FOUND;
} else { } else {
@ -861,7 +852,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
} }
} else { } else {
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term); // System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// } // }
return result; return result;
} }
@ -946,7 +938,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen=" + " prefixLen="
+ f.prefix + f.prefix
+ " prefix=" + " prefix="
+ brToString(prefix) + ToStringUtils.bytesRefToString(prefix)
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+ " hasTerms=" + " hasTerms="
+ f.hasTerms + f.hasTerms
@ -974,7 +966,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen=" + " prefixLen="
+ f.prefix + f.prefix
+ " prefix=" + " prefix="
+ brToString(prefix) + ToStringUtils.bytesRefToString(prefix)
+ " nextEnt=" + " nextEnt="
+ f.nextEnt + f.nextEnt
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
@ -1063,9 +1055,10 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
assert !eof; assert !eof;
// if (DEBUG) { // if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " // System.out.println("\nBTTR.next seg=" + segment + " term=" +
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + // ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix); // " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState(); // printSeekState();
// } // }
@ -1129,8 +1122,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// currentFrame.hasTerms = true; // currentFrame.hasTerms = true;
currentFrame.loadBlock(); currentFrame.loadBlock();
} else { } else {
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + " // if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
// currentFrame.ord=" + currentFrame.ord); // " currentFrame.ord=" + currentFrame.ord);
return term.get(); return term.get();
} }
} }

Some files were not shown because too many files have changed in this diff Show More