mirror of https://github.com/apache/lucene.git
Merge branch 'main' into java_21
This commit is contained in:
commit
07f4b5b19f
|
@ -1,15 +0,0 @@
|
|||
name: "Set up caches"
|
||||
description: "Set up cached resources"
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Cache/Restore cached gradle files
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
~/.gradle/caches
|
||||
~/.gradle/jdks
|
||||
key: ${{ runner.os }}-gradle-caches-${{ hashFiles('versions.lock', '**/gradle-wrapper.properties') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-gradle-caches-
|
|
@ -0,0 +1,29 @@
|
|||
# This composite action is included in other workflows to have a shared setup
|
||||
# for java, gradle, caches, etc.
|
||||
|
||||
name: Prepare Lucene build
|
||||
|
||||
inputs:
|
||||
java-version:
|
||||
required: false
|
||||
default: 17
|
||||
description: "The default JDK version to set up."
|
||||
|
||||
java-distribution:
|
||||
required: false
|
||||
default: "temurin"
|
||||
description: "The default JDK distribution type"
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Set up Java (${{ inputs.java-distribution }}, ${{ inputs.java-version }})"
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: ${{ inputs.java-distribution }}
|
||||
java-version: ${{ inputs.java-version }}
|
||||
java-package: jdk
|
||||
|
||||
# This includes "smart" caching of the wrapper and dependencies.
|
||||
- name: Set up Gradle
|
||||
uses: gradle/actions/setup-gradle@v3
|
|
@ -1,44 +0,0 @@
|
|||
name: Distribution tests
|
||||
|
||||
on:
|
||||
# Allow manual triggers for testing the action.
|
||||
workflow_dispatch:
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Run distribution tests
|
||||
timeout-minutes: 15
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
# we want to run the distribution tests on all major OSs, but it's occasionally too slow (or hangs or the forked process is not started at all..., not sure the cause) on windows.
|
||||
#os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up JDK
|
||||
uses: actions/setup-java@v3
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: 21
|
||||
java-package: jdk
|
||||
- name: Prepare caches
|
||||
uses: ./.github/actions/gradle-caches
|
||||
|
||||
- name: Run all distribution tests including GUI tests (${{ matrix.os }})
|
||||
run: ./gradlew -p lucene/distribution.tests test
|
|
@ -1,84 +0,0 @@
|
|||
name: Gradle Precommit Checks
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- '*'
|
||||
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- branch_9x
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
# This runs all validation checks without tests.
|
||||
checks:
|
||||
name: gradle check -x test (JDK ${{ matrix.java }} on ${{ matrix.os }})
|
||||
timeout-minutes: 15
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
# Operating systems to run on.
|
||||
os: [ubuntu-latest]
|
||||
# Test JVMs.
|
||||
java: [ '21' ]
|
||||
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up JDK
|
||||
uses: actions/setup-java@v3
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: ${{ matrix.java }}
|
||||
java-package: jdk
|
||||
|
||||
- name: Prepare caches
|
||||
uses: ./.github/actions/gradle-caches
|
||||
|
||||
- name: Run gradle check (without tests)
|
||||
run: ./gradlew check -x test -Ptask.times=true --max-workers 2
|
||||
|
||||
# This runs all tests without any other validation checks.
|
||||
tests:
|
||||
name: gradle test (JDK ${{ matrix.java }} on ${{ matrix.os }})
|
||||
timeout-minutes: 30
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
# Operating systems to run on.
|
||||
# windows-latest: fairly slow to build and results in odd errors (see LUCENE-10167)
|
||||
# macos-latest: a tad slower than ubuntu and pretty much the same (?) so leaving out.
|
||||
os: [ubuntu-latest]
|
||||
# Test JVMs.
|
||||
java: [ '21' ]
|
||||
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up JDK
|
||||
uses: actions/setup-java@v3
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: ${{ matrix.java }}
|
||||
java-package: jdk
|
||||
|
||||
- name: Prepare caches
|
||||
uses: ./.github/actions/gradle-caches
|
||||
|
||||
- name: Run gradle tests
|
||||
run: ./gradlew test "-Ptask.times=true" --max-workers 2
|
||||
|
||||
- name: Echo settings
|
||||
run: cat gradle.properties
|
|
@ -1,35 +0,0 @@
|
|||
name: Hunspell regression tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
paths:
|
||||
- '.github/workflows/hunspell.yml'
|
||||
- 'lucene/analysis/common/**'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Run Hunspell regression tests
|
||||
timeout-minutes: 15
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up JDK
|
||||
uses: actions/setup-java@v3
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: 21
|
||||
java-package: jdk
|
||||
|
||||
- name: Prepare caches
|
||||
uses: ./.github/actions/gradle-caches
|
||||
|
||||
- name: Run regular and regression tests
|
||||
run: ./gradlew -p lucene/analysis/common check testRegressions
|
|
@ -12,29 +12,29 @@ on:
|
|||
|
||||
jobs:
|
||||
stale:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
days-before-pr-stale: 14
|
||||
days-before-issue-stale: -1 # don't mark issues as stale
|
||||
exempt-draft-pr: true # don't mark draft PRs as stale
|
||||
days-before-pr-stale: 14
|
||||
days-before-issue-stale: -1 # don't mark issues as stale
|
||||
exempt-draft-pr: true # don't mark draft PRs as stale
|
||||
|
||||
days-before-close: -1 # don't close stale PRs/issues
|
||||
days-before-close: -1 # don't close stale PRs/issues
|
||||
|
||||
stale-pr-message: >
|
||||
This PR has not had activity in the past 2 weeks, labeling it as stale.
|
||||
If the PR is waiting for review, notify the dev@lucene.apache.org list.
|
||||
Thank you for your contribution!
|
||||
stale-pr-message: >
|
||||
This PR has not had activity in the past 2 weeks, labeling it as stale.
|
||||
If the PR is waiting for review, notify the dev@lucene.apache.org list.
|
||||
Thank you for your contribution!
|
||||
|
||||
debug-only: false # turn on to run the action without applying changes
|
||||
operations-per-run: 500 # operations budget
|
||||
debug-only: false # turn on to run the action without applying changes
|
||||
operations-per-run: 500 # operations budget
|
||||
|
||||
# The table shows the cost in operations of all combinations of stale / not-stale for a PR.
|
||||
# Processing a non-PR issue takes 0 operations, since we don't perform any action on it.
|
|
@ -0,0 +1,67 @@
|
|||
name: "Run checks: all modules"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- '*'
|
||||
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
||||
# We split the workflow into two parallel jobs for efficiency:
|
||||
# one is running all validation checks without tests,
|
||||
# the other runs all tests without other validation checks.
|
||||
|
||||
jobs:
|
||||
|
||||
# This runs all validation checks without tests.
|
||||
checks:
|
||||
name: checks without tests (JDK ${{ matrix.java }} on ${{ matrix.os }})
|
||||
timeout-minutes: 15
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-latest ]
|
||||
java: [ '17' ]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/prepare-for-build
|
||||
|
||||
- name: Run gradle check (without tests)
|
||||
run: ./gradlew check -x test -Ptask.times=true --max-workers 2
|
||||
|
||||
|
||||
# This runs all tests without any other validation checks.
|
||||
tests:
|
||||
name: tests (JDK ${{ matrix.java }} on ${{ matrix.os }})
|
||||
timeout-minutes: 30
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
# Operating systems to run on.
|
||||
# windows-latest: fairly slow to build and results in odd errors (see LUCENE-10167)
|
||||
# macos-latest: a tad slower than ubuntu and pretty much the same (?) so leaving out.
|
||||
os: [ ubuntu-latest ]
|
||||
java: [ '17' ]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/prepare-for-build
|
||||
|
||||
- name: Run gradle tests
|
||||
run: ./gradlew test "-Ptask.times=true" --max-workers 2
|
||||
|
||||
- name: List automatically-initialized gradle.properties
|
||||
run: cat gradle.properties
|
|
@ -0,0 +1,37 @@
|
|||
name: "Run checks: module lucene/analysis/common"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
paths:
|
||||
- '.github/workflows/run-checks-mod-analysis-common.yml'
|
||||
- 'lucene/analysis/common/**'
|
||||
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
paths:
|
||||
- '.github/workflows/run-checks-mod-analysis-common.yml'
|
||||
- 'lucene/analysis/common/**'
|
||||
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Extra regression tests
|
||||
timeout-minutes: 15
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/prepare-for-build
|
||||
|
||||
- name: Run 'gradlew lucene/analysis/common check testRegressions'
|
||||
run: ./gradlew -p lucene/analysis/common check testRegressions
|
|
@ -0,0 +1,36 @@
|
|||
name: "Run checks: module lucene/distribution.tests"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
timeout-minutes: 15
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
# ubuntu-latest is checked as part of run-checks-everything.yml
|
||||
# windows-latest is slow and sometimes flaky.
|
||||
os: [ macos-latest ]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/prepare-for-build
|
||||
|
||||
- name: Run 'gradlew lucene/distribution.tests test' (on ${{ matrix.os }})
|
||||
run: ./gradlew -p lucene/distribution.tests test
|
|
@ -23,23 +23,23 @@
|
|||
xmlns:asfext="http://projects.apache.org/ns/asfext#"
|
||||
xmlns:foaf="http://xmlns.com/foaf/0.1/">
|
||||
<!--
|
||||
This file's canonical URL is: http://lucene.apache.org/core/doap.rdf
|
||||
This file's canonical URL is: https://lucene.apache.org/core/doap.rdf
|
||||
|
||||
Note that the canonical URL may redirect to other non-canonical locations.
|
||||
-->
|
||||
<Project rdf:about="http://lucene.apache.org/core/">
|
||||
<Project rdf:about="https://lucene.apache.org/core/">
|
||||
<created>2001-09-01</created>
|
||||
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
|
||||
<name>Apache Lucene Core</name>
|
||||
<homepage rdf:resource="http://lucene.apache.org/core/" />
|
||||
<asfext:pmc rdf:resource="http://lucene.apache.org" />
|
||||
<homepage rdf:resource="https://lucene.apache.org/core/" />
|
||||
<asfext:pmc rdf:resource="https://lucene.apache.org" />
|
||||
|
||||
<shortdesc>Apache Lucene is a high-performance, full-featured text search engine library</shortdesc>
|
||||
<description>Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
|
||||
</description>
|
||||
<bug-database rdf:resource="https://github.com/apache/lucene/issues" />
|
||||
<mailing-list rdf:resource="http://lucene.apache.org/core/discussion.html" />
|
||||
<download-page rdf:resource="http://lucene.apache.org/core/downloads.html" />
|
||||
<mailing-list rdf:resource="https://lucene.apache.org/core/discussion.html" />
|
||||
<download-page rdf:resource="https://lucene.apache.org/core/downloads.html" />
|
||||
<programming-language>Java</programming-language>
|
||||
|
||||
<!--
|
||||
|
|
|
@ -96,16 +96,15 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
|
|||
scriptutil.run('rm -rf %s' % bc_index_dir)
|
||||
print('done')
|
||||
|
||||
def update_backcompat_tests(types, index_version, current_version):
|
||||
print(' adding new indexes %s to backcompat tests...' % types, end='', flush=True)
|
||||
def update_backcompat_tests(index_version, current_version):
|
||||
print(' adding new indexes to backcompat tests...', end='', flush=True)
|
||||
module = 'lucene/backward-codecs'
|
||||
filename = '%s/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java' % module
|
||||
|
||||
filename = None
|
||||
if not current_version.is_back_compat_with(index_version):
|
||||
matcher = re.compile(r'final String\[\] unsupportedNames = {|};')
|
||||
elif 'sorted' in types:
|
||||
matcher = re.compile(r'static final String\[\] oldSortedNames = {|};')
|
||||
filename = '%s/src/test/org/apache/lucene/backward_index/unsupported_versions.txt' % module
|
||||
else:
|
||||
matcher = re.compile(r'static final String\[\] oldNames = {|};')
|
||||
filename = '%s/src/test/org/apache/lucene/backward_index/versions.txt' % module
|
||||
|
||||
strip_dash_suffix_re = re.compile(r'-.*')
|
||||
|
||||
|
@ -114,53 +113,25 @@ def update_backcompat_tests(types, index_version, current_version):
|
|||
x = re.sub(strip_dash_suffix_re, '', x) # remove the -suffix if any
|
||||
return scriptutil.Version.parse(x)
|
||||
|
||||
class Edit(object):
|
||||
start = None
|
||||
def __call__(self, buffer, match, line):
|
||||
if self.start:
|
||||
# find where this version should exist
|
||||
i = len(buffer) - 1
|
||||
previous_version_exists = not ('};' in line and buffer[-1].strip().endswith("{"))
|
||||
if previous_version_exists: # Only look if there is a version here
|
||||
v = find_version(buffer[i])
|
||||
while i >= self.start and v.on_or_after(index_version):
|
||||
i -= 1
|
||||
v = find_version(buffer[i])
|
||||
i += 1 # readjust since we skipped past by 1
|
||||
def edit(buffer, match, line):
|
||||
v = find_version(line)
|
||||
changed = False
|
||||
if v.on_or_after(index_version):
|
||||
if not index_version.on_or_after(v):
|
||||
buffer.append(('%s\n') % index_version)
|
||||
changed = True
|
||||
buffer.append(line)
|
||||
return changed
|
||||
|
||||
# unfortunately python doesn't have a range remove from list...
|
||||
# here we want to remove any previous references to the version we are adding
|
||||
while i < len(buffer) and index_version.on_or_after(find_version(buffer[i])):
|
||||
buffer.pop(i)
|
||||
|
||||
if i == len(buffer) and previous_version_exists and not buffer[-1].strip().endswith(","):
|
||||
# add comma
|
||||
buffer[-1] = buffer[-1].rstrip() + ",\n"
|
||||
|
||||
if previous_version_exists:
|
||||
last = buffer[-1]
|
||||
spaces = ' ' * (len(last) - len(last.lstrip()))
|
||||
else:
|
||||
spaces = ' '
|
||||
for (j, t) in enumerate(types):
|
||||
if t == 'sorted':
|
||||
newline = spaces + ('"sorted.%s"') % index_version
|
||||
else:
|
||||
newline = spaces + ('"%s-%s"' % (index_version, t))
|
||||
if j < len(types) - 1 or i < len(buffer):
|
||||
newline += ','
|
||||
buffer.insert(i, newline + '\n')
|
||||
i += 1
|
||||
|
||||
buffer.append(line)
|
||||
return True
|
||||
|
||||
if 'Names = {' in line:
|
||||
self.start = len(buffer) # location of first index name
|
||||
buffer.append(line)
|
||||
return False
|
||||
def append(buffer, changed):
|
||||
if changed:
|
||||
return changed
|
||||
if not buffer[len(buffer)-1].endswith('\n'):
|
||||
buffer.append('\n')
|
||||
buffer.append(('%s\n') % index_version)
|
||||
return True
|
||||
|
||||
changed = scriptutil.update_file(filename, matcher, Edit())
|
||||
changed = scriptutil.update_file(filename, re.compile(r'.*'), edit, append)
|
||||
print('done' if changed else 'uptodate')
|
||||
|
||||
def check_backcompat_tests():
|
||||
|
@ -251,9 +222,8 @@ def main():
|
|||
print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing')
|
||||
|
||||
print('\nAdding backwards compatibility tests')
|
||||
update_backcompat_tests(['cfs', 'nocfs'], c.version, current_version)
|
||||
if should_make_sorted:
|
||||
update_backcompat_tests(['sorted'], c.version, current_version)
|
||||
update_backcompat_tests(c.version, current_version)
|
||||
|
||||
|
||||
print('\nTesting changes')
|
||||
check_backcompat_tests()
|
||||
|
|
|
@ -88,7 +88,7 @@ def run(cmd, cwd=None):
|
|||
raise e
|
||||
return output.decode('utf-8')
|
||||
|
||||
def update_file(filename, line_re, edit):
|
||||
def update_file(filename, line_re, edit, append=None):
|
||||
infile = open(filename, 'r')
|
||||
buffer = []
|
||||
|
||||
|
@ -102,6 +102,8 @@ def update_file(filename, line_re, edit):
|
|||
return False
|
||||
continue
|
||||
buffer.append(line)
|
||||
if append:
|
||||
changed = append(buffer, changed) # in the case did not change in edit but have an append function
|
||||
if not changed:
|
||||
raise Exception('Could not find %s in %s' % (line_re, filename))
|
||||
with open(filename, 'w') as f:
|
||||
|
|
|
@ -20,7 +20,7 @@ def resources = scriptResources(buildscript)
|
|||
configure(rootProject) {
|
||||
ext {
|
||||
// also change this in extractor tool: ExtractForeignAPI
|
||||
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_20, JavaVersion.VERSION_21 ] as Set
|
||||
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_20, JavaVersion.VERSION_21, JavaVersion.VERSION_22 ] as Set
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ configure(project(":lucene").subprojects) { prj ->
|
|||
|
||||
spotless {
|
||||
java {
|
||||
toggleOffOn() // obviously, only to be used sparingly.
|
||||
// TODO: Work out how to support multiple different header files (we have
|
||||
// classes in the codebase that have original headers). We currently use
|
||||
// Apache RAT to enforce headers so this is of lesser priority.
|
||||
|
|
|
@ -114,6 +114,8 @@ Improvements
|
|||
* GITHUB#12873: Expressions module now uses JEP 371 "Hidden Classes" with JEP 309
|
||||
"Dynamic Class-File Constants" to implement Javascript expressions. (Uwe Schindler)
|
||||
|
||||
* GITHUB#11657, LUCENE-10621: Upgrade to OpenNLP 2.3.2. (Christine Poerschke, Eric Pugh)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
@ -176,6 +178,36 @@ Other
|
|||
|
||||
* GITHUB#13001: Put Thread#sleep() on the list of forbidden APIs. (Shubham Chaudhary)
|
||||
|
||||
======================== Lucene 9.11.0 =======================
|
||||
|
||||
API Changes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
||||
* GITHUB#13092: `static final Map` constants have been made immutable (Dmitry Cherniachenko)
|
||||
|
||||
* GITHUB#13041: TokenizedPhraseQueryNode code cleanup (Dmitry Cherniachenko)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
|
||||
|
||||
======================== Lucene 9.10.0 =======================
|
||||
|
||||
API Changes
|
||||
|
@ -200,6 +232,17 @@ New Features
|
|||
* GITHUB#12336: Index additional data per facet label in the taxonomy. (Shai Erera, Egor Potemkin, Mike McCandless,
|
||||
Stefan Vodita)
|
||||
|
||||
* GITHUB#12706: Add support for the final release of Java foreign memory API in Java 22 (and later).
|
||||
Lucene's MMapDirectory will now mmap Lucene indexes in chunks of 16 GiB (instead of 1 GiB) starting
|
||||
from Java 19. Indexes closed while queries are running can no longer crash the JVM.
|
||||
Support for vectorized implementations of VectorUtil based on jdk.incubator.vector APIs was added
|
||||
for exactly Java 22. Therefore, applications started with command line parameter
|
||||
"java --add-modules jdk.incubator.vector" will automatically use the new vectorized implementations
|
||||
if running on a supported platform (Java 20/21/22 on x86 CPUs with AVX2 or later or ARM NEON CPUs).
|
||||
This is an opt-in feature and requires explicit Java command line flag! When enabled, Lucene logs
|
||||
a notice using java.util.logging. Please test thoroughly and report bugs/slowness to Lucene's mailing
|
||||
list. (Uwe Schindler, Chris Hegarty)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
||||
|
@ -219,8 +262,6 @@ Improvements
|
|||
Tests are running with random byte order to ensure that the order does not affect correctness
|
||||
of code. Native order was enabled for LZ4 compression. (Uwe Schindler)
|
||||
|
||||
* GITHUB#11657, LUCENE-10621: Upgrade to OpenNLP 2.3.2. (Christine Poerschke, Eric Pugh)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
|
||||
## Migration from Lucene 9.x to Lucene 10.0
|
||||
|
||||
### OpenNLP dependency upgrade
|
||||
|
||||
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using and be running on Java 17.
|
||||
|
||||
### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829)
|
||||
|
||||
For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
|
||||
|
@ -147,12 +151,6 @@ may throw `IOException` on index problems, bubbling up unexpectedly to the calle
|
|||
`(Reverse)PathHierarchyTokenizer` now produces sequential (instead of overlapping) tokens with accurate
|
||||
offsets, making positional queries and highlighters possible for fields tokenized with this tokenizer.
|
||||
|
||||
## Migration from Lucene 9.9 to Lucene 9.10
|
||||
|
||||
### OpenNLP dependency upgrade
|
||||
|
||||
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using and be running on Java 17.
|
||||
|
||||
## Migration from Lucene 9.0 to Lucene 9.1
|
||||
|
||||
### Test framework package migration and module (LUCENE-10301)
|
||||
|
|
|
@ -59,11 +59,11 @@ public class MinHashFilter extends TokenFilter {
|
|||
|
||||
private final List<List<FixedSizeTreeSet<LongPair>>> minHashSets;
|
||||
|
||||
private int hashSetSize = DEFAULT_HASH_SET_SIZE;
|
||||
private final int hashSetSize;
|
||||
|
||||
private int bucketCount = DEFAULT_BUCKET_COUNT;
|
||||
private final int bucketCount;
|
||||
|
||||
private int hashCount = DEFAULT_HASH_COUNT;
|
||||
private final int hashCount;
|
||||
|
||||
private boolean requiresInitialisation = true;
|
||||
|
||||
|
|
|
@ -32,13 +32,13 @@ public class MinHashFilterFactory extends TokenFilterFactory {
|
|||
/** SPI name */
|
||||
public static final String NAME = "minHash";
|
||||
|
||||
private int hashCount = MinHashFilter.DEFAULT_HASH_COUNT;
|
||||
private final int hashCount;
|
||||
|
||||
private int bucketCount = MinHashFilter.DEFAULT_BUCKET_COUNT;
|
||||
private final int bucketCount;
|
||||
|
||||
private int hashSetSize = MinHashFilter.DEFAULT_HASH_SET_SIZE;
|
||||
private final int hashSetSize;
|
||||
|
||||
private boolean withRotation;
|
||||
private final boolean withRotation;
|
||||
|
||||
/** Create a {@link MinHashFilterFactory}. */
|
||||
public MinHashFilterFactory(Map<String, String> args) {
|
||||
|
|
|
@ -67,7 +67,7 @@ public class WordDelimiterGraphFilterFactory extends TokenFilterFactory
|
|||
private final int flags;
|
||||
byte[] typeTable = null;
|
||||
private CharArraySet protectedWords = null;
|
||||
private boolean adjustOffsets = false;
|
||||
private final boolean adjustOffsets;
|
||||
|
||||
/** Creates a new WordDelimiterGraphFilterFactory */
|
||||
public WordDelimiterGraphFilterFactory(Map<String, String> args) {
|
||||
|
|
|
@ -89,7 +89,7 @@ public final class DutchAnalyzer extends Analyzer {
|
|||
private final CharArraySet stoptable;
|
||||
|
||||
/** Contains words that should be indexed but not stemmed. */
|
||||
private CharArraySet excltable = CharArraySet.EMPTY_SET;
|
||||
private final CharArraySet excltable;
|
||||
|
||||
private final StemmerOverrideMap stemdict;
|
||||
|
||||
|
|
|
@ -41,8 +41,8 @@ public class PatternCaptureGroupFilterFactory extends TokenFilterFactory {
|
|||
/** SPI name */
|
||||
public static final String NAME = "patternCaptureGroup";
|
||||
|
||||
private Pattern pattern;
|
||||
private boolean preserveOriginal = true;
|
||||
private final Pattern pattern;
|
||||
private final boolean preserveOriginal;
|
||||
|
||||
public PatternCaptureGroupFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.shingle;
|
|||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -175,7 +176,7 @@ public final class ShingleFilter extends TokenFilter {
|
|||
* @param tokenType token tokenType
|
||||
*/
|
||||
public void setTokenType(String tokenType) {
|
||||
this.tokenType = tokenType;
|
||||
this.tokenType = Objects.requireNonNull(tokenType, "tokenType");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -114,7 +114,7 @@ public class JapaneseTokenizerFactory extends TokenizerFactory implements Resour
|
|||
* /箱根山-箱根/成田空港-成田/ requests "箱根" and "成田" to be in the result in NBEST output.
|
||||
*/
|
||||
private final String nbestExamples;
|
||||
private int nbestCost = -1;
|
||||
private int nbestCost;
|
||||
|
||||
/** Creates a new JapaneseTokenizerFactory */
|
||||
public JapaneseTokenizerFactory(Map<String, String> args) {
|
||||
|
|
|
@ -17,103 +17,107 @@
|
|||
package org.apache.lucene.analysis.ja.dict;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/** Utility class for english translations of morphological data, used only for debugging. */
|
||||
public class ToStringUtil {
|
||||
// a translation map for parts of speech, only used for reflectWith
|
||||
private static final HashMap<String, String> posTranslations = new HashMap<>();
|
||||
private static final Map<String, String> posTranslations;
|
||||
|
||||
static {
|
||||
posTranslations.put("名詞", "noun");
|
||||
posTranslations.put("名詞-一般", "noun-common");
|
||||
posTranslations.put("名詞-固有名詞", "noun-proper");
|
||||
posTranslations.put("名詞-固有名詞-一般", "noun-proper-misc");
|
||||
posTranslations.put("名詞-固有名詞-人名", "noun-proper-person");
|
||||
posTranslations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc");
|
||||
posTranslations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname");
|
||||
posTranslations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name");
|
||||
posTranslations.put("名詞-固有名詞-組織", "noun-proper-organization");
|
||||
posTranslations.put("名詞-固有名詞-地域", "noun-proper-place");
|
||||
posTranslations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc");
|
||||
posTranslations.put("名詞-固有名詞-地域-国", "noun-proper-place-country");
|
||||
posTranslations.put("名詞-代名詞", "noun-pronoun");
|
||||
posTranslations.put("名詞-代名詞-一般", "noun-pronoun-misc");
|
||||
posTranslations.put("名詞-代名詞-縮約", "noun-pronoun-contraction");
|
||||
posTranslations.put("名詞-副詞可能", "noun-adverbial");
|
||||
posTranslations.put("名詞-サ変接続", "noun-verbal");
|
||||
posTranslations.put("名詞-形容動詞語幹", "noun-adjective-base");
|
||||
posTranslations.put("名詞-数", "noun-numeric");
|
||||
posTranslations.put("名詞-非自立", "noun-affix");
|
||||
posTranslations.put("名詞-非自立-一般", "noun-affix-misc");
|
||||
posTranslations.put("名詞-非自立-副詞可能", "noun-affix-adverbial");
|
||||
posTranslations.put("名詞-非自立-助動詞語幹", "noun-affix-aux");
|
||||
posTranslations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base");
|
||||
posTranslations.put("名詞-特殊", "noun-special");
|
||||
posTranslations.put("名詞-特殊-助動詞語幹", "noun-special-aux");
|
||||
posTranslations.put("名詞-接尾", "noun-suffix");
|
||||
posTranslations.put("名詞-接尾-一般", "noun-suffix-misc");
|
||||
posTranslations.put("名詞-接尾-人名", "noun-suffix-person");
|
||||
posTranslations.put("名詞-接尾-地域", "noun-suffix-place");
|
||||
posTranslations.put("名詞-接尾-サ変接続", "noun-suffix-verbal");
|
||||
posTranslations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux");
|
||||
posTranslations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base");
|
||||
posTranslations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial");
|
||||
posTranslations.put("名詞-接尾-助数詞", "noun-suffix-classifier");
|
||||
posTranslations.put("名詞-接尾-特殊", "noun-suffix-special");
|
||||
posTranslations.put("名詞-接続詞的", "noun-suffix-conjunctive");
|
||||
posTranslations.put("名詞-動詞非自立的", "noun-verbal_aux");
|
||||
posTranslations.put("名詞-引用文字列", "noun-quotation");
|
||||
posTranslations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective");
|
||||
posTranslations.put("接頭詞", "prefix");
|
||||
posTranslations.put("接頭詞-名詞接続", "prefix-nominal");
|
||||
posTranslations.put("接頭詞-動詞接続", "prefix-verbal");
|
||||
posTranslations.put("接頭詞-形容詞接続", "prefix-adjectival");
|
||||
posTranslations.put("接頭詞-数接続", "prefix-numerical");
|
||||
posTranslations.put("動詞", "verb");
|
||||
posTranslations.put("動詞-自立", "verb-main");
|
||||
posTranslations.put("動詞-非自立", "verb-auxiliary");
|
||||
posTranslations.put("動詞-接尾", "verb-suffix");
|
||||
posTranslations.put("形容詞", "adjective");
|
||||
posTranslations.put("形容詞-自立", "adjective-main");
|
||||
posTranslations.put("形容詞-非自立", "adjective-auxiliary");
|
||||
posTranslations.put("形容詞-接尾", "adjective-suffix");
|
||||
posTranslations.put("副詞", "adverb");
|
||||
posTranslations.put("副詞-一般", "adverb-misc");
|
||||
posTranslations.put("副詞-助詞類接続", "adverb-particle_conjunction");
|
||||
posTranslations.put("連体詞", "adnominal");
|
||||
posTranslations.put("接続詞", "conjunction");
|
||||
posTranslations.put("助詞", "particle");
|
||||
posTranslations.put("助詞-格助詞", "particle-case");
|
||||
posTranslations.put("助詞-格助詞-一般", "particle-case-misc");
|
||||
posTranslations.put("助詞-格助詞-引用", "particle-case-quote");
|
||||
posTranslations.put("助詞-格助詞-連語", "particle-case-compound");
|
||||
posTranslations.put("助詞-接続助詞", "particle-conjunctive");
|
||||
posTranslations.put("助詞-係助詞", "particle-dependency");
|
||||
posTranslations.put("助詞-副助詞", "particle-adverbial");
|
||||
posTranslations.put("助詞-間投助詞", "particle-interjective");
|
||||
posTranslations.put("助詞-並立助詞", "particle-coordinate");
|
||||
posTranslations.put("助詞-終助詞", "particle-final");
|
||||
posTranslations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final");
|
||||
posTranslations.put("助詞-連体化", "particle-adnominalizer");
|
||||
posTranslations.put("助詞-副詞化", "particle-adnominalizer");
|
||||
posTranslations.put("助詞-特殊", "particle-special");
|
||||
posTranslations.put("助動詞", "auxiliary-verb");
|
||||
posTranslations.put("感動詞", "interjection");
|
||||
posTranslations.put("記号", "symbol");
|
||||
posTranslations.put("記号-一般", "symbol-misc");
|
||||
posTranslations.put("記号-句点", "symbol-period");
|
||||
posTranslations.put("記号-読点", "symbol-comma");
|
||||
posTranslations.put("記号-空白", "symbol-space");
|
||||
posTranslations.put("記号-括弧開", "symbol-open_bracket");
|
||||
posTranslations.put("記号-括弧閉", "symbol-close_bracket");
|
||||
posTranslations.put("記号-アルファベット", "symbol-alphabetic");
|
||||
posTranslations.put("その他", "other");
|
||||
posTranslations.put("その他-間投", "other-interjection");
|
||||
posTranslations.put("フィラー", "filler");
|
||||
posTranslations.put("非言語音", "non-verbal");
|
||||
posTranslations.put("語断片", "fragment");
|
||||
posTranslations.put("未知語", "unknown");
|
||||
Map<String, String> translations = new HashMap<>();
|
||||
translations.put("名詞", "noun");
|
||||
translations.put("名詞-一般", "noun-common");
|
||||
translations.put("名詞-固有名詞", "noun-proper");
|
||||
translations.put("名詞-固有名詞-一般", "noun-proper-misc");
|
||||
translations.put("名詞-固有名詞-人名", "noun-proper-person");
|
||||
translations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc");
|
||||
translations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname");
|
||||
translations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name");
|
||||
translations.put("名詞-固有名詞-組織", "noun-proper-organization");
|
||||
translations.put("名詞-固有名詞-地域", "noun-proper-place");
|
||||
translations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc");
|
||||
translations.put("名詞-固有名詞-地域-国", "noun-proper-place-country");
|
||||
translations.put("名詞-代名詞", "noun-pronoun");
|
||||
translations.put("名詞-代名詞-一般", "noun-pronoun-misc");
|
||||
translations.put("名詞-代名詞-縮約", "noun-pronoun-contraction");
|
||||
translations.put("名詞-副詞可能", "noun-adverbial");
|
||||
translations.put("名詞-サ変接続", "noun-verbal");
|
||||
translations.put("名詞-形容動詞語幹", "noun-adjective-base");
|
||||
translations.put("名詞-数", "noun-numeric");
|
||||
translations.put("名詞-非自立", "noun-affix");
|
||||
translations.put("名詞-非自立-一般", "noun-affix-misc");
|
||||
translations.put("名詞-非自立-副詞可能", "noun-affix-adverbial");
|
||||
translations.put("名詞-非自立-助動詞語幹", "noun-affix-aux");
|
||||
translations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base");
|
||||
translations.put("名詞-特殊", "noun-special");
|
||||
translations.put("名詞-特殊-助動詞語幹", "noun-special-aux");
|
||||
translations.put("名詞-接尾", "noun-suffix");
|
||||
translations.put("名詞-接尾-一般", "noun-suffix-misc");
|
||||
translations.put("名詞-接尾-人名", "noun-suffix-person");
|
||||
translations.put("名詞-接尾-地域", "noun-suffix-place");
|
||||
translations.put("名詞-接尾-サ変接続", "noun-suffix-verbal");
|
||||
translations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux");
|
||||
translations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base");
|
||||
translations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial");
|
||||
translations.put("名詞-接尾-助数詞", "noun-suffix-classifier");
|
||||
translations.put("名詞-接尾-特殊", "noun-suffix-special");
|
||||
translations.put("名詞-接続詞的", "noun-suffix-conjunctive");
|
||||
translations.put("名詞-動詞非自立的", "noun-verbal_aux");
|
||||
translations.put("名詞-引用文字列", "noun-quotation");
|
||||
translations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective");
|
||||
translations.put("接頭詞", "prefix");
|
||||
translations.put("接頭詞-名詞接続", "prefix-nominal");
|
||||
translations.put("接頭詞-動詞接続", "prefix-verbal");
|
||||
translations.put("接頭詞-形容詞接続", "prefix-adjectival");
|
||||
translations.put("接頭詞-数接続", "prefix-numerical");
|
||||
translations.put("動詞", "verb");
|
||||
translations.put("動詞-自立", "verb-main");
|
||||
translations.put("動詞-非自立", "verb-auxiliary");
|
||||
translations.put("動詞-接尾", "verb-suffix");
|
||||
translations.put("形容詞", "adjective");
|
||||
translations.put("形容詞-自立", "adjective-main");
|
||||
translations.put("形容詞-非自立", "adjective-auxiliary");
|
||||
translations.put("形容詞-接尾", "adjective-suffix");
|
||||
translations.put("副詞", "adverb");
|
||||
translations.put("副詞-一般", "adverb-misc");
|
||||
translations.put("副詞-助詞類接続", "adverb-particle_conjunction");
|
||||
translations.put("連体詞", "adnominal");
|
||||
translations.put("接続詞", "conjunction");
|
||||
translations.put("助詞", "particle");
|
||||
translations.put("助詞-格助詞", "particle-case");
|
||||
translations.put("助詞-格助詞-一般", "particle-case-misc");
|
||||
translations.put("助詞-格助詞-引用", "particle-case-quote");
|
||||
translations.put("助詞-格助詞-連語", "particle-case-compound");
|
||||
translations.put("助詞-接続助詞", "particle-conjunctive");
|
||||
translations.put("助詞-係助詞", "particle-dependency");
|
||||
translations.put("助詞-副助詞", "particle-adverbial");
|
||||
translations.put("助詞-間投助詞", "particle-interjective");
|
||||
translations.put("助詞-並立助詞", "particle-coordinate");
|
||||
translations.put("助詞-終助詞", "particle-final");
|
||||
translations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final");
|
||||
translations.put("助詞-連体化", "particle-adnominalizer");
|
||||
translations.put("助詞-副詞化", "particle-adnominalizer");
|
||||
translations.put("助詞-特殊", "particle-special");
|
||||
translations.put("助動詞", "auxiliary-verb");
|
||||
translations.put("感動詞", "interjection");
|
||||
translations.put("記号", "symbol");
|
||||
translations.put("記号-一般", "symbol-misc");
|
||||
translations.put("記号-句点", "symbol-period");
|
||||
translations.put("記号-読点", "symbol-comma");
|
||||
translations.put("記号-空白", "symbol-space");
|
||||
translations.put("記号-括弧開", "symbol-open_bracket");
|
||||
translations.put("記号-括弧閉", "symbol-close_bracket");
|
||||
translations.put("記号-アルファベット", "symbol-alphabetic");
|
||||
translations.put("その他", "other");
|
||||
translations.put("その他-間投", "other-interjection");
|
||||
translations.put("フィラー", "filler");
|
||||
translations.put("非言語音", "non-verbal");
|
||||
translations.put("語断片", "fragment");
|
||||
translations.put("未知語", "unknown");
|
||||
posTranslations = Collections.unmodifiableMap(translations);
|
||||
}
|
||||
|
||||
/** Get the english form of a POS tag */
|
||||
|
@ -122,67 +126,69 @@ public class ToStringUtil {
|
|||
}
|
||||
|
||||
// a translation map for inflection types, only used for reflectWith
|
||||
private static final HashMap<String, String> inflTypeTranslations = new HashMap<>();
|
||||
private static final Map<String, String> inflTypeTranslations;
|
||||
|
||||
static {
|
||||
inflTypeTranslations.put("*", "*");
|
||||
inflTypeTranslations.put("形容詞・アウオ段", "adj-group-a-o-u");
|
||||
inflTypeTranslations.put("形容詞・イ段", "adj-group-i");
|
||||
inflTypeTranslations.put("形容詞・イイ", "adj-group-ii");
|
||||
inflTypeTranslations.put("不変化型", "non-inflectional");
|
||||
inflTypeTranslations.put("特殊・タ", "special-da");
|
||||
inflTypeTranslations.put("特殊・ダ", "special-ta");
|
||||
inflTypeTranslations.put("文語・ゴトシ", "classical-gotoshi");
|
||||
inflTypeTranslations.put("特殊・ジャ", "special-ja");
|
||||
inflTypeTranslations.put("特殊・ナイ", "special-nai");
|
||||
inflTypeTranslations.put("五段・ラ行特殊", "5-row-cons-r-special");
|
||||
inflTypeTranslations.put("特殊・ヌ", "special-nu");
|
||||
inflTypeTranslations.put("文語・キ", "classical-ki");
|
||||
inflTypeTranslations.put("特殊・タイ", "special-tai");
|
||||
inflTypeTranslations.put("文語・ベシ", "classical-beshi");
|
||||
inflTypeTranslations.put("特殊・ヤ", "special-ya");
|
||||
inflTypeTranslations.put("文語・マジ", "classical-maji");
|
||||
inflTypeTranslations.put("下二・タ行", "2-row-lower-cons-t");
|
||||
inflTypeTranslations.put("特殊・デス", "special-desu");
|
||||
inflTypeTranslations.put("特殊・マス", "special-masu");
|
||||
inflTypeTranslations.put("五段・ラ行アル", "5-row-aru");
|
||||
inflTypeTranslations.put("文語・ナリ", "classical-nari");
|
||||
inflTypeTranslations.put("文語・リ", "classical-ri");
|
||||
inflTypeTranslations.put("文語・ケリ", "classical-keri");
|
||||
inflTypeTranslations.put("文語・ル", "classical-ru");
|
||||
inflTypeTranslations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin");
|
||||
inflTypeTranslations.put("五段・サ行", "5-row-cons-s");
|
||||
inflTypeTranslations.put("一段", "1-row");
|
||||
inflTypeTranslations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin");
|
||||
inflTypeTranslations.put("五段・マ行", "5-row-cons-m");
|
||||
inflTypeTranslations.put("五段・タ行", "5-row-cons-t");
|
||||
inflTypeTranslations.put("五段・ラ行", "5-row-cons-r");
|
||||
inflTypeTranslations.put("サ変・−スル", "irregular-suffix-suru");
|
||||
inflTypeTranslations.put("五段・ガ行", "5-row-cons-g");
|
||||
inflTypeTranslations.put("サ変・−ズル", "irregular-suffix-zuru");
|
||||
inflTypeTranslations.put("五段・バ行", "5-row-cons-b");
|
||||
inflTypeTranslations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin");
|
||||
inflTypeTranslations.put("下二・ダ行", "2-row-lower-cons-d");
|
||||
inflTypeTranslations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku");
|
||||
inflTypeTranslations.put("上二・ダ行", "2-row-upper-cons-d");
|
||||
inflTypeTranslations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin");
|
||||
inflTypeTranslations.put("一段・得ル", "1-row-eru");
|
||||
inflTypeTranslations.put("四段・タ行", "4-row-cons-t");
|
||||
inflTypeTranslations.put("五段・ナ行", "5-row-cons-n");
|
||||
inflTypeTranslations.put("下二・ハ行", "2-row-lower-cons-h");
|
||||
inflTypeTranslations.put("四段・ハ行", "4-row-cons-h");
|
||||
inflTypeTranslations.put("四段・バ行", "4-row-cons-b");
|
||||
inflTypeTranslations.put("サ変・スル", "irregular-suru");
|
||||
inflTypeTranslations.put("上二・ハ行", "2-row-upper-cons-h");
|
||||
inflTypeTranslations.put("下二・マ行", "2-row-lower-cons-m");
|
||||
inflTypeTranslations.put("四段・サ行", "4-row-cons-s");
|
||||
inflTypeTranslations.put("下二・ガ行", "2-row-lower-cons-g");
|
||||
inflTypeTranslations.put("カ変・来ル", "kuru-kanji");
|
||||
inflTypeTranslations.put("一段・クレル", "1-row-kureru");
|
||||
inflTypeTranslations.put("下二・得", "2-row-lower-u");
|
||||
inflTypeTranslations.put("カ変・クル", "kuru-kana");
|
||||
inflTypeTranslations.put("ラ変", "irregular-cons-r");
|
||||
inflTypeTranslations.put("下二・カ行", "2-row-lower-cons-k");
|
||||
Map<String, String> translations = new HashMap<>();
|
||||
translations.put("*", "*");
|
||||
translations.put("形容詞・アウオ段", "adj-group-a-o-u");
|
||||
translations.put("形容詞・イ段", "adj-group-i");
|
||||
translations.put("形容詞・イイ", "adj-group-ii");
|
||||
translations.put("不変化型", "non-inflectional");
|
||||
translations.put("特殊・タ", "special-da");
|
||||
translations.put("特殊・ダ", "special-ta");
|
||||
translations.put("文語・ゴトシ", "classical-gotoshi");
|
||||
translations.put("特殊・ジャ", "special-ja");
|
||||
translations.put("特殊・ナイ", "special-nai");
|
||||
translations.put("五段・ラ行特殊", "5-row-cons-r-special");
|
||||
translations.put("特殊・ヌ", "special-nu");
|
||||
translations.put("文語・キ", "classical-ki");
|
||||
translations.put("特殊・タイ", "special-tai");
|
||||
translations.put("文語・ベシ", "classical-beshi");
|
||||
translations.put("特殊・ヤ", "special-ya");
|
||||
translations.put("文語・マジ", "classical-maji");
|
||||
translations.put("下二・タ行", "2-row-lower-cons-t");
|
||||
translations.put("特殊・デス", "special-desu");
|
||||
translations.put("特殊・マス", "special-masu");
|
||||
translations.put("五段・ラ行アル", "5-row-aru");
|
||||
translations.put("文語・ナリ", "classical-nari");
|
||||
translations.put("文語・リ", "classical-ri");
|
||||
translations.put("文語・ケリ", "classical-keri");
|
||||
translations.put("文語・ル", "classical-ru");
|
||||
translations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin");
|
||||
translations.put("五段・サ行", "5-row-cons-s");
|
||||
translations.put("一段", "1-row");
|
||||
translations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin");
|
||||
translations.put("五段・マ行", "5-row-cons-m");
|
||||
translations.put("五段・タ行", "5-row-cons-t");
|
||||
translations.put("五段・ラ行", "5-row-cons-r");
|
||||
translations.put("サ変・−スル", "irregular-suffix-suru");
|
||||
translations.put("五段・ガ行", "5-row-cons-g");
|
||||
translations.put("サ変・−ズル", "irregular-suffix-zuru");
|
||||
translations.put("五段・バ行", "5-row-cons-b");
|
||||
translations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin");
|
||||
translations.put("下二・ダ行", "2-row-lower-cons-d");
|
||||
translations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku");
|
||||
translations.put("上二・ダ行", "2-row-upper-cons-d");
|
||||
translations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin");
|
||||
translations.put("一段・得ル", "1-row-eru");
|
||||
translations.put("四段・タ行", "4-row-cons-t");
|
||||
translations.put("五段・ナ行", "5-row-cons-n");
|
||||
translations.put("下二・ハ行", "2-row-lower-cons-h");
|
||||
translations.put("四段・ハ行", "4-row-cons-h");
|
||||
translations.put("四段・バ行", "4-row-cons-b");
|
||||
translations.put("サ変・スル", "irregular-suru");
|
||||
translations.put("上二・ハ行", "2-row-upper-cons-h");
|
||||
translations.put("下二・マ行", "2-row-lower-cons-m");
|
||||
translations.put("四段・サ行", "4-row-cons-s");
|
||||
translations.put("下二・ガ行", "2-row-lower-cons-g");
|
||||
translations.put("カ変・来ル", "kuru-kanji");
|
||||
translations.put("一段・クレル", "1-row-kureru");
|
||||
translations.put("下二・得", "2-row-lower-u");
|
||||
translations.put("カ変・クル", "kuru-kana");
|
||||
translations.put("ラ変", "irregular-cons-r");
|
||||
translations.put("下二・カ行", "2-row-lower-cons-k");
|
||||
inflTypeTranslations = Collections.unmodifiableMap(translations);
|
||||
}
|
||||
|
||||
/** Get the english form of inflection type */
|
||||
|
@ -191,37 +197,39 @@ public class ToStringUtil {
|
|||
}
|
||||
|
||||
// a translation map for inflection forms, only used for reflectWith
|
||||
private static final HashMap<String, String> inflFormTranslations = new HashMap<>();
|
||||
private static final Map<String, String> inflFormTranslations;
|
||||
|
||||
static {
|
||||
inflFormTranslations.put("*", "*");
|
||||
inflFormTranslations.put("基本形", "base");
|
||||
inflFormTranslations.put("文語基本形", "classical-base");
|
||||
inflFormTranslations.put("未然ヌ接続", "imperfective-nu-connection");
|
||||
inflFormTranslations.put("未然ウ接続", "imperfective-u-connection");
|
||||
inflFormTranslations.put("連用タ接続", "conjunctive-ta-connection");
|
||||
inflFormTranslations.put("連用テ接続", "conjunctive-te-connection");
|
||||
inflFormTranslations.put("連用ゴザイ接続", "conjunctive-gozai-connection");
|
||||
inflFormTranslations.put("体言接続", "uninflected-connection");
|
||||
inflFormTranslations.put("仮定形", "subjunctive");
|
||||
inflFormTranslations.put("命令e", "imperative-e");
|
||||
inflFormTranslations.put("仮定縮約1", "conditional-contracted-1");
|
||||
inflFormTranslations.put("仮定縮約2", "conditional-contracted-2");
|
||||
inflFormTranslations.put("ガル接続", "garu-connection");
|
||||
inflFormTranslations.put("未然形", "imperfective");
|
||||
inflFormTranslations.put("連用形", "conjunctive");
|
||||
inflFormTranslations.put("音便基本形", "onbin-base");
|
||||
inflFormTranslations.put("連用デ接続", "conjunctive-de-connection");
|
||||
inflFormTranslations.put("未然特殊", "imperfective-special");
|
||||
inflFormTranslations.put("命令i", "imperative-i");
|
||||
inflFormTranslations.put("連用ニ接続", "conjunctive-ni-connection");
|
||||
inflFormTranslations.put("命令yo", "imperative-yo");
|
||||
inflFormTranslations.put("体言接続特殊", "adnominal-special");
|
||||
inflFormTranslations.put("命令ro", "imperative-ro");
|
||||
inflFormTranslations.put("体言接続特殊2", "uninflected-special-connection-2");
|
||||
inflFormTranslations.put("未然レル接続", "imperfective-reru-connection");
|
||||
inflFormTranslations.put("現代基本形", "modern-base");
|
||||
inflFormTranslations.put("基本形-促音便", "base-onbin"); // not sure about this
|
||||
Map<String, String> translations = new HashMap<>();
|
||||
translations.put("*", "*");
|
||||
translations.put("基本形", "base");
|
||||
translations.put("文語基本形", "classical-base");
|
||||
translations.put("未然ヌ接続", "imperfective-nu-connection");
|
||||
translations.put("未然ウ接続", "imperfective-u-connection");
|
||||
translations.put("連用タ接続", "conjunctive-ta-connection");
|
||||
translations.put("連用テ接続", "conjunctive-te-connection");
|
||||
translations.put("連用ゴザイ接続", "conjunctive-gozai-connection");
|
||||
translations.put("体言接続", "uninflected-connection");
|
||||
translations.put("仮定形", "subjunctive");
|
||||
translations.put("命令e", "imperative-e");
|
||||
translations.put("仮定縮約1", "conditional-contracted-1");
|
||||
translations.put("仮定縮約2", "conditional-contracted-2");
|
||||
translations.put("ガル接続", "garu-connection");
|
||||
translations.put("未然形", "imperfective");
|
||||
translations.put("連用形", "conjunctive");
|
||||
translations.put("音便基本形", "onbin-base");
|
||||
translations.put("連用デ接続", "conjunctive-de-connection");
|
||||
translations.put("未然特殊", "imperfective-special");
|
||||
translations.put("命令i", "imperative-i");
|
||||
translations.put("連用ニ接続", "conjunctive-ni-connection");
|
||||
translations.put("命令yo", "imperative-yo");
|
||||
translations.put("体言接続特殊", "adnominal-special");
|
||||
translations.put("命令ro", "imperative-ro");
|
||||
translations.put("体言接続特殊2", "uninflected-special-connection-2");
|
||||
translations.put("未然レル接続", "imperfective-reru-connection");
|
||||
translations.put("現代基本形", "modern-base");
|
||||
translations.put("基本形-促音便", "base-onbin"); // not sure about this
|
||||
inflFormTranslations = Collections.unmodifiableMap(translations);
|
||||
}
|
||||
|
||||
/** Get the english form of inflected form */
|
||||
|
|
|
@ -44,7 +44,7 @@ public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
|
|||
private int sentenceStart = 0;
|
||||
private int sentenceIndex = -1;
|
||||
|
||||
private NLPTokenizerOp tokenizerOp = null;
|
||||
private final NLPTokenizerOp tokenizerOp;
|
||||
|
||||
public OpenNLPTokenizer(
|
||||
AttributeFactory factory, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp)
|
||||
|
|
|
@ -23,7 +23,7 @@ import opennlp.tools.chunker.ChunkerModel;
|
|||
|
||||
/** Supply OpenNLP Chunking tool Requires binary models from OpenNLP project on SourceForge. */
|
||||
public class NLPChunkerOp {
|
||||
private ChunkerME chunker = null;
|
||||
private final ChunkerME chunker;
|
||||
|
||||
public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException {
|
||||
chunker = new ChunkerME(chunkerModel);
|
||||
|
|
|
@ -27,7 +27,7 @@ import opennlp.tools.postag.POSTaggerME;
|
|||
* SourceForge.
|
||||
*/
|
||||
public class NLPPOSTaggerOp {
|
||||
private POSTagger tagger = null;
|
||||
private final POSTagger tagger;
|
||||
|
||||
public NLPPOSTaggerOp(POSModel model) throws IOException {
|
||||
tagger = new POSTaggerME(model);
|
||||
|
|
|
@ -32,10 +32,10 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
*/
|
||||
public final class DaitchMokotoffSoundexFilter extends TokenFilter {
|
||||
/** true if encoded tokens should be added as synonyms */
|
||||
protected boolean inject = true;
|
||||
private final boolean inject;
|
||||
|
||||
/** phonetic encoder */
|
||||
protected DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
|
||||
private final DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
|
||||
|
||||
// output is a string such as ab|ac|...
|
||||
private static final Pattern pattern = Pattern.compile("([^|]+)");
|
||||
|
|
|
@ -32,13 +32,13 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
*/
|
||||
public final class PhoneticFilter extends TokenFilter {
|
||||
/** true if encoded tokens should be added as synonyms */
|
||||
protected boolean inject = true;
|
||||
private final boolean inject;
|
||||
|
||||
/** phonetic encoder */
|
||||
protected Encoder encoder = null;
|
||||
private final Encoder encoder;
|
||||
|
||||
/** captured state, non-null when <code>inject=true</code> and a token is buffered */
|
||||
protected State save = null;
|
||||
private State save = null;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
|
|
@ -73,7 +73,7 @@ public class Trie {
|
|||
List<CharSequence> cmds = new ArrayList<>();
|
||||
int root;
|
||||
|
||||
boolean forward = false;
|
||||
boolean forward;
|
||||
|
||||
/**
|
||||
* Constructor for the Trie object.
|
||||
|
|
|
@ -191,7 +191,7 @@ public final class FieldReader extends Terms {
|
|||
@Override
|
||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
|
||||
// BlockTreeTermsWriter.brToString(startTerm));
|
||||
// ToStringUtils.bytesRefToString(startTerm));
|
||||
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
||||
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
||||
// can we optimize knowing that...?
|
||||
|
|
|
@ -543,19 +543,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private void copyTerm() {
|
||||
final int len = currentFrame.prefix + currentFrame.suffix;
|
||||
if (term.bytes.length < len) {
|
||||
|
|
|
@ -354,24 +354,6 @@ public final class Lucene40BlockTreeTermsReader extends FieldsProducer {
|
|||
return fieldMap.size();
|
||||
}
|
||||
|
||||
// for debugging
|
||||
String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// terms index
|
||||
|
|
|
@ -256,8 +256,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||
f.arc = arc;
|
||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||
// term.length + " vs prefix=" + f.prefix);
|
||||
// if (f.prefix > targetBeforeCurrentLength) {
|
||||
|
@ -279,7 +279,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// final int sav = term.length;
|
||||
// term.length = length;
|
||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||
// term.length = sav;
|
||||
// }
|
||||
}
|
||||
|
@ -299,27 +299,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRefBuilder b) {
|
||||
return brToString(b.get());
|
||||
}
|
||||
*/
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef target) throws IOException {
|
||||
|
||||
|
@ -337,8 +316,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
|
||||
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||
// ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -496,8 +476,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -528,7 +508,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
term.setLength(1 + targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -544,7 +524,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -587,7 +567,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
term.setLength(targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -623,7 +603,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) +
|
||||
// " current=" + ToStringUtils.bytesRefToString(term)
|
||||
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
@ -667,9 +648,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
||||
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
|
||||
// " output=" + output);
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
|
||||
// + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -781,8 +762,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -818,7 +799,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" return NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -829,7 +811,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return " + result + " term=" + brToString(term));
|
||||
// System.out.println(" return " + result + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
|
@ -1029,9 +1012,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
assert !eof;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
|
||||
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
|
||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
|
||||
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||
// " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -1095,8 +1079,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// try to scan to the right floor frame:
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
|
||||
// + currentFrame.ord);
|
||||
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
|
||||
// " currentFrame.ord=" + currentFrame.ord);
|
||||
return term.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -317,8 +317,8 @@ final class SegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
public void nextLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -410,8 +410,8 @@ final class SegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -566,28 +566,14 @@ final class SegmentTermsEnumFrame {
|
|||
private long subCode;
|
||||
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
|
||||
|
||||
// for debugging
|
||||
/*
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Target's prefix matches this block's prefix; we
|
||||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
||||
// brToString(term));
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -617,7 +603,7 @@ final class SegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||
// + brToString(suffixBytesRef));
|
||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
|
@ -682,8 +668,9 @@ final class SegmentTermsEnumFrame {
|
|||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
||||
// brToString(target));
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -711,7 +698,8 @@ final class SegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
|
@ -743,8 +731,8 @@ final class SegmentTermsEnumFrame {
|
|||
// return NOT_FOUND:
|
||||
fillTerm();
|
||||
|
||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
|
||||
// ste.termExists=" + ste.termExists);
|
||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
|
||||
// " ste.termExists=" + ste.termExists);
|
||||
|
||||
if (!exactOnly && !ste.termExists) {
|
||||
// System.out.println(" now pushFrame");
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.compress.LZ4;
|
||||
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
|
@ -349,7 +350,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
termsWriter.write(term, termsEnum, norms);
|
||||
}
|
||||
|
||||
|
@ -388,33 +389,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TERM: " + brToString(termBytes);
|
||||
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "(null)";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(byte[] b) {
|
||||
return brToString(new BytesRef(b));
|
||||
}
|
||||
|
||||
private static final class PendingBlock extends PendingEntry {
|
||||
public final BytesRef prefix;
|
||||
public final long fp;
|
||||
|
@ -442,7 +420,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK: prefix=" + brToString(prefix);
|
||||
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
|
||||
}
|
||||
|
||||
public void compileIndex(
|
||||
|
@ -600,8 +578,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// if (DEBUG2) {
|
||||
// BytesRef br = new BytesRef(lastTerm.bytes());
|
||||
// br.length = prefixLength;
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
|
||||
// + count);
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
|
||||
// ToStringUtils.bytesRefToString(br) + " count=" + count);
|
||||
// }
|
||||
|
||||
// Root block better write all remaining pending entries:
|
||||
|
@ -754,9 +732,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
prefix.length = prefixLength;
|
||||
|
||||
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
|
||||
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
|
||||
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
|
||||
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
|
||||
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
|
||||
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
|
||||
// hasSubBlocks);
|
||||
|
||||
// Write block header:
|
||||
int numEntries = end - start;
|
||||
|
@ -769,7 +748,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
|
||||
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
|
||||
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -804,7 +785,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// System.out.println(" write term suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||
// }
|
||||
|
||||
// For leaf block we write suffix straight
|
||||
|
@ -837,7 +819,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// System.out.println(" write term suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||
// }
|
||||
|
||||
// For non-leaf block we borrow 1 bit to record
|
||||
|
@ -879,8 +862,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
|
||||
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// System.out.println(" write sub-block suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes) +
|
||||
// " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// }
|
||||
|
||||
assert floorLeadLabel == -1
|
||||
|
@ -998,7 +982,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
if (DEBUG) {
|
||||
int[] tmp = new int[lastTerm.length];
|
||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
|
||||
" pending.size()=" + pending.size());
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -1051,8 +1036,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// we are closing:
|
||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||
if (prefixTopSize >= minItemsInBlock) {
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
||||
// minItemsInBlock=" + minItemsInBlock);
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||
// " minItemsInBlock=" + minItemsInBlock);
|
||||
writeBlocks(i + 1, prefixTopSize);
|
||||
prefixStarts[i] -= prefixTopSize - 1;
|
||||
}
|
||||
|
|
|
@ -19,19 +19,25 @@ package org.apache.lucene.backward_index;
|
|||
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.LineNumberReader;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -47,26 +53,31 @@ import org.junit.Before;
|
|||
|
||||
public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
||||
|
||||
protected final Version version;
|
||||
private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion();
|
||||
protected final String indexPattern;
|
||||
static final Set<String> OLD_VERSIONS;
|
||||
protected static final Set<Version> BINARY_SUPPORTED_VERSIONS;
|
||||
|
||||
static {
|
||||
String[] oldVersions =
|
||||
new String[] {
|
||||
"8.0.0", "8.0.0", "8.1.0", "8.1.0", "8.1.1", "8.1.1", "8.2.0", "8.2.0", "8.3.0", "8.3.0",
|
||||
"8.3.1", "8.3.1", "8.4.0", "8.4.0", "8.4.1", "8.4.1", "8.5.0", "8.5.0", "8.5.1", "8.5.1",
|
||||
"8.5.2", "8.5.2", "8.6.0", "8.6.0", "8.6.1", "8.6.1", "8.6.2", "8.6.2", "8.6.3", "8.6.3",
|
||||
"8.7.0", "8.7.0", "8.8.0", "8.8.0", "8.8.1", "8.8.1", "8.8.2", "8.8.2", "8.9.0", "8.9.0",
|
||||
"8.10.0", "8.10.0", "8.10.1", "8.10.1", "8.11.0", "8.11.0", "8.11.1", "8.11.1", "8.11.2",
|
||||
"8.11.2", "8.11.3", "8.11.3", "9.0.0", "9.1.0", "9.2.0", "9.3.0", "9.4.0", "9.4.1",
|
||||
"9.4.2", "9.5.0", "9.6.0", "9.7.0", "9.8.0", "9.9.0", "9.9.1", "9.9.2", "9.10.0",
|
||||
"10.0.0",
|
||||
};
|
||||
private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion();
|
||||
|
||||
protected final Version version;
|
||||
protected final String indexPattern;
|
||||
|
||||
static {
|
||||
String name = "versions.txt";
|
||||
try (LineNumberReader in =
|
||||
new LineNumberReader(
|
||||
IOUtils.getDecodingReader(
|
||||
IOUtils.requireResourceNonNull(
|
||||
BackwardsCompatibilityTestBase.class.getResourceAsStream(name), name),
|
||||
StandardCharsets.UTF_8))) {
|
||||
OLD_VERSIONS =
|
||||
in.lines()
|
||||
.filter(Predicate.not(String::isBlank))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
} catch (IOException exception) {
|
||||
throw new RuntimeException("failed to load resource", exception);
|
||||
}
|
||||
Set<Version> binaryVersions = new HashSet<>();
|
||||
for (String version : oldVersions) {
|
||||
for (String version : OLD_VERSIONS) {
|
||||
try {
|
||||
Version v = Version.parse(version);
|
||||
assertTrue("Unsupported binary version: " + v, v.major >= Version.MIN_SUPPORTED_MAJOR - 1);
|
||||
|
@ -75,8 +86,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
|||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
List<Version> allCurrentVersions = getAllCurrentVersions();
|
||||
for (Version version : allCurrentVersions) {
|
||||
|
||||
for (Version version : getAllCurrentReleasedVersions()) {
|
||||
// make sure we never miss a version.
|
||||
assertTrue("Version: " + version + " missing", binaryVersions.remove(version));
|
||||
}
|
||||
|
@ -181,19 +192,51 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
|||
return versions;
|
||||
}
|
||||
|
||||
private static List<Version> getAllCurrentReleasedVersions() {
|
||||
List<Version> currentReleasedVersions = getAllCurrentVersions();
|
||||
|
||||
// The latest version from the current major is always under development.
|
||||
assertTrue(currentReleasedVersions.remove(Version.LATEST));
|
||||
// The latest minor from the previous major is also under development.
|
||||
assertTrue(currentReleasedVersions.remove(LATEST_PREVIOUS_MAJOR));
|
||||
|
||||
// In addition to those, we may need to remove one more version in case a release is in
|
||||
// progress, and the version constant has been added but backward-compatibility indexes have not
|
||||
// been checked in yet.
|
||||
List<Version> missingVersions = new ArrayList<>();
|
||||
for (Iterator<Version> it = currentReleasedVersions.iterator(); it.hasNext(); ) {
|
||||
Version version = it.next();
|
||||
String indexName = String.format(Locale.ROOT, "index.%s-cfs.zip", version);
|
||||
if (TestAncientIndicesCompatibility.class.getResource(indexName) == null) {
|
||||
missingVersions.add(version);
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
if (missingVersions.size() > 1) {
|
||||
throw new AssertionError(
|
||||
"More than one version is missing backward-compatibility data: " + missingVersions);
|
||||
}
|
||||
return currentReleasedVersions;
|
||||
}
|
||||
|
||||
/** Get all versions that are released, plus the latest version which is unreleased. */
|
||||
public static List<Version> getAllCurrentReleasedVersionsAndCurrent() {
|
||||
List<Version> versions = new ArrayList<>(getAllCurrentReleasedVersions());
|
||||
versions.add(Version.LATEST);
|
||||
return versions;
|
||||
}
|
||||
|
||||
public static Iterable<Object[]> allVersion(String name, String... suffixes) {
|
||||
List<Object> patterns = new ArrayList<>();
|
||||
for (String suffix : suffixes) {
|
||||
patterns.add(createPattern(name, suffix));
|
||||
}
|
||||
List<Object[]> versionAndPatterns = new ArrayList<>();
|
||||
List<Version> versionList = getAllCurrentVersions();
|
||||
List<Version> versionList = getAllCurrentReleasedVersionsAndCurrent();
|
||||
for (Version v : versionList) {
|
||||
if (v.equals(LATEST_PREVIOUS_MAJOR)
|
||||
== false) { // the latest prev-major has not yet been released
|
||||
for (Object p : patterns) {
|
||||
versionAndPatterns.add(new Object[] {v, p});
|
||||
}
|
||||
for (Object p : patterns) {
|
||||
versionAndPatterns.add(new Object[] {v, p});
|
||||
}
|
||||
}
|
||||
return versionAndPatterns;
|
||||
|
|
|
@ -21,8 +21,16 @@ import static java.nio.charset.StandardCharsets.UTF_8;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
|
@ -36,274 +44,57 @@ import org.apache.lucene.tests.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
||||
static final Set<String> UNSUPPORTED_INDEXES;
|
||||
|
||||
static final String[] unsupportedNames = {
|
||||
"1.9.0-cfs",
|
||||
"1.9.0-nocfs",
|
||||
"2.0.0-cfs",
|
||||
"2.0.0-nocfs",
|
||||
"2.1.0-cfs",
|
||||
"2.1.0-nocfs",
|
||||
"2.2.0-cfs",
|
||||
"2.2.0-nocfs",
|
||||
"2.3.0-cfs",
|
||||
"2.3.0-nocfs",
|
||||
"2.4.0-cfs",
|
||||
"2.4.0-nocfs",
|
||||
"2.4.1-cfs",
|
||||
"2.4.1-nocfs",
|
||||
"2.9.0-cfs",
|
||||
"2.9.0-nocfs",
|
||||
"2.9.1-cfs",
|
||||
"2.9.1-nocfs",
|
||||
"2.9.2-cfs",
|
||||
"2.9.2-nocfs",
|
||||
"2.9.3-cfs",
|
||||
"2.9.3-nocfs",
|
||||
"2.9.4-cfs",
|
||||
"2.9.4-nocfs",
|
||||
"3.0.0-cfs",
|
||||
"3.0.0-nocfs",
|
||||
"3.0.1-cfs",
|
||||
"3.0.1-nocfs",
|
||||
"3.0.2-cfs",
|
||||
"3.0.2-nocfs",
|
||||
"3.0.3-cfs",
|
||||
"3.0.3-nocfs",
|
||||
"3.1.0-cfs",
|
||||
"3.1.0-nocfs",
|
||||
"3.2.0-cfs",
|
||||
"3.2.0-nocfs",
|
||||
"3.3.0-cfs",
|
||||
"3.3.0-nocfs",
|
||||
"3.4.0-cfs",
|
||||
"3.4.0-nocfs",
|
||||
"3.5.0-cfs",
|
||||
"3.5.0-nocfs",
|
||||
"3.6.0-cfs",
|
||||
"3.6.0-nocfs",
|
||||
"3.6.1-cfs",
|
||||
"3.6.1-nocfs",
|
||||
"3.6.2-cfs",
|
||||
"3.6.2-nocfs",
|
||||
"4.0.0-cfs",
|
||||
"4.0.0-cfs",
|
||||
"4.0.0-nocfs",
|
||||
"4.0.0.1-cfs",
|
||||
"4.0.0.1-nocfs",
|
||||
"4.0.0.2-cfs",
|
||||
"4.0.0.2-nocfs",
|
||||
"4.1.0-cfs",
|
||||
"4.1.0-nocfs",
|
||||
"4.2.0-cfs",
|
||||
"4.2.0-nocfs",
|
||||
"4.2.1-cfs",
|
||||
"4.2.1-nocfs",
|
||||
"4.3.0-cfs",
|
||||
"4.3.0-nocfs",
|
||||
"4.3.1-cfs",
|
||||
"4.3.1-nocfs",
|
||||
"4.4.0-cfs",
|
||||
"4.4.0-nocfs",
|
||||
"4.5.0-cfs",
|
||||
"4.5.0-nocfs",
|
||||
"4.5.1-cfs",
|
||||
"4.5.1-nocfs",
|
||||
"4.6.0-cfs",
|
||||
"4.6.0-nocfs",
|
||||
"4.6.1-cfs",
|
||||
"4.6.1-nocfs",
|
||||
"4.7.0-cfs",
|
||||
"4.7.0-nocfs",
|
||||
"4.7.1-cfs",
|
||||
"4.7.1-nocfs",
|
||||
"4.7.2-cfs",
|
||||
"4.7.2-nocfs",
|
||||
"4.8.0-cfs",
|
||||
"4.8.0-nocfs",
|
||||
"4.8.1-cfs",
|
||||
"4.8.1-nocfs",
|
||||
"4.9.0-cfs",
|
||||
"4.9.0-nocfs",
|
||||
"4.9.1-cfs",
|
||||
"4.9.1-nocfs",
|
||||
"4.10.0-cfs",
|
||||
"4.10.0-nocfs",
|
||||
"4.10.1-cfs",
|
||||
"4.10.1-nocfs",
|
||||
"4.10.2-cfs",
|
||||
"4.10.2-nocfs",
|
||||
"4.10.3-cfs",
|
||||
"4.10.3-nocfs",
|
||||
"4.10.4-cfs",
|
||||
"4.10.4-nocfs",
|
||||
"5x-with-4x-segments-cfs",
|
||||
"5x-with-4x-segments-nocfs",
|
||||
"5.0.0.singlesegment-cfs",
|
||||
"5.0.0.singlesegment-nocfs",
|
||||
"5.0.0-cfs",
|
||||
"5.0.0-nocfs",
|
||||
"5.1.0-cfs",
|
||||
"5.1.0-nocfs",
|
||||
"5.2.0-cfs",
|
||||
"5.2.0-nocfs",
|
||||
"5.2.1-cfs",
|
||||
"5.2.1-nocfs",
|
||||
"5.3.0-cfs",
|
||||
"5.3.0-nocfs",
|
||||
"5.3.1-cfs",
|
||||
"5.3.1-nocfs",
|
||||
"5.3.2-cfs",
|
||||
"5.3.2-nocfs",
|
||||
"5.4.0-cfs",
|
||||
"5.4.0-nocfs",
|
||||
"5.4.1-cfs",
|
||||
"5.4.1-nocfs",
|
||||
"5.5.0-cfs",
|
||||
"5.5.0-nocfs",
|
||||
"5.5.1-cfs",
|
||||
"5.5.1-nocfs",
|
||||
"5.5.2-cfs",
|
||||
"5.5.2-nocfs",
|
||||
"5.5.3-cfs",
|
||||
"5.5.3-nocfs",
|
||||
"5.5.4-cfs",
|
||||
"5.5.4-nocfs",
|
||||
"5.5.5-cfs",
|
||||
"5.5.5-nocfs",
|
||||
"6.0.0-cfs",
|
||||
"6.0.0-nocfs",
|
||||
"6.0.1-cfs",
|
||||
"6.0.1-nocfs",
|
||||
"6.1.0-cfs",
|
||||
"6.1.0-nocfs",
|
||||
"6.2.0-cfs",
|
||||
"6.2.0-nocfs",
|
||||
"6.2.1-cfs",
|
||||
"6.2.1-nocfs",
|
||||
"6.3.0-cfs",
|
||||
"6.3.0-nocfs",
|
||||
"6.4.0-cfs",
|
||||
"6.4.0-nocfs",
|
||||
"6.4.1-cfs",
|
||||
"6.4.1-nocfs",
|
||||
"6.4.2-cfs",
|
||||
"6.4.2-nocfs",
|
||||
"6.5.0-cfs",
|
||||
"6.5.0-nocfs",
|
||||
"6.5.1-cfs",
|
||||
"6.5.1-nocfs",
|
||||
"6.6.0-cfs",
|
||||
"6.6.0-nocfs",
|
||||
"6.6.1-cfs",
|
||||
"6.6.1-nocfs",
|
||||
"6.6.2-cfs",
|
||||
"6.6.2-nocfs",
|
||||
"6.6.3-cfs",
|
||||
"6.6.3-nocfs",
|
||||
"6.6.4-cfs",
|
||||
"6.6.4-nocfs",
|
||||
"6.6.5-cfs",
|
||||
"6.6.5-nocfs",
|
||||
"6.6.6-cfs",
|
||||
"6.6.6-nocfs",
|
||||
"7.0.0-cfs",
|
||||
"7.0.0-nocfs",
|
||||
"7.0.1-cfs",
|
||||
"7.0.1-nocfs",
|
||||
"7.1.0-cfs",
|
||||
"7.1.0-nocfs",
|
||||
"7.2.0-cfs",
|
||||
"7.2.0-nocfs",
|
||||
"7.2.1-cfs",
|
||||
"7.2.1-nocfs",
|
||||
"7.3.0-cfs",
|
||||
"7.3.0-nocfs",
|
||||
"7.3.1-cfs",
|
||||
"7.3.1-nocfs",
|
||||
"7.4.0-cfs",
|
||||
"7.4.0-nocfs",
|
||||
"7.5.0-cfs",
|
||||
"7.5.0-nocfs",
|
||||
"7.6.0-cfs",
|
||||
"7.6.0-nocfs",
|
||||
"7.7.0-cfs",
|
||||
"7.7.0-nocfs",
|
||||
"7.7.1-cfs",
|
||||
"7.7.1-nocfs",
|
||||
"7.7.2-cfs",
|
||||
"7.7.2-nocfs",
|
||||
"7.7.3-cfs",
|
||||
"7.7.3-nocfs",
|
||||
"8.0.0-cfs",
|
||||
"8.0.0-nocfs",
|
||||
"8.1.0-cfs",
|
||||
"8.1.0-nocfs",
|
||||
"8.1.1-cfs",
|
||||
"8.1.1-nocfs",
|
||||
"8.2.0-cfs",
|
||||
"8.2.0-nocfs",
|
||||
"8.3.0-cfs",
|
||||
"8.3.0-nocfs",
|
||||
"8.3.1-cfs",
|
||||
"8.3.1-nocfs",
|
||||
"8.4.0-cfs",
|
||||
"8.4.0-nocfs",
|
||||
"8.4.1-cfs",
|
||||
"8.4.1-nocfs",
|
||||
"8.5.0-cfs",
|
||||
"8.5.0-nocfs",
|
||||
"8.5.1-cfs",
|
||||
"8.5.1-nocfs",
|
||||
"8.5.2-cfs",
|
||||
"8.5.2-nocfs",
|
||||
"8.6.0-cfs",
|
||||
"8.6.0-nocfs",
|
||||
"8.6.1-cfs",
|
||||
"8.6.1-nocfs",
|
||||
"8.6.2-cfs",
|
||||
"8.6.2-nocfs",
|
||||
"8.6.3-cfs",
|
||||
"8.6.3-nocfs",
|
||||
"8.7.0-cfs",
|
||||
"8.7.0-nocfs",
|
||||
"8.8.0-cfs",
|
||||
"8.8.0-nocfs",
|
||||
"8.8.1-cfs",
|
||||
"8.8.1-nocfs",
|
||||
"8.8.2-cfs",
|
||||
"8.8.2-nocfs",
|
||||
"8.9.0-cfs",
|
||||
"8.9.0-nocfs",
|
||||
"8.10.0-cfs",
|
||||
"8.10.0-nocfs",
|
||||
"8.10.1-cfs",
|
||||
"8.10.1-nocfs",
|
||||
"8.11.0-cfs",
|
||||
"8.11.0-nocfs",
|
||||
"8.11.1-cfs",
|
||||
"8.11.1-nocfs",
|
||||
"8.11.2-cfs",
|
||||
"8.11.2-nocfs",
|
||||
"8.11.3-cfs",
|
||||
"8.11.3-nocfs"
|
||||
};
|
||||
static {
|
||||
String name = "unsupported_versions.txt";
|
||||
Set<String> indices;
|
||||
try (LineNumberReader in =
|
||||
new LineNumberReader(
|
||||
IOUtils.getDecodingReader(
|
||||
IOUtils.requireResourceNonNull(
|
||||
TestAncientIndicesCompatibility.class.getResourceAsStream(name), name),
|
||||
StandardCharsets.UTF_8))) {
|
||||
indices =
|
||||
in.lines()
|
||||
.filter(Predicate.not(String::isBlank))
|
||||
.flatMap(version -> Stream.of(version + "-cfs", version + "-nocfs"))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
} catch (IOException exception) {
|
||||
throw new RuntimeException("failed to load resource", exception);
|
||||
}
|
||||
|
||||
name = "unsupported_indices.txt";
|
||||
try (LineNumberReader in =
|
||||
new LineNumberReader(
|
||||
IOUtils.getDecodingReader(
|
||||
IOUtils.requireResourceNonNull(
|
||||
TestAncientIndicesCompatibility.class.getResourceAsStream(name), name),
|
||||
StandardCharsets.UTF_8))) {
|
||||
indices.addAll(
|
||||
in.lines()
|
||||
.filter(Predicate.not(String::isBlank))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new)));
|
||||
} catch (IOException exception) {
|
||||
throw new RuntimeException("failed to load resource", exception);
|
||||
}
|
||||
UNSUPPORTED_INDEXES = Collections.unmodifiableSet(indices);
|
||||
}
|
||||
|
||||
/**
|
||||
* This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate
|
||||
* on too old indexes!
|
||||
*/
|
||||
public void testUnsupportedOldIndexes() throws Exception {
|
||||
for (int i = 0; i < unsupportedNames.length; i++) {
|
||||
for (String version : UNSUPPORTED_INDEXES) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: index " + unsupportedNames[i]);
|
||||
System.out.println("TEST: index " + version);
|
||||
}
|
||||
Path oldIndexDir = createTempDir(unsupportedNames[i]);
|
||||
TestUtil.unzip(
|
||||
getDataInputStream("unsupported." + unsupportedNames[i] + ".zip"), oldIndexDir);
|
||||
Path oldIndexDir = createTempDir(version);
|
||||
TestUtil.unzip(getDataInputStream("unsupported." + version + ".zip"), oldIndexDir);
|
||||
BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir);
|
||||
// don't checkindex, these are intentionally not supported
|
||||
dir.setCheckIndexOnClose(false);
|
||||
|
@ -312,7 +103,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
|||
IndexWriter writer = null;
|
||||
try {
|
||||
reader = DirectoryReader.open(dir);
|
||||
fail("DirectoryReader.open should not pass for " + unsupportedNames[i]);
|
||||
fail("DirectoryReader.open should not pass for " + version);
|
||||
} catch (IndexFormatTooOldException e) {
|
||||
if (e.getReason() != null) {
|
||||
assertNull(e.getVersion());
|
||||
|
@ -353,7 +144,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
|||
writer =
|
||||
new IndexWriter(
|
||||
dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false));
|
||||
fail("IndexWriter creation should not pass for " + unsupportedNames[i]);
|
||||
fail("IndexWriter creation should not pass for " + version);
|
||||
} catch (IndexFormatTooOldException e) {
|
||||
if (e.getReason() != null) {
|
||||
assertNull(e.getVersion());
|
||||
|
@ -406,7 +197,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
|||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos, false, UTF_8));
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
if (unsupportedNames[i].startsWith("8.")) {
|
||||
if (version.startsWith("8.")) {
|
||||
assertTrue(indexStatus.clean);
|
||||
} else {
|
||||
assertFalse(indexStatus.clean);
|
||||
|
|
|
@ -101,8 +101,6 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
|
|||
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
|
||||
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
|
||||
|
||||
static final int MIN_BINARY_SUPPORTED_MAJOR = Version.MIN_SUPPORTED_MAJOR - 1;
|
||||
|
||||
/**
|
||||
* A parameter constructor for {@link com.carrotsearch.randomizedtesting.RandomizedRunner}. See
|
||||
* {@link #testVersionsFactory()} for details on the values provided to the framework.
|
||||
|
|
|
@ -62,7 +62,6 @@ public class TestBinaryBackwardsCompatibility extends BackwardsCompatibilityTest
|
|||
|
||||
@Nightly
|
||||
public void testReadNMinusTwoCommit() throws IOException {
|
||||
|
||||
try (BaseDirectoryWrapper dir = newDirectory(directory)) {
|
||||
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
||||
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();
|
||||
|
|
|
@ -55,6 +55,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
|
||||
static final String INDEX_NAME = "sorted";
|
||||
static final String SUFFIX = "";
|
||||
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_10_0;
|
||||
private static final String PARENT_FIELD_NAME = "___parent";
|
||||
|
||||
public TestIndexSortBackwardsCompatibility(Version version, String pattern) {
|
||||
super(version, pattern);
|
||||
|
@ -79,8 +81,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
|
||||
.setIndexSort(sort)
|
||||
.setMergePolicy(newLogMergePolicy());
|
||||
if (this.version.onOrAfter(Version.LUCENE_10_0_0)) {
|
||||
indexWriterConfig.setParentField("___parent");
|
||||
if (this.version.onOrAfter(FIRST_PARENT_DOC_VERSION)) {
|
||||
indexWriterConfig.setParentField(PARENT_FIELD_NAME);
|
||||
}
|
||||
// open writer
|
||||
try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) {
|
||||
|
@ -89,7 +91,10 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
Document child = new Document();
|
||||
child.add(new StringField("relation", "child", Field.Store.NO));
|
||||
child.add(new StringField("bid", "" + i, Field.Store.NO));
|
||||
child.add(new NumericDocValuesField("dateDV", i));
|
||||
if (version.onOrAfter(FIRST_PARENT_DOC_VERSION)
|
||||
== false) { // only add this to earlier versions
|
||||
child.add(new NumericDocValuesField("dateDV", i));
|
||||
}
|
||||
Document parent = new Document();
|
||||
parent.add(new StringField("relation", "parent", Field.Store.NO));
|
||||
parent.add(new StringField("bid", "" + i, Field.Store.NO));
|
||||
|
@ -158,6 +163,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
conf.setUseCompoundFile(false);
|
||||
conf.setCodec(TestUtil.getDefaultCodec());
|
||||
conf.setParentField("___parent");
|
||||
conf.setParentField(PARENT_FIELD_NAME);
|
||||
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
|
||||
IndexWriter writer = new IndexWriter(directory, conf);
|
||||
LineFileDocs docs = new LineFileDocs(new Random(0));
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
5x-with-4x-segments-cfs
|
||||
5x-with-4x-segments-nocfs
|
||||
5.0.0.singlesegment-cfs
|
||||
5.0.0.singlesegment-nocfs
|
|
@ -0,0 +1,122 @@
|
|||
1.9.0
|
||||
2.0.0
|
||||
2.1.0
|
||||
2.2.0
|
||||
2.3.0
|
||||
2.4.0
|
||||
2.4.1
|
||||
2.9.0
|
||||
2.9.1
|
||||
2.9.2
|
||||
2.9.3
|
||||
2.9.4
|
||||
3.0.0
|
||||
3.0.1
|
||||
3.0.2
|
||||
3.0.3
|
||||
3.1.0
|
||||
3.2.0
|
||||
3.3.0
|
||||
3.4.0
|
||||
3.5.0
|
||||
3.6.0
|
||||
3.6.1
|
||||
3.6.2
|
||||
4.0.0
|
||||
4.0.0.1
|
||||
4.0.0.2
|
||||
4.1.0
|
||||
4.2.0
|
||||
4.2.1
|
||||
4.3.0
|
||||
4.3.1
|
||||
4.4.0
|
||||
4.5.0
|
||||
4.5.1
|
||||
4.6.0
|
||||
4.6.1
|
||||
4.7.0
|
||||
4.7.1
|
||||
4.7.2
|
||||
4.8.0
|
||||
4.8.1
|
||||
4.9.0
|
||||
4.9.1
|
||||
4.10.0
|
||||
4.10.1
|
||||
4.10.2
|
||||
4.10.3
|
||||
4.10.4
|
||||
5.0.0
|
||||
5.1.0
|
||||
5.2.0
|
||||
5.2.1
|
||||
5.3.0
|
||||
5.3.1
|
||||
5.3.2
|
||||
5.4.0
|
||||
5.4.1
|
||||
5.5.0
|
||||
5.5.1
|
||||
5.5.2
|
||||
5.5.3
|
||||
5.5.4
|
||||
5.5.5
|
||||
6.0.0
|
||||
6.0.1
|
||||
6.1.0
|
||||
6.2.0
|
||||
6.2.1
|
||||
6.3.0
|
||||
6.4.0
|
||||
6.4.1
|
||||
6.4.2
|
||||
6.5.0
|
||||
6.5.1
|
||||
6.6.0
|
||||
6.6.1
|
||||
6.6.2
|
||||
6.6.3
|
||||
6.6.4
|
||||
6.6.5
|
||||
6.6.6
|
||||
7.0.0
|
||||
7.0.1
|
||||
7.1.0
|
||||
7.2.0
|
||||
7.2.1
|
||||
7.3.0
|
||||
7.3.1
|
||||
7.4.0
|
||||
7.5.0
|
||||
7.6.0
|
||||
7.7.0
|
||||
7.7.1
|
||||
7.7.2
|
||||
7.7.3
|
||||
8.0.0
|
||||
8.1.0
|
||||
8.1.1
|
||||
8.2.0
|
||||
8.3.0
|
||||
8.3.1
|
||||
8.4.0
|
||||
8.4.1
|
||||
8.5.0
|
||||
8.5.1
|
||||
8.5.2
|
||||
8.6.0
|
||||
8.6.1
|
||||
8.6.2
|
||||
8.6.3
|
||||
8.7.0
|
||||
8.8.0
|
||||
8.8.1
|
||||
8.8.2
|
||||
8.9.0
|
||||
8.10.0
|
||||
8.10.1
|
||||
8.11.0
|
||||
8.11.1
|
||||
8.11.2
|
||||
8.11.3
|
|
@ -0,0 +1,40 @@
|
|||
8.0.0
|
||||
8.1.0
|
||||
8.1.1
|
||||
8.2.0
|
||||
8.3.0
|
||||
8.3.1
|
||||
8.4.0
|
||||
8.4.1
|
||||
8.5.0
|
||||
8.5.1
|
||||
8.5.2
|
||||
8.6.0
|
||||
8.6.1
|
||||
8.6.2
|
||||
8.6.3
|
||||
8.7.0
|
||||
8.8.0
|
||||
8.8.1
|
||||
8.8.2
|
||||
8.9.0
|
||||
8.10.0
|
||||
8.10.1
|
||||
8.11.0
|
||||
8.11.1
|
||||
8.11.2
|
||||
8.11.3
|
||||
9.0.0
|
||||
9.1.0
|
||||
9.2.0
|
||||
9.3.0
|
||||
9.4.0
|
||||
9.4.1
|
||||
9.4.2
|
||||
9.5.0
|
||||
9.6.0
|
||||
9.7.0
|
||||
9.8.0
|
||||
9.9.0
|
||||
9.9.1
|
||||
9.9.2
|
|
@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandle;
|
|||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.invoke.MethodType;
|
||||
import java.text.ParseException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
@ -67,7 +68,7 @@ public class ExpressionsBenchmark {
|
|||
lookup.findStatic(
|
||||
lookup.lookupClass(), "ident", MethodType.methodType(double.class, double.class)));
|
||||
m.put("mh_identity", MethodHandles.identity(double.class));
|
||||
return m;
|
||||
return Collections.unmodifiableMap(m);
|
||||
} catch (ReflectiveOperationException e) {
|
||||
throw new AssertionError(e);
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.InputStream;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import javax.xml.XMLConstants;
|
||||
|
@ -68,7 +67,7 @@ public class EnwikiContentSource extends ContentSource {
|
|||
private boolean stopped = false;
|
||||
private String[] tuple;
|
||||
private NoMoreDataException nmde;
|
||||
private StringBuilder contents = new StringBuilder();
|
||||
private final StringBuilder contents = new StringBuilder();
|
||||
private String title;
|
||||
private String body;
|
||||
private String time;
|
||||
|
@ -262,7 +261,6 @@ public class EnwikiContentSource extends ContentSource {
|
|||
}
|
||||
}
|
||||
|
||||
private static final Map<String, Integer> ELEMENTS = new HashMap<>();
|
||||
private static final int TITLE = 0;
|
||||
private static final int DATE = TITLE + 1;
|
||||
private static final int BODY = DATE + 1;
|
||||
|
@ -272,24 +270,24 @@ public class EnwikiContentSource extends ContentSource {
|
|||
// should not be part of the tuple, we should define them after LENGTH.
|
||||
private static final int PAGE = LENGTH + 1;
|
||||
|
||||
private static final Map<String, Integer> ELEMENTS =
|
||||
Map.of(
|
||||
"page", PAGE,
|
||||
"text", BODY,
|
||||
"timestamp", DATE,
|
||||
"title", TITLE,
|
||||
"id", ID);
|
||||
|
||||
private static final String[] months = {
|
||||
"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
|
||||
};
|
||||
|
||||
static {
|
||||
ELEMENTS.put("page", Integer.valueOf(PAGE));
|
||||
ELEMENTS.put("text", Integer.valueOf(BODY));
|
||||
ELEMENTS.put("timestamp", Integer.valueOf(DATE));
|
||||
ELEMENTS.put("title", Integer.valueOf(TITLE));
|
||||
ELEMENTS.put("id", Integer.valueOf(ID));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the type of the element if defined, otherwise returns -1. This method is useful in
|
||||
* startElement and endElement, by not needing to compare the element qualified name over and
|
||||
* over.
|
||||
*/
|
||||
private static final int getElementType(String elem) {
|
||||
private static int getElementType(String elem) {
|
||||
Integer val = ELEMENTS.get(elem);
|
||||
return val == null ? -1 : val.intValue();
|
||||
}
|
||||
|
@ -297,7 +295,7 @@ public class EnwikiContentSource extends ContentSource {
|
|||
private Path file;
|
||||
private boolean keepImages = true;
|
||||
private InputStream is;
|
||||
private Parser parser = new Parser();
|
||||
private final Parser parser = new Parser();
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
@ -40,22 +42,28 @@ public abstract class TrecDocParser {
|
|||
/** trec parser type used for unknown extensions */
|
||||
public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2;
|
||||
|
||||
static final Map<ParsePathType, TrecDocParser> pathType2parser = new HashMap<>();
|
||||
static final Map<ParsePathType, TrecDocParser> pathType2Parser;
|
||||
|
||||
static {
|
||||
pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser());
|
||||
pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser());
|
||||
pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser());
|
||||
pathType2parser.put(ParsePathType.FT, new TrecFTParser());
|
||||
pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser());
|
||||
pathType2Parser =
|
||||
Collections.unmodifiableMap(
|
||||
new EnumMap<>(
|
||||
Map.of(
|
||||
ParsePathType.GOV2, new TrecGov2Parser(),
|
||||
ParsePathType.FBIS, new TrecFBISParser(),
|
||||
ParsePathType.FR94, new TrecFR94Parser(),
|
||||
ParsePathType.FT, new TrecFTParser(),
|
||||
ParsePathType.LATIMES, new TrecLATimesParser())));
|
||||
}
|
||||
|
||||
static final Map<String, ParsePathType> pathName2Type = new HashMap<>();
|
||||
static final Map<String, ParsePathType> pathName2Type;
|
||||
|
||||
static {
|
||||
Map<String, ParsePathType> name2Type = new HashMap<>();
|
||||
for (ParsePathType ppt : ParsePathType.values()) {
|
||||
pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt);
|
||||
name2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt);
|
||||
}
|
||||
pathName2Type = Collections.unmodifiableMap(name2Type);
|
||||
}
|
||||
|
||||
/** max length of walk up from file to its ancestors when looking for a known path type */
|
||||
|
|
|
@ -32,6 +32,6 @@ public class TrecParserByPath extends TrecDocParser {
|
|||
StringBuilder docBuf,
|
||||
ParsePathType pathType)
|
||||
throws IOException {
|
||||
return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
|
||||
return pathType2Parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ public class TaskSequence extends PerfTask {
|
|||
private boolean resetExhausted = false;
|
||||
private PerfTask[] tasksArray;
|
||||
private boolean anyExhaustibleTasks;
|
||||
private boolean collapsable = false; // to not collapse external sequence named in alg.
|
||||
private final boolean collapsable; // to not collapse external sequence named in alg.
|
||||
|
||||
private boolean fixedTime; // true if we run for fixed time
|
||||
private double runTimeSec; // how long to run for
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.io.InputStream;
|
|||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import org.apache.commons.compress.compressors.CompressorException;
|
||||
|
@ -70,15 +69,9 @@ public class StreamUtils {
|
|||
}
|
||||
}
|
||||
|
||||
private static final Map<String, Type> extensionToType = new HashMap<>();
|
||||
|
||||
static {
|
||||
// these in are lower case, we will lower case at the test as well
|
||||
extensionToType.put(".bz2", Type.BZIP2);
|
||||
extensionToType.put(".bzip", Type.BZIP2);
|
||||
extensionToType.put(".gz", Type.GZIP);
|
||||
extensionToType.put(".gzip", Type.GZIP);
|
||||
}
|
||||
// these are in lower case, we will lower case at the test as well
|
||||
private static final Map<String, Type> extensionToType =
|
||||
Map.of(".bz2", Type.BZIP2, ".bzip", Type.BZIP2, ".gz", Type.GZIP, ".gzip", Type.GZIP);
|
||||
|
||||
/**
|
||||
* Returns an {@link InputStream} over the requested file. This method attempts to identify the
|
||||
|
|
|
@ -36,7 +36,7 @@ public class TestTrecContentSource extends LuceneTestCase {
|
|||
/** A TrecDocMaker which works on a String and not files. */
|
||||
private static class StringableTrecSource extends TrecContentSource {
|
||||
|
||||
private String docs = null;
|
||||
private final String docs;
|
||||
|
||||
public StringableTrecSource(String docs, boolean forever) {
|
||||
this.docs = docs;
|
||||
|
|
|
@ -230,24 +230,6 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
|||
return fields.size();
|
||||
}
|
||||
|
||||
// for debugging
|
||||
String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// term dictionary
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FSTCompiler;
|
||||
|
@ -288,29 +289,10 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return brToString(termBytes);
|
||||
return ToStringUtils.bytesRefToString(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(byte[] b) {
|
||||
return brToString(new BytesRef(b));
|
||||
}
|
||||
|
||||
private static final class SubIndex {
|
||||
public final FST<Output> index;
|
||||
public final long termOrdStart;
|
||||
|
@ -353,7 +335,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK: " + brToString(prefix);
|
||||
return "BLOCK: " + ToStringUtils.bytesRefToString(prefix);
|
||||
}
|
||||
|
||||
public void compileIndex(
|
||||
|
@ -457,9 +439,9 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
Output newOutput =
|
||||
FST_OUTPUTS.newOutput(
|
||||
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
|
||||
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + "
|
||||
// termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" +
|
||||
// newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
|
||||
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output +
|
||||
// " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput="
|
||||
// + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
|
||||
fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
|
||||
}
|
||||
}
|
||||
|
@ -642,8 +624,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
long startFP = out.getFilePointer();
|
||||
|
||||
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + "
|
||||
// floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
|
||||
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor +
|
||||
// " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
|
||||
// hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||
|
||||
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
|
||||
|
@ -662,11 +644,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
out.writeVInt(code);
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + "
|
||||
// pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
|
||||
// brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? ("
|
||||
// floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" +
|
||||
// isLastInFloor);
|
||||
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment +
|
||||
// " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
|
||||
// ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP +
|
||||
// (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") +
|
||||
// " isLastInFloor=" + isLastInFloor);
|
||||
// }
|
||||
|
||||
final List<SubIndex> subIndices;
|
||||
|
@ -784,7 +766,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) +
|
||||
" subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -842,7 +825,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
if (DEBUG) {
|
||||
int[] tmp = new int[lastTerm.length];
|
||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" +
|
||||
Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -885,8 +869,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
// we are closing:
|
||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||
if (prefixTopSize >= minItemsInBlock) {
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
||||
// minItemsInBlock=" + minItemsInBlock);
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||
// " minItemsInBlock=" + minItemsInBlock);
|
||||
writeBlocks(i + 1, prefixTopSize);
|
||||
prefixStarts[i] -= prefixTopSize - 1;
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
throws IOException {
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" +
|
||||
// brToString(compiled.commonSuffixRef));
|
||||
// ToStringUtils.bytesRefToString(compiled.commonSuffixRef));
|
||||
// }
|
||||
this.fr = fr;
|
||||
this.byteRunnable = compiled.getByteRunnable();
|
||||
|
@ -283,13 +283,15 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
currentFrame.loadNextFloorBlock();
|
||||
continue;
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + brToString(term));
|
||||
// if (DEBUG) System.out.println(" return term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
return;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
} else if (cmp == 0) {
|
||||
// if (DEBUG) System.out.println(" return term=" + brToString(term));
|
||||
// if (DEBUG) System.out.println(" return term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
return;
|
||||
} else {
|
||||
// Fallback to prior entry: the semantics of
|
||||
|
@ -327,10 +329,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nintEnum.next seg=" + segment);
|
||||
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new
|
||||
// BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + "
|
||||
// lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" +
|
||||
// (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" +
|
||||
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) +
|
||||
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||
// currentFrame.outputPrefix);
|
||||
// }
|
||||
|
@ -343,9 +345,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
// if (DEBUG) System.out.println(" next-floor-block");
|
||||
currentFrame.loadNextFloorBlock();
|
||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||
// currentFrame.prefix)) +
|
||||
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||
// currentFrame.outputPrefix);
|
||||
} else {
|
||||
|
@ -357,9 +360,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
currentFrame = stack[currentFrame.ord - 1];
|
||||
assert currentFrame.lastSubFP == lastFP;
|
||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||
// currentFrame.prefix)) +
|
||||
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||
// currentFrame.outputPrefix);
|
||||
}
|
||||
|
@ -373,7 +377,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
// suffixRef.length = currentFrame.suffix;
|
||||
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " +
|
||||
// currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" +
|
||||
// brToString(suffixRef));
|
||||
// ToStringUtils.bytesRefToString(suffixRef));
|
||||
// }
|
||||
|
||||
if (currentFrame.suffix != 0) {
|
||||
|
@ -480,15 +484,16 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
copyTerm();
|
||||
currentFrame = pushFrame(state);
|
||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||
// currentFrame.prefix)) +
|
||||
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||
// currentFrame.outputPrefix);
|
||||
} else if (byteRunnable.isAccept(state)) {
|
||||
copyTerm();
|
||||
// if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
|
||||
: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
|
||||
return term;
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
|
@ -174,11 +175,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
throws IOException {
|
||||
final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||
f.arc = arc;
|
||||
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp + "
|
||||
// nextEnt=" + f.nextEnt);
|
||||
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp +
|
||||
// " nextEnt=" + f.nextEnt);
|
||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||
// term.length + " vs prefix=" + f.prefix);
|
||||
if (f.prefix > targetBeforeCurrentLength) {
|
||||
|
@ -204,7 +205,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
// final int sav = term.length;
|
||||
// term.length = length;
|
||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||
// term.length = sav;
|
||||
// }
|
||||
}
|
||||
|
@ -224,19 +225,6 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
return true;
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(final BytesRef target) throws IOException {
|
||||
|
||||
|
@ -250,7 +238,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" +
|
||||
ToStringUtils.bytesRefToString(target) + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||
") validIndexPrefix=" + validIndexPrefix);
|
||||
printSeekState(System.out);
|
||||
}
|
||||
*/
|
||||
|
@ -411,8 +401,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
positioned = true;
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -443,7 +433,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
term.setLength(1 + targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -459,7 +449,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -502,7 +492,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
term.setLength(targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -537,8 +527,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
|
||||
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
|
||||
// termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
|
||||
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
|
||||
|
@ -581,9 +571,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
||||
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
|
||||
// " output=" + output);
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
|
||||
// + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -697,8 +687,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
positioned = true;
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -733,7 +723,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
|
||||
// System.out.println(" return NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -744,7 +735,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
|
||||
// System.out.println(" return " + result + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
|
@ -829,7 +821,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
+ " prefixLen="
|
||||
+ f.prefix
|
||||
+ " prefix="
|
||||
+ brToString(prefix)
|
||||
+ ToStringUtils.bytesRefToString(prefix)
|
||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||
+ " hasTerms="
|
||||
+ f.hasTerms
|
||||
|
@ -859,7 +851,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
+ " prefixLen="
|
||||
+ f.prefix
|
||||
+ " prefix="
|
||||
+ brToString(prefix)
|
||||
+ ToStringUtils.bytesRefToString(prefix)
|
||||
+ " nextEnt="
|
||||
+ f.nextEnt
|
||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||
|
@ -951,8 +943,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
assert !eof;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
|
||||
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term) +
|
||||
// " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
|
@ -1019,8 +1012,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
// currentFrame.hasTerms = true;
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
|
||||
// currentFrame.ord=" + currentFrame.ord);
|
||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
|
||||
// " currentFrame.ord=" + currentFrame.ord);
|
||||
positioned = true;
|
||||
return term.get();
|
||||
}
|
||||
|
@ -1235,8 +1228,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
int low = 0;
|
||||
int high = arc.numArcs() - 1;
|
||||
int mid = 0;
|
||||
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
|
||||
// output=" + output);
|
||||
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput +
|
||||
// " output=" + output);
|
||||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
|
|
|
@ -119,8 +119,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
numFollowFloorBlocks = floorDataReader.readVInt();
|
||||
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
||||
nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
|
||||
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + "
|
||||
// shift=" + (nextFloorTermOrd-termOrdOrig));
|
||||
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd +
|
||||
// " shift=" + (nextFloorTermOrd-termOrdOrig));
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
|
||||
|
@ -289,8 +289,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
|
||||
// Decodes next entry; returns true if it's a sub-block
|
||||
public boolean nextLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
|
||||
nextEnt++;
|
||||
|
@ -306,8 +306,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
public boolean nextNonLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -374,8 +374,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -440,8 +440,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -495,8 +495,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
boolean absolute = metaDataUpto == 0;
|
||||
assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt;
|
||||
|
||||
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + "
|
||||
// mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
|
||||
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment +
|
||||
// " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
|
||||
|
||||
// TODO: better API would be "jump straight to term=N"???
|
||||
while (metaDataUpto < limit) {
|
||||
|
@ -593,10 +593,10 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
|
||||
// OrdsSegmentTermsEnum.brToString(ste.term));
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) + " term=" +
|
||||
// ToStringUtils.bytesRefToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -627,7 +627,7 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||
// + OrdsSegmentTermsEnum.brToString(suffixBytesRef));
|
||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
|
@ -714,8 +714,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
|
||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
|
||||
// OrdsSegmentTermsEnum.brToString(ste.term));
|
||||
// ToStringUtils.bytesRefToString(target) + " term=" +
|
||||
// ToStringUtils.bytesRefToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -743,7 +743,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
ste.termExists = (code & 1) == 0;
|
||||
|
|
|
@ -210,7 +210,7 @@ public final class FieldReader extends Terms {
|
|||
@Override
|
||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
|
||||
// BlockTreeTermsWriter.brToString(startTerm));
|
||||
// ToStringUtils.bytesRefToString(startTerm));
|
||||
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
||||
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
||||
// can we optimize knowing that...?
|
||||
|
|
|
@ -549,19 +549,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private void copyTerm() {
|
||||
final int len = currentFrame.prefix + currentFrame.suffix;
|
||||
if (term.bytes.length < len) {
|
||||
|
|
|
@ -307,24 +307,6 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
|||
return fieldMap.size();
|
||||
}
|
||||
|
||||
// for debugging
|
||||
String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// terms index
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.compress.LZ4;
|
||||
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
|
@ -394,7 +395,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
termsWriter.write(term, termsEnum, norms);
|
||||
}
|
||||
|
||||
|
@ -433,33 +434,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TERM: " + brToString(termBytes);
|
||||
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "(null)";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(byte[] b) {
|
||||
return brToString(new BytesRef(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes long value to variable length byte[], in MSB order. Use {@link
|
||||
* FieldReader#readMSBVLong} to decode.
|
||||
|
@ -506,7 +484,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK: prefix=" + brToString(prefix);
|
||||
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
|
||||
}
|
||||
|
||||
public void compileIndex(
|
||||
|
@ -689,8 +667,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// if (DEBUG2) {
|
||||
// BytesRef br = new BytesRef(lastTerm.bytes());
|
||||
// br.length = prefixLength;
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
|
||||
// + count);
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
|
||||
// ToStringUtils.bytesRefToString(br) + " count=" + count);
|
||||
// }
|
||||
|
||||
// Root block better write all remaining pending entries:
|
||||
|
@ -843,9 +821,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
prefix.length = prefixLength;
|
||||
|
||||
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
|
||||
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
|
||||
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
|
||||
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
|
||||
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
|
||||
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
|
||||
// hasSubBlocks);
|
||||
|
||||
// Write block header:
|
||||
int numEntries = end - start;
|
||||
|
@ -858,7 +837,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
|
||||
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
|
||||
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -893,7 +874,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// System.out.println(" write term suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||
// }
|
||||
|
||||
// For leaf block we write suffix straight
|
||||
|
@ -926,7 +908,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// System.out.println(" write term suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||
// }
|
||||
|
||||
// For non-leaf block we borrow 1 bit to record
|
||||
|
@ -968,8 +951,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
|
||||
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// System.out.println(" write sub-block suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes) + " subFP=" + block.fp + " subCode=" +
|
||||
// (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// }
|
||||
|
||||
assert floorLeadLabel == -1
|
||||
|
@ -1090,7 +1074,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
if (DEBUG) {
|
||||
int[] tmp = new int[lastTerm.length];
|
||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
|
||||
" pending.size()=" + pending.size());
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -1143,8 +1128,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// we are closing:
|
||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||
if (prefixTopSize >= minItemsInBlock) {
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
||||
// minItemsInBlock=" + minItemsInBlock);
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||
// " minItemsInBlock=" + minItemsInBlock);
|
||||
writeBlocks(i + 1, prefixTopSize);
|
||||
prefixStarts[i] -= prefixTopSize - 1;
|
||||
}
|
||||
|
|
|
@ -263,8 +263,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||
f.arc = arc;
|
||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||
// term.length + " vs prefix=" + f.prefix);
|
||||
// if (f.prefix > targetBeforeCurrentLength) {
|
||||
|
@ -286,7 +286,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// final int sav = term.length;
|
||||
// term.length = length;
|
||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||
// term.length = sav;
|
||||
// }
|
||||
}
|
||||
|
@ -306,27 +306,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRefBuilder b) {
|
||||
return brToString(b.get());
|
||||
}
|
||||
*/
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef target) throws IOException {
|
||||
|
||||
|
@ -344,8 +323,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
|
||||
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||
// ToStringUtils.bytesRefToString(term) +
|
||||
// " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -499,8 +479,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -531,7 +511,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
term.setLength(1 + targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -547,7 +527,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -586,7 +566,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
term.setLength(targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -622,8 +602,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
|
||||
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||
// ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -663,9 +644,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
||||
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
|
||||
// " output=" + output);
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
|
||||
// + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -771,8 +752,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -808,7 +789,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" return NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -819,7 +801,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return " + result + " term=" + brToString(term));
|
||||
// System.out.println(" return " + result + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
|
@ -1015,9 +998,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
assert !eof;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
|
||||
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
|
||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
|
||||
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||
// " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -1081,8 +1065,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// try to scan to the right floor frame:
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
|
||||
// + currentFrame.ord);
|
||||
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
|
||||
// " currentFrame.ord=" + currentFrame.ord);
|
||||
return term.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -295,8 +295,8 @@ final class SegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
public void nextLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -388,8 +388,8 @@ final class SegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -531,28 +531,14 @@ final class SegmentTermsEnumFrame {
|
|||
private long subCode;
|
||||
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
|
||||
|
||||
// for debugging
|
||||
/*
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Target's prefix matches this block's prefix; we
|
||||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
||||
// brToString(term));
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -582,7 +568,7 @@ final class SegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||
// + brToString(suffixBytesRef));
|
||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
|
@ -647,8 +633,9 @@ final class SegmentTermsEnumFrame {
|
|||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
||||
// brToString(target));
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -676,7 +663,8 @@ final class SegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
|
@ -708,8 +696,8 @@ final class SegmentTermsEnumFrame {
|
|||
// return NOT_FOUND:
|
||||
fillTerm();
|
||||
|
||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
|
||||
// ste.termExists=" + ste.termExists);
|
||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
|
||||
// " ste.termExists=" + ste.termExists);
|
||||
|
||||
if (!exactOnly && !ste.termExists) {
|
||||
// System.out.println(" now pushFrame");
|
||||
|
|
|
@ -166,6 +166,16 @@ public final class FeatureField extends Field {
|
|||
return stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is useful if you have multiple features sharing a name and you want to take action to
|
||||
* deduplicate them.
|
||||
*
|
||||
* @return the feature value of this field.
|
||||
*/
|
||||
public float getFeatureValue() {
|
||||
return featureValue;
|
||||
}
|
||||
|
||||
private static final class FeatureTokenStream extends TokenStream {
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.StreamTokenizer;
|
|||
import java.io.StringReader;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
@ -404,21 +405,23 @@ public class SimpleWKTShapeParser {
|
|||
ENVELOPE("envelope"); // not part of the actual WKB spec
|
||||
|
||||
private final String shapeName;
|
||||
private static final Map<String, ShapeType> shapeTypeMap = new HashMap<>();
|
||||
private static final Map<String, ShapeType> shapeTypeMap;
|
||||
private static final String BBOX = "BBOX";
|
||||
|
||||
static {
|
||||
Map<String, ShapeType> shapeTypes = new HashMap<>();
|
||||
for (ShapeType type : values()) {
|
||||
shapeTypeMap.put(type.shapeName, type);
|
||||
shapeTypes.put(type.shapeName, type);
|
||||
}
|
||||
shapeTypeMap.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE);
|
||||
shapeTypes.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE);
|
||||
shapeTypeMap = Collections.unmodifiableMap(shapeTypes);
|
||||
}
|
||||
|
||||
ShapeType(String shapeName) {
|
||||
this.shapeName = shapeName;
|
||||
}
|
||||
|
||||
protected String typename() {
|
||||
String typename() {
|
||||
return shapeName;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ public final class FieldInfo {
|
|||
/** Internal field number */
|
||||
public final int number;
|
||||
|
||||
private DocValuesType docValuesType = DocValuesType.NONE;
|
||||
private DocValuesType docValuesType;
|
||||
|
||||
// True if any document indexed term vectors
|
||||
private boolean storeTermVector;
|
||||
|
|
|
@ -84,7 +84,7 @@ public class LiveIndexWriterConfig {
|
|||
protected volatile int perThreadHardLimitMB;
|
||||
|
||||
/** True if segment flushes should use compound file format */
|
||||
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
|
||||
protected volatile boolean useCompoundFile;
|
||||
|
||||
/** True if calls to {@link IndexWriter#close()} should first do a commit. */
|
||||
protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;
|
||||
|
|
|
@ -597,12 +597,12 @@ public abstract class MergePolicy {
|
|||
* If the size of the merge segment exceeds this ratio of the total index size then it will remain
|
||||
* in non-compound format
|
||||
*/
|
||||
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
|
||||
protected double noCFSRatio;
|
||||
|
||||
/**
|
||||
* If the size of the merged segment exceeds this value then it will not use compound file format.
|
||||
*/
|
||||
protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
|
||||
protected long maxCFSSegmentSize;
|
||||
|
||||
/** Creates a new merge policy instance. */
|
||||
protected MergePolicy() {
|
||||
|
|
|
@ -103,7 +103,7 @@ public abstract class VectorizationProvider {
|
|||
// visible for tests
|
||||
static VectorizationProvider lookup(boolean testMode) {
|
||||
final int runtimeVersion = Runtime.version().feature();
|
||||
if (runtimeVersion >= 20 && runtimeVersion <= 21) {
|
||||
if (runtimeVersion >= 20 && runtimeVersion <= 22) {
|
||||
// is locale sane (only buggy in Java 20)
|
||||
if (isAffectedByJDK8301190()) {
|
||||
LOG.warning(
|
||||
|
@ -169,9 +169,9 @@ public abstract class VectorizationProvider {
|
|||
} catch (ClassNotFoundException cnfe) {
|
||||
throw new LinkageError("PanamaVectorizationProvider is missing in Lucene JAR file", cnfe);
|
||||
}
|
||||
} else if (runtimeVersion >= 22) {
|
||||
} else if (runtimeVersion >= 23) {
|
||||
LOG.warning(
|
||||
"You are running with Java 22 or later. To make full use of the Vector API, please update Apache Lucene.");
|
||||
"You are running with Java 23 or later. To make full use of the Vector API, please update Apache Lucene.");
|
||||
} else if (lookupVectorModule().isPresent()) {
|
||||
LOG.warning(
|
||||
"Java vector incubator module was enabled by command line flags, but your Java version is too old: "
|
||||
|
|
|
@ -120,7 +120,7 @@ final class WANDScorer extends Scorer {
|
|||
|
||||
private final int scalingFactor;
|
||||
// scaled min competitive score
|
||||
private long minCompetitiveScore = 0;
|
||||
private long minCompetitiveScore;
|
||||
|
||||
private final Scorer[] allScorers;
|
||||
|
||||
|
|
|
@ -89,7 +89,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
|
|||
private boolean singleSort;
|
||||
|
||||
/** Whether this comparator is allowed to skip documents. */
|
||||
private boolean canSkipDocuments = true;
|
||||
private boolean canSkipDocuments;
|
||||
|
||||
/** Whether the collector is done with counting hits so that we can start skipping documents. */
|
||||
private boolean hitsThresholdReached = false;
|
||||
|
|
|
@ -346,7 +346,7 @@ public class MMapDirectory extends FSDirectory {
|
|||
}
|
||||
final var lookup = MethodHandles.lookup();
|
||||
final int runtimeVersion = Runtime.version().feature();
|
||||
if (runtimeVersion >= 19 && runtimeVersion <= 21) {
|
||||
if (runtimeVersion >= 19) {
|
||||
try {
|
||||
final var cls = lookup.findClass("org.apache.lucene.store.MemorySegmentIndexInputProvider");
|
||||
// we use method handles, so we do not need to deal with setAccessible as we have private
|
||||
|
@ -366,9 +366,6 @@ public class MMapDirectory extends FSDirectory {
|
|||
throw new LinkageError(
|
||||
"MemorySegmentIndexInputProvider is missing in Lucene JAR file", cnfe);
|
||||
}
|
||||
} else if (runtimeVersion >= 22) {
|
||||
LOG.warning(
|
||||
"You are running with Java 22 or later. To make full use of MMapDirectory, please update Apache Lucene.");
|
||||
}
|
||||
return new MappedByteBufferIndexInputProvider();
|
||||
}
|
||||
|
|
|
@ -130,17 +130,20 @@ public final class BytesRef implements Comparable<BytesRef>, Cloneable {
|
|||
return false;
|
||||
}
|
||||
|
||||
/** Interprets stored bytes as UTF8 bytes, returning the resulting string */
|
||||
/**
|
||||
* Interprets stored bytes as UTF-8 bytes, returning the resulting string. May throw an {@link
|
||||
* AssertionError} or a {@link RuntimeException} if the data is not well-formed UTF-8.
|
||||
*/
|
||||
public String utf8ToString() {
|
||||
final char[] ref = new char[length];
|
||||
final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||
return new String(ref, 0, len);
|
||||
}
|
||||
|
||||
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
|
||||
/** Returns hex encoded bytes, e.g. "[6c 75 63 65 6e 65]" */
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
StringBuilder sb = new StringBuilder(2 + 3 * length);
|
||||
sb.append('[');
|
||||
final int end = offset + length;
|
||||
for (int i = offset; i < end; i++) {
|
||||
|
|
|
@ -253,7 +253,7 @@ public class RoaringDocIdSet extends DocIdSet {
|
|||
private class Iterator extends DocIdSetIterator {
|
||||
|
||||
int block;
|
||||
DocIdSetIterator sub = null;
|
||||
DocIdSetIterator sub;
|
||||
int doc;
|
||||
|
||||
Iterator() throws IOException {
|
||||
|
|
|
@ -32,6 +32,10 @@ public final class ToStringUtils {
|
|||
|
||||
private static final char[] HEX = "0123456789abcdef".toCharArray();
|
||||
|
||||
/**
|
||||
* Unlike {@link Long#toHexString(long)} returns a String with a "0x" prefix and all the leading
|
||||
* zeros.
|
||||
*/
|
||||
public static String longHex(long x) {
|
||||
char[] asHex = new char[16];
|
||||
for (int i = 16; --i >= 0; x >>>= 4) {
|
||||
|
@ -39,4 +43,31 @@ public final class ToStringUtils {
|
|||
}
|
||||
return "0x" + new String(asHex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a String with both textual representation of the {@link BytesRef} data and the bytes hex
|
||||
* values. For example: {@code "hello [68 65 6c 6c 6f]"}. If the content is not a valid UTF-8
|
||||
* sequence, only the bytes hex values are returned, as per {@link BytesRef#toString()}.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public static String bytesRefToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
}
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (AssertionError | RuntimeException t) {
|
||||
// If BytesRef isn't actually UTF-8, or it's e.g. a prefix of UTF-8
|
||||
// that ends mid-unicode-char, we fall back to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public static String bytesRefToString(BytesRefBuilder b) {
|
||||
return bytesRefToString(b.get());
|
||||
}
|
||||
|
||||
public static String bytesRefToString(byte[] b) {
|
||||
return bytesRefToString(new BytesRef(b));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -133,10 +133,17 @@ public final class Version {
|
|||
/**
|
||||
* Match settings and bugs in Lucene's 9.10.0 release.
|
||||
*
|
||||
* @deprecated Use latest
|
||||
* @deprecated (9.11.0) Use latest
|
||||
*/
|
||||
@Deprecated public static final Version LUCENE_9_10_0 = new Version(9, 10, 0);
|
||||
|
||||
/**
|
||||
* Match settings and bugs in Lucene's 9.11.0 release.
|
||||
*
|
||||
* @deprecated Use latest
|
||||
*/
|
||||
@Deprecated public static final Version LUCENE_9_11_0 = new Version(9, 11, 0);
|
||||
|
||||
/**
|
||||
* Match settings and bugs in Lucene's 10.0.0 release.
|
||||
*
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.util.IntsRef;
|
|||
*/
|
||||
public class LimitedFiniteStringsIterator extends FiniteStringsIterator {
|
||||
/** Maximum number of finite strings to create. */
|
||||
private int limit = Integer.MAX_VALUE;
|
||||
private final int limit;
|
||||
|
||||
/** Number of generated finite strings. */
|
||||
private int count = 0;
|
||||
|
|
|
@ -108,10 +108,16 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces
|
|||
if (this.curSegment == null) {
|
||||
return new AlreadyClosedException("Already closed: " + this);
|
||||
}
|
||||
// ISE can be thrown by MemorySegment and contains "closed" in message:
|
||||
// in Java 22 or later we can check the isAlive status of all segments
|
||||
// (see https://bugs.openjdk.org/browse/JDK-8310644):
|
||||
if (Arrays.stream(segments).allMatch(s -> s.scope().isAlive()) == false) {
|
||||
return new AlreadyClosedException("Already closed: " + this);
|
||||
}
|
||||
// fallback for Java 21: ISE can be thrown by MemorySegment and contains "closed" in message:
|
||||
if (e instanceof IllegalStateException
|
||||
&& e.getMessage() != null
|
||||
&& e.getMessage().contains("closed")) {
|
||||
// the check is on message only, so preserve original cause for debugging:
|
||||
return new AlreadyClosedException("Already closed: " + this, e);
|
||||
}
|
||||
// otherwise rethrow unmodified NPE/ISE (as it possibly a bug with passing a null parameter to
|
||||
|
|
|
@ -33,7 +33,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
|
|||
public MemorySegmentIndexInputProvider() {
|
||||
var log = Logger.getLogger(getClass().getName());
|
||||
log.info(
|
||||
"Using MemorySegmentIndexInput with Java 21; to disable start with -D"
|
||||
"Using MemorySegmentIndexInput with Java 21 or later; to disable start with -D"
|
||||
+ MMapDirectory.ENABLE_MEMORY_SEGMENTS_SYSPROP
|
||||
+ "=false");
|
||||
}
|
||||
|
|
|
@ -154,6 +154,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
|||
IndexWriter writer =
|
||||
new IndexWriter(
|
||||
directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp));
|
||||
TestUtil.reduceOpenFiles(writer);
|
||||
|
||||
Document doc = new Document();
|
||||
Field idField = newStringField("id", "", Field.Store.YES);
|
||||
|
@ -779,6 +780,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
|||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
iwc.setMaxBufferedDocs(2);
|
||||
iwc.setUseCompoundFile(true); // reduce open files
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
int numDocs = TEST_NIGHTLY ? 1000 : 100;
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
|
|
|
@ -67,7 +67,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
assertAllBetween(last2, j, bd2, ids);
|
||||
last2 = j + 1;
|
||||
}
|
||||
assertEquals(j + 1, queue.numGlobalTermDeletes());
|
||||
assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes());
|
||||
}
|
||||
assertEquals(uniqueValues, bd1.deleteTerms.keySet());
|
||||
assertEquals(uniqueValues, bd2.deleteTerms.keySet());
|
||||
|
|
|
@ -258,6 +258,7 @@ public class TestIndexWriterThreadsToSegments extends LuceneTestCase {
|
|||
IndexWriterConfig iwc = newIndexWriterConfig(r, new MockAnalyzer(r));
|
||||
iwc.setCommitOnClose(false);
|
||||
final RandomIndexWriter w = new RandomIndexWriter(r, dir, iwc);
|
||||
TestUtil.reduceOpenFiles(w.w);
|
||||
w.setDoRandomForceMerge(false);
|
||||
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 30)];
|
||||
final CountDownLatch startingGun = new CountDownLatch(1);
|
||||
|
|
|
@ -48,9 +48,9 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
|
|||
|
||||
public void testCorrectImplementation() {
|
||||
final int runtimeVersion = Runtime.version().feature();
|
||||
if (runtimeVersion >= 19 && runtimeVersion <= 21) {
|
||||
if (runtimeVersion >= 19) {
|
||||
assertTrue(
|
||||
"on Java 19, 20, and 21 we should use MemorySegmentIndexInputProvider to create mmap IndexInputs",
|
||||
"on Java 19 or later we should use MemorySegmentIndexInputProvider to create mmap IndexInputs",
|
||||
isMemorySegmentImpl());
|
||||
} else {
|
||||
assertSame(MappedByteBufferIndexInputProvider.class, MMapDirectory.PROVIDER.getClass());
|
||||
|
|
|
@ -820,7 +820,7 @@ public final class JavascriptCompiler {
|
|||
*/
|
||||
public static final Map<String, MethodHandle> DEFAULT_FUNCTIONS = loadDefaultFunctions();
|
||||
|
||||
private static final Map<String, MethodHandle> loadDefaultFunctions() {
|
||||
private static Map<String, MethodHandle> loadDefaultFunctions() {
|
||||
final Map<String, MethodHandle> map = new HashMap<>();
|
||||
final Lookup publicLookup = MethodHandles.publicLookup();
|
||||
try {
|
||||
|
@ -852,7 +852,7 @@ public final class JavascriptCompiler {
|
|||
} catch (ReflectiveOperationException | IOException e) {
|
||||
throw new Error("Cannot resolve function", e);
|
||||
}
|
||||
return Map.copyOf(map);
|
||||
return Collections.unmodifiableMap(map);
|
||||
}
|
||||
|
||||
/** Check Method signature for compatibility. */
|
||||
|
|
|
@ -123,7 +123,7 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
|
|||
private final PostingsEnum postingsEnum; // with offsets
|
||||
private final int freq;
|
||||
|
||||
private int posCounter = -1;
|
||||
private int posCounter;
|
||||
|
||||
public OfPostings(BytesRef term, int freq, PostingsEnum postingsEnum) throws IOException {
|
||||
this.term = Objects.requireNonNull(term);
|
||||
|
|
|
@ -23,6 +23,9 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
|
@ -208,21 +211,23 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
|
|||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
|
||||
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
|
||||
assertScorerResults(searcher, query, new float[] {1f, 1f / 51f}, new String[] {"2", "7"});
|
||||
assertScorerResults(
|
||||
searcher, query, new float[] {1f, 1f / 51f}, new String[] {"2", "7"}, 2);
|
||||
|
||||
query = getParentJoinKnnQuery("field", new float[] {6, 6}, null, 3, parentFilter);
|
||||
assertScorerResults(
|
||||
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"});
|
||||
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 2);
|
||||
query =
|
||||
getParentJoinKnnQuery(
|
||||
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 20, parentFilter);
|
||||
assertScorerResults(
|
||||
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"});
|
||||
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 2);
|
||||
|
||||
query =
|
||||
getParentJoinKnnQuery(
|
||||
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 1, parentFilter);
|
||||
assertScorerResults(searcher, query, new float[] {1f / 3f}, new String[] {"5"});
|
||||
assertScorerResults(
|
||||
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -324,7 +329,8 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
|
|||
assertEquals(expectedId, actualId);
|
||||
}
|
||||
|
||||
void assertScorerResults(IndexSearcher searcher, Query query, float[] scores, String[] ids)
|
||||
void assertScorerResults(
|
||||
IndexSearcher searcher, Query query, float[] possibleScores, String[] possibleIds, int count)
|
||||
throws IOException {
|
||||
IndexReader reader = searcher.getIndexReader();
|
||||
Query rewritten = query.rewrite(searcher);
|
||||
|
@ -334,11 +340,16 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
|
|||
assertEquals(-1, scorer.docID());
|
||||
expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score);
|
||||
DocIdSetIterator it = scorer.iterator();
|
||||
for (int i = 0; i < scores.length; i++) {
|
||||
Map<String, Float> idToScore =
|
||||
IntStream.range(0, possibleIds.length)
|
||||
.boxed()
|
||||
.collect(Collectors.toMap(i -> possibleIds[i], i -> possibleScores[i]));
|
||||
for (int i = 0; i < count; i++) {
|
||||
int docId = it.nextDoc();
|
||||
assertNotEquals(NO_MORE_DOCS, docId);
|
||||
assertEquals(scores[i], scorer.score(), 0.0001);
|
||||
assertIdMatches(reader, ids[i], docId);
|
||||
String actualId = reader.storedFields().document(docId).get("id");
|
||||
assertTrue(idToScore.containsKey(actualId));
|
||||
assertEquals(idToScore.get(actualId), scorer.score(), 0.0001);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,7 +81,8 @@ public class TestParentBlockJoinFloatKnnVectorQuery extends ParentBlockJoinKnnVe
|
|||
float score1 =
|
||||
(float) ((1 + (2 * 2 + 3 * 4) / Math.sqrt((2 * 2 + 3 * 3) * (2 * 2 + 4 * 4))) / 2);
|
||||
|
||||
assertScorerResults(searcher, query, new float[] {score0, score1}, new String[] {"1", "2"});
|
||||
assertScorerResults(
|
||||
searcher, query, new float[] {score0, score1}, new String[] {"1", "2"}, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -239,7 +239,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// Test data - format is artist, song, weeks at top of charts
|
||||
private static String[] hitsOfThe60s = {
|
||||
private static final String[] hitsOfThe60s = {
|
||||
"1966\tSPENCER DAVIS GROUP\tKEEP ON RUNNING\t1",
|
||||
"1966\tOVERLANDERS\tMICHELLE\t3",
|
||||
"1966\tNANCY SINATRA\tTHESE BOOTS ARE MADE FOR WALKIN'\t4",
|
||||
|
@ -317,7 +317,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
|
|||
"1969\tARCHIES\tSUGAR, SUGAR\t4"
|
||||
};
|
||||
|
||||
private static final Map<String, Record> parsedRecords = new HashMap<String, Record>();
|
||||
private static final Map<String, Record> parsedRecords = new HashMap<>();
|
||||
private Directory dir;
|
||||
private IndexReader reader;
|
||||
private IndexSearcher searcher;
|
||||
|
@ -452,7 +452,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
|
|||
|
||||
private int getMaxNumRecordsPerArtist(ScoreDoc[] sd) throws IOException {
|
||||
int result = 0;
|
||||
HashMap<String, Integer> artistCounts = new HashMap<String, Integer>();
|
||||
HashMap<String, Integer> artistCounts = new HashMap<>();
|
||||
for (int i = 0; i < sd.length; i++) {
|
||||
Document doc = reader.storedFields().document(sd[i].doc);
|
||||
Record record = parsedRecords.get(doc.get("id"));
|
||||
|
|
|
@ -17,7 +17,9 @@
|
|||
package org.apache.lucene.queries.payloads;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation;
|
||||
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -30,32 +32,45 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public class PayloadMatcherFactory {
|
||||
|
||||
private static final EnumMap<PayloadType, EnumMap<MatchOperation, PayloadMatcher>>
|
||||
private static final Map<PayloadType, Map<MatchOperation, PayloadMatcher>>
|
||||
payloadCheckerOpTypeMap;
|
||||
|
||||
static {
|
||||
payloadCheckerOpTypeMap = new EnumMap<>(PayloadType.class);
|
||||
// ints
|
||||
EnumMap<MatchOperation, PayloadMatcher> intCheckers = new EnumMap<>(MatchOperation.class);
|
||||
intCheckers.put(MatchOperation.LT, new LTIntPayloadMatcher());
|
||||
intCheckers.put(MatchOperation.LTE, new LTEIntPayloadMatcher());
|
||||
intCheckers.put(MatchOperation.GT, new GTIntPayloadMatcher());
|
||||
intCheckers.put(MatchOperation.GTE, new GTEIntPayloadMatcher());
|
||||
EnumMap<MatchOperation, PayloadMatcher> floatCheckers = new EnumMap<>(MatchOperation.class);
|
||||
floatCheckers.put(MatchOperation.LT, new LTFloatPayloadMatcher());
|
||||
floatCheckers.put(MatchOperation.LTE, new LTEFloatPayloadMatcher());
|
||||
floatCheckers.put(MatchOperation.GT, new GTFloatPayloadMatcher());
|
||||
floatCheckers.put(MatchOperation.GTE, new GTEFloatPayloadMatcher());
|
||||
Map<MatchOperation, PayloadMatcher> intCheckers =
|
||||
Collections.unmodifiableMap(
|
||||
new EnumMap<>(
|
||||
Map.of(
|
||||
MatchOperation.LT, new LTIntPayloadMatcher(),
|
||||
MatchOperation.LTE, new LTEIntPayloadMatcher(),
|
||||
MatchOperation.GT, new GTIntPayloadMatcher(),
|
||||
MatchOperation.GTE, new GTEIntPayloadMatcher())));
|
||||
// floats
|
||||
Map<MatchOperation, PayloadMatcher> floatCheckers =
|
||||
Collections.unmodifiableMap(
|
||||
new EnumMap<>(
|
||||
Map.of(
|
||||
MatchOperation.LT, new LTFloatPayloadMatcher(),
|
||||
MatchOperation.LTE, new LTEFloatPayloadMatcher(),
|
||||
MatchOperation.GT, new GTFloatPayloadMatcher(),
|
||||
MatchOperation.GTE, new GTEFloatPayloadMatcher())));
|
||||
// strings
|
||||
EnumMap<MatchOperation, PayloadMatcher> stringCheckers = new EnumMap<>(MatchOperation.class);
|
||||
stringCheckers.put(MatchOperation.LT, new LTStringPayloadMatcher());
|
||||
stringCheckers.put(MatchOperation.LTE, new LTEStringPayloadMatcher());
|
||||
stringCheckers.put(MatchOperation.GT, new GTStringPayloadMatcher());
|
||||
stringCheckers.put(MatchOperation.GTE, new GTEStringPayloadMatcher());
|
||||
Map<MatchOperation, PayloadMatcher> stringCheckers =
|
||||
Collections.unmodifiableMap(
|
||||
new EnumMap<>(
|
||||
Map.of(
|
||||
MatchOperation.LT, new LTStringPayloadMatcher(),
|
||||
MatchOperation.LTE, new LTEStringPayloadMatcher(),
|
||||
MatchOperation.GT, new GTStringPayloadMatcher(),
|
||||
MatchOperation.GTE, new GTEStringPayloadMatcher())));
|
||||
// load the matcher maps per payload type
|
||||
payloadCheckerOpTypeMap.put(PayloadType.INT, intCheckers);
|
||||
payloadCheckerOpTypeMap.put(PayloadType.FLOAT, floatCheckers);
|
||||
payloadCheckerOpTypeMap.put(PayloadType.STRING, stringCheckers);
|
||||
payloadCheckerOpTypeMap =
|
||||
Collections.unmodifiableMap(
|
||||
new EnumMap<>(
|
||||
Map.of(
|
||||
PayloadType.INT, intCheckers,
|
||||
PayloadType.FLOAT, floatCheckers,
|
||||
PayloadType.STRING, stringCheckers)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -75,7 +90,7 @@ public class PayloadMatcherFactory {
|
|||
return new EQPayloadMatcher();
|
||||
}
|
||||
// otherwise, we need to pay attention to the payload type and operation
|
||||
EnumMap<MatchOperation, PayloadMatcher> opMap = payloadCheckerOpTypeMap.get(payloadType);
|
||||
Map<MatchOperation, PayloadMatcher> opMap = payloadCheckerOpTypeMap.get(payloadType);
|
||||
if (opMap != null) {
|
||||
return opMap.get(op);
|
||||
} else {
|
||||
|
|
|
@ -269,10 +269,11 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
|
|||
MatchOperation.GT);
|
||||
checkHits(
|
||||
stringGT2,
|
||||
new int[] { // spotless:off
|
||||
155, 255, 355, 455, 555, 655, 755, 855, 955,
|
||||
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
|
||||
}); // spotless:on
|
||||
alignedIntArray(
|
||||
"""
|
||||
155, 255, 355, 455, 555, 655, 755, 855, 955,
|
||||
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
|
||||
"""));
|
||||
SpanQuery stringGTE2 =
|
||||
new SpanPayloadCheckQuery(
|
||||
new SpanNearQuery(new SpanQuery[] {termFifty, termFive}, 0, true),
|
||||
|
@ -281,10 +282,11 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
|
|||
MatchOperation.GTE);
|
||||
checkHits(
|
||||
stringGTE2,
|
||||
new int[] { // spotless:off
|
||||
55, 155, 255, 355, 455, 555, 655, 755, 855, 955,
|
||||
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
|
||||
}); // spotless:on
|
||||
alignedIntArray(
|
||||
"""
|
||||
55, 155, 255, 355, 455, 555, 655, 755, 855, 955,
|
||||
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
|
||||
"""));
|
||||
|
||||
SpanQuery stringLT2 =
|
||||
new SpanPayloadCheckQuery(
|
||||
|
@ -306,6 +308,23 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
|
|||
// sets "upto" back to zero between SpanOrQuery subclauses.
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a comma-separated array of integers, ignoring white space around them. This allows for
|
||||
* arbitrary alignment of integers in the source string to convey additional information about
|
||||
* their mutual relations. For example:
|
||||
*
|
||||
* <pre>{@code
|
||||
* var ints =
|
||||
* """
|
||||
* 1, 2, 3,
|
||||
* 11, 12, 13
|
||||
* """
|
||||
* }</pre>
|
||||
*/
|
||||
private static int[] alignedIntArray(String ints) {
|
||||
return Arrays.stream(ints.split(",")).map(String::trim).mapToInt(Integer::parseInt).toArray();
|
||||
}
|
||||
|
||||
public void testUnorderedPayloadChecks() throws Exception {
|
||||
|
||||
SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five"));
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
|||
*/
|
||||
public class BoostQueryNode extends QueryNodeImpl {
|
||||
|
||||
private float value = 0;
|
||||
private float value;
|
||||
|
||||
/**
|
||||
* Constructs a boost node
|
||||
|
|
|
@ -84,7 +84,7 @@ public class ModifierQueryNode extends QueryNodeImpl {
|
|||
}
|
||||
}
|
||||
|
||||
private Modifier modifier = Modifier.MOD_NONE;
|
||||
private Modifier modifier;
|
||||
|
||||
/**
|
||||
* Used to store the modifier value on the original query string
|
||||
|
|
|
@ -25,9 +25,9 @@ import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
|
|||
*/
|
||||
public class OpaqueQueryNode extends QueryNodeImpl {
|
||||
|
||||
private CharSequence schema = null;
|
||||
private CharSequence schema;
|
||||
|
||||
private CharSequence value = null;
|
||||
private CharSequence value;
|
||||
|
||||
/**
|
||||
* @param schema - schema identifier
|
||||
|
|
|
@ -41,7 +41,7 @@ public class PathQueryNode extends QueryNodeImpl {
|
|||
|
||||
/** Term text with a beginning and end position */
|
||||
public static class QueryText implements Cloneable {
|
||||
CharSequence value = null;
|
||||
CharSequence value;
|
||||
|
||||
/** != null The term's begin position. */
|
||||
int begin;
|
||||
|
@ -97,7 +97,7 @@ public class PathQueryNode extends QueryNodeImpl {
|
|||
}
|
||||
}
|
||||
|
||||
private List<QueryText> values = null;
|
||||
private List<QueryText> values;
|
||||
|
||||
/**
|
||||
* @param pathElements - List of QueryText objects
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.search.PhraseQuery; // javadocs
|
|||
/** Query node for {@link PhraseQuery}'s slop factor. */
|
||||
public class PhraseSlopQueryNode extends QueryNodeImpl implements FieldableNode {
|
||||
|
||||
private int value = 0;
|
||||
private int value;
|
||||
|
||||
/**
|
||||
* @exception QueryNodeError throw in overridden method to disallow
|
||||
|
|
|
@ -57,9 +57,9 @@ public class ProximityQueryNode extends BooleanQueryNode {
|
|||
|
||||
/** utility class containing the distance condition and number */
|
||||
public static class ProximityType {
|
||||
int pDistance = 0;
|
||||
int pDistance;
|
||||
|
||||
Type pType = null;
|
||||
Type pType;
|
||||
|
||||
public ProximityType(Type type) {
|
||||
this(type, 0);
|
||||
|
@ -71,10 +71,10 @@ public class ProximityQueryNode extends BooleanQueryNode {
|
|||
}
|
||||
}
|
||||
|
||||
private Type proximityType = Type.SENTENCE;
|
||||
private Type proximityType;
|
||||
private int distance = -1;
|
||||
private boolean inorder = false;
|
||||
private CharSequence field = null;
|
||||
private final boolean inorder;
|
||||
private CharSequence field;
|
||||
|
||||
/**
|
||||
* @param clauses - QueryNode children
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
|||
*/
|
||||
public class SlopQueryNode extends QueryNodeImpl implements FieldableNode {
|
||||
|
||||
private int value = 0;
|
||||
private int value;
|
||||
|
||||
/**
|
||||
* @param query - QueryNode Tree with the phrase
|
||||
|
|
|
@ -32,10 +32,11 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (getChildren() == null || getChildren().size() == 0) return "<tokenizedphrase/>";
|
||||
List<QueryNode> children = getChildren();
|
||||
if (children == null || children.isEmpty()) return "<tokenizedphrase/>";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("<tokenizedtphrase>");
|
||||
for (QueryNode child : getChildren()) {
|
||||
sb.append("<tokenizedphrase>");
|
||||
for (QueryNode child : children) {
|
||||
sb.append("\n");
|
||||
sb.append(child.toString());
|
||||
}
|
||||
|
@ -46,16 +47,15 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
|
|||
// This text representation is not re-parseable
|
||||
@Override
|
||||
public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
|
||||
if (getChildren() == null || getChildren().size() == 0) return "";
|
||||
|
||||
List<QueryNode> children = getChildren();
|
||||
if (children == null || children.isEmpty()) return "";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String filler = "";
|
||||
for (QueryNode child : getChildren()) {
|
||||
for (QueryNode child : children) {
|
||||
sb.append(filler).append(child.toQueryString(escapeSyntaxParser));
|
||||
filler = ",";
|
||||
}
|
||||
|
||||
return "[TP[" + sb.toString() + "]]";
|
||||
return "[TP[" + sb + "]]";
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -70,27 +70,25 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
|
|||
@Override
|
||||
public CharSequence getField() {
|
||||
List<QueryNode> children = getChildren();
|
||||
|
||||
if (children == null || children.size() == 0) {
|
||||
return null;
|
||||
|
||||
} else {
|
||||
return ((FieldableNode) children.get(0)).getField();
|
||||
if (children != null) {
|
||||
for (QueryNode child : children) {
|
||||
if (child instanceof FieldableNode) {
|
||||
return ((FieldableNode) child).getField();
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setField(CharSequence fieldName) {
|
||||
List<QueryNode> children = getChildren();
|
||||
|
||||
if (children != null) {
|
||||
|
||||
for (QueryNode child : getChildren()) {
|
||||
|
||||
for (QueryNode child : children) {
|
||||
if (child instanceof FieldableNode) {
|
||||
((FieldableNode) child).setField(fieldName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end class MultitermQueryNode
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfi
|
|||
*/
|
||||
public class FieldBoostMapFCListener implements FieldConfigListener {
|
||||
|
||||
private QueryConfigHandler config = null;
|
||||
private final QueryConfigHandler config;
|
||||
|
||||
public FieldBoostMapFCListener(QueryConfigHandler config) {
|
||||
this.config = config;
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfi
|
|||
*/
|
||||
public class FieldDateResolutionFCListener implements FieldConfigListener {
|
||||
|
||||
private QueryConfigHandler config = null;
|
||||
private final QueryConfigHandler config;
|
||||
|
||||
public FieldDateResolutionFCListener(QueryConfigHandler config) {
|
||||
this.config = config;
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
@ -175,8 +176,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||
f.arc = arc;
|
||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||
// term.length + " vs prefix=" + f.prefix);
|
||||
if (f.prefix > targetBeforeCurrentLength) {
|
||||
|
@ -197,7 +198,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
// final int sav = term.length;
|
||||
// term.length = length;
|
||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||
// term.length = sav;
|
||||
// }
|
||||
}
|
||||
|
@ -222,19 +223,6 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
return seekExact(target, 0);
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the version of the currently seek'd term; only valid if we are positioned. */
|
||||
public long getVersion() {
|
||||
return ((IDVersionTermState) currentFrame.state).idVersion;
|
||||
|
@ -258,8 +246,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current="
|
||||
// + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" +
|
||||
// minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" +
|
||||
// termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -460,8 +449,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength + " termExists=" + termExists);
|
||||
// }
|
||||
|
||||
|
@ -492,7 +481,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
term.setLength(1 + targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -520,10 +509,11 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
// termExists = false;
|
||||
// }
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + "
|
||||
// targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion +
|
||||
// " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " +
|
||||
// currentFrame.fp + " termExists=" + termExists);
|
||||
// System.out.println(" FAST version NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto +
|
||||
// " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" +
|
||||
// validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp +
|
||||
// " termExists=" + termExists);
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -553,7 +543,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -604,7 +594,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
term.setLength(targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -656,8 +646,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
|
||||
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
|
||||
// termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
|
||||
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
|
||||
|
@ -700,9 +690,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
||||
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
|
||||
// " output=" + output);
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
|
||||
// + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -814,8 +804,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -850,7 +840,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
|
||||
// System.out.println(" return NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -861,7 +852,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
|
||||
// System.out.println(" return " + result + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
|
@ -946,7 +938,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
+ " prefixLen="
|
||||
+ f.prefix
|
||||
+ " prefix="
|
||||
+ brToString(prefix)
|
||||
+ ToStringUtils.bytesRefToString(prefix)
|
||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||
+ " hasTerms="
|
||||
+ f.hasTerms
|
||||
|
@ -974,7 +966,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
+ " prefixLen="
|
||||
+ f.prefix
|
||||
+ " prefix="
|
||||
+ brToString(prefix)
|
||||
+ ToStringUtils.bytesRefToString(prefix)
|
||||
+ " nextEnt="
|
||||
+ f.nextEnt
|
||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||
|
@ -1063,9 +1055,10 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
assert !eof;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
|
||||
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists +
|
||||
// " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||
// " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
|
||||
|
@ -1129,8 +1122,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
// currentFrame.hasTerms = true;
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
|
||||
// currentFrame.ord=" + currentFrame.ord);
|
||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
|
||||
// " currentFrame.ord=" + currentFrame.ord);
|
||||
return term.get();
|
||||
}
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue