mirror of https://github.com/apache/lucene.git
Merge branch 'main' into java_21
This commit is contained in:
commit
07f4b5b19f
|
@ -1,15 +0,0 @@
|
||||||
name: "Set up caches"
|
|
||||||
description: "Set up cached resources"
|
|
||||||
|
|
||||||
runs:
|
|
||||||
using: "composite"
|
|
||||||
steps:
|
|
||||||
- name: Cache/Restore cached gradle files
|
|
||||||
uses: actions/cache@v2
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
~/.gradle/caches
|
|
||||||
~/.gradle/jdks
|
|
||||||
key: ${{ runner.os }}-gradle-caches-${{ hashFiles('versions.lock', '**/gradle-wrapper.properties') }}
|
|
||||||
restore-keys: |
|
|
||||||
${{ runner.os }}-gradle-caches-
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
# This composite action is included in other workflows to have a shared setup
|
||||||
|
# for java, gradle, caches, etc.
|
||||||
|
|
||||||
|
name: Prepare Lucene build
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
java-version:
|
||||||
|
required: false
|
||||||
|
default: 17
|
||||||
|
description: "The default JDK version to set up."
|
||||||
|
|
||||||
|
java-distribution:
|
||||||
|
required: false
|
||||||
|
default: "temurin"
|
||||||
|
description: "The default JDK distribution type"
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: "composite"
|
||||||
|
steps:
|
||||||
|
- name: Set up Java (${{ inputs.java-distribution }}, ${{ inputs.java-version }})"
|
||||||
|
uses: actions/setup-java@v4
|
||||||
|
with:
|
||||||
|
distribution: ${{ inputs.java-distribution }}
|
||||||
|
java-version: ${{ inputs.java-version }}
|
||||||
|
java-package: jdk
|
||||||
|
|
||||||
|
# This includes "smart" caching of the wrapper and dependencies.
|
||||||
|
- name: Set up Gradle
|
||||||
|
uses: gradle/actions/setup-gradle@v3
|
|
@ -1,44 +0,0 @@
|
||||||
name: Distribution tests
|
|
||||||
|
|
||||||
on:
|
|
||||||
# Allow manual triggers for testing the action.
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- 'main'
|
|
||||||
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- 'main'
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read # to fetch code (actions/checkout)
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test:
|
|
||||||
name: Run distribution tests
|
|
||||||
timeout-minutes: 15
|
|
||||||
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
# we want to run the distribution tests on all major OSs, but it's occasionally too slow (or hangs or the forked process is not started at all..., not sure the cause) on windows.
|
|
||||||
#os: [ubuntu-latest, macos-latest, windows-latest]
|
|
||||||
os: [ubuntu-latest, macos-latest]
|
|
||||||
env:
|
|
||||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
- name: Set up JDK
|
|
||||||
uses: actions/setup-java@v3
|
|
||||||
with:
|
|
||||||
distribution: 'temurin'
|
|
||||||
java-version: 21
|
|
||||||
java-package: jdk
|
|
||||||
- name: Prepare caches
|
|
||||||
uses: ./.github/actions/gradle-caches
|
|
||||||
|
|
||||||
- name: Run all distribution tests including GUI tests (${{ matrix.os }})
|
|
||||||
run: ./gradlew -p lucene/distribution.tests test
|
|
|
@ -1,84 +0,0 @@
|
||||||
name: Gradle Precommit Checks
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- '*'
|
|
||||||
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
- branch_9x
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read # to fetch code (actions/checkout)
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
# This runs all validation checks without tests.
|
|
||||||
checks:
|
|
||||||
name: gradle check -x test (JDK ${{ matrix.java }} on ${{ matrix.os }})
|
|
||||||
timeout-minutes: 15
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
# Operating systems to run on.
|
|
||||||
os: [ubuntu-latest]
|
|
||||||
# Test JVMs.
|
|
||||||
java: [ '21' ]
|
|
||||||
|
|
||||||
env:
|
|
||||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- name: Set up JDK
|
|
||||||
uses: actions/setup-java@v3
|
|
||||||
with:
|
|
||||||
distribution: 'temurin'
|
|
||||||
java-version: ${{ matrix.java }}
|
|
||||||
java-package: jdk
|
|
||||||
|
|
||||||
- name: Prepare caches
|
|
||||||
uses: ./.github/actions/gradle-caches
|
|
||||||
|
|
||||||
- name: Run gradle check (without tests)
|
|
||||||
run: ./gradlew check -x test -Ptask.times=true --max-workers 2
|
|
||||||
|
|
||||||
# This runs all tests without any other validation checks.
|
|
||||||
tests:
|
|
||||||
name: gradle test (JDK ${{ matrix.java }} on ${{ matrix.os }})
|
|
||||||
timeout-minutes: 30
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
# Operating systems to run on.
|
|
||||||
# windows-latest: fairly slow to build and results in odd errors (see LUCENE-10167)
|
|
||||||
# macos-latest: a tad slower than ubuntu and pretty much the same (?) so leaving out.
|
|
||||||
os: [ubuntu-latest]
|
|
||||||
# Test JVMs.
|
|
||||||
java: [ '21' ]
|
|
||||||
|
|
||||||
env:
|
|
||||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- name: Set up JDK
|
|
||||||
uses: actions/setup-java@v3
|
|
||||||
with:
|
|
||||||
distribution: 'temurin'
|
|
||||||
java-version: ${{ matrix.java }}
|
|
||||||
java-package: jdk
|
|
||||||
|
|
||||||
- name: Prepare caches
|
|
||||||
uses: ./.github/actions/gradle-caches
|
|
||||||
|
|
||||||
- name: Run gradle tests
|
|
||||||
run: ./gradlew test "-Ptask.times=true" --max-workers 2
|
|
||||||
|
|
||||||
- name: Echo settings
|
|
||||||
run: cat gradle.properties
|
|
|
@ -1,35 +0,0 @@
|
||||||
name: Hunspell regression tests
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- 'main'
|
|
||||||
paths:
|
|
||||||
- '.github/workflows/hunspell.yml'
|
|
||||||
- 'lucene/analysis/common/**'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test:
|
|
||||||
name: Run Hunspell regression tests
|
|
||||||
timeout-minutes: 15
|
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
env:
|
|
||||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- name: Set up JDK
|
|
||||||
uses: actions/setup-java@v3
|
|
||||||
with:
|
|
||||||
distribution: 'temurin'
|
|
||||||
java-version: 21
|
|
||||||
java-package: jdk
|
|
||||||
|
|
||||||
- name: Prepare caches
|
|
||||||
uses: ./.github/actions/gradle-caches
|
|
||||||
|
|
||||||
- name: Run regular and regression tests
|
|
||||||
run: ./gradlew -p lucene/analysis/common check testRegressions
|
|
|
@ -12,29 +12,29 @@ on:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
stale:
|
stale:
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/stale@v5
|
- uses: actions/stale@v5
|
||||||
with:
|
with:
|
||||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
days-before-pr-stale: 14
|
days-before-pr-stale: 14
|
||||||
days-before-issue-stale: -1 # don't mark issues as stale
|
days-before-issue-stale: -1 # don't mark issues as stale
|
||||||
exempt-draft-pr: true # don't mark draft PRs as stale
|
exempt-draft-pr: true # don't mark draft PRs as stale
|
||||||
|
|
||||||
days-before-close: -1 # don't close stale PRs/issues
|
days-before-close: -1 # don't close stale PRs/issues
|
||||||
|
|
||||||
stale-pr-message: >
|
stale-pr-message: >
|
||||||
This PR has not had activity in the past 2 weeks, labeling it as stale.
|
This PR has not had activity in the past 2 weeks, labeling it as stale.
|
||||||
If the PR is waiting for review, notify the dev@lucene.apache.org list.
|
If the PR is waiting for review, notify the dev@lucene.apache.org list.
|
||||||
Thank you for your contribution!
|
Thank you for your contribution!
|
||||||
|
|
||||||
debug-only: false # turn on to run the action without applying changes
|
debug-only: false # turn on to run the action without applying changes
|
||||||
operations-per-run: 500 # operations budget
|
operations-per-run: 500 # operations budget
|
||||||
|
|
||||||
# The table shows the cost in operations of all combinations of stale / not-stale for a PR.
|
# The table shows the cost in operations of all combinations of stale / not-stale for a PR.
|
||||||
# Processing a non-PR issue takes 0 operations, since we don't perform any action on it.
|
# Processing a non-PR issue takes 0 operations, since we don't perform any action on it.
|
|
@ -0,0 +1,67 @@
|
||||||
|
name: "Run checks: all modules"
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- '*'
|
||||||
|
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- 'main'
|
||||||
|
- 'branch_9x'
|
||||||
|
|
||||||
|
env:
|
||||||
|
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||||
|
|
||||||
|
# We split the workflow into two parallel jobs for efficiency:
|
||||||
|
# one is running all validation checks without tests,
|
||||||
|
# the other runs all tests without other validation checks.
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
# This runs all validation checks without tests.
|
||||||
|
checks:
|
||||||
|
name: checks without tests (JDK ${{ matrix.java }} on ${{ matrix.os }})
|
||||||
|
timeout-minutes: 15
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ ubuntu-latest ]
|
||||||
|
java: [ '17' ]
|
||||||
|
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: ./.github/actions/prepare-for-build
|
||||||
|
|
||||||
|
- name: Run gradle check (without tests)
|
||||||
|
run: ./gradlew check -x test -Ptask.times=true --max-workers 2
|
||||||
|
|
||||||
|
|
||||||
|
# This runs all tests without any other validation checks.
|
||||||
|
tests:
|
||||||
|
name: tests (JDK ${{ matrix.java }} on ${{ matrix.os }})
|
||||||
|
timeout-minutes: 30
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
# Operating systems to run on.
|
||||||
|
# windows-latest: fairly slow to build and results in odd errors (see LUCENE-10167)
|
||||||
|
# macos-latest: a tad slower than ubuntu and pretty much the same (?) so leaving out.
|
||||||
|
os: [ ubuntu-latest ]
|
||||||
|
java: [ '17' ]
|
||||||
|
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: ./.github/actions/prepare-for-build
|
||||||
|
|
||||||
|
- name: Run gradle tests
|
||||||
|
run: ./gradlew test "-Ptask.times=true" --max-workers 2
|
||||||
|
|
||||||
|
- name: List automatically-initialized gradle.properties
|
||||||
|
run: cat gradle.properties
|
|
@ -0,0 +1,37 @@
|
||||||
|
name: "Run checks: module lucene/analysis/common"
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- 'main'
|
||||||
|
- 'branch_9x'
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/run-checks-mod-analysis-common.yml'
|
||||||
|
- 'lucene/analysis/common/**'
|
||||||
|
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- 'main'
|
||||||
|
- 'branch_9x'
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/run-checks-mod-analysis-common.yml'
|
||||||
|
- 'lucene/analysis/common/**'
|
||||||
|
|
||||||
|
env:
|
||||||
|
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: Extra regression tests
|
||||||
|
timeout-minutes: 15
|
||||||
|
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: ./.github/actions/prepare-for-build
|
||||||
|
|
||||||
|
- name: Run 'gradlew lucene/analysis/common check testRegressions'
|
||||||
|
run: ./gradlew -p lucene/analysis/common check testRegressions
|
|
@ -0,0 +1,36 @@
|
||||||
|
name: "Run checks: module lucene/distribution.tests"
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- 'main'
|
||||||
|
- 'branch_9x'
|
||||||
|
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- 'main'
|
||||||
|
- 'branch_9x'
|
||||||
|
|
||||||
|
env:
|
||||||
|
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
timeout-minutes: 15
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
# ubuntu-latest is checked as part of run-checks-everything.yml
|
||||||
|
# windows-latest is slow and sometimes flaky.
|
||||||
|
os: [ macos-latest ]
|
||||||
|
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: ./.github/actions/prepare-for-build
|
||||||
|
|
||||||
|
- name: Run 'gradlew lucene/distribution.tests test' (on ${{ matrix.os }})
|
||||||
|
run: ./gradlew -p lucene/distribution.tests test
|
|
@ -23,23 +23,23 @@
|
||||||
xmlns:asfext="http://projects.apache.org/ns/asfext#"
|
xmlns:asfext="http://projects.apache.org/ns/asfext#"
|
||||||
xmlns:foaf="http://xmlns.com/foaf/0.1/">
|
xmlns:foaf="http://xmlns.com/foaf/0.1/">
|
||||||
<!--
|
<!--
|
||||||
This file's canonical URL is: http://lucene.apache.org/core/doap.rdf
|
This file's canonical URL is: https://lucene.apache.org/core/doap.rdf
|
||||||
|
|
||||||
Note that the canonical URL may redirect to other non-canonical locations.
|
Note that the canonical URL may redirect to other non-canonical locations.
|
||||||
-->
|
-->
|
||||||
<Project rdf:about="http://lucene.apache.org/core/">
|
<Project rdf:about="https://lucene.apache.org/core/">
|
||||||
<created>2001-09-01</created>
|
<created>2001-09-01</created>
|
||||||
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
|
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
|
||||||
<name>Apache Lucene Core</name>
|
<name>Apache Lucene Core</name>
|
||||||
<homepage rdf:resource="http://lucene.apache.org/core/" />
|
<homepage rdf:resource="https://lucene.apache.org/core/" />
|
||||||
<asfext:pmc rdf:resource="http://lucene.apache.org" />
|
<asfext:pmc rdf:resource="https://lucene.apache.org" />
|
||||||
|
|
||||||
<shortdesc>Apache Lucene is a high-performance, full-featured text search engine library</shortdesc>
|
<shortdesc>Apache Lucene is a high-performance, full-featured text search engine library</shortdesc>
|
||||||
<description>Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
|
<description>Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
|
||||||
</description>
|
</description>
|
||||||
<bug-database rdf:resource="https://github.com/apache/lucene/issues" />
|
<bug-database rdf:resource="https://github.com/apache/lucene/issues" />
|
||||||
<mailing-list rdf:resource="http://lucene.apache.org/core/discussion.html" />
|
<mailing-list rdf:resource="https://lucene.apache.org/core/discussion.html" />
|
||||||
<download-page rdf:resource="http://lucene.apache.org/core/downloads.html" />
|
<download-page rdf:resource="https://lucene.apache.org/core/downloads.html" />
|
||||||
<programming-language>Java</programming-language>
|
<programming-language>Java</programming-language>
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
|
|
|
@ -96,16 +96,15 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
|
||||||
scriptutil.run('rm -rf %s' % bc_index_dir)
|
scriptutil.run('rm -rf %s' % bc_index_dir)
|
||||||
print('done')
|
print('done')
|
||||||
|
|
||||||
def update_backcompat_tests(types, index_version, current_version):
|
def update_backcompat_tests(index_version, current_version):
|
||||||
print(' adding new indexes %s to backcompat tests...' % types, end='', flush=True)
|
print(' adding new indexes to backcompat tests...', end='', flush=True)
|
||||||
module = 'lucene/backward-codecs'
|
module = 'lucene/backward-codecs'
|
||||||
filename = '%s/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java' % module
|
|
||||||
|
filename = None
|
||||||
if not current_version.is_back_compat_with(index_version):
|
if not current_version.is_back_compat_with(index_version):
|
||||||
matcher = re.compile(r'final String\[\] unsupportedNames = {|};')
|
filename = '%s/src/test/org/apache/lucene/backward_index/unsupported_versions.txt' % module
|
||||||
elif 'sorted' in types:
|
|
||||||
matcher = re.compile(r'static final String\[\] oldSortedNames = {|};')
|
|
||||||
else:
|
else:
|
||||||
matcher = re.compile(r'static final String\[\] oldNames = {|};')
|
filename = '%s/src/test/org/apache/lucene/backward_index/versions.txt' % module
|
||||||
|
|
||||||
strip_dash_suffix_re = re.compile(r'-.*')
|
strip_dash_suffix_re = re.compile(r'-.*')
|
||||||
|
|
||||||
|
@ -114,53 +113,25 @@ def update_backcompat_tests(types, index_version, current_version):
|
||||||
x = re.sub(strip_dash_suffix_re, '', x) # remove the -suffix if any
|
x = re.sub(strip_dash_suffix_re, '', x) # remove the -suffix if any
|
||||||
return scriptutil.Version.parse(x)
|
return scriptutil.Version.parse(x)
|
||||||
|
|
||||||
class Edit(object):
|
def edit(buffer, match, line):
|
||||||
start = None
|
v = find_version(line)
|
||||||
def __call__(self, buffer, match, line):
|
changed = False
|
||||||
if self.start:
|
if v.on_or_after(index_version):
|
||||||
# find where this version should exist
|
if not index_version.on_or_after(v):
|
||||||
i = len(buffer) - 1
|
buffer.append(('%s\n') % index_version)
|
||||||
previous_version_exists = not ('};' in line and buffer[-1].strip().endswith("{"))
|
changed = True
|
||||||
if previous_version_exists: # Only look if there is a version here
|
buffer.append(line)
|
||||||
v = find_version(buffer[i])
|
return changed
|
||||||
while i >= self.start and v.on_or_after(index_version):
|
|
||||||
i -= 1
|
|
||||||
v = find_version(buffer[i])
|
|
||||||
i += 1 # readjust since we skipped past by 1
|
|
||||||
|
|
||||||
# unfortunately python doesn't have a range remove from list...
|
def append(buffer, changed):
|
||||||
# here we want to remove any previous references to the version we are adding
|
if changed:
|
||||||
while i < len(buffer) and index_version.on_or_after(find_version(buffer[i])):
|
return changed
|
||||||
buffer.pop(i)
|
if not buffer[len(buffer)-1].endswith('\n'):
|
||||||
|
buffer.append('\n')
|
||||||
if i == len(buffer) and previous_version_exists and not buffer[-1].strip().endswith(","):
|
buffer.append(('%s\n') % index_version)
|
||||||
# add comma
|
return True
|
||||||
buffer[-1] = buffer[-1].rstrip() + ",\n"
|
|
||||||
|
|
||||||
if previous_version_exists:
|
|
||||||
last = buffer[-1]
|
|
||||||
spaces = ' ' * (len(last) - len(last.lstrip()))
|
|
||||||
else:
|
|
||||||
spaces = ' '
|
|
||||||
for (j, t) in enumerate(types):
|
|
||||||
if t == 'sorted':
|
|
||||||
newline = spaces + ('"sorted.%s"') % index_version
|
|
||||||
else:
|
|
||||||
newline = spaces + ('"%s-%s"' % (index_version, t))
|
|
||||||
if j < len(types) - 1 or i < len(buffer):
|
|
||||||
newline += ','
|
|
||||||
buffer.insert(i, newline + '\n')
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
buffer.append(line)
|
|
||||||
return True
|
|
||||||
|
|
||||||
if 'Names = {' in line:
|
|
||||||
self.start = len(buffer) # location of first index name
|
|
||||||
buffer.append(line)
|
|
||||||
return False
|
|
||||||
|
|
||||||
changed = scriptutil.update_file(filename, matcher, Edit())
|
changed = scriptutil.update_file(filename, re.compile(r'.*'), edit, append)
|
||||||
print('done' if changed else 'uptodate')
|
print('done' if changed else 'uptodate')
|
||||||
|
|
||||||
def check_backcompat_tests():
|
def check_backcompat_tests():
|
||||||
|
@ -251,9 +222,8 @@ def main():
|
||||||
print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing')
|
print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing')
|
||||||
|
|
||||||
print('\nAdding backwards compatibility tests')
|
print('\nAdding backwards compatibility tests')
|
||||||
update_backcompat_tests(['cfs', 'nocfs'], c.version, current_version)
|
update_backcompat_tests(c.version, current_version)
|
||||||
if should_make_sorted:
|
|
||||||
update_backcompat_tests(['sorted'], c.version, current_version)
|
|
||||||
|
|
||||||
print('\nTesting changes')
|
print('\nTesting changes')
|
||||||
check_backcompat_tests()
|
check_backcompat_tests()
|
||||||
|
|
|
@ -88,7 +88,7 @@ def run(cmd, cwd=None):
|
||||||
raise e
|
raise e
|
||||||
return output.decode('utf-8')
|
return output.decode('utf-8')
|
||||||
|
|
||||||
def update_file(filename, line_re, edit):
|
def update_file(filename, line_re, edit, append=None):
|
||||||
infile = open(filename, 'r')
|
infile = open(filename, 'r')
|
||||||
buffer = []
|
buffer = []
|
||||||
|
|
||||||
|
@ -102,6 +102,8 @@ def update_file(filename, line_re, edit):
|
||||||
return False
|
return False
|
||||||
continue
|
continue
|
||||||
buffer.append(line)
|
buffer.append(line)
|
||||||
|
if append:
|
||||||
|
changed = append(buffer, changed) # in the case did not change in edit but have an append function
|
||||||
if not changed:
|
if not changed:
|
||||||
raise Exception('Could not find %s in %s' % (line_re, filename))
|
raise Exception('Could not find %s in %s' % (line_re, filename))
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w') as f:
|
||||||
|
|
|
@ -20,7 +20,7 @@ def resources = scriptResources(buildscript)
|
||||||
configure(rootProject) {
|
configure(rootProject) {
|
||||||
ext {
|
ext {
|
||||||
// also change this in extractor tool: ExtractForeignAPI
|
// also change this in extractor tool: ExtractForeignAPI
|
||||||
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_20, JavaVersion.VERSION_21 ] as Set
|
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_20, JavaVersion.VERSION_21, JavaVersion.VERSION_22 ] as Set
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,6 @@ configure(project(":lucene").subprojects) { prj ->
|
||||||
|
|
||||||
spotless {
|
spotless {
|
||||||
java {
|
java {
|
||||||
toggleOffOn() // obviously, only to be used sparingly.
|
|
||||||
// TODO: Work out how to support multiple different header files (we have
|
// TODO: Work out how to support multiple different header files (we have
|
||||||
// classes in the codebase that have original headers). We currently use
|
// classes in the codebase that have original headers). We currently use
|
||||||
// Apache RAT to enforce headers so this is of lesser priority.
|
// Apache RAT to enforce headers so this is of lesser priority.
|
||||||
|
|
|
@ -114,6 +114,8 @@ Improvements
|
||||||
* GITHUB#12873: Expressions module now uses JEP 371 "Hidden Classes" with JEP 309
|
* GITHUB#12873: Expressions module now uses JEP 371 "Hidden Classes" with JEP 309
|
||||||
"Dynamic Class-File Constants" to implement Javascript expressions. (Uwe Schindler)
|
"Dynamic Class-File Constants" to implement Javascript expressions. (Uwe Schindler)
|
||||||
|
|
||||||
|
* GITHUB#11657, LUCENE-10621: Upgrade to OpenNLP 2.3.2. (Christine Poerschke, Eric Pugh)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
@ -176,6 +178,36 @@ Other
|
||||||
|
|
||||||
* GITHUB#13001: Put Thread#sleep() on the list of forbidden APIs. (Shubham Chaudhary)
|
* GITHUB#13001: Put Thread#sleep() on the list of forbidden APIs. (Shubham Chaudhary)
|
||||||
|
|
||||||
|
======================== Lucene 9.11.0 =======================
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
---------------------
|
||||||
|
(No changes)
|
||||||
|
|
||||||
|
New Features
|
||||||
|
---------------------
|
||||||
|
(No changes)
|
||||||
|
|
||||||
|
Improvements
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
* GITHUB#13092: `static final Map` constants have been made immutable (Dmitry Cherniachenko)
|
||||||
|
|
||||||
|
* GITHUB#13041: TokenizedPhraseQueryNode code cleanup (Dmitry Cherniachenko)
|
||||||
|
|
||||||
|
Optimizations
|
||||||
|
---------------------
|
||||||
|
(No changes)
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
---------------------
|
||||||
|
(No changes)
|
||||||
|
|
||||||
|
Other
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
|
||||||
|
|
||||||
======================== Lucene 9.10.0 =======================
|
======================== Lucene 9.10.0 =======================
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
@ -200,6 +232,17 @@ New Features
|
||||||
* GITHUB#12336: Index additional data per facet label in the taxonomy. (Shai Erera, Egor Potemkin, Mike McCandless,
|
* GITHUB#12336: Index additional data per facet label in the taxonomy. (Shai Erera, Egor Potemkin, Mike McCandless,
|
||||||
Stefan Vodita)
|
Stefan Vodita)
|
||||||
|
|
||||||
|
* GITHUB#12706: Add support for the final release of Java foreign memory API in Java 22 (and later).
|
||||||
|
Lucene's MMapDirectory will now mmap Lucene indexes in chunks of 16 GiB (instead of 1 GiB) starting
|
||||||
|
from Java 19. Indexes closed while queries are running can no longer crash the JVM.
|
||||||
|
Support for vectorized implementations of VectorUtil based on jdk.incubator.vector APIs was added
|
||||||
|
for exactly Java 22. Therefore, applications started with command line parameter
|
||||||
|
"java --add-modules jdk.incubator.vector" will automatically use the new vectorized implementations
|
||||||
|
if running on a supported platform (Java 20/21/22 on x86 CPUs with AVX2 or later or ARM NEON CPUs).
|
||||||
|
This is an opt-in feature and requires explicit Java command line flag! When enabled, Lucene logs
|
||||||
|
a notice using java.util.logging. Please test thoroughly and report bugs/slowness to Lucene's mailing
|
||||||
|
list. (Uwe Schindler, Chris Hegarty)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
@ -219,8 +262,6 @@ Improvements
|
||||||
Tests are running with random byte order to ensure that the order does not affect correctness
|
Tests are running with random byte order to ensure that the order does not affect correctness
|
||||||
of code. Native order was enabled for LZ4 compression. (Uwe Schindler)
|
of code. Native order was enabled for LZ4 compression. (Uwe Schindler)
|
||||||
|
|
||||||
* GITHUB#11657, LUCENE-10621: Upgrade to OpenNLP 2.3.2. (Christine Poerschke, Eric Pugh)
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,10 @@
|
||||||
|
|
||||||
## Migration from Lucene 9.x to Lucene 10.0
|
## Migration from Lucene 9.x to Lucene 10.0
|
||||||
|
|
||||||
|
### OpenNLP dependency upgrade
|
||||||
|
|
||||||
|
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using and be running on Java 17.
|
||||||
|
|
||||||
### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829)
|
### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829)
|
||||||
|
|
||||||
For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
|
For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
|
||||||
|
@ -147,12 +151,6 @@ may throw `IOException` on index problems, bubbling up unexpectedly to the calle
|
||||||
`(Reverse)PathHierarchyTokenizer` now produces sequential (instead of overlapping) tokens with accurate
|
`(Reverse)PathHierarchyTokenizer` now produces sequential (instead of overlapping) tokens with accurate
|
||||||
offsets, making positional queries and highlighters possible for fields tokenized with this tokenizer.
|
offsets, making positional queries and highlighters possible for fields tokenized with this tokenizer.
|
||||||
|
|
||||||
## Migration from Lucene 9.9 to Lucene 9.10
|
|
||||||
|
|
||||||
### OpenNLP dependency upgrade
|
|
||||||
|
|
||||||
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using and be running on Java 17.
|
|
||||||
|
|
||||||
## Migration from Lucene 9.0 to Lucene 9.1
|
## Migration from Lucene 9.0 to Lucene 9.1
|
||||||
|
|
||||||
### Test framework package migration and module (LUCENE-10301)
|
### Test framework package migration and module (LUCENE-10301)
|
||||||
|
|
|
@ -59,11 +59,11 @@ public class MinHashFilter extends TokenFilter {
|
||||||
|
|
||||||
private final List<List<FixedSizeTreeSet<LongPair>>> minHashSets;
|
private final List<List<FixedSizeTreeSet<LongPair>>> minHashSets;
|
||||||
|
|
||||||
private int hashSetSize = DEFAULT_HASH_SET_SIZE;
|
private final int hashSetSize;
|
||||||
|
|
||||||
private int bucketCount = DEFAULT_BUCKET_COUNT;
|
private final int bucketCount;
|
||||||
|
|
||||||
private int hashCount = DEFAULT_HASH_COUNT;
|
private final int hashCount;
|
||||||
|
|
||||||
private boolean requiresInitialisation = true;
|
private boolean requiresInitialisation = true;
|
||||||
|
|
||||||
|
|
|
@ -32,13 +32,13 @@ public class MinHashFilterFactory extends TokenFilterFactory {
|
||||||
/** SPI name */
|
/** SPI name */
|
||||||
public static final String NAME = "minHash";
|
public static final String NAME = "minHash";
|
||||||
|
|
||||||
private int hashCount = MinHashFilter.DEFAULT_HASH_COUNT;
|
private final int hashCount;
|
||||||
|
|
||||||
private int bucketCount = MinHashFilter.DEFAULT_BUCKET_COUNT;
|
private final int bucketCount;
|
||||||
|
|
||||||
private int hashSetSize = MinHashFilter.DEFAULT_HASH_SET_SIZE;
|
private final int hashSetSize;
|
||||||
|
|
||||||
private boolean withRotation;
|
private final boolean withRotation;
|
||||||
|
|
||||||
/** Create a {@link MinHashFilterFactory}. */
|
/** Create a {@link MinHashFilterFactory}. */
|
||||||
public MinHashFilterFactory(Map<String, String> args) {
|
public MinHashFilterFactory(Map<String, String> args) {
|
||||||
|
|
|
@ -67,7 +67,7 @@ public class WordDelimiterGraphFilterFactory extends TokenFilterFactory
|
||||||
private final int flags;
|
private final int flags;
|
||||||
byte[] typeTable = null;
|
byte[] typeTable = null;
|
||||||
private CharArraySet protectedWords = null;
|
private CharArraySet protectedWords = null;
|
||||||
private boolean adjustOffsets = false;
|
private final boolean adjustOffsets;
|
||||||
|
|
||||||
/** Creates a new WordDelimiterGraphFilterFactory */
|
/** Creates a new WordDelimiterGraphFilterFactory */
|
||||||
public WordDelimiterGraphFilterFactory(Map<String, String> args) {
|
public WordDelimiterGraphFilterFactory(Map<String, String> args) {
|
||||||
|
|
|
@ -89,7 +89,7 @@ public final class DutchAnalyzer extends Analyzer {
|
||||||
private final CharArraySet stoptable;
|
private final CharArraySet stoptable;
|
||||||
|
|
||||||
/** Contains words that should be indexed but not stemmed. */
|
/** Contains words that should be indexed but not stemmed. */
|
||||||
private CharArraySet excltable = CharArraySet.EMPTY_SET;
|
private final CharArraySet excltable;
|
||||||
|
|
||||||
private final StemmerOverrideMap stemdict;
|
private final StemmerOverrideMap stemdict;
|
||||||
|
|
||||||
|
|
|
@ -41,8 +41,8 @@ public class PatternCaptureGroupFilterFactory extends TokenFilterFactory {
|
||||||
/** SPI name */
|
/** SPI name */
|
||||||
public static final String NAME = "patternCaptureGroup";
|
public static final String NAME = "patternCaptureGroup";
|
||||||
|
|
||||||
private Pattern pattern;
|
private final Pattern pattern;
|
||||||
private boolean preserveOriginal = true;
|
private final boolean preserveOriginal;
|
||||||
|
|
||||||
public PatternCaptureGroupFilterFactory(Map<String, String> args) {
|
public PatternCaptureGroupFilterFactory(Map<String, String> args) {
|
||||||
super(args);
|
super(args);
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.shingle;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -175,7 +176,7 @@ public final class ShingleFilter extends TokenFilter {
|
||||||
* @param tokenType token tokenType
|
* @param tokenType token tokenType
|
||||||
*/
|
*/
|
||||||
public void setTokenType(String tokenType) {
|
public void setTokenType(String tokenType) {
|
||||||
this.tokenType = tokenType;
|
this.tokenType = Objects.requireNonNull(tokenType, "tokenType");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -114,7 +114,7 @@ public class JapaneseTokenizerFactory extends TokenizerFactory implements Resour
|
||||||
* /箱根山-箱根/成田空港-成田/ requests "箱根" and "成田" to be in the result in NBEST output.
|
* /箱根山-箱根/成田空港-成田/ requests "箱根" and "成田" to be in the result in NBEST output.
|
||||||
*/
|
*/
|
||||||
private final String nbestExamples;
|
private final String nbestExamples;
|
||||||
private int nbestCost = -1;
|
private int nbestCost;
|
||||||
|
|
||||||
/** Creates a new JapaneseTokenizerFactory */
|
/** Creates a new JapaneseTokenizerFactory */
|
||||||
public JapaneseTokenizerFactory(Map<String, String> args) {
|
public JapaneseTokenizerFactory(Map<String, String> args) {
|
||||||
|
|
|
@ -17,103 +17,107 @@
|
||||||
package org.apache.lucene.analysis.ja.dict;
|
package org.apache.lucene.analysis.ja.dict;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/** Utility class for english translations of morphological data, used only for debugging. */
|
/** Utility class for english translations of morphological data, used only for debugging. */
|
||||||
public class ToStringUtil {
|
public class ToStringUtil {
|
||||||
// a translation map for parts of speech, only used for reflectWith
|
// a translation map for parts of speech, only used for reflectWith
|
||||||
private static final HashMap<String, String> posTranslations = new HashMap<>();
|
private static final Map<String, String> posTranslations;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
posTranslations.put("名詞", "noun");
|
Map<String, String> translations = new HashMap<>();
|
||||||
posTranslations.put("名詞-一般", "noun-common");
|
translations.put("名詞", "noun");
|
||||||
posTranslations.put("名詞-固有名詞", "noun-proper");
|
translations.put("名詞-一般", "noun-common");
|
||||||
posTranslations.put("名詞-固有名詞-一般", "noun-proper-misc");
|
translations.put("名詞-固有名詞", "noun-proper");
|
||||||
posTranslations.put("名詞-固有名詞-人名", "noun-proper-person");
|
translations.put("名詞-固有名詞-一般", "noun-proper-misc");
|
||||||
posTranslations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc");
|
translations.put("名詞-固有名詞-人名", "noun-proper-person");
|
||||||
posTranslations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname");
|
translations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc");
|
||||||
posTranslations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name");
|
translations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname");
|
||||||
posTranslations.put("名詞-固有名詞-組織", "noun-proper-organization");
|
translations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name");
|
||||||
posTranslations.put("名詞-固有名詞-地域", "noun-proper-place");
|
translations.put("名詞-固有名詞-組織", "noun-proper-organization");
|
||||||
posTranslations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc");
|
translations.put("名詞-固有名詞-地域", "noun-proper-place");
|
||||||
posTranslations.put("名詞-固有名詞-地域-国", "noun-proper-place-country");
|
translations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc");
|
||||||
posTranslations.put("名詞-代名詞", "noun-pronoun");
|
translations.put("名詞-固有名詞-地域-国", "noun-proper-place-country");
|
||||||
posTranslations.put("名詞-代名詞-一般", "noun-pronoun-misc");
|
translations.put("名詞-代名詞", "noun-pronoun");
|
||||||
posTranslations.put("名詞-代名詞-縮約", "noun-pronoun-contraction");
|
translations.put("名詞-代名詞-一般", "noun-pronoun-misc");
|
||||||
posTranslations.put("名詞-副詞可能", "noun-adverbial");
|
translations.put("名詞-代名詞-縮約", "noun-pronoun-contraction");
|
||||||
posTranslations.put("名詞-サ変接続", "noun-verbal");
|
translations.put("名詞-副詞可能", "noun-adverbial");
|
||||||
posTranslations.put("名詞-形容動詞語幹", "noun-adjective-base");
|
translations.put("名詞-サ変接続", "noun-verbal");
|
||||||
posTranslations.put("名詞-数", "noun-numeric");
|
translations.put("名詞-形容動詞語幹", "noun-adjective-base");
|
||||||
posTranslations.put("名詞-非自立", "noun-affix");
|
translations.put("名詞-数", "noun-numeric");
|
||||||
posTranslations.put("名詞-非自立-一般", "noun-affix-misc");
|
translations.put("名詞-非自立", "noun-affix");
|
||||||
posTranslations.put("名詞-非自立-副詞可能", "noun-affix-adverbial");
|
translations.put("名詞-非自立-一般", "noun-affix-misc");
|
||||||
posTranslations.put("名詞-非自立-助動詞語幹", "noun-affix-aux");
|
translations.put("名詞-非自立-副詞可能", "noun-affix-adverbial");
|
||||||
posTranslations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base");
|
translations.put("名詞-非自立-助動詞語幹", "noun-affix-aux");
|
||||||
posTranslations.put("名詞-特殊", "noun-special");
|
translations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base");
|
||||||
posTranslations.put("名詞-特殊-助動詞語幹", "noun-special-aux");
|
translations.put("名詞-特殊", "noun-special");
|
||||||
posTranslations.put("名詞-接尾", "noun-suffix");
|
translations.put("名詞-特殊-助動詞語幹", "noun-special-aux");
|
||||||
posTranslations.put("名詞-接尾-一般", "noun-suffix-misc");
|
translations.put("名詞-接尾", "noun-suffix");
|
||||||
posTranslations.put("名詞-接尾-人名", "noun-suffix-person");
|
translations.put("名詞-接尾-一般", "noun-suffix-misc");
|
||||||
posTranslations.put("名詞-接尾-地域", "noun-suffix-place");
|
translations.put("名詞-接尾-人名", "noun-suffix-person");
|
||||||
posTranslations.put("名詞-接尾-サ変接続", "noun-suffix-verbal");
|
translations.put("名詞-接尾-地域", "noun-suffix-place");
|
||||||
posTranslations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux");
|
translations.put("名詞-接尾-サ変接続", "noun-suffix-verbal");
|
||||||
posTranslations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base");
|
translations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux");
|
||||||
posTranslations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial");
|
translations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base");
|
||||||
posTranslations.put("名詞-接尾-助数詞", "noun-suffix-classifier");
|
translations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial");
|
||||||
posTranslations.put("名詞-接尾-特殊", "noun-suffix-special");
|
translations.put("名詞-接尾-助数詞", "noun-suffix-classifier");
|
||||||
posTranslations.put("名詞-接続詞的", "noun-suffix-conjunctive");
|
translations.put("名詞-接尾-特殊", "noun-suffix-special");
|
||||||
posTranslations.put("名詞-動詞非自立的", "noun-verbal_aux");
|
translations.put("名詞-接続詞的", "noun-suffix-conjunctive");
|
||||||
posTranslations.put("名詞-引用文字列", "noun-quotation");
|
translations.put("名詞-動詞非自立的", "noun-verbal_aux");
|
||||||
posTranslations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective");
|
translations.put("名詞-引用文字列", "noun-quotation");
|
||||||
posTranslations.put("接頭詞", "prefix");
|
translations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective");
|
||||||
posTranslations.put("接頭詞-名詞接続", "prefix-nominal");
|
translations.put("接頭詞", "prefix");
|
||||||
posTranslations.put("接頭詞-動詞接続", "prefix-verbal");
|
translations.put("接頭詞-名詞接続", "prefix-nominal");
|
||||||
posTranslations.put("接頭詞-形容詞接続", "prefix-adjectival");
|
translations.put("接頭詞-動詞接続", "prefix-verbal");
|
||||||
posTranslations.put("接頭詞-数接続", "prefix-numerical");
|
translations.put("接頭詞-形容詞接続", "prefix-adjectival");
|
||||||
posTranslations.put("動詞", "verb");
|
translations.put("接頭詞-数接続", "prefix-numerical");
|
||||||
posTranslations.put("動詞-自立", "verb-main");
|
translations.put("動詞", "verb");
|
||||||
posTranslations.put("動詞-非自立", "verb-auxiliary");
|
translations.put("動詞-自立", "verb-main");
|
||||||
posTranslations.put("動詞-接尾", "verb-suffix");
|
translations.put("動詞-非自立", "verb-auxiliary");
|
||||||
posTranslations.put("形容詞", "adjective");
|
translations.put("動詞-接尾", "verb-suffix");
|
||||||
posTranslations.put("形容詞-自立", "adjective-main");
|
translations.put("形容詞", "adjective");
|
||||||
posTranslations.put("形容詞-非自立", "adjective-auxiliary");
|
translations.put("形容詞-自立", "adjective-main");
|
||||||
posTranslations.put("形容詞-接尾", "adjective-suffix");
|
translations.put("形容詞-非自立", "adjective-auxiliary");
|
||||||
posTranslations.put("副詞", "adverb");
|
translations.put("形容詞-接尾", "adjective-suffix");
|
||||||
posTranslations.put("副詞-一般", "adverb-misc");
|
translations.put("副詞", "adverb");
|
||||||
posTranslations.put("副詞-助詞類接続", "adverb-particle_conjunction");
|
translations.put("副詞-一般", "adverb-misc");
|
||||||
posTranslations.put("連体詞", "adnominal");
|
translations.put("副詞-助詞類接続", "adverb-particle_conjunction");
|
||||||
posTranslations.put("接続詞", "conjunction");
|
translations.put("連体詞", "adnominal");
|
||||||
posTranslations.put("助詞", "particle");
|
translations.put("接続詞", "conjunction");
|
||||||
posTranslations.put("助詞-格助詞", "particle-case");
|
translations.put("助詞", "particle");
|
||||||
posTranslations.put("助詞-格助詞-一般", "particle-case-misc");
|
translations.put("助詞-格助詞", "particle-case");
|
||||||
posTranslations.put("助詞-格助詞-引用", "particle-case-quote");
|
translations.put("助詞-格助詞-一般", "particle-case-misc");
|
||||||
posTranslations.put("助詞-格助詞-連語", "particle-case-compound");
|
translations.put("助詞-格助詞-引用", "particle-case-quote");
|
||||||
posTranslations.put("助詞-接続助詞", "particle-conjunctive");
|
translations.put("助詞-格助詞-連語", "particle-case-compound");
|
||||||
posTranslations.put("助詞-係助詞", "particle-dependency");
|
translations.put("助詞-接続助詞", "particle-conjunctive");
|
||||||
posTranslations.put("助詞-副助詞", "particle-adverbial");
|
translations.put("助詞-係助詞", "particle-dependency");
|
||||||
posTranslations.put("助詞-間投助詞", "particle-interjective");
|
translations.put("助詞-副助詞", "particle-adverbial");
|
||||||
posTranslations.put("助詞-並立助詞", "particle-coordinate");
|
translations.put("助詞-間投助詞", "particle-interjective");
|
||||||
posTranslations.put("助詞-終助詞", "particle-final");
|
translations.put("助詞-並立助詞", "particle-coordinate");
|
||||||
posTranslations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final");
|
translations.put("助詞-終助詞", "particle-final");
|
||||||
posTranslations.put("助詞-連体化", "particle-adnominalizer");
|
translations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final");
|
||||||
posTranslations.put("助詞-副詞化", "particle-adnominalizer");
|
translations.put("助詞-連体化", "particle-adnominalizer");
|
||||||
posTranslations.put("助詞-特殊", "particle-special");
|
translations.put("助詞-副詞化", "particle-adnominalizer");
|
||||||
posTranslations.put("助動詞", "auxiliary-verb");
|
translations.put("助詞-特殊", "particle-special");
|
||||||
posTranslations.put("感動詞", "interjection");
|
translations.put("助動詞", "auxiliary-verb");
|
||||||
posTranslations.put("記号", "symbol");
|
translations.put("感動詞", "interjection");
|
||||||
posTranslations.put("記号-一般", "symbol-misc");
|
translations.put("記号", "symbol");
|
||||||
posTranslations.put("記号-句点", "symbol-period");
|
translations.put("記号-一般", "symbol-misc");
|
||||||
posTranslations.put("記号-読点", "symbol-comma");
|
translations.put("記号-句点", "symbol-period");
|
||||||
posTranslations.put("記号-空白", "symbol-space");
|
translations.put("記号-読点", "symbol-comma");
|
||||||
posTranslations.put("記号-括弧開", "symbol-open_bracket");
|
translations.put("記号-空白", "symbol-space");
|
||||||
posTranslations.put("記号-括弧閉", "symbol-close_bracket");
|
translations.put("記号-括弧開", "symbol-open_bracket");
|
||||||
posTranslations.put("記号-アルファベット", "symbol-alphabetic");
|
translations.put("記号-括弧閉", "symbol-close_bracket");
|
||||||
posTranslations.put("その他", "other");
|
translations.put("記号-アルファベット", "symbol-alphabetic");
|
||||||
posTranslations.put("その他-間投", "other-interjection");
|
translations.put("その他", "other");
|
||||||
posTranslations.put("フィラー", "filler");
|
translations.put("その他-間投", "other-interjection");
|
||||||
posTranslations.put("非言語音", "non-verbal");
|
translations.put("フィラー", "filler");
|
||||||
posTranslations.put("語断片", "fragment");
|
translations.put("非言語音", "non-verbal");
|
||||||
posTranslations.put("未知語", "unknown");
|
translations.put("語断片", "fragment");
|
||||||
|
translations.put("未知語", "unknown");
|
||||||
|
posTranslations = Collections.unmodifiableMap(translations);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Get the english form of a POS tag */
|
/** Get the english form of a POS tag */
|
||||||
|
@ -122,67 +126,69 @@ public class ToStringUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
// a translation map for inflection types, only used for reflectWith
|
// a translation map for inflection types, only used for reflectWith
|
||||||
private static final HashMap<String, String> inflTypeTranslations = new HashMap<>();
|
private static final Map<String, String> inflTypeTranslations;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
inflTypeTranslations.put("*", "*");
|
Map<String, String> translations = new HashMap<>();
|
||||||
inflTypeTranslations.put("形容詞・アウオ段", "adj-group-a-o-u");
|
translations.put("*", "*");
|
||||||
inflTypeTranslations.put("形容詞・イ段", "adj-group-i");
|
translations.put("形容詞・アウオ段", "adj-group-a-o-u");
|
||||||
inflTypeTranslations.put("形容詞・イイ", "adj-group-ii");
|
translations.put("形容詞・イ段", "adj-group-i");
|
||||||
inflTypeTranslations.put("不変化型", "non-inflectional");
|
translations.put("形容詞・イイ", "adj-group-ii");
|
||||||
inflTypeTranslations.put("特殊・タ", "special-da");
|
translations.put("不変化型", "non-inflectional");
|
||||||
inflTypeTranslations.put("特殊・ダ", "special-ta");
|
translations.put("特殊・タ", "special-da");
|
||||||
inflTypeTranslations.put("文語・ゴトシ", "classical-gotoshi");
|
translations.put("特殊・ダ", "special-ta");
|
||||||
inflTypeTranslations.put("特殊・ジャ", "special-ja");
|
translations.put("文語・ゴトシ", "classical-gotoshi");
|
||||||
inflTypeTranslations.put("特殊・ナイ", "special-nai");
|
translations.put("特殊・ジャ", "special-ja");
|
||||||
inflTypeTranslations.put("五段・ラ行特殊", "5-row-cons-r-special");
|
translations.put("特殊・ナイ", "special-nai");
|
||||||
inflTypeTranslations.put("特殊・ヌ", "special-nu");
|
translations.put("五段・ラ行特殊", "5-row-cons-r-special");
|
||||||
inflTypeTranslations.put("文語・キ", "classical-ki");
|
translations.put("特殊・ヌ", "special-nu");
|
||||||
inflTypeTranslations.put("特殊・タイ", "special-tai");
|
translations.put("文語・キ", "classical-ki");
|
||||||
inflTypeTranslations.put("文語・ベシ", "classical-beshi");
|
translations.put("特殊・タイ", "special-tai");
|
||||||
inflTypeTranslations.put("特殊・ヤ", "special-ya");
|
translations.put("文語・ベシ", "classical-beshi");
|
||||||
inflTypeTranslations.put("文語・マジ", "classical-maji");
|
translations.put("特殊・ヤ", "special-ya");
|
||||||
inflTypeTranslations.put("下二・タ行", "2-row-lower-cons-t");
|
translations.put("文語・マジ", "classical-maji");
|
||||||
inflTypeTranslations.put("特殊・デス", "special-desu");
|
translations.put("下二・タ行", "2-row-lower-cons-t");
|
||||||
inflTypeTranslations.put("特殊・マス", "special-masu");
|
translations.put("特殊・デス", "special-desu");
|
||||||
inflTypeTranslations.put("五段・ラ行アル", "5-row-aru");
|
translations.put("特殊・マス", "special-masu");
|
||||||
inflTypeTranslations.put("文語・ナリ", "classical-nari");
|
translations.put("五段・ラ行アル", "5-row-aru");
|
||||||
inflTypeTranslations.put("文語・リ", "classical-ri");
|
translations.put("文語・ナリ", "classical-nari");
|
||||||
inflTypeTranslations.put("文語・ケリ", "classical-keri");
|
translations.put("文語・リ", "classical-ri");
|
||||||
inflTypeTranslations.put("文語・ル", "classical-ru");
|
translations.put("文語・ケリ", "classical-keri");
|
||||||
inflTypeTranslations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin");
|
translations.put("文語・ル", "classical-ru");
|
||||||
inflTypeTranslations.put("五段・サ行", "5-row-cons-s");
|
translations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin");
|
||||||
inflTypeTranslations.put("一段", "1-row");
|
translations.put("五段・サ行", "5-row-cons-s");
|
||||||
inflTypeTranslations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin");
|
translations.put("一段", "1-row");
|
||||||
inflTypeTranslations.put("五段・マ行", "5-row-cons-m");
|
translations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin");
|
||||||
inflTypeTranslations.put("五段・タ行", "5-row-cons-t");
|
translations.put("五段・マ行", "5-row-cons-m");
|
||||||
inflTypeTranslations.put("五段・ラ行", "5-row-cons-r");
|
translations.put("五段・タ行", "5-row-cons-t");
|
||||||
inflTypeTranslations.put("サ変・−スル", "irregular-suffix-suru");
|
translations.put("五段・ラ行", "5-row-cons-r");
|
||||||
inflTypeTranslations.put("五段・ガ行", "5-row-cons-g");
|
translations.put("サ変・−スル", "irregular-suffix-suru");
|
||||||
inflTypeTranslations.put("サ変・−ズル", "irregular-suffix-zuru");
|
translations.put("五段・ガ行", "5-row-cons-g");
|
||||||
inflTypeTranslations.put("五段・バ行", "5-row-cons-b");
|
translations.put("サ変・−ズル", "irregular-suffix-zuru");
|
||||||
inflTypeTranslations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin");
|
translations.put("五段・バ行", "5-row-cons-b");
|
||||||
inflTypeTranslations.put("下二・ダ行", "2-row-lower-cons-d");
|
translations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin");
|
||||||
inflTypeTranslations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku");
|
translations.put("下二・ダ行", "2-row-lower-cons-d");
|
||||||
inflTypeTranslations.put("上二・ダ行", "2-row-upper-cons-d");
|
translations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku");
|
||||||
inflTypeTranslations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin");
|
translations.put("上二・ダ行", "2-row-upper-cons-d");
|
||||||
inflTypeTranslations.put("一段・得ル", "1-row-eru");
|
translations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin");
|
||||||
inflTypeTranslations.put("四段・タ行", "4-row-cons-t");
|
translations.put("一段・得ル", "1-row-eru");
|
||||||
inflTypeTranslations.put("五段・ナ行", "5-row-cons-n");
|
translations.put("四段・タ行", "4-row-cons-t");
|
||||||
inflTypeTranslations.put("下二・ハ行", "2-row-lower-cons-h");
|
translations.put("五段・ナ行", "5-row-cons-n");
|
||||||
inflTypeTranslations.put("四段・ハ行", "4-row-cons-h");
|
translations.put("下二・ハ行", "2-row-lower-cons-h");
|
||||||
inflTypeTranslations.put("四段・バ行", "4-row-cons-b");
|
translations.put("四段・ハ行", "4-row-cons-h");
|
||||||
inflTypeTranslations.put("サ変・スル", "irregular-suru");
|
translations.put("四段・バ行", "4-row-cons-b");
|
||||||
inflTypeTranslations.put("上二・ハ行", "2-row-upper-cons-h");
|
translations.put("サ変・スル", "irregular-suru");
|
||||||
inflTypeTranslations.put("下二・マ行", "2-row-lower-cons-m");
|
translations.put("上二・ハ行", "2-row-upper-cons-h");
|
||||||
inflTypeTranslations.put("四段・サ行", "4-row-cons-s");
|
translations.put("下二・マ行", "2-row-lower-cons-m");
|
||||||
inflTypeTranslations.put("下二・ガ行", "2-row-lower-cons-g");
|
translations.put("四段・サ行", "4-row-cons-s");
|
||||||
inflTypeTranslations.put("カ変・来ル", "kuru-kanji");
|
translations.put("下二・ガ行", "2-row-lower-cons-g");
|
||||||
inflTypeTranslations.put("一段・クレル", "1-row-kureru");
|
translations.put("カ変・来ル", "kuru-kanji");
|
||||||
inflTypeTranslations.put("下二・得", "2-row-lower-u");
|
translations.put("一段・クレル", "1-row-kureru");
|
||||||
inflTypeTranslations.put("カ変・クル", "kuru-kana");
|
translations.put("下二・得", "2-row-lower-u");
|
||||||
inflTypeTranslations.put("ラ変", "irregular-cons-r");
|
translations.put("カ変・クル", "kuru-kana");
|
||||||
inflTypeTranslations.put("下二・カ行", "2-row-lower-cons-k");
|
translations.put("ラ変", "irregular-cons-r");
|
||||||
|
translations.put("下二・カ行", "2-row-lower-cons-k");
|
||||||
|
inflTypeTranslations = Collections.unmodifiableMap(translations);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Get the english form of inflection type */
|
/** Get the english form of inflection type */
|
||||||
|
@ -191,37 +197,39 @@ public class ToStringUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
// a translation map for inflection forms, only used for reflectWith
|
// a translation map for inflection forms, only used for reflectWith
|
||||||
private static final HashMap<String, String> inflFormTranslations = new HashMap<>();
|
private static final Map<String, String> inflFormTranslations;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
inflFormTranslations.put("*", "*");
|
Map<String, String> translations = new HashMap<>();
|
||||||
inflFormTranslations.put("基本形", "base");
|
translations.put("*", "*");
|
||||||
inflFormTranslations.put("文語基本形", "classical-base");
|
translations.put("基本形", "base");
|
||||||
inflFormTranslations.put("未然ヌ接続", "imperfective-nu-connection");
|
translations.put("文語基本形", "classical-base");
|
||||||
inflFormTranslations.put("未然ウ接続", "imperfective-u-connection");
|
translations.put("未然ヌ接続", "imperfective-nu-connection");
|
||||||
inflFormTranslations.put("連用タ接続", "conjunctive-ta-connection");
|
translations.put("未然ウ接続", "imperfective-u-connection");
|
||||||
inflFormTranslations.put("連用テ接続", "conjunctive-te-connection");
|
translations.put("連用タ接続", "conjunctive-ta-connection");
|
||||||
inflFormTranslations.put("連用ゴザイ接続", "conjunctive-gozai-connection");
|
translations.put("連用テ接続", "conjunctive-te-connection");
|
||||||
inflFormTranslations.put("体言接続", "uninflected-connection");
|
translations.put("連用ゴザイ接続", "conjunctive-gozai-connection");
|
||||||
inflFormTranslations.put("仮定形", "subjunctive");
|
translations.put("体言接続", "uninflected-connection");
|
||||||
inflFormTranslations.put("命令e", "imperative-e");
|
translations.put("仮定形", "subjunctive");
|
||||||
inflFormTranslations.put("仮定縮約1", "conditional-contracted-1");
|
translations.put("命令e", "imperative-e");
|
||||||
inflFormTranslations.put("仮定縮約2", "conditional-contracted-2");
|
translations.put("仮定縮約1", "conditional-contracted-1");
|
||||||
inflFormTranslations.put("ガル接続", "garu-connection");
|
translations.put("仮定縮約2", "conditional-contracted-2");
|
||||||
inflFormTranslations.put("未然形", "imperfective");
|
translations.put("ガル接続", "garu-connection");
|
||||||
inflFormTranslations.put("連用形", "conjunctive");
|
translations.put("未然形", "imperfective");
|
||||||
inflFormTranslations.put("音便基本形", "onbin-base");
|
translations.put("連用形", "conjunctive");
|
||||||
inflFormTranslations.put("連用デ接続", "conjunctive-de-connection");
|
translations.put("音便基本形", "onbin-base");
|
||||||
inflFormTranslations.put("未然特殊", "imperfective-special");
|
translations.put("連用デ接続", "conjunctive-de-connection");
|
||||||
inflFormTranslations.put("命令i", "imperative-i");
|
translations.put("未然特殊", "imperfective-special");
|
||||||
inflFormTranslations.put("連用ニ接続", "conjunctive-ni-connection");
|
translations.put("命令i", "imperative-i");
|
||||||
inflFormTranslations.put("命令yo", "imperative-yo");
|
translations.put("連用ニ接続", "conjunctive-ni-connection");
|
||||||
inflFormTranslations.put("体言接続特殊", "adnominal-special");
|
translations.put("命令yo", "imperative-yo");
|
||||||
inflFormTranslations.put("命令ro", "imperative-ro");
|
translations.put("体言接続特殊", "adnominal-special");
|
||||||
inflFormTranslations.put("体言接続特殊2", "uninflected-special-connection-2");
|
translations.put("命令ro", "imperative-ro");
|
||||||
inflFormTranslations.put("未然レル接続", "imperfective-reru-connection");
|
translations.put("体言接続特殊2", "uninflected-special-connection-2");
|
||||||
inflFormTranslations.put("現代基本形", "modern-base");
|
translations.put("未然レル接続", "imperfective-reru-connection");
|
||||||
inflFormTranslations.put("基本形-促音便", "base-onbin"); // not sure about this
|
translations.put("現代基本形", "modern-base");
|
||||||
|
translations.put("基本形-促音便", "base-onbin"); // not sure about this
|
||||||
|
inflFormTranslations = Collections.unmodifiableMap(translations);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Get the english form of inflected form */
|
/** Get the english form of inflected form */
|
||||||
|
|
|
@ -44,7 +44,7 @@ public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
|
||||||
private int sentenceStart = 0;
|
private int sentenceStart = 0;
|
||||||
private int sentenceIndex = -1;
|
private int sentenceIndex = -1;
|
||||||
|
|
||||||
private NLPTokenizerOp tokenizerOp = null;
|
private final NLPTokenizerOp tokenizerOp;
|
||||||
|
|
||||||
public OpenNLPTokenizer(
|
public OpenNLPTokenizer(
|
||||||
AttributeFactory factory, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp)
|
AttributeFactory factory, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp)
|
||||||
|
|
|
@ -23,7 +23,7 @@ import opennlp.tools.chunker.ChunkerModel;
|
||||||
|
|
||||||
/** Supply OpenNLP Chunking tool Requires binary models from OpenNLP project on SourceForge. */
|
/** Supply OpenNLP Chunking tool Requires binary models from OpenNLP project on SourceForge. */
|
||||||
public class NLPChunkerOp {
|
public class NLPChunkerOp {
|
||||||
private ChunkerME chunker = null;
|
private final ChunkerME chunker;
|
||||||
|
|
||||||
public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException {
|
public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException {
|
||||||
chunker = new ChunkerME(chunkerModel);
|
chunker = new ChunkerME(chunkerModel);
|
||||||
|
|
|
@ -27,7 +27,7 @@ import opennlp.tools.postag.POSTaggerME;
|
||||||
* SourceForge.
|
* SourceForge.
|
||||||
*/
|
*/
|
||||||
public class NLPPOSTaggerOp {
|
public class NLPPOSTaggerOp {
|
||||||
private POSTagger tagger = null;
|
private final POSTagger tagger;
|
||||||
|
|
||||||
public NLPPOSTaggerOp(POSModel model) throws IOException {
|
public NLPPOSTaggerOp(POSModel model) throws IOException {
|
||||||
tagger = new POSTaggerME(model);
|
tagger = new POSTaggerME(model);
|
||||||
|
|
|
@ -32,10 +32,10 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
*/
|
*/
|
||||||
public final class DaitchMokotoffSoundexFilter extends TokenFilter {
|
public final class DaitchMokotoffSoundexFilter extends TokenFilter {
|
||||||
/** true if encoded tokens should be added as synonyms */
|
/** true if encoded tokens should be added as synonyms */
|
||||||
protected boolean inject = true;
|
private final boolean inject;
|
||||||
|
|
||||||
/** phonetic encoder */
|
/** phonetic encoder */
|
||||||
protected DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
|
private final DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
|
||||||
|
|
||||||
// output is a string such as ab|ac|...
|
// output is a string such as ab|ac|...
|
||||||
private static final Pattern pattern = Pattern.compile("([^|]+)");
|
private static final Pattern pattern = Pattern.compile("([^|]+)");
|
||||||
|
|
|
@ -32,13 +32,13 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
*/
|
*/
|
||||||
public final class PhoneticFilter extends TokenFilter {
|
public final class PhoneticFilter extends TokenFilter {
|
||||||
/** true if encoded tokens should be added as synonyms */
|
/** true if encoded tokens should be added as synonyms */
|
||||||
protected boolean inject = true;
|
private final boolean inject;
|
||||||
|
|
||||||
/** phonetic encoder */
|
/** phonetic encoder */
|
||||||
protected Encoder encoder = null;
|
private final Encoder encoder;
|
||||||
|
|
||||||
/** captured state, non-null when <code>inject=true</code> and a token is buffered */
|
/** captured state, non-null when <code>inject=true</code> and a token is buffered */
|
||||||
protected State save = null;
|
private State save = null;
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
|
@ -73,7 +73,7 @@ public class Trie {
|
||||||
List<CharSequence> cmds = new ArrayList<>();
|
List<CharSequence> cmds = new ArrayList<>();
|
||||||
int root;
|
int root;
|
||||||
|
|
||||||
boolean forward = false;
|
boolean forward;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for the Trie object.
|
* Constructor for the Trie object.
|
||||||
|
|
|
@ -191,7 +191,7 @@ public final class FieldReader extends Terms {
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||||
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
|
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
|
||||||
// BlockTreeTermsWriter.brToString(startTerm));
|
// ToStringUtils.bytesRefToString(startTerm));
|
||||||
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
||||||
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
||||||
// can we optimize knowing that...?
|
// can we optimize knowing that...?
|
||||||
|
|
|
@ -543,19 +543,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void copyTerm() {
|
private void copyTerm() {
|
||||||
final int len = currentFrame.prefix + currentFrame.suffix;
|
final int len = currentFrame.prefix + currentFrame.suffix;
|
||||||
if (term.bytes.length < len) {
|
if (term.bytes.length < len) {
|
||||||
|
|
|
@ -354,24 +354,6 @@ public final class Lucene40BlockTreeTermsReader extends FieldsProducer {
|
||||||
return fieldMap.size();
|
return fieldMap.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
String brToString(BytesRef b) {
|
|
||||||
if (b == null) {
|
|
||||||
return "null";
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() throws IOException {
|
public void checkIntegrity() throws IOException {
|
||||||
// terms index
|
// terms index
|
||||||
|
|
|
@ -256,8 +256,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||||
f.arc = arc;
|
f.arc = arc;
|
||||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||||
// term.length + " vs prefix=" + f.prefix);
|
// term.length + " vs prefix=" + f.prefix);
|
||||||
// if (f.prefix > targetBeforeCurrentLength) {
|
// if (f.prefix > targetBeforeCurrentLength) {
|
||||||
|
@ -279,7 +279,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
// final int sav = term.length;
|
// final int sav = term.length;
|
||||||
// term.length = length;
|
// term.length = length;
|
||||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||||
// term.length = sav;
|
// term.length = sav;
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
@ -299,27 +299,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRefBuilder b) {
|
|
||||||
return brToString(b.get());
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean seekExact(BytesRef target) throws IOException {
|
public boolean seekExact(BytesRef target) throws IOException {
|
||||||
|
|
||||||
|
@ -337,8 +316,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||||
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
|
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||||
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||||
|
// ") validIndexPrefix=" + validIndexPrefix);
|
||||||
// printSeekState(System.out);
|
// printSeekState(System.out);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -496,8 +476,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||||
// targetBeforeCurrentLength);
|
// targetBeforeCurrentLength);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -528,7 +508,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
term.setByteAt(targetUpto, (byte) targetLabel);
|
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||||
term.setLength(1 + targetUpto);
|
term.setLength(1 + targetUpto);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -544,7 +524,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -587,7 +567,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
termExists = false;
|
termExists = false;
|
||||||
term.setLength(targetUpto);
|
term.setLength(targetUpto);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -623,7 +603,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
|
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
|
||||||
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
|
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) +
|
||||||
|
// " current=" + ToStringUtils.bytesRefToString(term)
|
||||||
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||||
// printSeekState(System.out);
|
// printSeekState(System.out);
|
||||||
// }
|
// }
|
||||||
|
@ -667,9 +648,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||||
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
|
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
|
||||||
// " output=" + output);
|
// + " output=" + output);
|
||||||
// }
|
// }
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
break;
|
break;
|
||||||
|
@ -781,8 +762,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||||
// targetBeforeCurrentLength);
|
// targetBeforeCurrentLength);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -818,7 +799,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
if (next() != null) {
|
if (next() != null) {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" return NOT_FOUND term=" + brToString(term));
|
// System.out.println(" return NOT_FOUND term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else {
|
} else {
|
||||||
|
@ -829,7 +811,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" return " + result + " term=" + brToString(term));
|
// System.out.println(" return " + result + " term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -1029,9 +1012,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
assert !eof;
|
assert !eof;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
|
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
|
||||||
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
|
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
|
||||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||||
|
// " validIndexPrefix=" + validIndexPrefix);
|
||||||
// printSeekState(System.out);
|
// printSeekState(System.out);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -1095,8 +1079,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
// try to scan to the right floor frame:
|
// try to scan to the right floor frame:
|
||||||
currentFrame.loadBlock();
|
currentFrame.loadBlock();
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
|
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
|
||||||
// + currentFrame.ord);
|
// " currentFrame.ord=" + currentFrame.ord);
|
||||||
return term.get();
|
return term.get();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -317,8 +317,8 @@ final class SegmentTermsEnumFrame {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void nextLeaf() {
|
public void nextLeaf() {
|
||||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||||
// entCount=" + entCount);
|
// " entCount=" + entCount);
|
||||||
assert nextEnt != -1 && nextEnt < entCount
|
assert nextEnt != -1 && nextEnt < entCount
|
||||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
|
@ -410,8 +410,8 @@ final class SegmentTermsEnumFrame {
|
||||||
newFP = fpOrig + (code >>> 1);
|
newFP = fpOrig + (code >>> 1);
|
||||||
hasTerms = (code & 1) != 0;
|
hasTerms = (code & 1) != 0;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
|
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
|
||||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
isLastInFloor = numFollowFloorBlocks == 1;
|
isLastInFloor = numFollowFloorBlocks == 1;
|
||||||
|
@ -566,28 +566,14 @@ final class SegmentTermsEnumFrame {
|
||||||
private long subCode;
|
private long subCode;
|
||||||
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
|
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
|
||||||
|
|
||||||
// for debugging
|
|
||||||
/*
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Target's prefix matches this block's prefix; we
|
// Target's prefix matches this block's prefix; we
|
||||||
// scan the entries check if the suffix matches.
|
// scan the entries check if the suffix matches.
|
||||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(target) +
|
||||||
|
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||||
|
|
||||||
assert nextEnt != -1;
|
assert nextEnt != -1;
|
||||||
|
|
||||||
|
@ -617,7 +603,7 @@ final class SegmentTermsEnumFrame {
|
||||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||||
// suffixBytesRef.length = suffix;
|
// suffixBytesRef.length = suffix;
|
||||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||||
// + brToString(suffixBytesRef));
|
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
|
@ -682,8 +668,9 @@ final class SegmentTermsEnumFrame {
|
||||||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||||
// brToString(target));
|
// ToStringUtils.bytesRefToString(target) +
|
||||||
|
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||||
|
|
||||||
assert nextEnt != -1;
|
assert nextEnt != -1;
|
||||||
|
|
||||||
|
@ -711,7 +698,8 @@ final class SegmentTermsEnumFrame {
|
||||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||||
// suffixBytesRef.length = suffix;
|
// suffixBytesRef.length = suffix;
|
||||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||||
|
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
final int termLen = prefix + suffix;
|
final int termLen = prefix + suffix;
|
||||||
|
@ -743,8 +731,8 @@ final class SegmentTermsEnumFrame {
|
||||||
// return NOT_FOUND:
|
// return NOT_FOUND:
|
||||||
fillTerm();
|
fillTerm();
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
|
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
|
||||||
// ste.termExists=" + ste.termExists);
|
// " ste.termExists=" + ste.termExists);
|
||||||
|
|
||||||
if (!exactOnly && !ste.termExists) {
|
if (!exactOnly && !ste.termExists) {
|
||||||
// System.out.println(" now pushFrame");
|
// System.out.println(" now pushFrame");
|
||||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.IntsRefBuilder;
|
import org.apache.lucene.util.IntsRefBuilder;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.compress.LZ4;
|
import org.apache.lucene.util.compress.LZ4;
|
||||||
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
||||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||||
|
@ -349,7 +350,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
|
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(term));
|
||||||
termsWriter.write(term, termsEnum, norms);
|
termsWriter.write(term, termsEnum, norms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -388,33 +389,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "TERM: " + brToString(termBytes);
|
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
if (b == null) {
|
|
||||||
return "(null)";
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(byte[] b) {
|
|
||||||
return brToString(new BytesRef(b));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final class PendingBlock extends PendingEntry {
|
private static final class PendingBlock extends PendingEntry {
|
||||||
public final BytesRef prefix;
|
public final BytesRef prefix;
|
||||||
public final long fp;
|
public final long fp;
|
||||||
|
@ -442,7 +420,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "BLOCK: prefix=" + brToString(prefix);
|
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void compileIndex(
|
public void compileIndex(
|
||||||
|
@ -600,8 +578,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// if (DEBUG2) {
|
// if (DEBUG2) {
|
||||||
// BytesRef br = new BytesRef(lastTerm.bytes());
|
// BytesRef br = new BytesRef(lastTerm.bytes());
|
||||||
// br.length = prefixLength;
|
// br.length = prefixLength;
|
||||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
|
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
|
||||||
// + count);
|
// ToStringUtils.bytesRefToString(br) + " count=" + count);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// Root block better write all remaining pending entries:
|
// Root block better write all remaining pending entries:
|
||||||
|
@ -754,9 +732,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
prefix.length = prefixLength;
|
prefix.length = prefixLength;
|
||||||
|
|
||||||
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
|
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
|
||||||
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
|
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
|
||||||
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
|
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
|
||||||
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
|
||||||
|
// hasSubBlocks);
|
||||||
|
|
||||||
// Write block header:
|
// Write block header:
|
||||||
int numEntries = end - start;
|
int numEntries = end - start;
|
||||||
|
@ -769,7 +748,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
|
||||||
|
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
|
||||||
|
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -804,7 +785,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
// suffixBytes.length = suffix;
|
// suffixBytes.length = suffix;
|
||||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
// System.out.println(" write term suffix=" +
|
||||||
|
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// For leaf block we write suffix straight
|
// For leaf block we write suffix straight
|
||||||
|
@ -837,7 +819,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
// suffixBytes.length = suffix;
|
// suffixBytes.length = suffix;
|
||||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
// System.out.println(" write term suffix=" +
|
||||||
|
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// For non-leaf block we borrow 1 bit to record
|
// For non-leaf block we borrow 1 bit to record
|
||||||
|
@ -879,8 +862,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
// suffixBytes.length = suffix;
|
// suffixBytes.length = suffix;
|
||||||
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
|
// System.out.println(" write sub-block suffix=" +
|
||||||
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
// ToStringUtils.bytesRefToString(suffixBytes) +
|
||||||
|
// " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
assert floorLeadLabel == -1
|
assert floorLeadLabel == -1
|
||||||
|
@ -998,7 +982,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
int[] tmp = new int[lastTerm.length];
|
int[] tmp = new int[lastTerm.length];
|
||||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
|
||||||
|
" pending.size()=" + pending.size());
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -1051,8 +1036,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// we are closing:
|
// we are closing:
|
||||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||||
if (prefixTopSize >= minItemsInBlock) {
|
if (prefixTopSize >= minItemsInBlock) {
|
||||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||||
// minItemsInBlock=" + minItemsInBlock);
|
// " minItemsInBlock=" + minItemsInBlock);
|
||||||
writeBlocks(i + 1, prefixTopSize);
|
writeBlocks(i + 1, prefixTopSize);
|
||||||
prefixStarts[i] -= prefixTopSize - 1;
|
prefixStarts[i] -= prefixTopSize - 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,19 +19,25 @@ package org.apache.lucene.backward_index;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.Name;
|
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.LineNumberReader;
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
import java.lang.reflect.Modifier;
|
import java.lang.reflect.Modifier;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.function.Predicate;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
@ -47,26 +53,31 @@ import org.junit.Before;
|
||||||
|
|
||||||
public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
||||||
|
|
||||||
protected final Version version;
|
static final Set<String> OLD_VERSIONS;
|
||||||
private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion();
|
|
||||||
protected final String indexPattern;
|
|
||||||
protected static final Set<Version> BINARY_SUPPORTED_VERSIONS;
|
protected static final Set<Version> BINARY_SUPPORTED_VERSIONS;
|
||||||
|
|
||||||
static {
|
private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion();
|
||||||
String[] oldVersions =
|
|
||||||
new String[] {
|
|
||||||
"8.0.0", "8.0.0", "8.1.0", "8.1.0", "8.1.1", "8.1.1", "8.2.0", "8.2.0", "8.3.0", "8.3.0",
|
|
||||||
"8.3.1", "8.3.1", "8.4.0", "8.4.0", "8.4.1", "8.4.1", "8.5.0", "8.5.0", "8.5.1", "8.5.1",
|
|
||||||
"8.5.2", "8.5.2", "8.6.0", "8.6.0", "8.6.1", "8.6.1", "8.6.2", "8.6.2", "8.6.3", "8.6.3",
|
|
||||||
"8.7.0", "8.7.0", "8.8.0", "8.8.0", "8.8.1", "8.8.1", "8.8.2", "8.8.2", "8.9.0", "8.9.0",
|
|
||||||
"8.10.0", "8.10.0", "8.10.1", "8.10.1", "8.11.0", "8.11.0", "8.11.1", "8.11.1", "8.11.2",
|
|
||||||
"8.11.2", "8.11.3", "8.11.3", "9.0.0", "9.1.0", "9.2.0", "9.3.0", "9.4.0", "9.4.1",
|
|
||||||
"9.4.2", "9.5.0", "9.6.0", "9.7.0", "9.8.0", "9.9.0", "9.9.1", "9.9.2", "9.10.0",
|
|
||||||
"10.0.0",
|
|
||||||
};
|
|
||||||
|
|
||||||
|
protected final Version version;
|
||||||
|
protected final String indexPattern;
|
||||||
|
|
||||||
|
static {
|
||||||
|
String name = "versions.txt";
|
||||||
|
try (LineNumberReader in =
|
||||||
|
new LineNumberReader(
|
||||||
|
IOUtils.getDecodingReader(
|
||||||
|
IOUtils.requireResourceNonNull(
|
||||||
|
BackwardsCompatibilityTestBase.class.getResourceAsStream(name), name),
|
||||||
|
StandardCharsets.UTF_8))) {
|
||||||
|
OLD_VERSIONS =
|
||||||
|
in.lines()
|
||||||
|
.filter(Predicate.not(String::isBlank))
|
||||||
|
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||||
|
} catch (IOException exception) {
|
||||||
|
throw new RuntimeException("failed to load resource", exception);
|
||||||
|
}
|
||||||
Set<Version> binaryVersions = new HashSet<>();
|
Set<Version> binaryVersions = new HashSet<>();
|
||||||
for (String version : oldVersions) {
|
for (String version : OLD_VERSIONS) {
|
||||||
try {
|
try {
|
||||||
Version v = Version.parse(version);
|
Version v = Version.parse(version);
|
||||||
assertTrue("Unsupported binary version: " + v, v.major >= Version.MIN_SUPPORTED_MAJOR - 1);
|
assertTrue("Unsupported binary version: " + v, v.major >= Version.MIN_SUPPORTED_MAJOR - 1);
|
||||||
|
@ -75,8 +86,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
||||||
throw new RuntimeException(ex);
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
List<Version> allCurrentVersions = getAllCurrentVersions();
|
|
||||||
for (Version version : allCurrentVersions) {
|
for (Version version : getAllCurrentReleasedVersions()) {
|
||||||
// make sure we never miss a version.
|
// make sure we never miss a version.
|
||||||
assertTrue("Version: " + version + " missing", binaryVersions.remove(version));
|
assertTrue("Version: " + version + " missing", binaryVersions.remove(version));
|
||||||
}
|
}
|
||||||
|
@ -181,19 +192,51 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
||||||
return versions;
|
return versions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static List<Version> getAllCurrentReleasedVersions() {
|
||||||
|
List<Version> currentReleasedVersions = getAllCurrentVersions();
|
||||||
|
|
||||||
|
// The latest version from the current major is always under development.
|
||||||
|
assertTrue(currentReleasedVersions.remove(Version.LATEST));
|
||||||
|
// The latest minor from the previous major is also under development.
|
||||||
|
assertTrue(currentReleasedVersions.remove(LATEST_PREVIOUS_MAJOR));
|
||||||
|
|
||||||
|
// In addition to those, we may need to remove one more version in case a release is in
|
||||||
|
// progress, and the version constant has been added but backward-compatibility indexes have not
|
||||||
|
// been checked in yet.
|
||||||
|
List<Version> missingVersions = new ArrayList<>();
|
||||||
|
for (Iterator<Version> it = currentReleasedVersions.iterator(); it.hasNext(); ) {
|
||||||
|
Version version = it.next();
|
||||||
|
String indexName = String.format(Locale.ROOT, "index.%s-cfs.zip", version);
|
||||||
|
if (TestAncientIndicesCompatibility.class.getResource(indexName) == null) {
|
||||||
|
missingVersions.add(version);
|
||||||
|
it.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (missingVersions.size() > 1) {
|
||||||
|
throw new AssertionError(
|
||||||
|
"More than one version is missing backward-compatibility data: " + missingVersions);
|
||||||
|
}
|
||||||
|
return currentReleasedVersions;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get all versions that are released, plus the latest version which is unreleased. */
|
||||||
|
public static List<Version> getAllCurrentReleasedVersionsAndCurrent() {
|
||||||
|
List<Version> versions = new ArrayList<>(getAllCurrentReleasedVersions());
|
||||||
|
versions.add(Version.LATEST);
|
||||||
|
return versions;
|
||||||
|
}
|
||||||
|
|
||||||
public static Iterable<Object[]> allVersion(String name, String... suffixes) {
|
public static Iterable<Object[]> allVersion(String name, String... suffixes) {
|
||||||
List<Object> patterns = new ArrayList<>();
|
List<Object> patterns = new ArrayList<>();
|
||||||
for (String suffix : suffixes) {
|
for (String suffix : suffixes) {
|
||||||
patterns.add(createPattern(name, suffix));
|
patterns.add(createPattern(name, suffix));
|
||||||
}
|
}
|
||||||
List<Object[]> versionAndPatterns = new ArrayList<>();
|
List<Object[]> versionAndPatterns = new ArrayList<>();
|
||||||
List<Version> versionList = getAllCurrentVersions();
|
List<Version> versionList = getAllCurrentReleasedVersionsAndCurrent();
|
||||||
for (Version v : versionList) {
|
for (Version v : versionList) {
|
||||||
if (v.equals(LATEST_PREVIOUS_MAJOR)
|
for (Object p : patterns) {
|
||||||
== false) { // the latest prev-major has not yet been released
|
versionAndPatterns.add(new Object[] {v, p});
|
||||||
for (Object p : patterns) {
|
|
||||||
versionAndPatterns.add(new Object[] {v, p});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return versionAndPatterns;
|
return versionAndPatterns;
|
||||||
|
|
|
@ -21,8 +21,16 @@ import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.LineNumberReader;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
import org.apache.lucene.index.CheckIndex;
|
import org.apache.lucene.index.CheckIndex;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||||
|
@ -36,274 +44,57 @@ import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
|
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
import org.apache.lucene.tests.util.TestUtil;
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
||||||
|
static final Set<String> UNSUPPORTED_INDEXES;
|
||||||
|
|
||||||
static final String[] unsupportedNames = {
|
static {
|
||||||
"1.9.0-cfs",
|
String name = "unsupported_versions.txt";
|
||||||
"1.9.0-nocfs",
|
Set<String> indices;
|
||||||
"2.0.0-cfs",
|
try (LineNumberReader in =
|
||||||
"2.0.0-nocfs",
|
new LineNumberReader(
|
||||||
"2.1.0-cfs",
|
IOUtils.getDecodingReader(
|
||||||
"2.1.0-nocfs",
|
IOUtils.requireResourceNonNull(
|
||||||
"2.2.0-cfs",
|
TestAncientIndicesCompatibility.class.getResourceAsStream(name), name),
|
||||||
"2.2.0-nocfs",
|
StandardCharsets.UTF_8))) {
|
||||||
"2.3.0-cfs",
|
indices =
|
||||||
"2.3.0-nocfs",
|
in.lines()
|
||||||
"2.4.0-cfs",
|
.filter(Predicate.not(String::isBlank))
|
||||||
"2.4.0-nocfs",
|
.flatMap(version -> Stream.of(version + "-cfs", version + "-nocfs"))
|
||||||
"2.4.1-cfs",
|
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||||
"2.4.1-nocfs",
|
} catch (IOException exception) {
|
||||||
"2.9.0-cfs",
|
throw new RuntimeException("failed to load resource", exception);
|
||||||
"2.9.0-nocfs",
|
}
|
||||||
"2.9.1-cfs",
|
|
||||||
"2.9.1-nocfs",
|
name = "unsupported_indices.txt";
|
||||||
"2.9.2-cfs",
|
try (LineNumberReader in =
|
||||||
"2.9.2-nocfs",
|
new LineNumberReader(
|
||||||
"2.9.3-cfs",
|
IOUtils.getDecodingReader(
|
||||||
"2.9.3-nocfs",
|
IOUtils.requireResourceNonNull(
|
||||||
"2.9.4-cfs",
|
TestAncientIndicesCompatibility.class.getResourceAsStream(name), name),
|
||||||
"2.9.4-nocfs",
|
StandardCharsets.UTF_8))) {
|
||||||
"3.0.0-cfs",
|
indices.addAll(
|
||||||
"3.0.0-nocfs",
|
in.lines()
|
||||||
"3.0.1-cfs",
|
.filter(Predicate.not(String::isBlank))
|
||||||
"3.0.1-nocfs",
|
.collect(Collectors.toCollection(LinkedHashSet::new)));
|
||||||
"3.0.2-cfs",
|
} catch (IOException exception) {
|
||||||
"3.0.2-nocfs",
|
throw new RuntimeException("failed to load resource", exception);
|
||||||
"3.0.3-cfs",
|
}
|
||||||
"3.0.3-nocfs",
|
UNSUPPORTED_INDEXES = Collections.unmodifiableSet(indices);
|
||||||
"3.1.0-cfs",
|
}
|
||||||
"3.1.0-nocfs",
|
|
||||||
"3.2.0-cfs",
|
|
||||||
"3.2.0-nocfs",
|
|
||||||
"3.3.0-cfs",
|
|
||||||
"3.3.0-nocfs",
|
|
||||||
"3.4.0-cfs",
|
|
||||||
"3.4.0-nocfs",
|
|
||||||
"3.5.0-cfs",
|
|
||||||
"3.5.0-nocfs",
|
|
||||||
"3.6.0-cfs",
|
|
||||||
"3.6.0-nocfs",
|
|
||||||
"3.6.1-cfs",
|
|
||||||
"3.6.1-nocfs",
|
|
||||||
"3.6.2-cfs",
|
|
||||||
"3.6.2-nocfs",
|
|
||||||
"4.0.0-cfs",
|
|
||||||
"4.0.0-cfs",
|
|
||||||
"4.0.0-nocfs",
|
|
||||||
"4.0.0.1-cfs",
|
|
||||||
"4.0.0.1-nocfs",
|
|
||||||
"4.0.0.2-cfs",
|
|
||||||
"4.0.0.2-nocfs",
|
|
||||||
"4.1.0-cfs",
|
|
||||||
"4.1.0-nocfs",
|
|
||||||
"4.2.0-cfs",
|
|
||||||
"4.2.0-nocfs",
|
|
||||||
"4.2.1-cfs",
|
|
||||||
"4.2.1-nocfs",
|
|
||||||
"4.3.0-cfs",
|
|
||||||
"4.3.0-nocfs",
|
|
||||||
"4.3.1-cfs",
|
|
||||||
"4.3.1-nocfs",
|
|
||||||
"4.4.0-cfs",
|
|
||||||
"4.4.0-nocfs",
|
|
||||||
"4.5.0-cfs",
|
|
||||||
"4.5.0-nocfs",
|
|
||||||
"4.5.1-cfs",
|
|
||||||
"4.5.1-nocfs",
|
|
||||||
"4.6.0-cfs",
|
|
||||||
"4.6.0-nocfs",
|
|
||||||
"4.6.1-cfs",
|
|
||||||
"4.6.1-nocfs",
|
|
||||||
"4.7.0-cfs",
|
|
||||||
"4.7.0-nocfs",
|
|
||||||
"4.7.1-cfs",
|
|
||||||
"4.7.1-nocfs",
|
|
||||||
"4.7.2-cfs",
|
|
||||||
"4.7.2-nocfs",
|
|
||||||
"4.8.0-cfs",
|
|
||||||
"4.8.0-nocfs",
|
|
||||||
"4.8.1-cfs",
|
|
||||||
"4.8.1-nocfs",
|
|
||||||
"4.9.0-cfs",
|
|
||||||
"4.9.0-nocfs",
|
|
||||||
"4.9.1-cfs",
|
|
||||||
"4.9.1-nocfs",
|
|
||||||
"4.10.0-cfs",
|
|
||||||
"4.10.0-nocfs",
|
|
||||||
"4.10.1-cfs",
|
|
||||||
"4.10.1-nocfs",
|
|
||||||
"4.10.2-cfs",
|
|
||||||
"4.10.2-nocfs",
|
|
||||||
"4.10.3-cfs",
|
|
||||||
"4.10.3-nocfs",
|
|
||||||
"4.10.4-cfs",
|
|
||||||
"4.10.4-nocfs",
|
|
||||||
"5x-with-4x-segments-cfs",
|
|
||||||
"5x-with-4x-segments-nocfs",
|
|
||||||
"5.0.0.singlesegment-cfs",
|
|
||||||
"5.0.0.singlesegment-nocfs",
|
|
||||||
"5.0.0-cfs",
|
|
||||||
"5.0.0-nocfs",
|
|
||||||
"5.1.0-cfs",
|
|
||||||
"5.1.0-nocfs",
|
|
||||||
"5.2.0-cfs",
|
|
||||||
"5.2.0-nocfs",
|
|
||||||
"5.2.1-cfs",
|
|
||||||
"5.2.1-nocfs",
|
|
||||||
"5.3.0-cfs",
|
|
||||||
"5.3.0-nocfs",
|
|
||||||
"5.3.1-cfs",
|
|
||||||
"5.3.1-nocfs",
|
|
||||||
"5.3.2-cfs",
|
|
||||||
"5.3.2-nocfs",
|
|
||||||
"5.4.0-cfs",
|
|
||||||
"5.4.0-nocfs",
|
|
||||||
"5.4.1-cfs",
|
|
||||||
"5.4.1-nocfs",
|
|
||||||
"5.5.0-cfs",
|
|
||||||
"5.5.0-nocfs",
|
|
||||||
"5.5.1-cfs",
|
|
||||||
"5.5.1-nocfs",
|
|
||||||
"5.5.2-cfs",
|
|
||||||
"5.5.2-nocfs",
|
|
||||||
"5.5.3-cfs",
|
|
||||||
"5.5.3-nocfs",
|
|
||||||
"5.5.4-cfs",
|
|
||||||
"5.5.4-nocfs",
|
|
||||||
"5.5.5-cfs",
|
|
||||||
"5.5.5-nocfs",
|
|
||||||
"6.0.0-cfs",
|
|
||||||
"6.0.0-nocfs",
|
|
||||||
"6.0.1-cfs",
|
|
||||||
"6.0.1-nocfs",
|
|
||||||
"6.1.0-cfs",
|
|
||||||
"6.1.0-nocfs",
|
|
||||||
"6.2.0-cfs",
|
|
||||||
"6.2.0-nocfs",
|
|
||||||
"6.2.1-cfs",
|
|
||||||
"6.2.1-nocfs",
|
|
||||||
"6.3.0-cfs",
|
|
||||||
"6.3.0-nocfs",
|
|
||||||
"6.4.0-cfs",
|
|
||||||
"6.4.0-nocfs",
|
|
||||||
"6.4.1-cfs",
|
|
||||||
"6.4.1-nocfs",
|
|
||||||
"6.4.2-cfs",
|
|
||||||
"6.4.2-nocfs",
|
|
||||||
"6.5.0-cfs",
|
|
||||||
"6.5.0-nocfs",
|
|
||||||
"6.5.1-cfs",
|
|
||||||
"6.5.1-nocfs",
|
|
||||||
"6.6.0-cfs",
|
|
||||||
"6.6.0-nocfs",
|
|
||||||
"6.6.1-cfs",
|
|
||||||
"6.6.1-nocfs",
|
|
||||||
"6.6.2-cfs",
|
|
||||||
"6.6.2-nocfs",
|
|
||||||
"6.6.3-cfs",
|
|
||||||
"6.6.3-nocfs",
|
|
||||||
"6.6.4-cfs",
|
|
||||||
"6.6.4-nocfs",
|
|
||||||
"6.6.5-cfs",
|
|
||||||
"6.6.5-nocfs",
|
|
||||||
"6.6.6-cfs",
|
|
||||||
"6.6.6-nocfs",
|
|
||||||
"7.0.0-cfs",
|
|
||||||
"7.0.0-nocfs",
|
|
||||||
"7.0.1-cfs",
|
|
||||||
"7.0.1-nocfs",
|
|
||||||
"7.1.0-cfs",
|
|
||||||
"7.1.0-nocfs",
|
|
||||||
"7.2.0-cfs",
|
|
||||||
"7.2.0-nocfs",
|
|
||||||
"7.2.1-cfs",
|
|
||||||
"7.2.1-nocfs",
|
|
||||||
"7.3.0-cfs",
|
|
||||||
"7.3.0-nocfs",
|
|
||||||
"7.3.1-cfs",
|
|
||||||
"7.3.1-nocfs",
|
|
||||||
"7.4.0-cfs",
|
|
||||||
"7.4.0-nocfs",
|
|
||||||
"7.5.0-cfs",
|
|
||||||
"7.5.0-nocfs",
|
|
||||||
"7.6.0-cfs",
|
|
||||||
"7.6.0-nocfs",
|
|
||||||
"7.7.0-cfs",
|
|
||||||
"7.7.0-nocfs",
|
|
||||||
"7.7.1-cfs",
|
|
||||||
"7.7.1-nocfs",
|
|
||||||
"7.7.2-cfs",
|
|
||||||
"7.7.2-nocfs",
|
|
||||||
"7.7.3-cfs",
|
|
||||||
"7.7.3-nocfs",
|
|
||||||
"8.0.0-cfs",
|
|
||||||
"8.0.0-nocfs",
|
|
||||||
"8.1.0-cfs",
|
|
||||||
"8.1.0-nocfs",
|
|
||||||
"8.1.1-cfs",
|
|
||||||
"8.1.1-nocfs",
|
|
||||||
"8.2.0-cfs",
|
|
||||||
"8.2.0-nocfs",
|
|
||||||
"8.3.0-cfs",
|
|
||||||
"8.3.0-nocfs",
|
|
||||||
"8.3.1-cfs",
|
|
||||||
"8.3.1-nocfs",
|
|
||||||
"8.4.0-cfs",
|
|
||||||
"8.4.0-nocfs",
|
|
||||||
"8.4.1-cfs",
|
|
||||||
"8.4.1-nocfs",
|
|
||||||
"8.5.0-cfs",
|
|
||||||
"8.5.0-nocfs",
|
|
||||||
"8.5.1-cfs",
|
|
||||||
"8.5.1-nocfs",
|
|
||||||
"8.5.2-cfs",
|
|
||||||
"8.5.2-nocfs",
|
|
||||||
"8.6.0-cfs",
|
|
||||||
"8.6.0-nocfs",
|
|
||||||
"8.6.1-cfs",
|
|
||||||
"8.6.1-nocfs",
|
|
||||||
"8.6.2-cfs",
|
|
||||||
"8.6.2-nocfs",
|
|
||||||
"8.6.3-cfs",
|
|
||||||
"8.6.3-nocfs",
|
|
||||||
"8.7.0-cfs",
|
|
||||||
"8.7.0-nocfs",
|
|
||||||
"8.8.0-cfs",
|
|
||||||
"8.8.0-nocfs",
|
|
||||||
"8.8.1-cfs",
|
|
||||||
"8.8.1-nocfs",
|
|
||||||
"8.8.2-cfs",
|
|
||||||
"8.8.2-nocfs",
|
|
||||||
"8.9.0-cfs",
|
|
||||||
"8.9.0-nocfs",
|
|
||||||
"8.10.0-cfs",
|
|
||||||
"8.10.0-nocfs",
|
|
||||||
"8.10.1-cfs",
|
|
||||||
"8.10.1-nocfs",
|
|
||||||
"8.11.0-cfs",
|
|
||||||
"8.11.0-nocfs",
|
|
||||||
"8.11.1-cfs",
|
|
||||||
"8.11.1-nocfs",
|
|
||||||
"8.11.2-cfs",
|
|
||||||
"8.11.2-nocfs",
|
|
||||||
"8.11.3-cfs",
|
|
||||||
"8.11.3-nocfs"
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate
|
* This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate
|
||||||
* on too old indexes!
|
* on too old indexes!
|
||||||
*/
|
*/
|
||||||
public void testUnsupportedOldIndexes() throws Exception {
|
public void testUnsupportedOldIndexes() throws Exception {
|
||||||
for (int i = 0; i < unsupportedNames.length; i++) {
|
for (String version : UNSUPPORTED_INDEXES) {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: index " + unsupportedNames[i]);
|
System.out.println("TEST: index " + version);
|
||||||
}
|
}
|
||||||
Path oldIndexDir = createTempDir(unsupportedNames[i]);
|
Path oldIndexDir = createTempDir(version);
|
||||||
TestUtil.unzip(
|
TestUtil.unzip(getDataInputStream("unsupported." + version + ".zip"), oldIndexDir);
|
||||||
getDataInputStream("unsupported." + unsupportedNames[i] + ".zip"), oldIndexDir);
|
|
||||||
BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir);
|
BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir);
|
||||||
// don't checkindex, these are intentionally not supported
|
// don't checkindex, these are intentionally not supported
|
||||||
dir.setCheckIndexOnClose(false);
|
dir.setCheckIndexOnClose(false);
|
||||||
|
@ -312,7 +103,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
||||||
IndexWriter writer = null;
|
IndexWriter writer = null;
|
||||||
try {
|
try {
|
||||||
reader = DirectoryReader.open(dir);
|
reader = DirectoryReader.open(dir);
|
||||||
fail("DirectoryReader.open should not pass for " + unsupportedNames[i]);
|
fail("DirectoryReader.open should not pass for " + version);
|
||||||
} catch (IndexFormatTooOldException e) {
|
} catch (IndexFormatTooOldException e) {
|
||||||
if (e.getReason() != null) {
|
if (e.getReason() != null) {
|
||||||
assertNull(e.getVersion());
|
assertNull(e.getVersion());
|
||||||
|
@ -353,7 +144,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
||||||
writer =
|
writer =
|
||||||
new IndexWriter(
|
new IndexWriter(
|
||||||
dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false));
|
dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false));
|
||||||
fail("IndexWriter creation should not pass for " + unsupportedNames[i]);
|
fail("IndexWriter creation should not pass for " + version);
|
||||||
} catch (IndexFormatTooOldException e) {
|
} catch (IndexFormatTooOldException e) {
|
||||||
if (e.getReason() != null) {
|
if (e.getReason() != null) {
|
||||||
assertNull(e.getVersion());
|
assertNull(e.getVersion());
|
||||||
|
@ -406,7 +197,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
||||||
CheckIndex checker = new CheckIndex(dir);
|
CheckIndex checker = new CheckIndex(dir);
|
||||||
checker.setInfoStream(new PrintStream(bos, false, UTF_8));
|
checker.setInfoStream(new PrintStream(bos, false, UTF_8));
|
||||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||||
if (unsupportedNames[i].startsWith("8.")) {
|
if (version.startsWith("8.")) {
|
||||||
assertTrue(indexStatus.clean);
|
assertTrue(indexStatus.clean);
|
||||||
} else {
|
} else {
|
||||||
assertFalse(indexStatus.clean);
|
assertFalse(indexStatus.clean);
|
||||||
|
|
|
@ -101,8 +101,6 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
|
||||||
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
|
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
|
||||||
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
|
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
|
||||||
|
|
||||||
static final int MIN_BINARY_SUPPORTED_MAJOR = Version.MIN_SUPPORTED_MAJOR - 1;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A parameter constructor for {@link com.carrotsearch.randomizedtesting.RandomizedRunner}. See
|
* A parameter constructor for {@link com.carrotsearch.randomizedtesting.RandomizedRunner}. See
|
||||||
* {@link #testVersionsFactory()} for details on the values provided to the framework.
|
* {@link #testVersionsFactory()} for details on the values provided to the framework.
|
||||||
|
|
|
@ -62,7 +62,6 @@ public class TestBinaryBackwardsCompatibility extends BackwardsCompatibilityTest
|
||||||
|
|
||||||
@Nightly
|
@Nightly
|
||||||
public void testReadNMinusTwoCommit() throws IOException {
|
public void testReadNMinusTwoCommit() throws IOException {
|
||||||
|
|
||||||
try (BaseDirectoryWrapper dir = newDirectory(directory)) {
|
try (BaseDirectoryWrapper dir = newDirectory(directory)) {
|
||||||
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
||||||
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();
|
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();
|
||||||
|
|
|
@ -55,6 +55,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
||||||
|
|
||||||
static final String INDEX_NAME = "sorted";
|
static final String INDEX_NAME = "sorted";
|
||||||
static final String SUFFIX = "";
|
static final String SUFFIX = "";
|
||||||
|
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_10_0;
|
||||||
|
private static final String PARENT_FIELD_NAME = "___parent";
|
||||||
|
|
||||||
public TestIndexSortBackwardsCompatibility(Version version, String pattern) {
|
public TestIndexSortBackwardsCompatibility(Version version, String pattern) {
|
||||||
super(version, pattern);
|
super(version, pattern);
|
||||||
|
@ -79,8 +81,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
||||||
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
|
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
|
||||||
.setIndexSort(sort)
|
.setIndexSort(sort)
|
||||||
.setMergePolicy(newLogMergePolicy());
|
.setMergePolicy(newLogMergePolicy());
|
||||||
if (this.version.onOrAfter(Version.LUCENE_10_0_0)) {
|
if (this.version.onOrAfter(FIRST_PARENT_DOC_VERSION)) {
|
||||||
indexWriterConfig.setParentField("___parent");
|
indexWriterConfig.setParentField(PARENT_FIELD_NAME);
|
||||||
}
|
}
|
||||||
// open writer
|
// open writer
|
||||||
try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) {
|
try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) {
|
||||||
|
@ -89,7 +91,10 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
||||||
Document child = new Document();
|
Document child = new Document();
|
||||||
child.add(new StringField("relation", "child", Field.Store.NO));
|
child.add(new StringField("relation", "child", Field.Store.NO));
|
||||||
child.add(new StringField("bid", "" + i, Field.Store.NO));
|
child.add(new StringField("bid", "" + i, Field.Store.NO));
|
||||||
child.add(new NumericDocValuesField("dateDV", i));
|
if (version.onOrAfter(FIRST_PARENT_DOC_VERSION)
|
||||||
|
== false) { // only add this to earlier versions
|
||||||
|
child.add(new NumericDocValuesField("dateDV", i));
|
||||||
|
}
|
||||||
Document parent = new Document();
|
Document parent = new Document();
|
||||||
parent.add(new StringField("relation", "parent", Field.Store.NO));
|
parent.add(new StringField("relation", "parent", Field.Store.NO));
|
||||||
parent.add(new StringField("bid", "" + i, Field.Store.NO));
|
parent.add(new StringField("bid", "" + i, Field.Store.NO));
|
||||||
|
@ -158,6 +163,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
||||||
conf.setUseCompoundFile(false);
|
conf.setUseCompoundFile(false);
|
||||||
conf.setCodec(TestUtil.getDefaultCodec());
|
conf.setCodec(TestUtil.getDefaultCodec());
|
||||||
conf.setParentField("___parent");
|
conf.setParentField("___parent");
|
||||||
|
conf.setParentField(PARENT_FIELD_NAME);
|
||||||
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
|
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
|
||||||
IndexWriter writer = new IndexWriter(directory, conf);
|
IndexWriter writer = new IndexWriter(directory, conf);
|
||||||
LineFileDocs docs = new LineFileDocs(new Random(0));
|
LineFileDocs docs = new LineFileDocs(new Random(0));
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
5x-with-4x-segments-cfs
|
||||||
|
5x-with-4x-segments-nocfs
|
||||||
|
5.0.0.singlesegment-cfs
|
||||||
|
5.0.0.singlesegment-nocfs
|
|
@ -0,0 +1,122 @@
|
||||||
|
1.9.0
|
||||||
|
2.0.0
|
||||||
|
2.1.0
|
||||||
|
2.2.0
|
||||||
|
2.3.0
|
||||||
|
2.4.0
|
||||||
|
2.4.1
|
||||||
|
2.9.0
|
||||||
|
2.9.1
|
||||||
|
2.9.2
|
||||||
|
2.9.3
|
||||||
|
2.9.4
|
||||||
|
3.0.0
|
||||||
|
3.0.1
|
||||||
|
3.0.2
|
||||||
|
3.0.3
|
||||||
|
3.1.0
|
||||||
|
3.2.0
|
||||||
|
3.3.0
|
||||||
|
3.4.0
|
||||||
|
3.5.0
|
||||||
|
3.6.0
|
||||||
|
3.6.1
|
||||||
|
3.6.2
|
||||||
|
4.0.0
|
||||||
|
4.0.0.1
|
||||||
|
4.0.0.2
|
||||||
|
4.1.0
|
||||||
|
4.2.0
|
||||||
|
4.2.1
|
||||||
|
4.3.0
|
||||||
|
4.3.1
|
||||||
|
4.4.0
|
||||||
|
4.5.0
|
||||||
|
4.5.1
|
||||||
|
4.6.0
|
||||||
|
4.6.1
|
||||||
|
4.7.0
|
||||||
|
4.7.1
|
||||||
|
4.7.2
|
||||||
|
4.8.0
|
||||||
|
4.8.1
|
||||||
|
4.9.0
|
||||||
|
4.9.1
|
||||||
|
4.10.0
|
||||||
|
4.10.1
|
||||||
|
4.10.2
|
||||||
|
4.10.3
|
||||||
|
4.10.4
|
||||||
|
5.0.0
|
||||||
|
5.1.0
|
||||||
|
5.2.0
|
||||||
|
5.2.1
|
||||||
|
5.3.0
|
||||||
|
5.3.1
|
||||||
|
5.3.2
|
||||||
|
5.4.0
|
||||||
|
5.4.1
|
||||||
|
5.5.0
|
||||||
|
5.5.1
|
||||||
|
5.5.2
|
||||||
|
5.5.3
|
||||||
|
5.5.4
|
||||||
|
5.5.5
|
||||||
|
6.0.0
|
||||||
|
6.0.1
|
||||||
|
6.1.0
|
||||||
|
6.2.0
|
||||||
|
6.2.1
|
||||||
|
6.3.0
|
||||||
|
6.4.0
|
||||||
|
6.4.1
|
||||||
|
6.4.2
|
||||||
|
6.5.0
|
||||||
|
6.5.1
|
||||||
|
6.6.0
|
||||||
|
6.6.1
|
||||||
|
6.6.2
|
||||||
|
6.6.3
|
||||||
|
6.6.4
|
||||||
|
6.6.5
|
||||||
|
6.6.6
|
||||||
|
7.0.0
|
||||||
|
7.0.1
|
||||||
|
7.1.0
|
||||||
|
7.2.0
|
||||||
|
7.2.1
|
||||||
|
7.3.0
|
||||||
|
7.3.1
|
||||||
|
7.4.0
|
||||||
|
7.5.0
|
||||||
|
7.6.0
|
||||||
|
7.7.0
|
||||||
|
7.7.1
|
||||||
|
7.7.2
|
||||||
|
7.7.3
|
||||||
|
8.0.0
|
||||||
|
8.1.0
|
||||||
|
8.1.1
|
||||||
|
8.2.0
|
||||||
|
8.3.0
|
||||||
|
8.3.1
|
||||||
|
8.4.0
|
||||||
|
8.4.1
|
||||||
|
8.5.0
|
||||||
|
8.5.1
|
||||||
|
8.5.2
|
||||||
|
8.6.0
|
||||||
|
8.6.1
|
||||||
|
8.6.2
|
||||||
|
8.6.3
|
||||||
|
8.7.0
|
||||||
|
8.8.0
|
||||||
|
8.8.1
|
||||||
|
8.8.2
|
||||||
|
8.9.0
|
||||||
|
8.10.0
|
||||||
|
8.10.1
|
||||||
|
8.11.0
|
||||||
|
8.11.1
|
||||||
|
8.11.2
|
||||||
|
8.11.3
|
|
@ -0,0 +1,40 @@
|
||||||
|
8.0.0
|
||||||
|
8.1.0
|
||||||
|
8.1.1
|
||||||
|
8.2.0
|
||||||
|
8.3.0
|
||||||
|
8.3.1
|
||||||
|
8.4.0
|
||||||
|
8.4.1
|
||||||
|
8.5.0
|
||||||
|
8.5.1
|
||||||
|
8.5.2
|
||||||
|
8.6.0
|
||||||
|
8.6.1
|
||||||
|
8.6.2
|
||||||
|
8.6.3
|
||||||
|
8.7.0
|
||||||
|
8.8.0
|
||||||
|
8.8.1
|
||||||
|
8.8.2
|
||||||
|
8.9.0
|
||||||
|
8.10.0
|
||||||
|
8.10.1
|
||||||
|
8.11.0
|
||||||
|
8.11.1
|
||||||
|
8.11.2
|
||||||
|
8.11.3
|
||||||
|
9.0.0
|
||||||
|
9.1.0
|
||||||
|
9.2.0
|
||||||
|
9.3.0
|
||||||
|
9.4.0
|
||||||
|
9.4.1
|
||||||
|
9.4.2
|
||||||
|
9.5.0
|
||||||
|
9.6.0
|
||||||
|
9.7.0
|
||||||
|
9.8.0
|
||||||
|
9.9.0
|
||||||
|
9.9.1
|
||||||
|
9.9.2
|
|
@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandle;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.lang.invoke.MethodType;
|
import java.lang.invoke.MethodType;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
@ -67,7 +68,7 @@ public class ExpressionsBenchmark {
|
||||||
lookup.findStatic(
|
lookup.findStatic(
|
||||||
lookup.lookupClass(), "ident", MethodType.methodType(double.class, double.class)));
|
lookup.lookupClass(), "ident", MethodType.methodType(double.class, double.class)));
|
||||||
m.put("mh_identity", MethodHandles.identity(double.class));
|
m.put("mh_identity", MethodHandles.identity(double.class));
|
||||||
return m;
|
return Collections.unmodifiableMap(m);
|
||||||
} catch (ReflectiveOperationException e) {
|
} catch (ReflectiveOperationException e) {
|
||||||
throw new AssertionError(e);
|
throw new AssertionError(e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,6 @@ import java.io.InputStream;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import javax.xml.XMLConstants;
|
import javax.xml.XMLConstants;
|
||||||
|
@ -68,7 +67,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
private boolean stopped = false;
|
private boolean stopped = false;
|
||||||
private String[] tuple;
|
private String[] tuple;
|
||||||
private NoMoreDataException nmde;
|
private NoMoreDataException nmde;
|
||||||
private StringBuilder contents = new StringBuilder();
|
private final StringBuilder contents = new StringBuilder();
|
||||||
private String title;
|
private String title;
|
||||||
private String body;
|
private String body;
|
||||||
private String time;
|
private String time;
|
||||||
|
@ -262,7 +261,6 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Map<String, Integer> ELEMENTS = new HashMap<>();
|
|
||||||
private static final int TITLE = 0;
|
private static final int TITLE = 0;
|
||||||
private static final int DATE = TITLE + 1;
|
private static final int DATE = TITLE + 1;
|
||||||
private static final int BODY = DATE + 1;
|
private static final int BODY = DATE + 1;
|
||||||
|
@ -272,24 +270,24 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
// should not be part of the tuple, we should define them after LENGTH.
|
// should not be part of the tuple, we should define them after LENGTH.
|
||||||
private static final int PAGE = LENGTH + 1;
|
private static final int PAGE = LENGTH + 1;
|
||||||
|
|
||||||
|
private static final Map<String, Integer> ELEMENTS =
|
||||||
|
Map.of(
|
||||||
|
"page", PAGE,
|
||||||
|
"text", BODY,
|
||||||
|
"timestamp", DATE,
|
||||||
|
"title", TITLE,
|
||||||
|
"id", ID);
|
||||||
|
|
||||||
private static final String[] months = {
|
private static final String[] months = {
|
||||||
"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
|
"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
|
||||||
};
|
};
|
||||||
|
|
||||||
static {
|
|
||||||
ELEMENTS.put("page", Integer.valueOf(PAGE));
|
|
||||||
ELEMENTS.put("text", Integer.valueOf(BODY));
|
|
||||||
ELEMENTS.put("timestamp", Integer.valueOf(DATE));
|
|
||||||
ELEMENTS.put("title", Integer.valueOf(TITLE));
|
|
||||||
ELEMENTS.put("id", Integer.valueOf(ID));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the type of the element if defined, otherwise returns -1. This method is useful in
|
* Returns the type of the element if defined, otherwise returns -1. This method is useful in
|
||||||
* startElement and endElement, by not needing to compare the element qualified name over and
|
* startElement and endElement, by not needing to compare the element qualified name over and
|
||||||
* over.
|
* over.
|
||||||
*/
|
*/
|
||||||
private static final int getElementType(String elem) {
|
private static int getElementType(String elem) {
|
||||||
Integer val = ELEMENTS.get(elem);
|
Integer val = ELEMENTS.get(elem);
|
||||||
return val == null ? -1 : val.intValue();
|
return val == null ? -1 : val.intValue();
|
||||||
}
|
}
|
||||||
|
@ -297,7 +295,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
private Path file;
|
private Path file;
|
||||||
private boolean keepImages = true;
|
private boolean keepImages = true;
|
||||||
private InputStream is;
|
private InputStream is;
|
||||||
private Parser parser = new Parser();
|
private final Parser parser = new Parser();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.EnumMap;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -40,22 +42,28 @@ public abstract class TrecDocParser {
|
||||||
/** trec parser type used for unknown extensions */
|
/** trec parser type used for unknown extensions */
|
||||||
public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2;
|
public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2;
|
||||||
|
|
||||||
static final Map<ParsePathType, TrecDocParser> pathType2parser = new HashMap<>();
|
static final Map<ParsePathType, TrecDocParser> pathType2Parser;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser());
|
pathType2Parser =
|
||||||
pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser());
|
Collections.unmodifiableMap(
|
||||||
pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser());
|
new EnumMap<>(
|
||||||
pathType2parser.put(ParsePathType.FT, new TrecFTParser());
|
Map.of(
|
||||||
pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser());
|
ParsePathType.GOV2, new TrecGov2Parser(),
|
||||||
|
ParsePathType.FBIS, new TrecFBISParser(),
|
||||||
|
ParsePathType.FR94, new TrecFR94Parser(),
|
||||||
|
ParsePathType.FT, new TrecFTParser(),
|
||||||
|
ParsePathType.LATIMES, new TrecLATimesParser())));
|
||||||
}
|
}
|
||||||
|
|
||||||
static final Map<String, ParsePathType> pathName2Type = new HashMap<>();
|
static final Map<String, ParsePathType> pathName2Type;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
Map<String, ParsePathType> name2Type = new HashMap<>();
|
||||||
for (ParsePathType ppt : ParsePathType.values()) {
|
for (ParsePathType ppt : ParsePathType.values()) {
|
||||||
pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt);
|
name2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt);
|
||||||
}
|
}
|
||||||
|
pathName2Type = Collections.unmodifiableMap(name2Type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** max length of walk up from file to its ancestors when looking for a known path type */
|
/** max length of walk up from file to its ancestors when looking for a known path type */
|
||||||
|
|
|
@ -32,6 +32,6 @@ public class TrecParserByPath extends TrecDocParser {
|
||||||
StringBuilder docBuf,
|
StringBuilder docBuf,
|
||||||
ParsePathType pathType)
|
ParsePathType pathType)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
|
return pathType2Parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,7 @@ public class TaskSequence extends PerfTask {
|
||||||
private boolean resetExhausted = false;
|
private boolean resetExhausted = false;
|
||||||
private PerfTask[] tasksArray;
|
private PerfTask[] tasksArray;
|
||||||
private boolean anyExhaustibleTasks;
|
private boolean anyExhaustibleTasks;
|
||||||
private boolean collapsable = false; // to not collapse external sequence named in alg.
|
private final boolean collapsable; // to not collapse external sequence named in alg.
|
||||||
|
|
||||||
private boolean fixedTime; // true if we run for fixed time
|
private boolean fixedTime; // true if we run for fixed time
|
||||||
private double runTimeSec; // how long to run for
|
private double runTimeSec; // how long to run for
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.apache.commons.compress.compressors.CompressorException;
|
import org.apache.commons.compress.compressors.CompressorException;
|
||||||
|
@ -70,15 +69,9 @@ public class StreamUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Map<String, Type> extensionToType = new HashMap<>();
|
// these are in lower case, we will lower case at the test as well
|
||||||
|
private static final Map<String, Type> extensionToType =
|
||||||
static {
|
Map.of(".bz2", Type.BZIP2, ".bzip", Type.BZIP2, ".gz", Type.GZIP, ".gzip", Type.GZIP);
|
||||||
// these in are lower case, we will lower case at the test as well
|
|
||||||
extensionToType.put(".bz2", Type.BZIP2);
|
|
||||||
extensionToType.put(".bzip", Type.BZIP2);
|
|
||||||
extensionToType.put(".gz", Type.GZIP);
|
|
||||||
extensionToType.put(".gzip", Type.GZIP);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an {@link InputStream} over the requested file. This method attempts to identify the
|
* Returns an {@link InputStream} over the requested file. This method attempts to identify the
|
||||||
|
|
|
@ -36,7 +36,7 @@ public class TestTrecContentSource extends LuceneTestCase {
|
||||||
/** A TrecDocMaker which works on a String and not files. */
|
/** A TrecDocMaker which works on a String and not files. */
|
||||||
private static class StringableTrecSource extends TrecContentSource {
|
private static class StringableTrecSource extends TrecContentSource {
|
||||||
|
|
||||||
private String docs = null;
|
private final String docs;
|
||||||
|
|
||||||
public StringableTrecSource(String docs, boolean forever) {
|
public StringableTrecSource(String docs, boolean forever) {
|
||||||
this.docs = docs;
|
this.docs = docs;
|
||||||
|
|
|
@ -230,24 +230,6 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
||||||
return fields.size();
|
return fields.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
String brToString(BytesRef b) {
|
|
||||||
if (b == null) {
|
|
||||||
return "null";
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() throws IOException {
|
public void checkIntegrity() throws IOException {
|
||||||
// term dictionary
|
// term dictionary
|
||||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.IntsRefBuilder;
|
import org.apache.lucene.util.IntsRefBuilder;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.FSTCompiler;
|
import org.apache.lucene.util.fst.FSTCompiler;
|
||||||
|
@ -288,29 +289,10 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return brToString(termBytes);
|
return ToStringUtils.bytesRefToString(termBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(byte[] b) {
|
|
||||||
return brToString(new BytesRef(b));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final class SubIndex {
|
private static final class SubIndex {
|
||||||
public final FST<Output> index;
|
public final FST<Output> index;
|
||||||
public final long termOrdStart;
|
public final long termOrdStart;
|
||||||
|
@ -353,7 +335,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "BLOCK: " + brToString(prefix);
|
return "BLOCK: " + ToStringUtils.bytesRefToString(prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void compileIndex(
|
public void compileIndex(
|
||||||
|
@ -457,9 +439,9 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
Output newOutput =
|
Output newOutput =
|
||||||
FST_OUTPUTS.newOutput(
|
FST_OUTPUTS.newOutput(
|
||||||
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
|
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
|
||||||
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + "
|
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output +
|
||||||
// termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" +
|
// " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput="
|
||||||
// newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
|
// + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
|
||||||
fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
|
fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -642,8 +624,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
long startFP = out.getFilePointer();
|
long startFP = out.getFilePointer();
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + "
|
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor +
|
||||||
// floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
|
// " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
|
||||||
// hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
// hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||||
|
|
||||||
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
|
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
|
||||||
|
@ -662,11 +644,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
out.writeVInt(code);
|
out.writeVInt(code);
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + "
|
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment +
|
||||||
// pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
|
// " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
|
||||||
// brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? ("
|
// ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP +
|
||||||
// floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" +
|
// (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") +
|
||||||
// isLastInFloor);
|
// " isLastInFloor=" + isLastInFloor);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
final List<SubIndex> subIndices;
|
final List<SubIndex> subIndices;
|
||||||
|
@ -784,7 +766,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
BytesRef suffixBytes = new BytesRef(suffix);
|
BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
suffixBytes.length = suffix;
|
suffixBytes.length = suffix;
|
||||||
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) +
|
||||||
|
" subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -842,7 +825,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
int[] tmp = new int[lastTerm.length];
|
int[] tmp = new int[lastTerm.length];
|
||||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" +
|
||||||
|
Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -885,8 +869,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// we are closing:
|
// we are closing:
|
||||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||||
if (prefixTopSize >= minItemsInBlock) {
|
if (prefixTopSize >= minItemsInBlock) {
|
||||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||||
// minItemsInBlock=" + minItemsInBlock);
|
// " minItemsInBlock=" + minItemsInBlock);
|
||||||
writeBlocks(i + 1, prefixTopSize);
|
writeBlocks(i + 1, prefixTopSize);
|
||||||
prefixStarts[i] -= prefixTopSize - 1;
|
prefixStarts[i] -= prefixTopSize - 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" +
|
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" +
|
||||||
// brToString(compiled.commonSuffixRef));
|
// ToStringUtils.bytesRefToString(compiled.commonSuffixRef));
|
||||||
// }
|
// }
|
||||||
this.fr = fr;
|
this.fr = fr;
|
||||||
this.byteRunnable = compiled.getByteRunnable();
|
this.byteRunnable = compiled.getByteRunnable();
|
||||||
|
@ -283,13 +283,15 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
||||||
currentFrame.loadNextFloorBlock();
|
currentFrame.loadNextFloorBlock();
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) System.out.println(" return term=" + brToString(term));
|
// if (DEBUG) System.out.println(" return term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
} else if (cmp == 0) {
|
} else if (cmp == 0) {
|
||||||
// if (DEBUG) System.out.println(" return term=" + brToString(term));
|
// if (DEBUG) System.out.println(" return term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
// Fallback to prior entry: the semantics of
|
// Fallback to prior entry: the semantics of
|
||||||
|
@ -327,10 +329,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nintEnum.next seg=" + segment);
|
// System.out.println("\nintEnum.next seg=" + segment);
|
||||||
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new
|
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" +
|
||||||
// BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + "
|
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) +
|
||||||
// lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" +
|
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||||
// (currentFrame.transitions.length == 0 ? "n/a" :
|
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||||
// currentFrame.outputPrefix);
|
// currentFrame.outputPrefix);
|
||||||
// }
|
// }
|
||||||
|
@ -343,9 +345,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
||||||
// if (DEBUG) System.out.println(" next-floor-block");
|
// if (DEBUG) System.out.println(" next-floor-block");
|
||||||
currentFrame.loadNextFloorBlock();
|
currentFrame.loadNextFloorBlock();
|
||||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
// currentFrame.prefix)) +
|
||||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||||
|
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||||
// currentFrame.outputPrefix);
|
// currentFrame.outputPrefix);
|
||||||
} else {
|
} else {
|
||||||
|
@ -357,9 +360,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
||||||
currentFrame = stack[currentFrame.ord - 1];
|
currentFrame = stack[currentFrame.ord - 1];
|
||||||
assert currentFrame.lastSubFP == lastFP;
|
assert currentFrame.lastSubFP == lastFP;
|
||||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
// currentFrame.prefix)) +
|
||||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||||
|
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||||
// currentFrame.outputPrefix);
|
// currentFrame.outputPrefix);
|
||||||
}
|
}
|
||||||
|
@ -373,7 +377,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
||||||
// suffixRef.length = currentFrame.suffix;
|
// suffixRef.length = currentFrame.suffix;
|
||||||
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " +
|
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " +
|
||||||
// currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" +
|
// currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" +
|
||||||
// brToString(suffixRef));
|
// ToStringUtils.bytesRefToString(suffixRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
if (currentFrame.suffix != 0) {
|
if (currentFrame.suffix != 0) {
|
||||||
|
@ -480,15 +484,16 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
||||||
copyTerm();
|
copyTerm();
|
||||||
currentFrame = pushFrame(state);
|
currentFrame = pushFrame(state);
|
||||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
// currentFrame.prefix)) +
|
||||||
|
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||||
// currentFrame.outputPrefix);
|
// currentFrame.outputPrefix);
|
||||||
} else if (byteRunnable.isAccept(state)) {
|
} else if (byteRunnable.isAccept(state)) {
|
||||||
copyTerm();
|
copyTerm();
|
||||||
// if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" +
|
// if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(term));
|
||||||
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
|
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
|
||||||
: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
|
: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
|
||||||
return term;
|
return term;
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.IntsRefBuilder;
|
import org.apache.lucene.util.IntsRefBuilder;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
|
||||||
|
@ -174,11 +175,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||||
f.arc = arc;
|
f.arc = arc;
|
||||||
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp + "
|
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp +
|
||||||
// nextEnt=" + f.nextEnt);
|
// " nextEnt=" + f.nextEnt);
|
||||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||||
// term.length + " vs prefix=" + f.prefix);
|
// term.length + " vs prefix=" + f.prefix);
|
||||||
if (f.prefix > targetBeforeCurrentLength) {
|
if (f.prefix > targetBeforeCurrentLength) {
|
||||||
|
@ -204,7 +205,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// final int sav = term.length;
|
// final int sav = term.length;
|
||||||
// term.length = length;
|
// term.length = length;
|
||||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||||
// term.length = sav;
|
// term.length = sav;
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
@ -224,19 +225,6 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean seekExact(final BytesRef target) throws IOException {
|
public boolean seekExact(final BytesRef target) throws IOException {
|
||||||
|
|
||||||
|
@ -250,7 +238,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" +
|
||||||
|
ToStringUtils.bytesRefToString(target) + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||||
|
") validIndexPrefix=" + validIndexPrefix);
|
||||||
printSeekState(System.out);
|
printSeekState(System.out);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
@ -411,8 +401,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
positioned = true;
|
positioned = true;
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||||
// targetBeforeCurrentLength);
|
// targetBeforeCurrentLength);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -443,7 +433,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
term.setByteAt(targetUpto, (byte) targetLabel);
|
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||||
term.setLength(1 + targetUpto);
|
term.setLength(1 + targetUpto);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -459,7 +449,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -502,7 +492,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
termExists = false;
|
termExists = false;
|
||||||
term.setLength(targetUpto);
|
term.setLength(targetUpto);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -537,8 +527,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
|
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
|
||||||
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
|
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
|
||||||
// termExists + ") validIndexPrefix= " + validIndexPrefix);
|
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||||
// printSeekState();
|
// printSeekState();
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -581,9 +571,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||||
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
|
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
|
||||||
// " output=" + output);
|
// + " output=" + output);
|
||||||
// }
|
// }
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
break;
|
break;
|
||||||
|
@ -697,8 +687,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
positioned = true;
|
positioned = true;
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||||
// targetBeforeCurrentLength);
|
// targetBeforeCurrentLength);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -733,7 +723,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
if (next() != null) {
|
if (next() != null) {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
|
// System.out.println(" return NOT_FOUND term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else {
|
} else {
|
||||||
|
@ -744,7 +735,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
|
// System.out.println(" return " + result + " term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -829,7 +821,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
+ " prefixLen="
|
+ " prefixLen="
|
||||||
+ f.prefix
|
+ f.prefix
|
||||||
+ " prefix="
|
+ " prefix="
|
||||||
+ brToString(prefix)
|
+ ToStringUtils.bytesRefToString(prefix)
|
||||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||||
+ " hasTerms="
|
+ " hasTerms="
|
||||||
+ f.hasTerms
|
+ f.hasTerms
|
||||||
|
@ -859,7 +851,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
+ " prefixLen="
|
+ " prefixLen="
|
||||||
+ f.prefix
|
+ f.prefix
|
||||||
+ " prefix="
|
+ " prefix="
|
||||||
+ brToString(prefix)
|
+ ToStringUtils.bytesRefToString(prefix)
|
||||||
+ " nextEnt="
|
+ " nextEnt="
|
||||||
+ f.nextEnt
|
+ f.nextEnt
|
||||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||||
|
@ -951,8 +943,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
assert !eof;
|
assert !eof;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
|
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
|
||||||
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
// ToStringUtils.bytesRefToString(term) +
|
||||||
|
// " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
||||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||||
// printSeekState();
|
// printSeekState();
|
||||||
// }
|
// }
|
||||||
|
@ -1019,8 +1012,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// currentFrame.hasTerms = true;
|
// currentFrame.hasTerms = true;
|
||||||
currentFrame.loadBlock();
|
currentFrame.loadBlock();
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
|
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
|
||||||
// currentFrame.ord=" + currentFrame.ord);
|
// " currentFrame.ord=" + currentFrame.ord);
|
||||||
positioned = true;
|
positioned = true;
|
||||||
return term.get();
|
return term.get();
|
||||||
}
|
}
|
||||||
|
@ -1235,8 +1228,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
int low = 0;
|
int low = 0;
|
||||||
int high = arc.numArcs() - 1;
|
int high = arc.numArcs() - 1;
|
||||||
int mid = 0;
|
int mid = 0;
|
||||||
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
|
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput +
|
||||||
// output=" + output);
|
// " output=" + output);
|
||||||
boolean found = false;
|
boolean found = false;
|
||||||
while (low <= high) {
|
while (low <= high) {
|
||||||
mid = (low + high) >>> 1;
|
mid = (low + high) >>> 1;
|
||||||
|
|
|
@ -119,8 +119,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
numFollowFloorBlocks = floorDataReader.readVInt();
|
numFollowFloorBlocks = floorDataReader.readVInt();
|
||||||
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
||||||
nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
|
nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
|
||||||
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + "
|
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd +
|
||||||
// shift=" + (nextFloorTermOrd-termOrdOrig));
|
// " shift=" + (nextFloorTermOrd-termOrdOrig));
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
|
// System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
|
||||||
|
@ -289,8 +289,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
|
|
||||||
// Decodes next entry; returns true if it's a sub-block
|
// Decodes next entry; returns true if it's a sub-block
|
||||||
public boolean nextLeaf() {
|
public boolean nextLeaf() {
|
||||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||||
// entCount=" + entCount);
|
// " entCount=" + entCount);
|
||||||
assert nextEnt != -1 && nextEnt < entCount
|
assert nextEnt != -1 && nextEnt < entCount
|
||||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
|
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
|
@ -306,8 +306,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean nextNonLeaf() {
|
public boolean nextNonLeaf() {
|
||||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||||
// entCount=" + entCount);
|
// " entCount=" + entCount);
|
||||||
assert nextEnt != -1 && nextEnt < entCount
|
assert nextEnt != -1 && nextEnt < entCount
|
||||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
|
@ -374,8 +374,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
newFP = fpOrig + (code >>> 1);
|
newFP = fpOrig + (code >>> 1);
|
||||||
hasTerms = (code & 1) != 0;
|
hasTerms = (code & 1) != 0;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
|
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
|
||||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
isLastInFloor = numFollowFloorBlocks == 1;
|
isLastInFloor = numFollowFloorBlocks == 1;
|
||||||
|
@ -440,8 +440,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
newFP = fpOrig + (code >>> 1);
|
newFP = fpOrig + (code >>> 1);
|
||||||
hasTerms = (code & 1) != 0;
|
hasTerms = (code & 1) != 0;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
|
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
|
||||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
isLastInFloor = numFollowFloorBlocks == 1;
|
isLastInFloor = numFollowFloorBlocks == 1;
|
||||||
|
@ -495,8 +495,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
boolean absolute = metaDataUpto == 0;
|
boolean absolute = metaDataUpto == 0;
|
||||||
assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt;
|
assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt;
|
||||||
|
|
||||||
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + "
|
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment +
|
||||||
// mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
|
// " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
|
||||||
|
|
||||||
// TODO: better API would be "jump straight to term=N"???
|
// TODO: better API would be "jump straight to term=N"???
|
||||||
while (metaDataUpto < limit) {
|
while (metaDataUpto < limit) {
|
||||||
|
@ -593,10 +593,10 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
// scan the entries check if the suffix matches.
|
// scan the entries check if the suffix matches.
|
||||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||||
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
|
// ToStringUtils.bytesRefToString(target) + " term=" +
|
||||||
// OrdsSegmentTermsEnum.brToString(ste.term));
|
// ToStringUtils.bytesRefToString(ste.term));
|
||||||
|
|
||||||
assert nextEnt != -1;
|
assert nextEnt != -1;
|
||||||
|
|
||||||
|
@ -627,7 +627,7 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||||
// suffixBytesRef.length = suffix;
|
// suffixBytesRef.length = suffix;
|
||||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||||
// + OrdsSegmentTermsEnum.brToString(suffixBytesRef));
|
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
final int termLen = prefix + suffix;
|
final int termLen = prefix + suffix;
|
||||||
|
@ -714,8 +714,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||||
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
|
// ToStringUtils.bytesRefToString(target) + " term=" +
|
||||||
// OrdsSegmentTermsEnum.brToString(ste.term));
|
// ToStringUtils.bytesRefToString(ste.term));
|
||||||
|
|
||||||
assert nextEnt != -1;
|
assert nextEnt != -1;
|
||||||
|
|
||||||
|
@ -743,7 +743,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||||
// suffixBytesRef.length = suffix;
|
// suffixBytesRef.length = suffix;
|
||||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||||
|
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
ste.termExists = (code & 1) == 0;
|
ste.termExists = (code & 1) == 0;
|
||||||
|
|
|
@ -210,7 +210,7 @@ public final class FieldReader extends Terms {
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||||
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
|
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
|
||||||
// BlockTreeTermsWriter.brToString(startTerm));
|
// ToStringUtils.bytesRefToString(startTerm));
|
||||||
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
||||||
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
||||||
// can we optimize knowing that...?
|
// can we optimize knowing that...?
|
||||||
|
|
|
@ -549,19 +549,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void copyTerm() {
|
private void copyTerm() {
|
||||||
final int len = currentFrame.prefix + currentFrame.suffix;
|
final int len = currentFrame.prefix + currentFrame.suffix;
|
||||||
if (term.bytes.length < len) {
|
if (term.bytes.length < len) {
|
||||||
|
|
|
@ -307,24 +307,6 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
||||||
return fieldMap.size();
|
return fieldMap.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
String brToString(BytesRef b) {
|
|
||||||
if (b == null) {
|
|
||||||
return "null";
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() throws IOException {
|
public void checkIntegrity() throws IOException {
|
||||||
// terms index
|
// terms index
|
||||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.IntsRefBuilder;
|
import org.apache.lucene.util.IntsRefBuilder;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.compress.LZ4;
|
import org.apache.lucene.util.compress.LZ4;
|
||||||
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
||||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||||
|
@ -394,7 +395,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
|
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(term));
|
||||||
termsWriter.write(term, termsEnum, norms);
|
termsWriter.write(term, termsEnum, norms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -433,33 +434,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "TERM: " + brToString(termBytes);
|
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
if (b == null) {
|
|
||||||
return "(null)";
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(byte[] b) {
|
|
||||||
return brToString(new BytesRef(b));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encodes long value to variable length byte[], in MSB order. Use {@link
|
* Encodes long value to variable length byte[], in MSB order. Use {@link
|
||||||
* FieldReader#readMSBVLong} to decode.
|
* FieldReader#readMSBVLong} to decode.
|
||||||
|
@ -506,7 +484,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "BLOCK: prefix=" + brToString(prefix);
|
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void compileIndex(
|
public void compileIndex(
|
||||||
|
@ -689,8 +667,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// if (DEBUG2) {
|
// if (DEBUG2) {
|
||||||
// BytesRef br = new BytesRef(lastTerm.bytes());
|
// BytesRef br = new BytesRef(lastTerm.bytes());
|
||||||
// br.length = prefixLength;
|
// br.length = prefixLength;
|
||||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
|
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
|
||||||
// + count);
|
// ToStringUtils.bytesRefToString(br) + " count=" + count);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// Root block better write all remaining pending entries:
|
// Root block better write all remaining pending entries:
|
||||||
|
@ -843,9 +821,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
prefix.length = prefixLength;
|
prefix.length = prefixLength;
|
||||||
|
|
||||||
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
|
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
|
||||||
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
|
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
|
||||||
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
|
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
|
||||||
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
|
||||||
|
// hasSubBlocks);
|
||||||
|
|
||||||
// Write block header:
|
// Write block header:
|
||||||
int numEntries = end - start;
|
int numEntries = end - start;
|
||||||
|
@ -858,7 +837,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
|
||||||
|
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
|
||||||
|
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -893,7 +874,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
// suffixBytes.length = suffix;
|
// suffixBytes.length = suffix;
|
||||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
// System.out.println(" write term suffix=" +
|
||||||
|
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// For leaf block we write suffix straight
|
// For leaf block we write suffix straight
|
||||||
|
@ -926,7 +908,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
// suffixBytes.length = suffix;
|
// suffixBytes.length = suffix;
|
||||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
// System.out.println(" write term suffix=" +
|
||||||
|
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// For non-leaf block we borrow 1 bit to record
|
// For non-leaf block we borrow 1 bit to record
|
||||||
|
@ -968,8 +951,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
// suffixBytes.length = suffix;
|
// suffixBytes.length = suffix;
|
||||||
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
|
// System.out.println(" write sub-block suffix=" +
|
||||||
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
// ToStringUtils.bytesRefToString(suffixBytes) + " subFP=" + block.fp + " subCode=" +
|
||||||
|
// (startFP-block.fp) + " floor=" + block.isFloor);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
assert floorLeadLabel == -1
|
assert floorLeadLabel == -1
|
||||||
|
@ -1090,7 +1074,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
int[] tmp = new int[lastTerm.length];
|
int[] tmp = new int[lastTerm.length];
|
||||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
|
||||||
|
" pending.size()=" + pending.size());
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -1143,8 +1128,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// we are closing:
|
// we are closing:
|
||||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||||
if (prefixTopSize >= minItemsInBlock) {
|
if (prefixTopSize >= minItemsInBlock) {
|
||||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||||
// minItemsInBlock=" + minItemsInBlock);
|
// " minItemsInBlock=" + minItemsInBlock);
|
||||||
writeBlocks(i + 1, prefixTopSize);
|
writeBlocks(i + 1, prefixTopSize);
|
||||||
prefixStarts[i] -= prefixTopSize - 1;
|
prefixStarts[i] -= prefixTopSize - 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -263,8 +263,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||||
f.arc = arc;
|
f.arc = arc;
|
||||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||||
// term.length + " vs prefix=" + f.prefix);
|
// term.length + " vs prefix=" + f.prefix);
|
||||||
// if (f.prefix > targetBeforeCurrentLength) {
|
// if (f.prefix > targetBeforeCurrentLength) {
|
||||||
|
@ -286,7 +286,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
// final int sav = term.length;
|
// final int sav = term.length;
|
||||||
// term.length = length;
|
// term.length = length;
|
||||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||||
// term.length = sav;
|
// term.length = sav;
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
@ -306,27 +306,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRefBuilder b) {
|
|
||||||
return brToString(b.get());
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean seekExact(BytesRef target) throws IOException {
|
public boolean seekExact(BytesRef target) throws IOException {
|
||||||
|
|
||||||
|
@ -344,8 +323,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||||
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
|
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||||
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
// ToStringUtils.bytesRefToString(term) +
|
||||||
|
// " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||||
// printSeekState(System.out);
|
// printSeekState(System.out);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -499,8 +479,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||||
// targetBeforeCurrentLength);
|
// targetBeforeCurrentLength);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -531,7 +511,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
term.setByteAt(targetUpto, (byte) targetLabel);
|
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||||
term.setLength(1 + targetUpto);
|
term.setLength(1 + targetUpto);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -547,7 +527,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -586,7 +566,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
termExists = false;
|
termExists = false;
|
||||||
term.setLength(targetUpto);
|
term.setLength(targetUpto);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -622,8 +602,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
|
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
|
||||||
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
|
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||||
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||||
|
// ") validIndexPrefix= " + validIndexPrefix);
|
||||||
// printSeekState(System.out);
|
// printSeekState(System.out);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -663,9 +644,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||||
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
|
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
|
||||||
// " output=" + output);
|
// + " output=" + output);
|
||||||
// }
|
// }
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
break;
|
break;
|
||||||
|
@ -771,8 +752,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||||
// targetBeforeCurrentLength);
|
// targetBeforeCurrentLength);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -808,7 +789,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
if (next() != null) {
|
if (next() != null) {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" return NOT_FOUND term=" + brToString(term));
|
// System.out.println(" return NOT_FOUND term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else {
|
} else {
|
||||||
|
@ -819,7 +801,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" return " + result + " term=" + brToString(term));
|
// System.out.println(" return " + result + " term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -1015,9 +998,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
assert !eof;
|
assert !eof;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
|
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
|
||||||
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
|
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
|
||||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||||
|
// " validIndexPrefix=" + validIndexPrefix);
|
||||||
// printSeekState(System.out);
|
// printSeekState(System.out);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -1081,8 +1065,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
||||||
// try to scan to the right floor frame:
|
// try to scan to the right floor frame:
|
||||||
currentFrame.loadBlock();
|
currentFrame.loadBlock();
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
|
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
|
||||||
// + currentFrame.ord);
|
// " currentFrame.ord=" + currentFrame.ord);
|
||||||
return term.get();
|
return term.get();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -295,8 +295,8 @@ final class SegmentTermsEnumFrame {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void nextLeaf() {
|
public void nextLeaf() {
|
||||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||||
// entCount=" + entCount);
|
// " entCount=" + entCount);
|
||||||
assert nextEnt != -1 && nextEnt < entCount
|
assert nextEnt != -1 && nextEnt < entCount
|
||||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
|
@ -388,8 +388,8 @@ final class SegmentTermsEnumFrame {
|
||||||
newFP = fpOrig + (code >>> 1);
|
newFP = fpOrig + (code >>> 1);
|
||||||
hasTerms = (code & 1) != 0;
|
hasTerms = (code & 1) != 0;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
|
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
|
||||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
isLastInFloor = numFollowFloorBlocks == 1;
|
isLastInFloor = numFollowFloorBlocks == 1;
|
||||||
|
@ -531,28 +531,14 @@ final class SegmentTermsEnumFrame {
|
||||||
private long subCode;
|
private long subCode;
|
||||||
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
|
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
|
||||||
|
|
||||||
// for debugging
|
|
||||||
/*
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Target's prefix matches this block's prefix; we
|
// Target's prefix matches this block's prefix; we
|
||||||
// scan the entries check if the suffix matches.
|
// scan the entries check if the suffix matches.
|
||||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(target) +
|
||||||
|
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||||
|
|
||||||
assert nextEnt != -1;
|
assert nextEnt != -1;
|
||||||
|
|
||||||
|
@ -582,7 +568,7 @@ final class SegmentTermsEnumFrame {
|
||||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||||
// suffixBytesRef.length = suffix;
|
// suffixBytesRef.length = suffix;
|
||||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||||
// + brToString(suffixBytesRef));
|
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
|
@ -647,8 +633,9 @@ final class SegmentTermsEnumFrame {
|
||||||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||||
// brToString(target));
|
// ToStringUtils.bytesRefToString(target) +
|
||||||
|
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||||
|
|
||||||
assert nextEnt != -1;
|
assert nextEnt != -1;
|
||||||
|
|
||||||
|
@ -676,7 +663,8 @@ final class SegmentTermsEnumFrame {
|
||||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||||
// suffixBytesRef.length = suffix;
|
// suffixBytesRef.length = suffix;
|
||||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||||
|
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
final int termLen = prefix + suffix;
|
final int termLen = prefix + suffix;
|
||||||
|
@ -708,8 +696,8 @@ final class SegmentTermsEnumFrame {
|
||||||
// return NOT_FOUND:
|
// return NOT_FOUND:
|
||||||
fillTerm();
|
fillTerm();
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
|
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
|
||||||
// ste.termExists=" + ste.termExists);
|
// " ste.termExists=" + ste.termExists);
|
||||||
|
|
||||||
if (!exactOnly && !ste.termExists) {
|
if (!exactOnly && !ste.termExists) {
|
||||||
// System.out.println(" now pushFrame");
|
// System.out.println(" now pushFrame");
|
||||||
|
|
|
@ -166,6 +166,16 @@ public final class FeatureField extends Field {
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is useful if you have multiple features sharing a name and you want to take action to
|
||||||
|
* deduplicate them.
|
||||||
|
*
|
||||||
|
* @return the feature value of this field.
|
||||||
|
*/
|
||||||
|
public float getFeatureValue() {
|
||||||
|
return featureValue;
|
||||||
|
}
|
||||||
|
|
||||||
private static final class FeatureTokenStream extends TokenStream {
|
private static final class FeatureTokenStream extends TokenStream {
|
||||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);
|
private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.StreamTokenizer;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -404,21 +405,23 @@ public class SimpleWKTShapeParser {
|
||||||
ENVELOPE("envelope"); // not part of the actual WKB spec
|
ENVELOPE("envelope"); // not part of the actual WKB spec
|
||||||
|
|
||||||
private final String shapeName;
|
private final String shapeName;
|
||||||
private static final Map<String, ShapeType> shapeTypeMap = new HashMap<>();
|
private static final Map<String, ShapeType> shapeTypeMap;
|
||||||
private static final String BBOX = "BBOX";
|
private static final String BBOX = "BBOX";
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
Map<String, ShapeType> shapeTypes = new HashMap<>();
|
||||||
for (ShapeType type : values()) {
|
for (ShapeType type : values()) {
|
||||||
shapeTypeMap.put(type.shapeName, type);
|
shapeTypes.put(type.shapeName, type);
|
||||||
}
|
}
|
||||||
shapeTypeMap.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE);
|
shapeTypes.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE);
|
||||||
|
shapeTypeMap = Collections.unmodifiableMap(shapeTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
ShapeType(String shapeName) {
|
ShapeType(String shapeName) {
|
||||||
this.shapeName = shapeName;
|
this.shapeName = shapeName;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String typename() {
|
String typename() {
|
||||||
return shapeName;
|
return shapeName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ public final class FieldInfo {
|
||||||
/** Internal field number */
|
/** Internal field number */
|
||||||
public final int number;
|
public final int number;
|
||||||
|
|
||||||
private DocValuesType docValuesType = DocValuesType.NONE;
|
private DocValuesType docValuesType;
|
||||||
|
|
||||||
// True if any document indexed term vectors
|
// True if any document indexed term vectors
|
||||||
private boolean storeTermVector;
|
private boolean storeTermVector;
|
||||||
|
|
|
@ -84,7 +84,7 @@ public class LiveIndexWriterConfig {
|
||||||
protected volatile int perThreadHardLimitMB;
|
protected volatile int perThreadHardLimitMB;
|
||||||
|
|
||||||
/** True if segment flushes should use compound file format */
|
/** True if segment flushes should use compound file format */
|
||||||
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
|
protected volatile boolean useCompoundFile;
|
||||||
|
|
||||||
/** True if calls to {@link IndexWriter#close()} should first do a commit. */
|
/** True if calls to {@link IndexWriter#close()} should first do a commit. */
|
||||||
protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;
|
protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;
|
||||||
|
|
|
@ -597,12 +597,12 @@ public abstract class MergePolicy {
|
||||||
* If the size of the merge segment exceeds this ratio of the total index size then it will remain
|
* If the size of the merge segment exceeds this ratio of the total index size then it will remain
|
||||||
* in non-compound format
|
* in non-compound format
|
||||||
*/
|
*/
|
||||||
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
|
protected double noCFSRatio;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the size of the merged segment exceeds this value then it will not use compound file format.
|
* If the size of the merged segment exceeds this value then it will not use compound file format.
|
||||||
*/
|
*/
|
||||||
protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
|
protected long maxCFSSegmentSize;
|
||||||
|
|
||||||
/** Creates a new merge policy instance. */
|
/** Creates a new merge policy instance. */
|
||||||
protected MergePolicy() {
|
protected MergePolicy() {
|
||||||
|
|
|
@ -103,7 +103,7 @@ public abstract class VectorizationProvider {
|
||||||
// visible for tests
|
// visible for tests
|
||||||
static VectorizationProvider lookup(boolean testMode) {
|
static VectorizationProvider lookup(boolean testMode) {
|
||||||
final int runtimeVersion = Runtime.version().feature();
|
final int runtimeVersion = Runtime.version().feature();
|
||||||
if (runtimeVersion >= 20 && runtimeVersion <= 21) {
|
if (runtimeVersion >= 20 && runtimeVersion <= 22) {
|
||||||
// is locale sane (only buggy in Java 20)
|
// is locale sane (only buggy in Java 20)
|
||||||
if (isAffectedByJDK8301190()) {
|
if (isAffectedByJDK8301190()) {
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
|
@ -169,9 +169,9 @@ public abstract class VectorizationProvider {
|
||||||
} catch (ClassNotFoundException cnfe) {
|
} catch (ClassNotFoundException cnfe) {
|
||||||
throw new LinkageError("PanamaVectorizationProvider is missing in Lucene JAR file", cnfe);
|
throw new LinkageError("PanamaVectorizationProvider is missing in Lucene JAR file", cnfe);
|
||||||
}
|
}
|
||||||
} else if (runtimeVersion >= 22) {
|
} else if (runtimeVersion >= 23) {
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
"You are running with Java 22 or later. To make full use of the Vector API, please update Apache Lucene.");
|
"You are running with Java 23 or later. To make full use of the Vector API, please update Apache Lucene.");
|
||||||
} else if (lookupVectorModule().isPresent()) {
|
} else if (lookupVectorModule().isPresent()) {
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
"Java vector incubator module was enabled by command line flags, but your Java version is too old: "
|
"Java vector incubator module was enabled by command line flags, but your Java version is too old: "
|
||||||
|
|
|
@ -120,7 +120,7 @@ final class WANDScorer extends Scorer {
|
||||||
|
|
||||||
private final int scalingFactor;
|
private final int scalingFactor;
|
||||||
// scaled min competitive score
|
// scaled min competitive score
|
||||||
private long minCompetitiveScore = 0;
|
private long minCompetitiveScore;
|
||||||
|
|
||||||
private final Scorer[] allScorers;
|
private final Scorer[] allScorers;
|
||||||
|
|
||||||
|
|
|
@ -89,7 +89,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
private boolean singleSort;
|
private boolean singleSort;
|
||||||
|
|
||||||
/** Whether this comparator is allowed to skip documents. */
|
/** Whether this comparator is allowed to skip documents. */
|
||||||
private boolean canSkipDocuments = true;
|
private boolean canSkipDocuments;
|
||||||
|
|
||||||
/** Whether the collector is done with counting hits so that we can start skipping documents. */
|
/** Whether the collector is done with counting hits so that we can start skipping documents. */
|
||||||
private boolean hitsThresholdReached = false;
|
private boolean hitsThresholdReached = false;
|
||||||
|
|
|
@ -346,7 +346,7 @@ public class MMapDirectory extends FSDirectory {
|
||||||
}
|
}
|
||||||
final var lookup = MethodHandles.lookup();
|
final var lookup = MethodHandles.lookup();
|
||||||
final int runtimeVersion = Runtime.version().feature();
|
final int runtimeVersion = Runtime.version().feature();
|
||||||
if (runtimeVersion >= 19 && runtimeVersion <= 21) {
|
if (runtimeVersion >= 19) {
|
||||||
try {
|
try {
|
||||||
final var cls = lookup.findClass("org.apache.lucene.store.MemorySegmentIndexInputProvider");
|
final var cls = lookup.findClass("org.apache.lucene.store.MemorySegmentIndexInputProvider");
|
||||||
// we use method handles, so we do not need to deal with setAccessible as we have private
|
// we use method handles, so we do not need to deal with setAccessible as we have private
|
||||||
|
@ -366,9 +366,6 @@ public class MMapDirectory extends FSDirectory {
|
||||||
throw new LinkageError(
|
throw new LinkageError(
|
||||||
"MemorySegmentIndexInputProvider is missing in Lucene JAR file", cnfe);
|
"MemorySegmentIndexInputProvider is missing in Lucene JAR file", cnfe);
|
||||||
}
|
}
|
||||||
} else if (runtimeVersion >= 22) {
|
|
||||||
LOG.warning(
|
|
||||||
"You are running with Java 22 or later. To make full use of MMapDirectory, please update Apache Lucene.");
|
|
||||||
}
|
}
|
||||||
return new MappedByteBufferIndexInputProvider();
|
return new MappedByteBufferIndexInputProvider();
|
||||||
}
|
}
|
||||||
|
|
|
@ -130,17 +130,20 @@ public final class BytesRef implements Comparable<BytesRef>, Cloneable {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Interprets stored bytes as UTF8 bytes, returning the resulting string */
|
/**
|
||||||
|
* Interprets stored bytes as UTF-8 bytes, returning the resulting string. May throw an {@link
|
||||||
|
* AssertionError} or a {@link RuntimeException} if the data is not well-formed UTF-8.
|
||||||
|
*/
|
||||||
public String utf8ToString() {
|
public String utf8ToString() {
|
||||||
final char[] ref = new char[length];
|
final char[] ref = new char[length];
|
||||||
final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||||
return new String(ref, 0, len);
|
return new String(ref, 0, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
|
/** Returns hex encoded bytes, e.g. "[6c 75 63 65 6e 65]" */
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder(2 + 3 * length);
|
||||||
sb.append('[');
|
sb.append('[');
|
||||||
final int end = offset + length;
|
final int end = offset + length;
|
||||||
for (int i = offset; i < end; i++) {
|
for (int i = offset; i < end; i++) {
|
||||||
|
|
|
@ -253,7 +253,7 @@ public class RoaringDocIdSet extends DocIdSet {
|
||||||
private class Iterator extends DocIdSetIterator {
|
private class Iterator extends DocIdSetIterator {
|
||||||
|
|
||||||
int block;
|
int block;
|
||||||
DocIdSetIterator sub = null;
|
DocIdSetIterator sub;
|
||||||
int doc;
|
int doc;
|
||||||
|
|
||||||
Iterator() throws IOException {
|
Iterator() throws IOException {
|
||||||
|
|
|
@ -32,6 +32,10 @@ public final class ToStringUtils {
|
||||||
|
|
||||||
private static final char[] HEX = "0123456789abcdef".toCharArray();
|
private static final char[] HEX = "0123456789abcdef".toCharArray();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unlike {@link Long#toHexString(long)} returns a String with a "0x" prefix and all the leading
|
||||||
|
* zeros.
|
||||||
|
*/
|
||||||
public static String longHex(long x) {
|
public static String longHex(long x) {
|
||||||
char[] asHex = new char[16];
|
char[] asHex = new char[16];
|
||||||
for (int i = 16; --i >= 0; x >>>= 4) {
|
for (int i = 16; --i >= 0; x >>>= 4) {
|
||||||
|
@ -39,4 +43,31 @@ public final class ToStringUtils {
|
||||||
}
|
}
|
||||||
return "0x" + new String(asHex);
|
return "0x" + new String(asHex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds a String with both textual representation of the {@link BytesRef} data and the bytes hex
|
||||||
|
* values. For example: {@code "hello [68 65 6c 6c 6f]"}. If the content is not a valid UTF-8
|
||||||
|
* sequence, only the bytes hex values are returned, as per {@link BytesRef#toString()}.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unused")
|
||||||
|
public static String bytesRefToString(BytesRef b) {
|
||||||
|
if (b == null) {
|
||||||
|
return "null";
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return b.utf8ToString() + " " + b;
|
||||||
|
} catch (AssertionError | RuntimeException t) {
|
||||||
|
// If BytesRef isn't actually UTF-8, or it's e.g. a prefix of UTF-8
|
||||||
|
// that ends mid-unicode-char, we fall back to hex:
|
||||||
|
return b.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String bytesRefToString(BytesRefBuilder b) {
|
||||||
|
return bytesRefToString(b.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String bytesRefToString(byte[] b) {
|
||||||
|
return bytesRefToString(new BytesRef(b));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,10 +133,17 @@ public final class Version {
|
||||||
/**
|
/**
|
||||||
* Match settings and bugs in Lucene's 9.10.0 release.
|
* Match settings and bugs in Lucene's 9.10.0 release.
|
||||||
*
|
*
|
||||||
* @deprecated Use latest
|
* @deprecated (9.11.0) Use latest
|
||||||
*/
|
*/
|
||||||
@Deprecated public static final Version LUCENE_9_10_0 = new Version(9, 10, 0);
|
@Deprecated public static final Version LUCENE_9_10_0 = new Version(9, 10, 0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Match settings and bugs in Lucene's 9.11.0 release.
|
||||||
|
*
|
||||||
|
* @deprecated Use latest
|
||||||
|
*/
|
||||||
|
@Deprecated public static final Version LUCENE_9_11_0 = new Version(9, 11, 0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Match settings and bugs in Lucene's 10.0.0 release.
|
* Match settings and bugs in Lucene's 10.0.0 release.
|
||||||
*
|
*
|
||||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.util.IntsRef;
|
||||||
*/
|
*/
|
||||||
public class LimitedFiniteStringsIterator extends FiniteStringsIterator {
|
public class LimitedFiniteStringsIterator extends FiniteStringsIterator {
|
||||||
/** Maximum number of finite strings to create. */
|
/** Maximum number of finite strings to create. */
|
||||||
private int limit = Integer.MAX_VALUE;
|
private final int limit;
|
||||||
|
|
||||||
/** Number of generated finite strings. */
|
/** Number of generated finite strings. */
|
||||||
private int count = 0;
|
private int count = 0;
|
||||||
|
|
|
@ -108,10 +108,16 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces
|
||||||
if (this.curSegment == null) {
|
if (this.curSegment == null) {
|
||||||
return new AlreadyClosedException("Already closed: " + this);
|
return new AlreadyClosedException("Already closed: " + this);
|
||||||
}
|
}
|
||||||
// ISE can be thrown by MemorySegment and contains "closed" in message:
|
// in Java 22 or later we can check the isAlive status of all segments
|
||||||
|
// (see https://bugs.openjdk.org/browse/JDK-8310644):
|
||||||
|
if (Arrays.stream(segments).allMatch(s -> s.scope().isAlive()) == false) {
|
||||||
|
return new AlreadyClosedException("Already closed: " + this);
|
||||||
|
}
|
||||||
|
// fallback for Java 21: ISE can be thrown by MemorySegment and contains "closed" in message:
|
||||||
if (e instanceof IllegalStateException
|
if (e instanceof IllegalStateException
|
||||||
&& e.getMessage() != null
|
&& e.getMessage() != null
|
||||||
&& e.getMessage().contains("closed")) {
|
&& e.getMessage().contains("closed")) {
|
||||||
|
// the check is on message only, so preserve original cause for debugging:
|
||||||
return new AlreadyClosedException("Already closed: " + this, e);
|
return new AlreadyClosedException("Already closed: " + this, e);
|
||||||
}
|
}
|
||||||
// otherwise rethrow unmodified NPE/ISE (as it possibly a bug with passing a null parameter to
|
// otherwise rethrow unmodified NPE/ISE (as it possibly a bug with passing a null parameter to
|
||||||
|
|
|
@ -33,7 +33,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
|
||||||
public MemorySegmentIndexInputProvider() {
|
public MemorySegmentIndexInputProvider() {
|
||||||
var log = Logger.getLogger(getClass().getName());
|
var log = Logger.getLogger(getClass().getName());
|
||||||
log.info(
|
log.info(
|
||||||
"Using MemorySegmentIndexInput with Java 21; to disable start with -D"
|
"Using MemorySegmentIndexInput with Java 21 or later; to disable start with -D"
|
||||||
+ MMapDirectory.ENABLE_MEMORY_SEGMENTS_SYSPROP
|
+ MMapDirectory.ENABLE_MEMORY_SEGMENTS_SYSPROP
|
||||||
+ "=false");
|
+ "=false");
|
||||||
}
|
}
|
||||||
|
|
|
@ -154,6 +154,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
||||||
IndexWriter writer =
|
IndexWriter writer =
|
||||||
new IndexWriter(
|
new IndexWriter(
|
||||||
directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp));
|
directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp));
|
||||||
|
TestUtil.reduceOpenFiles(writer);
|
||||||
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
Field idField = newStringField("id", "", Field.Store.YES);
|
Field idField = newStringField("id", "", Field.Store.YES);
|
||||||
|
@ -779,6 +780,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
|
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
iwc.setMaxBufferedDocs(2);
|
iwc.setMaxBufferedDocs(2);
|
||||||
|
iwc.setUseCompoundFile(true); // reduce open files
|
||||||
IndexWriter w = new IndexWriter(dir, iwc);
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
int numDocs = TEST_NIGHTLY ? 1000 : 100;
|
int numDocs = TEST_NIGHTLY ? 1000 : 100;
|
||||||
for (int i = 0; i < numDocs; i++) {
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
|
|
@ -67,7 +67,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
||||||
assertAllBetween(last2, j, bd2, ids);
|
assertAllBetween(last2, j, bd2, ids);
|
||||||
last2 = j + 1;
|
last2 = j + 1;
|
||||||
}
|
}
|
||||||
assertEquals(j + 1, queue.numGlobalTermDeletes());
|
assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes());
|
||||||
}
|
}
|
||||||
assertEquals(uniqueValues, bd1.deleteTerms.keySet());
|
assertEquals(uniqueValues, bd1.deleteTerms.keySet());
|
||||||
assertEquals(uniqueValues, bd2.deleteTerms.keySet());
|
assertEquals(uniqueValues, bd2.deleteTerms.keySet());
|
||||||
|
|
|
@ -258,6 +258,7 @@ public class TestIndexWriterThreadsToSegments extends LuceneTestCase {
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(r, new MockAnalyzer(r));
|
IndexWriterConfig iwc = newIndexWriterConfig(r, new MockAnalyzer(r));
|
||||||
iwc.setCommitOnClose(false);
|
iwc.setCommitOnClose(false);
|
||||||
final RandomIndexWriter w = new RandomIndexWriter(r, dir, iwc);
|
final RandomIndexWriter w = new RandomIndexWriter(r, dir, iwc);
|
||||||
|
TestUtil.reduceOpenFiles(w.w);
|
||||||
w.setDoRandomForceMerge(false);
|
w.setDoRandomForceMerge(false);
|
||||||
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 30)];
|
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 30)];
|
||||||
final CountDownLatch startingGun = new CountDownLatch(1);
|
final CountDownLatch startingGun = new CountDownLatch(1);
|
||||||
|
|
|
@ -48,9 +48,9 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
|
||||||
|
|
||||||
public void testCorrectImplementation() {
|
public void testCorrectImplementation() {
|
||||||
final int runtimeVersion = Runtime.version().feature();
|
final int runtimeVersion = Runtime.version().feature();
|
||||||
if (runtimeVersion >= 19 && runtimeVersion <= 21) {
|
if (runtimeVersion >= 19) {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
"on Java 19, 20, and 21 we should use MemorySegmentIndexInputProvider to create mmap IndexInputs",
|
"on Java 19 or later we should use MemorySegmentIndexInputProvider to create mmap IndexInputs",
|
||||||
isMemorySegmentImpl());
|
isMemorySegmentImpl());
|
||||||
} else {
|
} else {
|
||||||
assertSame(MappedByteBufferIndexInputProvider.class, MMapDirectory.PROVIDER.getClass());
|
assertSame(MappedByteBufferIndexInputProvider.class, MMapDirectory.PROVIDER.getClass());
|
||||||
|
|
|
@ -820,7 +820,7 @@ public final class JavascriptCompiler {
|
||||||
*/
|
*/
|
||||||
public static final Map<String, MethodHandle> DEFAULT_FUNCTIONS = loadDefaultFunctions();
|
public static final Map<String, MethodHandle> DEFAULT_FUNCTIONS = loadDefaultFunctions();
|
||||||
|
|
||||||
private static final Map<String, MethodHandle> loadDefaultFunctions() {
|
private static Map<String, MethodHandle> loadDefaultFunctions() {
|
||||||
final Map<String, MethodHandle> map = new HashMap<>();
|
final Map<String, MethodHandle> map = new HashMap<>();
|
||||||
final Lookup publicLookup = MethodHandles.publicLookup();
|
final Lookup publicLookup = MethodHandles.publicLookup();
|
||||||
try {
|
try {
|
||||||
|
@ -852,7 +852,7 @@ public final class JavascriptCompiler {
|
||||||
} catch (ReflectiveOperationException | IOException e) {
|
} catch (ReflectiveOperationException | IOException e) {
|
||||||
throw new Error("Cannot resolve function", e);
|
throw new Error("Cannot resolve function", e);
|
||||||
}
|
}
|
||||||
return Map.copyOf(map);
|
return Collections.unmodifiableMap(map);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Check Method signature for compatibility. */
|
/** Check Method signature for compatibility. */
|
||||||
|
|
|
@ -123,7 +123,7 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
|
||||||
private final PostingsEnum postingsEnum; // with offsets
|
private final PostingsEnum postingsEnum; // with offsets
|
||||||
private final int freq;
|
private final int freq;
|
||||||
|
|
||||||
private int posCounter = -1;
|
private int posCounter;
|
||||||
|
|
||||||
public OfPostings(BytesRef term, int freq, PostingsEnum postingsEnum) throws IOException {
|
public OfPostings(BytesRef term, int freq, PostingsEnum postingsEnum) throws IOException {
|
||||||
this.term = Objects.requireNonNull(term);
|
this.term = Objects.requireNonNull(term);
|
||||||
|
|
|
@ -23,6 +23,9 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
|
@ -208,21 +211,23 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
|
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
|
||||||
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
|
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
|
||||||
assertScorerResults(searcher, query, new float[] {1f, 1f / 51f}, new String[] {"2", "7"});
|
assertScorerResults(
|
||||||
|
searcher, query, new float[] {1f, 1f / 51f}, new String[] {"2", "7"}, 2);
|
||||||
|
|
||||||
query = getParentJoinKnnQuery("field", new float[] {6, 6}, null, 3, parentFilter);
|
query = getParentJoinKnnQuery("field", new float[] {6, 6}, null, 3, parentFilter);
|
||||||
assertScorerResults(
|
assertScorerResults(
|
||||||
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"});
|
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 2);
|
||||||
query =
|
query =
|
||||||
getParentJoinKnnQuery(
|
getParentJoinKnnQuery(
|
||||||
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 20, parentFilter);
|
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 20, parentFilter);
|
||||||
assertScorerResults(
|
assertScorerResults(
|
||||||
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"});
|
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 2);
|
||||||
|
|
||||||
query =
|
query =
|
||||||
getParentJoinKnnQuery(
|
getParentJoinKnnQuery(
|
||||||
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 1, parentFilter);
|
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 1, parentFilter);
|
||||||
assertScorerResults(searcher, query, new float[] {1f / 3f}, new String[] {"5"});
|
assertScorerResults(
|
||||||
|
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -324,7 +329,8 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
|
||||||
assertEquals(expectedId, actualId);
|
assertEquals(expectedId, actualId);
|
||||||
}
|
}
|
||||||
|
|
||||||
void assertScorerResults(IndexSearcher searcher, Query query, float[] scores, String[] ids)
|
void assertScorerResults(
|
||||||
|
IndexSearcher searcher, Query query, float[] possibleScores, String[] possibleIds, int count)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
IndexReader reader = searcher.getIndexReader();
|
IndexReader reader = searcher.getIndexReader();
|
||||||
Query rewritten = query.rewrite(searcher);
|
Query rewritten = query.rewrite(searcher);
|
||||||
|
@ -334,11 +340,16 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
|
||||||
assertEquals(-1, scorer.docID());
|
assertEquals(-1, scorer.docID());
|
||||||
expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score);
|
expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score);
|
||||||
DocIdSetIterator it = scorer.iterator();
|
DocIdSetIterator it = scorer.iterator();
|
||||||
for (int i = 0; i < scores.length; i++) {
|
Map<String, Float> idToScore =
|
||||||
|
IntStream.range(0, possibleIds.length)
|
||||||
|
.boxed()
|
||||||
|
.collect(Collectors.toMap(i -> possibleIds[i], i -> possibleScores[i]));
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
int docId = it.nextDoc();
|
int docId = it.nextDoc();
|
||||||
assertNotEquals(NO_MORE_DOCS, docId);
|
assertNotEquals(NO_MORE_DOCS, docId);
|
||||||
assertEquals(scores[i], scorer.score(), 0.0001);
|
String actualId = reader.storedFields().document(docId).get("id");
|
||||||
assertIdMatches(reader, ids[i], docId);
|
assertTrue(idToScore.containsKey(actualId));
|
||||||
|
assertEquals(idToScore.get(actualId), scorer.score(), 0.0001);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,7 +81,8 @@ public class TestParentBlockJoinFloatKnnVectorQuery extends ParentBlockJoinKnnVe
|
||||||
float score1 =
|
float score1 =
|
||||||
(float) ((1 + (2 * 2 + 3 * 4) / Math.sqrt((2 * 2 + 3 * 3) * (2 * 2 + 4 * 4))) / 2);
|
(float) ((1 + (2 * 2 + 3 * 4) / Math.sqrt((2 * 2 + 3 * 3) * (2 * 2 + 4 * 4))) / 2);
|
||||||
|
|
||||||
assertScorerResults(searcher, query, new float[] {score0, score1}, new String[] {"1", "2"});
|
assertScorerResults(
|
||||||
|
searcher, query, new float[] {score0, score1}, new String[] {"1", "2"}, 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -239,7 +239,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test data - format is artist, song, weeks at top of charts
|
// Test data - format is artist, song, weeks at top of charts
|
||||||
private static String[] hitsOfThe60s = {
|
private static final String[] hitsOfThe60s = {
|
||||||
"1966\tSPENCER DAVIS GROUP\tKEEP ON RUNNING\t1",
|
"1966\tSPENCER DAVIS GROUP\tKEEP ON RUNNING\t1",
|
||||||
"1966\tOVERLANDERS\tMICHELLE\t3",
|
"1966\tOVERLANDERS\tMICHELLE\t3",
|
||||||
"1966\tNANCY SINATRA\tTHESE BOOTS ARE MADE FOR WALKIN'\t4",
|
"1966\tNANCY SINATRA\tTHESE BOOTS ARE MADE FOR WALKIN'\t4",
|
||||||
|
@ -317,7 +317,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
|
||||||
"1969\tARCHIES\tSUGAR, SUGAR\t4"
|
"1969\tARCHIES\tSUGAR, SUGAR\t4"
|
||||||
};
|
};
|
||||||
|
|
||||||
private static final Map<String, Record> parsedRecords = new HashMap<String, Record>();
|
private static final Map<String, Record> parsedRecords = new HashMap<>();
|
||||||
private Directory dir;
|
private Directory dir;
|
||||||
private IndexReader reader;
|
private IndexReader reader;
|
||||||
private IndexSearcher searcher;
|
private IndexSearcher searcher;
|
||||||
|
@ -452,7 +452,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
|
||||||
|
|
||||||
private int getMaxNumRecordsPerArtist(ScoreDoc[] sd) throws IOException {
|
private int getMaxNumRecordsPerArtist(ScoreDoc[] sd) throws IOException {
|
||||||
int result = 0;
|
int result = 0;
|
||||||
HashMap<String, Integer> artistCounts = new HashMap<String, Integer>();
|
HashMap<String, Integer> artistCounts = new HashMap<>();
|
||||||
for (int i = 0; i < sd.length; i++) {
|
for (int i = 0; i < sd.length; i++) {
|
||||||
Document doc = reader.storedFields().document(sd[i].doc);
|
Document doc = reader.storedFields().document(sd[i].doc);
|
||||||
Record record = parsedRecords.get(doc.get("id"));
|
Record record = parsedRecords.get(doc.get("id"));
|
||||||
|
|
|
@ -17,7 +17,9 @@
|
||||||
package org.apache.lucene.queries.payloads;
|
package org.apache.lucene.queries.payloads;
|
||||||
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.EnumMap;
|
import java.util.EnumMap;
|
||||||
|
import java.util.Map;
|
||||||
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation;
|
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation;
|
||||||
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType;
|
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -30,32 +32,45 @@ import org.apache.lucene.util.BytesRef;
|
||||||
*/
|
*/
|
||||||
public class PayloadMatcherFactory {
|
public class PayloadMatcherFactory {
|
||||||
|
|
||||||
private static final EnumMap<PayloadType, EnumMap<MatchOperation, PayloadMatcher>>
|
private static final Map<PayloadType, Map<MatchOperation, PayloadMatcher>>
|
||||||
payloadCheckerOpTypeMap;
|
payloadCheckerOpTypeMap;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
payloadCheckerOpTypeMap = new EnumMap<>(PayloadType.class);
|
|
||||||
// ints
|
// ints
|
||||||
EnumMap<MatchOperation, PayloadMatcher> intCheckers = new EnumMap<>(MatchOperation.class);
|
Map<MatchOperation, PayloadMatcher> intCheckers =
|
||||||
intCheckers.put(MatchOperation.LT, new LTIntPayloadMatcher());
|
Collections.unmodifiableMap(
|
||||||
intCheckers.put(MatchOperation.LTE, new LTEIntPayloadMatcher());
|
new EnumMap<>(
|
||||||
intCheckers.put(MatchOperation.GT, new GTIntPayloadMatcher());
|
Map.of(
|
||||||
intCheckers.put(MatchOperation.GTE, new GTEIntPayloadMatcher());
|
MatchOperation.LT, new LTIntPayloadMatcher(),
|
||||||
EnumMap<MatchOperation, PayloadMatcher> floatCheckers = new EnumMap<>(MatchOperation.class);
|
MatchOperation.LTE, new LTEIntPayloadMatcher(),
|
||||||
floatCheckers.put(MatchOperation.LT, new LTFloatPayloadMatcher());
|
MatchOperation.GT, new GTIntPayloadMatcher(),
|
||||||
floatCheckers.put(MatchOperation.LTE, new LTEFloatPayloadMatcher());
|
MatchOperation.GTE, new GTEIntPayloadMatcher())));
|
||||||
floatCheckers.put(MatchOperation.GT, new GTFloatPayloadMatcher());
|
// floats
|
||||||
floatCheckers.put(MatchOperation.GTE, new GTEFloatPayloadMatcher());
|
Map<MatchOperation, PayloadMatcher> floatCheckers =
|
||||||
|
Collections.unmodifiableMap(
|
||||||
|
new EnumMap<>(
|
||||||
|
Map.of(
|
||||||
|
MatchOperation.LT, new LTFloatPayloadMatcher(),
|
||||||
|
MatchOperation.LTE, new LTEFloatPayloadMatcher(),
|
||||||
|
MatchOperation.GT, new GTFloatPayloadMatcher(),
|
||||||
|
MatchOperation.GTE, new GTEFloatPayloadMatcher())));
|
||||||
// strings
|
// strings
|
||||||
EnumMap<MatchOperation, PayloadMatcher> stringCheckers = new EnumMap<>(MatchOperation.class);
|
Map<MatchOperation, PayloadMatcher> stringCheckers =
|
||||||
stringCheckers.put(MatchOperation.LT, new LTStringPayloadMatcher());
|
Collections.unmodifiableMap(
|
||||||
stringCheckers.put(MatchOperation.LTE, new LTEStringPayloadMatcher());
|
new EnumMap<>(
|
||||||
stringCheckers.put(MatchOperation.GT, new GTStringPayloadMatcher());
|
Map.of(
|
||||||
stringCheckers.put(MatchOperation.GTE, new GTEStringPayloadMatcher());
|
MatchOperation.LT, new LTStringPayloadMatcher(),
|
||||||
|
MatchOperation.LTE, new LTEStringPayloadMatcher(),
|
||||||
|
MatchOperation.GT, new GTStringPayloadMatcher(),
|
||||||
|
MatchOperation.GTE, new GTEStringPayloadMatcher())));
|
||||||
// load the matcher maps per payload type
|
// load the matcher maps per payload type
|
||||||
payloadCheckerOpTypeMap.put(PayloadType.INT, intCheckers);
|
payloadCheckerOpTypeMap =
|
||||||
payloadCheckerOpTypeMap.put(PayloadType.FLOAT, floatCheckers);
|
Collections.unmodifiableMap(
|
||||||
payloadCheckerOpTypeMap.put(PayloadType.STRING, stringCheckers);
|
new EnumMap<>(
|
||||||
|
Map.of(
|
||||||
|
PayloadType.INT, intCheckers,
|
||||||
|
PayloadType.FLOAT, floatCheckers,
|
||||||
|
PayloadType.STRING, stringCheckers)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -75,7 +90,7 @@ public class PayloadMatcherFactory {
|
||||||
return new EQPayloadMatcher();
|
return new EQPayloadMatcher();
|
||||||
}
|
}
|
||||||
// otherwise, we need to pay attention to the payload type and operation
|
// otherwise, we need to pay attention to the payload type and operation
|
||||||
EnumMap<MatchOperation, PayloadMatcher> opMap = payloadCheckerOpTypeMap.get(payloadType);
|
Map<MatchOperation, PayloadMatcher> opMap = payloadCheckerOpTypeMap.get(payloadType);
|
||||||
if (opMap != null) {
|
if (opMap != null) {
|
||||||
return opMap.get(op);
|
return opMap.get(op);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -269,10 +269,11 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
|
||||||
MatchOperation.GT);
|
MatchOperation.GT);
|
||||||
checkHits(
|
checkHits(
|
||||||
stringGT2,
|
stringGT2,
|
||||||
new int[] { // spotless:off
|
alignedIntArray(
|
||||||
155, 255, 355, 455, 555, 655, 755, 855, 955,
|
"""
|
||||||
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
|
155, 255, 355, 455, 555, 655, 755, 855, 955,
|
||||||
}); // spotless:on
|
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
|
||||||
|
"""));
|
||||||
SpanQuery stringGTE2 =
|
SpanQuery stringGTE2 =
|
||||||
new SpanPayloadCheckQuery(
|
new SpanPayloadCheckQuery(
|
||||||
new SpanNearQuery(new SpanQuery[] {termFifty, termFive}, 0, true),
|
new SpanNearQuery(new SpanQuery[] {termFifty, termFive}, 0, true),
|
||||||
|
@ -281,10 +282,11 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
|
||||||
MatchOperation.GTE);
|
MatchOperation.GTE);
|
||||||
checkHits(
|
checkHits(
|
||||||
stringGTE2,
|
stringGTE2,
|
||||||
new int[] { // spotless:off
|
alignedIntArray(
|
||||||
55, 155, 255, 355, 455, 555, 655, 755, 855, 955,
|
"""
|
||||||
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
|
55, 155, 255, 355, 455, 555, 655, 755, 855, 955,
|
||||||
}); // spotless:on
|
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
|
||||||
|
"""));
|
||||||
|
|
||||||
SpanQuery stringLT2 =
|
SpanQuery stringLT2 =
|
||||||
new SpanPayloadCheckQuery(
|
new SpanPayloadCheckQuery(
|
||||||
|
@ -306,6 +308,23 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
|
||||||
// sets "upto" back to zero between SpanOrQuery subclauses.
|
// sets "upto" back to zero between SpanOrQuery subclauses.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a comma-separated array of integers, ignoring white space around them. This allows for
|
||||||
|
* arbitrary alignment of integers in the source string to convey additional information about
|
||||||
|
* their mutual relations. For example:
|
||||||
|
*
|
||||||
|
* <pre>{@code
|
||||||
|
* var ints =
|
||||||
|
* """
|
||||||
|
* 1, 2, 3,
|
||||||
|
* 11, 12, 13
|
||||||
|
* """
|
||||||
|
* }</pre>
|
||||||
|
*/
|
||||||
|
private static int[] alignedIntArray(String ints) {
|
||||||
|
return Arrays.stream(ints.split(",")).map(String::trim).mapToInt(Integer::parseInt).toArray();
|
||||||
|
}
|
||||||
|
|
||||||
public void testUnorderedPayloadChecks() throws Exception {
|
public void testUnorderedPayloadChecks() throws Exception {
|
||||||
|
|
||||||
SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five"));
|
SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five"));
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
||||||
*/
|
*/
|
||||||
public class BoostQueryNode extends QueryNodeImpl {
|
public class BoostQueryNode extends QueryNodeImpl {
|
||||||
|
|
||||||
private float value = 0;
|
private float value;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a boost node
|
* Constructs a boost node
|
||||||
|
|
|
@ -84,7 +84,7 @@ public class ModifierQueryNode extends QueryNodeImpl {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Modifier modifier = Modifier.MOD_NONE;
|
private Modifier modifier;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used to store the modifier value on the original query string
|
* Used to store the modifier value on the original query string
|
||||||
|
|
|
@ -25,9 +25,9 @@ import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
|
||||||
*/
|
*/
|
||||||
public class OpaqueQueryNode extends QueryNodeImpl {
|
public class OpaqueQueryNode extends QueryNodeImpl {
|
||||||
|
|
||||||
private CharSequence schema = null;
|
private CharSequence schema;
|
||||||
|
|
||||||
private CharSequence value = null;
|
private CharSequence value;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param schema - schema identifier
|
* @param schema - schema identifier
|
||||||
|
|
|
@ -41,7 +41,7 @@ public class PathQueryNode extends QueryNodeImpl {
|
||||||
|
|
||||||
/** Term text with a beginning and end position */
|
/** Term text with a beginning and end position */
|
||||||
public static class QueryText implements Cloneable {
|
public static class QueryText implements Cloneable {
|
||||||
CharSequence value = null;
|
CharSequence value;
|
||||||
|
|
||||||
/** != null The term's begin position. */
|
/** != null The term's begin position. */
|
||||||
int begin;
|
int begin;
|
||||||
|
@ -97,7 +97,7 @@ public class PathQueryNode extends QueryNodeImpl {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<QueryText> values = null;
|
private List<QueryText> values;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param pathElements - List of QueryText objects
|
* @param pathElements - List of QueryText objects
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.search.PhraseQuery; // javadocs
|
||||||
/** Query node for {@link PhraseQuery}'s slop factor. */
|
/** Query node for {@link PhraseQuery}'s slop factor. */
|
||||||
public class PhraseSlopQueryNode extends QueryNodeImpl implements FieldableNode {
|
public class PhraseSlopQueryNode extends QueryNodeImpl implements FieldableNode {
|
||||||
|
|
||||||
private int value = 0;
|
private int value;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @exception QueryNodeError throw in overridden method to disallow
|
* @exception QueryNodeError throw in overridden method to disallow
|
||||||
|
|
|
@ -57,9 +57,9 @@ public class ProximityQueryNode extends BooleanQueryNode {
|
||||||
|
|
||||||
/** utility class containing the distance condition and number */
|
/** utility class containing the distance condition and number */
|
||||||
public static class ProximityType {
|
public static class ProximityType {
|
||||||
int pDistance = 0;
|
int pDistance;
|
||||||
|
|
||||||
Type pType = null;
|
Type pType;
|
||||||
|
|
||||||
public ProximityType(Type type) {
|
public ProximityType(Type type) {
|
||||||
this(type, 0);
|
this(type, 0);
|
||||||
|
@ -71,10 +71,10 @@ public class ProximityQueryNode extends BooleanQueryNode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Type proximityType = Type.SENTENCE;
|
private Type proximityType;
|
||||||
private int distance = -1;
|
private int distance = -1;
|
||||||
private boolean inorder = false;
|
private final boolean inorder;
|
||||||
private CharSequence field = null;
|
private CharSequence field;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param clauses - QueryNode children
|
* @param clauses - QueryNode children
|
||||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
||||||
*/
|
*/
|
||||||
public class SlopQueryNode extends QueryNodeImpl implements FieldableNode {
|
public class SlopQueryNode extends QueryNodeImpl implements FieldableNode {
|
||||||
|
|
||||||
private int value = 0;
|
private int value;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param query - QueryNode Tree with the phrase
|
* @param query - QueryNode Tree with the phrase
|
||||||
|
|
|
@ -32,10 +32,11 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
if (getChildren() == null || getChildren().size() == 0) return "<tokenizedphrase/>";
|
List<QueryNode> children = getChildren();
|
||||||
|
if (children == null || children.isEmpty()) return "<tokenizedphrase/>";
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("<tokenizedtphrase>");
|
sb.append("<tokenizedphrase>");
|
||||||
for (QueryNode child : getChildren()) {
|
for (QueryNode child : children) {
|
||||||
sb.append("\n");
|
sb.append("\n");
|
||||||
sb.append(child.toString());
|
sb.append(child.toString());
|
||||||
}
|
}
|
||||||
|
@ -46,16 +47,15 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
|
||||||
// This text representation is not re-parseable
|
// This text representation is not re-parseable
|
||||||
@Override
|
@Override
|
||||||
public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
|
public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
|
||||||
if (getChildren() == null || getChildren().size() == 0) return "";
|
List<QueryNode> children = getChildren();
|
||||||
|
if (children == null || children.isEmpty()) return "";
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
String filler = "";
|
String filler = "";
|
||||||
for (QueryNode child : getChildren()) {
|
for (QueryNode child : children) {
|
||||||
sb.append(filler).append(child.toQueryString(escapeSyntaxParser));
|
sb.append(filler).append(child.toQueryString(escapeSyntaxParser));
|
||||||
filler = ",";
|
filler = ",";
|
||||||
}
|
}
|
||||||
|
return "[TP[" + sb + "]]";
|
||||||
return "[TP[" + sb.toString() + "]]";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -70,27 +70,25 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
|
||||||
@Override
|
@Override
|
||||||
public CharSequence getField() {
|
public CharSequence getField() {
|
||||||
List<QueryNode> children = getChildren();
|
List<QueryNode> children = getChildren();
|
||||||
|
if (children != null) {
|
||||||
if (children == null || children.size() == 0) {
|
for (QueryNode child : children) {
|
||||||
return null;
|
if (child instanceof FieldableNode) {
|
||||||
|
return ((FieldableNode) child).getField();
|
||||||
} else {
|
}
|
||||||
return ((FieldableNode) children.get(0)).getField();
|
}
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setField(CharSequence fieldName) {
|
public void setField(CharSequence fieldName) {
|
||||||
List<QueryNode> children = getChildren();
|
List<QueryNode> children = getChildren();
|
||||||
|
|
||||||
if (children != null) {
|
if (children != null) {
|
||||||
|
for (QueryNode child : children) {
|
||||||
for (QueryNode child : getChildren()) {
|
|
||||||
|
|
||||||
if (child instanceof FieldableNode) {
|
if (child instanceof FieldableNode) {
|
||||||
((FieldableNode) child).setField(fieldName);
|
((FieldableNode) child).setField(fieldName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // end class MultitermQueryNode
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfi
|
||||||
*/
|
*/
|
||||||
public class FieldBoostMapFCListener implements FieldConfigListener {
|
public class FieldBoostMapFCListener implements FieldConfigListener {
|
||||||
|
|
||||||
private QueryConfigHandler config = null;
|
private final QueryConfigHandler config;
|
||||||
|
|
||||||
public FieldBoostMapFCListener(QueryConfigHandler config) {
|
public FieldBoostMapFCListener(QueryConfigHandler config) {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfi
|
||||||
*/
|
*/
|
||||||
public class FieldDateResolutionFCListener implements FieldConfigListener {
|
public class FieldDateResolutionFCListener implements FieldConfigListener {
|
||||||
|
|
||||||
private QueryConfigHandler config = null;
|
private final QueryConfigHandler config;
|
||||||
|
|
||||||
public FieldDateResolutionFCListener(QueryConfigHandler config) {
|
public FieldDateResolutionFCListener(QueryConfigHandler config) {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
@ -175,8 +176,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||||
f.arc = arc;
|
f.arc = arc;
|
||||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||||
// term.length + " vs prefix=" + f.prefix);
|
// term.length + " vs prefix=" + f.prefix);
|
||||||
if (f.prefix > targetBeforeCurrentLength) {
|
if (f.prefix > targetBeforeCurrentLength) {
|
||||||
|
@ -197,7 +198,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// final int sav = term.length;
|
// final int sav = term.length;
|
||||||
// term.length = length;
|
// term.length = length;
|
||||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||||
// term.length = sav;
|
// term.length = sav;
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
@ -222,19 +223,6 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
return seekExact(target, 0);
|
return seekExact(target, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
static String brToString(BytesRef b) {
|
|
||||||
try {
|
|
||||||
return b.utf8ToString() + " " + b;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// If BytesRef isn't actually UTF8, or it's eg a
|
|
||||||
// prefix of UTF8 that ends mid-unicode-char, we
|
|
||||||
// fallback to hex:
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get the version of the currently seek'd term; only valid if we are positioned. */
|
/** Get the version of the currently seek'd term; only valid if we are positioned. */
|
||||||
public long getVersion() {
|
public long getVersion() {
|
||||||
return ((IDVersionTermState) currentFrame.state).idVersion;
|
return ((IDVersionTermState) currentFrame.state).idVersion;
|
||||||
|
@ -258,8 +246,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||||
// fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current="
|
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" +
|
||||||
// + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
// minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" +
|
||||||
|
// termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||||
// printSeekState(System.out);
|
// printSeekState(System.out);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -460,8 +449,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||||
// targetBeforeCurrentLength + " termExists=" + termExists);
|
// targetBeforeCurrentLength + " termExists=" + termExists);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -492,7 +481,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
term.setByteAt(targetUpto, (byte) targetLabel);
|
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||||
term.setLength(1 + targetUpto);
|
term.setLength(1 + targetUpto);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -520,10 +509,11 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// termExists = false;
|
// termExists = false;
|
||||||
// }
|
// }
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + "
|
// System.out.println(" FAST version NOT_FOUND term=" +
|
||||||
// targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion +
|
// ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto +
|
||||||
// " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " +
|
// " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" +
|
||||||
// currentFrame.fp + " termExists=" + termExists);
|
// validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp +
|
||||||
|
// " termExists=" + termExists);
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -553,7 +543,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||||
// brToString(term));
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -604,7 +594,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
termExists = false;
|
termExists = false;
|
||||||
term.setLength(targetUpto);
|
term.setLength(targetUpto);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -656,8 +646,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
|
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
|
||||||
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
|
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
|
||||||
// termExists + ") validIndexPrefix= " + validIndexPrefix);
|
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||||
// printSeekState();
|
// printSeekState();
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -700,9 +690,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||||
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
|
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
|
||||||
// " output=" + output);
|
// + " output=" + output);
|
||||||
// }
|
// }
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
break;
|
break;
|
||||||
|
@ -814,8 +804,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||||
// targetBeforeCurrentLength);
|
// targetBeforeCurrentLength);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -850,7 +840,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
if (next() != null) {
|
if (next() != null) {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
|
// System.out.println(" return NOT_FOUND term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else {
|
} else {
|
||||||
|
@ -861,7 +852,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
|
// System.out.println(" return " + result + " term=" +
|
||||||
|
// ToStringUtils.bytesRefToString(term));
|
||||||
// }
|
// }
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -946,7 +938,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
+ " prefixLen="
|
+ " prefixLen="
|
||||||
+ f.prefix
|
+ f.prefix
|
||||||
+ " prefix="
|
+ " prefix="
|
||||||
+ brToString(prefix)
|
+ ToStringUtils.bytesRefToString(prefix)
|
||||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||||
+ " hasTerms="
|
+ " hasTerms="
|
||||||
+ f.hasTerms
|
+ f.hasTerms
|
||||||
|
@ -974,7 +966,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
+ " prefixLen="
|
+ " prefixLen="
|
||||||
+ f.prefix
|
+ f.prefix
|
||||||
+ " prefix="
|
+ " prefix="
|
||||||
+ brToString(prefix)
|
+ ToStringUtils.bytesRefToString(prefix)
|
||||||
+ " nextEnt="
|
+ " nextEnt="
|
||||||
+ f.nextEnt
|
+ f.nextEnt
|
||||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||||
|
@ -1063,9 +1055,10 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
assert !eof;
|
assert !eof;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
|
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
|
||||||
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists +
|
||||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
// " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||||
|
// " validIndexPrefix=" + validIndexPrefix);
|
||||||
// printSeekState();
|
// printSeekState();
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
@ -1129,8 +1122,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// currentFrame.hasTerms = true;
|
// currentFrame.hasTerms = true;
|
||||||
currentFrame.loadBlock();
|
currentFrame.loadBlock();
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
|
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
|
||||||
// currentFrame.ord=" + currentFrame.ord);
|
// " currentFrame.ord=" + currentFrame.ord);
|
||||||
return term.get();
|
return term.get();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue