Merge branch 'main' into java_21

This commit is contained in:
ChrisHegarty 2024-02-19 11:43:46 +00:00
commit 07f4b5b19f
105 changed files with 1317 additions and 1423 deletions

View File

@ -1,15 +0,0 @@
name: "Set up caches"
description: "Set up cached resources"
runs:
using: "composite"
steps:
- name: Cache/Restore cached gradle files
uses: actions/cache@v2
with:
path: |
~/.gradle/caches
~/.gradle/jdks
key: ${{ runner.os }}-gradle-caches-${{ hashFiles('versions.lock', '**/gradle-wrapper.properties') }}
restore-keys: |
${{ runner.os }}-gradle-caches-

View File

@ -0,0 +1,29 @@
# This composite action is included in other workflows to have a shared setup
# for java, gradle, caches, etc.
name: Prepare Lucene build
inputs:
java-version:
required: false
default: 17
description: "The default JDK version to set up."
java-distribution:
required: false
default: "temurin"
description: "The default JDK distribution type"
runs:
using: "composite"
steps:
- name: Set up Java (${{ inputs.java-distribution }}, ${{ inputs.java-version }})"
uses: actions/setup-java@v4
with:
distribution: ${{ inputs.java-distribution }}
java-version: ${{ inputs.java-version }}
java-package: jdk
# This includes "smart" caching of the wrapper and dependencies.
- name: Set up Gradle
uses: gradle/actions/setup-gradle@v3

View File

@ -1,44 +0,0 @@
name: Distribution tests
on:
# Allow manual triggers for testing the action.
workflow_dispatch:
pull_request:
branches:
- 'main'
push:
branches:
- 'main'
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
test:
name: Run distribution tests
timeout-minutes: 15
runs-on: ${{ matrix.os }}
strategy:
matrix:
# we want to run the distribution tests on all major OSs, but it's occasionally too slow (or hangs or the forked process is not started at all..., not sure the cause) on windows.
#os: [ubuntu-latest, macos-latest, windows-latest]
os: [ubuntu-latest, macos-latest]
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: 21
java-package: jdk
- name: Prepare caches
uses: ./.github/actions/gradle-caches
- name: Run all distribution tests including GUI tests (${{ matrix.os }})
run: ./gradlew -p lucene/distribution.tests test

View File

@ -1,84 +0,0 @@
name: Gradle Precommit Checks
on:
pull_request:
branches:
- '*'
push:
branches:
- main
- branch_9x
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
# This runs all validation checks without tests.
checks:
name: gradle check -x test (JDK ${{ matrix.java }} on ${{ matrix.os }})
timeout-minutes: 15
runs-on: ${{ matrix.os }}
strategy:
matrix:
# Operating systems to run on.
os: [ubuntu-latest]
# Test JVMs.
java: [ '21' ]
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
java-package: jdk
- name: Prepare caches
uses: ./.github/actions/gradle-caches
- name: Run gradle check (without tests)
run: ./gradlew check -x test -Ptask.times=true --max-workers 2
# This runs all tests without any other validation checks.
tests:
name: gradle test (JDK ${{ matrix.java }} on ${{ matrix.os }})
timeout-minutes: 30
runs-on: ${{ matrix.os }}
strategy:
matrix:
# Operating systems to run on.
# windows-latest: fairly slow to build and results in odd errors (see LUCENE-10167)
# macos-latest: a tad slower than ubuntu and pretty much the same (?) so leaving out.
os: [ubuntu-latest]
# Test JVMs.
java: [ '21' ]
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
java-package: jdk
- name: Prepare caches
uses: ./.github/actions/gradle-caches
- name: Run gradle tests
run: ./gradlew test "-Ptask.times=true" --max-workers 2
- name: Echo settings
run: cat gradle.properties

View File

@ -1,35 +0,0 @@
name: Hunspell regression tests
on:
pull_request:
branches:
- 'main'
paths:
- '.github/workflows/hunspell.yml'
- 'lucene/analysis/common/**'
jobs:
test:
name: Run Hunspell regression tests
timeout-minutes: 15
runs-on: ubuntu-latest
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: 21
java-package: jdk
- name: Prepare caches
uses: ./.github/actions/gradle-caches
- name: Run regular and regression tests
run: ./gradlew -p lucene/analysis/common check testRegressions

View File

@ -12,8 +12,8 @@ on:
jobs:
stale:
runs-on: ubuntu-latest
permissions:
pull-requests: write

67
.github/workflows/run-checks-all.yml vendored Normal file
View File

@ -0,0 +1,67 @@
name: "Run checks: all modules"
on:
workflow_dispatch:
pull_request:
branches:
- '*'
push:
branches:
- 'main'
- 'branch_9x'
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
# We split the workflow into two parallel jobs for efficiency:
# one is running all validation checks without tests,
# the other runs all tests without other validation checks.
jobs:
# This runs all validation checks without tests.
checks:
name: checks without tests (JDK ${{ matrix.java }} on ${{ matrix.os }})
timeout-minutes: 15
strategy:
matrix:
os: [ ubuntu-latest ]
java: [ '17' ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/prepare-for-build
- name: Run gradle check (without tests)
run: ./gradlew check -x test -Ptask.times=true --max-workers 2
# This runs all tests without any other validation checks.
tests:
name: tests (JDK ${{ matrix.java }} on ${{ matrix.os }})
timeout-minutes: 30
strategy:
matrix:
# Operating systems to run on.
# windows-latest: fairly slow to build and results in odd errors (see LUCENE-10167)
# macos-latest: a tad slower than ubuntu and pretty much the same (?) so leaving out.
os: [ ubuntu-latest ]
java: [ '17' ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/prepare-for-build
- name: Run gradle tests
run: ./gradlew test "-Ptask.times=true" --max-workers 2
- name: List automatically-initialized gradle.properties
run: cat gradle.properties

View File

@ -0,0 +1,37 @@
name: "Run checks: module lucene/analysis/common"
on:
workflow_dispatch:
pull_request:
branches:
- 'main'
- 'branch_9x'
paths:
- '.github/workflows/run-checks-mod-analysis-common.yml'
- 'lucene/analysis/common/**'
push:
branches:
- 'main'
- 'branch_9x'
paths:
- '.github/workflows/run-checks-mod-analysis-common.yml'
- 'lucene/analysis/common/**'
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
jobs:
test:
name: Extra regression tests
timeout-minutes: 15
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/prepare-for-build
- name: Run 'gradlew lucene/analysis/common check testRegressions'
run: ./gradlew -p lucene/analysis/common check testRegressions

View File

@ -0,0 +1,36 @@
name: "Run checks: module lucene/distribution.tests"
on:
workflow_dispatch:
pull_request:
branches:
- 'main'
- 'branch_9x'
push:
branches:
- 'main'
- 'branch_9x'
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
jobs:
test:
timeout-minutes: 15
strategy:
matrix:
# ubuntu-latest is checked as part of run-checks-everything.yml
# windows-latest is slow and sometimes flaky.
os: [ macos-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/prepare-for-build
- name: Run 'gradlew lucene/distribution.tests test' (on ${{ matrix.os }})
run: ./gradlew -p lucene/distribution.tests test

View File

@ -23,23 +23,23 @@
xmlns:asfext="http://projects.apache.org/ns/asfext#"
xmlns:foaf="http://xmlns.com/foaf/0.1/">
<!--
This file's canonical URL is: http://lucene.apache.org/core/doap.rdf
This file's canonical URL is: https://lucene.apache.org/core/doap.rdf
Note that the canonical URL may redirect to other non-canonical locations.
-->
<Project rdf:about="http://lucene.apache.org/core/">
<Project rdf:about="https://lucene.apache.org/core/">
<created>2001-09-01</created>
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
<name>Apache Lucene Core</name>
<homepage rdf:resource="http://lucene.apache.org/core/" />
<asfext:pmc rdf:resource="http://lucene.apache.org" />
<homepage rdf:resource="https://lucene.apache.org/core/" />
<asfext:pmc rdf:resource="https://lucene.apache.org" />
<shortdesc>Apache Lucene is a high-performance, full-featured text search engine library</shortdesc>
<description>Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
</description>
<bug-database rdf:resource="https://github.com/apache/lucene/issues" />
<mailing-list rdf:resource="http://lucene.apache.org/core/discussion.html" />
<download-page rdf:resource="http://lucene.apache.org/core/downloads.html" />
<mailing-list rdf:resource="https://lucene.apache.org/core/discussion.html" />
<download-page rdf:resource="https://lucene.apache.org/core/downloads.html" />
<programming-language>Java</programming-language>
<!--

View File

@ -96,16 +96,15 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
scriptutil.run('rm -rf %s' % bc_index_dir)
print('done')
def update_backcompat_tests(types, index_version, current_version):
print(' adding new indexes %s to backcompat tests...' % types, end='', flush=True)
def update_backcompat_tests(index_version, current_version):
print(' adding new indexes to backcompat tests...', end='', flush=True)
module = 'lucene/backward-codecs'
filename = '%s/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java' % module
filename = None
if not current_version.is_back_compat_with(index_version):
matcher = re.compile(r'final String\[\] unsupportedNames = {|};')
elif 'sorted' in types:
matcher = re.compile(r'static final String\[\] oldSortedNames = {|};')
filename = '%s/src/test/org/apache/lucene/backward_index/unsupported_versions.txt' % module
else:
matcher = re.compile(r'static final String\[\] oldNames = {|};')
filename = '%s/src/test/org/apache/lucene/backward_index/versions.txt' % module
strip_dash_suffix_re = re.compile(r'-.*')
@ -114,53 +113,25 @@ def update_backcompat_tests(types, index_version, current_version):
x = re.sub(strip_dash_suffix_re, '', x) # remove the -suffix if any
return scriptutil.Version.parse(x)
class Edit(object):
start = None
def __call__(self, buffer, match, line):
if self.start:
# find where this version should exist
i = len(buffer) - 1
previous_version_exists = not ('};' in line and buffer[-1].strip().endswith("{"))
if previous_version_exists: # Only look if there is a version here
v = find_version(buffer[i])
while i >= self.start and v.on_or_after(index_version):
i -= 1
v = find_version(buffer[i])
i += 1 # readjust since we skipped past by 1
# unfortunately python doesn't have a range remove from list...
# here we want to remove any previous references to the version we are adding
while i < len(buffer) and index_version.on_or_after(find_version(buffer[i])):
buffer.pop(i)
if i == len(buffer) and previous_version_exists and not buffer[-1].strip().endswith(","):
# add comma
buffer[-1] = buffer[-1].rstrip() + ",\n"
if previous_version_exists:
last = buffer[-1]
spaces = ' ' * (len(last) - len(last.lstrip()))
else:
spaces = ' '
for (j, t) in enumerate(types):
if t == 'sorted':
newline = spaces + ('"sorted.%s"') % index_version
else:
newline = spaces + ('"%s-%s"' % (index_version, t))
if j < len(types) - 1 or i < len(buffer):
newline += ','
buffer.insert(i, newline + '\n')
i += 1
def edit(buffer, match, line):
v = find_version(line)
changed = False
if v.on_or_after(index_version):
if not index_version.on_or_after(v):
buffer.append(('%s\n') % index_version)
changed = True
buffer.append(line)
return changed
def append(buffer, changed):
if changed:
return changed
if not buffer[len(buffer)-1].endswith('\n'):
buffer.append('\n')
buffer.append(('%s\n') % index_version)
return True
if 'Names = {' in line:
self.start = len(buffer) # location of first index name
buffer.append(line)
return False
changed = scriptutil.update_file(filename, matcher, Edit())
changed = scriptutil.update_file(filename, re.compile(r'.*'), edit, append)
print('done' if changed else 'uptodate')
def check_backcompat_tests():
@ -251,9 +222,8 @@ def main():
print ('\nMANUAL UPDATE REQUIRED: edit TestGenerateBwcIndices to enable moreterms, dvupdates, and empty index testing')
print('\nAdding backwards compatibility tests')
update_backcompat_tests(['cfs', 'nocfs'], c.version, current_version)
if should_make_sorted:
update_backcompat_tests(['sorted'], c.version, current_version)
update_backcompat_tests(c.version, current_version)
print('\nTesting changes')
check_backcompat_tests()

View File

@ -88,7 +88,7 @@ def run(cmd, cwd=None):
raise e
return output.decode('utf-8')
def update_file(filename, line_re, edit):
def update_file(filename, line_re, edit, append=None):
infile = open(filename, 'r')
buffer = []
@ -102,6 +102,8 @@ def update_file(filename, line_re, edit):
return False
continue
buffer.append(line)
if append:
changed = append(buffer, changed) # in the case did not change in edit but have an append function
if not changed:
raise Exception('Could not find %s in %s' % (line_re, filename))
with open(filename, 'w') as f:

View File

@ -20,7 +20,7 @@ def resources = scriptResources(buildscript)
configure(rootProject) {
ext {
// also change this in extractor tool: ExtractForeignAPI
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_20, JavaVersion.VERSION_21 ] as Set
vectorIncubatorJavaVersions = [ JavaVersion.VERSION_20, JavaVersion.VERSION_21, JavaVersion.VERSION_22 ] as Set
}
}

View File

@ -28,7 +28,6 @@ configure(project(":lucene").subprojects) { prj ->
spotless {
java {
toggleOffOn() // obviously, only to be used sparingly.
// TODO: Work out how to support multiple different header files (we have
// classes in the codebase that have original headers). We currently use
// Apache RAT to enforce headers so this is of lesser priority.

View File

@ -114,6 +114,8 @@ Improvements
* GITHUB#12873: Expressions module now uses JEP 371 "Hidden Classes" with JEP 309
"Dynamic Class-File Constants" to implement Javascript expressions. (Uwe Schindler)
* GITHUB#11657, LUCENE-10621: Upgrade to OpenNLP 2.3.2. (Christine Poerschke, Eric Pugh)
Optimizations
---------------------
@ -176,6 +178,36 @@ Other
* GITHUB#13001: Put Thread#sleep() on the list of forbidden APIs. (Shubham Chaudhary)
======================== Lucene 9.11.0 =======================
API Changes
---------------------
(No changes)
New Features
---------------------
(No changes)
Improvements
---------------------
* GITHUB#13092: `static final Map` constants have been made immutable (Dmitry Cherniachenko)
* GITHUB#13041: TokenizedPhraseQueryNode code cleanup (Dmitry Cherniachenko)
Optimizations
---------------------
(No changes)
Bug Fixes
---------------------
(No changes)
Other
---------------------
* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
======================== Lucene 9.10.0 =======================
API Changes
@ -200,6 +232,17 @@ New Features
* GITHUB#12336: Index additional data per facet label in the taxonomy. (Shai Erera, Egor Potemkin, Mike McCandless,
Stefan Vodita)
* GITHUB#12706: Add support for the final release of Java foreign memory API in Java 22 (and later).
Lucene's MMapDirectory will now mmap Lucene indexes in chunks of 16 GiB (instead of 1 GiB) starting
from Java 19. Indexes closed while queries are running can no longer crash the JVM.
Support for vectorized implementations of VectorUtil based on jdk.incubator.vector APIs was added
for exactly Java 22. Therefore, applications started with command line parameter
"java --add-modules jdk.incubator.vector" will automatically use the new vectorized implementations
if running on a supported platform (Java 20/21/22 on x86 CPUs with AVX2 or later or ARM NEON CPUs).
This is an opt-in feature and requires explicit Java command line flag! When enabled, Lucene logs
a notice using java.util.logging. Please test thoroughly and report bugs/slowness to Lucene's mailing
list. (Uwe Schindler, Chris Hegarty)
Improvements
---------------------
@ -219,8 +262,6 @@ Improvements
Tests are running with random byte order to ensure that the order does not affect correctness
of code. Native order was enabled for LZ4 compression. (Uwe Schindler)
* GITHUB#11657, LUCENE-10621: Upgrade to OpenNLP 2.3.2. (Christine Poerschke, Eric Pugh)
Optimizations
---------------------

View File

@ -19,6 +19,10 @@
## Migration from Lucene 9.x to Lucene 10.0
### OpenNLP dependency upgrade
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using and be running on Java 17.
### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829)
For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
@ -147,12 +151,6 @@ may throw `IOException` on index problems, bubbling up unexpectedly to the calle
`(Reverse)PathHierarchyTokenizer` now produces sequential (instead of overlapping) tokens with accurate
offsets, making positional queries and highlighters possible for fields tokenized with this tokenizer.
## Migration from Lucene 9.9 to Lucene 9.10
### OpenNLP dependency upgrade
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using and be running on Java 17.
## Migration from Lucene 9.0 to Lucene 9.1
### Test framework package migration and module (LUCENE-10301)

View File

@ -59,11 +59,11 @@ public class MinHashFilter extends TokenFilter {
private final List<List<FixedSizeTreeSet<LongPair>>> minHashSets;
private int hashSetSize = DEFAULT_HASH_SET_SIZE;
private final int hashSetSize;
private int bucketCount = DEFAULT_BUCKET_COUNT;
private final int bucketCount;
private int hashCount = DEFAULT_HASH_COUNT;
private final int hashCount;
private boolean requiresInitialisation = true;

View File

@ -32,13 +32,13 @@ public class MinHashFilterFactory extends TokenFilterFactory {
/** SPI name */
public static final String NAME = "minHash";
private int hashCount = MinHashFilter.DEFAULT_HASH_COUNT;
private final int hashCount;
private int bucketCount = MinHashFilter.DEFAULT_BUCKET_COUNT;
private final int bucketCount;
private int hashSetSize = MinHashFilter.DEFAULT_HASH_SET_SIZE;
private final int hashSetSize;
private boolean withRotation;
private final boolean withRotation;
/** Create a {@link MinHashFilterFactory}. */
public MinHashFilterFactory(Map<String, String> args) {

View File

@ -67,7 +67,7 @@ public class WordDelimiterGraphFilterFactory extends TokenFilterFactory
private final int flags;
byte[] typeTable = null;
private CharArraySet protectedWords = null;
private boolean adjustOffsets = false;
private final boolean adjustOffsets;
/** Creates a new WordDelimiterGraphFilterFactory */
public WordDelimiterGraphFilterFactory(Map<String, String> args) {

View File

@ -89,7 +89,7 @@ public final class DutchAnalyzer extends Analyzer {
private final CharArraySet stoptable;
/** Contains words that should be indexed but not stemmed. */
private CharArraySet excltable = CharArraySet.EMPTY_SET;
private final CharArraySet excltable;
private final StemmerOverrideMap stemdict;

View File

@ -41,8 +41,8 @@ public class PatternCaptureGroupFilterFactory extends TokenFilterFactory {
/** SPI name */
public static final String NAME = "patternCaptureGroup";
private Pattern pattern;
private boolean preserveOriginal = true;
private final Pattern pattern;
private final boolean preserveOriginal;
public PatternCaptureGroupFilterFactory(Map<String, String> args) {
super(args);

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.shingle;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -175,7 +176,7 @@ public final class ShingleFilter extends TokenFilter {
* @param tokenType token tokenType
*/
public void setTokenType(String tokenType) {
this.tokenType = tokenType;
this.tokenType = Objects.requireNonNull(tokenType, "tokenType");
}
/**

View File

@ -114,7 +114,7 @@ public class JapaneseTokenizerFactory extends TokenizerFactory implements Resour
* /箱根山-箱根/成田空港-成田/ requests "箱根" and "成田" to be in the result in NBEST output.
*/
private final String nbestExamples;
private int nbestCost = -1;
private int nbestCost;
/** Creates a new JapaneseTokenizerFactory */
public JapaneseTokenizerFactory(Map<String, String> args) {

View File

@ -17,103 +17,107 @@
package org.apache.lucene.analysis.ja.dict;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/** Utility class for english translations of morphological data, used only for debugging. */
public class ToStringUtil {
// a translation map for parts of speech, only used for reflectWith
private static final HashMap<String, String> posTranslations = new HashMap<>();
private static final Map<String, String> posTranslations;
static {
posTranslations.put("名詞", "noun");
posTranslations.put("名詞-一般", "noun-common");
posTranslations.put("名詞-固有名詞", "noun-proper");
posTranslations.put("名詞-固有名詞-一般", "noun-proper-misc");
posTranslations.put("名詞-固有名詞-人名", "noun-proper-person");
posTranslations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc");
posTranslations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname");
posTranslations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name");
posTranslations.put("名詞-固有名詞-組織", "noun-proper-organization");
posTranslations.put("名詞-固有名詞-地域", "noun-proper-place");
posTranslations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc");
posTranslations.put("名詞-固有名詞-地域-国", "noun-proper-place-country");
posTranslations.put("名詞-代名詞", "noun-pronoun");
posTranslations.put("名詞-代名詞-一般", "noun-pronoun-misc");
posTranslations.put("名詞-代名詞-縮約", "noun-pronoun-contraction");
posTranslations.put("名詞-副詞可能", "noun-adverbial");
posTranslations.put("名詞-サ変接続", "noun-verbal");
posTranslations.put("名詞-形容動詞語幹", "noun-adjective-base");
posTranslations.put("名詞-数", "noun-numeric");
posTranslations.put("名詞-非自立", "noun-affix");
posTranslations.put("名詞-非自立-一般", "noun-affix-misc");
posTranslations.put("名詞-非自立-副詞可能", "noun-affix-adverbial");
posTranslations.put("名詞-非自立-助動詞語幹", "noun-affix-aux");
posTranslations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base");
posTranslations.put("名詞-特殊", "noun-special");
posTranslations.put("名詞-特殊-助動詞語幹", "noun-special-aux");
posTranslations.put("名詞-接尾", "noun-suffix");
posTranslations.put("名詞-接尾-一般", "noun-suffix-misc");
posTranslations.put("名詞-接尾-人名", "noun-suffix-person");
posTranslations.put("名詞-接尾-地域", "noun-suffix-place");
posTranslations.put("名詞-接尾-サ変接続", "noun-suffix-verbal");
posTranslations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux");
posTranslations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base");
posTranslations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial");
posTranslations.put("名詞-接尾-助数詞", "noun-suffix-classifier");
posTranslations.put("名詞-接尾-特殊", "noun-suffix-special");
posTranslations.put("名詞-接続詞的", "noun-suffix-conjunctive");
posTranslations.put("名詞-動詞非自立的", "noun-verbal_aux");
posTranslations.put("名詞-引用文字列", "noun-quotation");
posTranslations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective");
posTranslations.put("接頭詞", "prefix");
posTranslations.put("接頭詞-名詞接続", "prefix-nominal");
posTranslations.put("接頭詞-動詞接続", "prefix-verbal");
posTranslations.put("接頭詞-形容詞接続", "prefix-adjectival");
posTranslations.put("接頭詞-数接続", "prefix-numerical");
posTranslations.put("動詞", "verb");
posTranslations.put("動詞-自立", "verb-main");
posTranslations.put("動詞-非自立", "verb-auxiliary");
posTranslations.put("動詞-接尾", "verb-suffix");
posTranslations.put("形容詞", "adjective");
posTranslations.put("形容詞-自立", "adjective-main");
posTranslations.put("形容詞-非自立", "adjective-auxiliary");
posTranslations.put("形容詞-接尾", "adjective-suffix");
posTranslations.put("副詞", "adverb");
posTranslations.put("副詞-一般", "adverb-misc");
posTranslations.put("副詞-助詞類接続", "adverb-particle_conjunction");
posTranslations.put("連体詞", "adnominal");
posTranslations.put("接続詞", "conjunction");
posTranslations.put("助詞", "particle");
posTranslations.put("助詞-格助詞", "particle-case");
posTranslations.put("助詞-格助詞-一般", "particle-case-misc");
posTranslations.put("助詞-格助詞-引用", "particle-case-quote");
posTranslations.put("助詞-格助詞-連語", "particle-case-compound");
posTranslations.put("助詞-接続助詞", "particle-conjunctive");
posTranslations.put("助詞-係助詞", "particle-dependency");
posTranslations.put("助詞-副助詞", "particle-adverbial");
posTranslations.put("助詞-間投助詞", "particle-interjective");
posTranslations.put("助詞-並立助詞", "particle-coordinate");
posTranslations.put("助詞-終助詞", "particle-final");
posTranslations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final");
posTranslations.put("助詞-連体化", "particle-adnominalizer");
posTranslations.put("助詞-副詞化", "particle-adnominalizer");
posTranslations.put("助詞-特殊", "particle-special");
posTranslations.put("助動詞", "auxiliary-verb");
posTranslations.put("感動詞", "interjection");
posTranslations.put("記号", "symbol");
posTranslations.put("記号-一般", "symbol-misc");
posTranslations.put("記号-句点", "symbol-period");
posTranslations.put("記号-読点", "symbol-comma");
posTranslations.put("記号-空白", "symbol-space");
posTranslations.put("記号-括弧開", "symbol-open_bracket");
posTranslations.put("記号-括弧閉", "symbol-close_bracket");
posTranslations.put("記号-アルファベット", "symbol-alphabetic");
posTranslations.put("その他", "other");
posTranslations.put("その他-間投", "other-interjection");
posTranslations.put("フィラー", "filler");
posTranslations.put("非言語音", "non-verbal");
posTranslations.put("語断片", "fragment");
posTranslations.put("未知語", "unknown");
Map<String, String> translations = new HashMap<>();
translations.put("名詞", "noun");
translations.put("名詞-一般", "noun-common");
translations.put("名詞-固有名詞", "noun-proper");
translations.put("名詞-固有名詞-一般", "noun-proper-misc");
translations.put("名詞-固有名詞-人名", "noun-proper-person");
translations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc");
translations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname");
translations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name");
translations.put("名詞-固有名詞-組織", "noun-proper-organization");
translations.put("名詞-固有名詞-地域", "noun-proper-place");
translations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc");
translations.put("名詞-固有名詞-地域-国", "noun-proper-place-country");
translations.put("名詞-代名詞", "noun-pronoun");
translations.put("名詞-代名詞-一般", "noun-pronoun-misc");
translations.put("名詞-代名詞-縮約", "noun-pronoun-contraction");
translations.put("名詞-副詞可能", "noun-adverbial");
translations.put("名詞-サ変接続", "noun-verbal");
translations.put("名詞-形容動詞語幹", "noun-adjective-base");
translations.put("名詞-数", "noun-numeric");
translations.put("名詞-非自立", "noun-affix");
translations.put("名詞-非自立-一般", "noun-affix-misc");
translations.put("名詞-非自立-副詞可能", "noun-affix-adverbial");
translations.put("名詞-非自立-助動詞語幹", "noun-affix-aux");
translations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base");
translations.put("名詞-特殊", "noun-special");
translations.put("名詞-特殊-助動詞語幹", "noun-special-aux");
translations.put("名詞-接尾", "noun-suffix");
translations.put("名詞-接尾-一般", "noun-suffix-misc");
translations.put("名詞-接尾-人名", "noun-suffix-person");
translations.put("名詞-接尾-地域", "noun-suffix-place");
translations.put("名詞-接尾-サ変接続", "noun-suffix-verbal");
translations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux");
translations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base");
translations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial");
translations.put("名詞-接尾-助数詞", "noun-suffix-classifier");
translations.put("名詞-接尾-特殊", "noun-suffix-special");
translations.put("名詞-接続詞的", "noun-suffix-conjunctive");
translations.put("名詞-動詞非自立的", "noun-verbal_aux");
translations.put("名詞-引用文字列", "noun-quotation");
translations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective");
translations.put("接頭詞", "prefix");
translations.put("接頭詞-名詞接続", "prefix-nominal");
translations.put("接頭詞-動詞接続", "prefix-verbal");
translations.put("接頭詞-形容詞接続", "prefix-adjectival");
translations.put("接頭詞-数接続", "prefix-numerical");
translations.put("動詞", "verb");
translations.put("動詞-自立", "verb-main");
translations.put("動詞-非自立", "verb-auxiliary");
translations.put("動詞-接尾", "verb-suffix");
translations.put("形容詞", "adjective");
translations.put("形容詞-自立", "adjective-main");
translations.put("形容詞-非自立", "adjective-auxiliary");
translations.put("形容詞-接尾", "adjective-suffix");
translations.put("副詞", "adverb");
translations.put("副詞-一般", "adverb-misc");
translations.put("副詞-助詞類接続", "adverb-particle_conjunction");
translations.put("連体詞", "adnominal");
translations.put("接続詞", "conjunction");
translations.put("助詞", "particle");
translations.put("助詞-格助詞", "particle-case");
translations.put("助詞-格助詞-一般", "particle-case-misc");
translations.put("助詞-格助詞-引用", "particle-case-quote");
translations.put("助詞-格助詞-連語", "particle-case-compound");
translations.put("助詞-接続助詞", "particle-conjunctive");
translations.put("助詞-係助詞", "particle-dependency");
translations.put("助詞-副助詞", "particle-adverbial");
translations.put("助詞-間投助詞", "particle-interjective");
translations.put("助詞-並立助詞", "particle-coordinate");
translations.put("助詞-終助詞", "particle-final");
translations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final");
translations.put("助詞-連体化", "particle-adnominalizer");
translations.put("助詞-副詞化", "particle-adnominalizer");
translations.put("助詞-特殊", "particle-special");
translations.put("助動詞", "auxiliary-verb");
translations.put("感動詞", "interjection");
translations.put("記号", "symbol");
translations.put("記号-一般", "symbol-misc");
translations.put("記号-句点", "symbol-period");
translations.put("記号-読点", "symbol-comma");
translations.put("記号-空白", "symbol-space");
translations.put("記号-括弧開", "symbol-open_bracket");
translations.put("記号-括弧閉", "symbol-close_bracket");
translations.put("記号-アルファベット", "symbol-alphabetic");
translations.put("その他", "other");
translations.put("その他-間投", "other-interjection");
translations.put("フィラー", "filler");
translations.put("非言語音", "non-verbal");
translations.put("語断片", "fragment");
translations.put("未知語", "unknown");
posTranslations = Collections.unmodifiableMap(translations);
}
/** Get the english form of a POS tag */
@ -122,67 +126,69 @@ public class ToStringUtil {
}
// a translation map for inflection types, only used for reflectWith
private static final HashMap<String, String> inflTypeTranslations = new HashMap<>();
private static final Map<String, String> inflTypeTranslations;
static {
inflTypeTranslations.put("*", "*");
inflTypeTranslations.put("形容詞・アウオ段", "adj-group-a-o-u");
inflTypeTranslations.put("形容詞・イ段", "adj-group-i");
inflTypeTranslations.put("形容詞・イイ", "adj-group-ii");
inflTypeTranslations.put("不変化型", "non-inflectional");
inflTypeTranslations.put("特殊・タ", "special-da");
inflTypeTranslations.put("特殊・ダ", "special-ta");
inflTypeTranslations.put("文語・ゴトシ", "classical-gotoshi");
inflTypeTranslations.put("特殊・ジャ", "special-ja");
inflTypeTranslations.put("特殊・ナイ", "special-nai");
inflTypeTranslations.put("五段・ラ行特殊", "5-row-cons-r-special");
inflTypeTranslations.put("特殊・ヌ", "special-nu");
inflTypeTranslations.put("文語・キ", "classical-ki");
inflTypeTranslations.put("特殊・タイ", "special-tai");
inflTypeTranslations.put("文語・ベシ", "classical-beshi");
inflTypeTranslations.put("特殊・ヤ", "special-ya");
inflTypeTranslations.put("文語・マジ", "classical-maji");
inflTypeTranslations.put("下二・タ行", "2-row-lower-cons-t");
inflTypeTranslations.put("特殊・デス", "special-desu");
inflTypeTranslations.put("特殊・マス", "special-masu");
inflTypeTranslations.put("五段・ラ行アル", "5-row-aru");
inflTypeTranslations.put("文語・ナリ", "classical-nari");
inflTypeTranslations.put("文語・リ", "classical-ri");
inflTypeTranslations.put("文語・ケリ", "classical-keri");
inflTypeTranslations.put("文語・ル", "classical-ru");
inflTypeTranslations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin");
inflTypeTranslations.put("五段・サ行", "5-row-cons-s");
inflTypeTranslations.put("一段", "1-row");
inflTypeTranslations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin");
inflTypeTranslations.put("五段・マ行", "5-row-cons-m");
inflTypeTranslations.put("五段・タ行", "5-row-cons-t");
inflTypeTranslations.put("五段・ラ行", "5-row-cons-r");
inflTypeTranslations.put("サ変・−スル", "irregular-suffix-suru");
inflTypeTranslations.put("五段・ガ行", "5-row-cons-g");
inflTypeTranslations.put("サ変・−ズル", "irregular-suffix-zuru");
inflTypeTranslations.put("五段・バ行", "5-row-cons-b");
inflTypeTranslations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin");
inflTypeTranslations.put("下二・ダ行", "2-row-lower-cons-d");
inflTypeTranslations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku");
inflTypeTranslations.put("上二・ダ行", "2-row-upper-cons-d");
inflTypeTranslations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin");
inflTypeTranslations.put("一段・得ル", "1-row-eru");
inflTypeTranslations.put("四段・タ行", "4-row-cons-t");
inflTypeTranslations.put("五段・ナ行", "5-row-cons-n");
inflTypeTranslations.put("下二・ハ行", "2-row-lower-cons-h");
inflTypeTranslations.put("四段・ハ行", "4-row-cons-h");
inflTypeTranslations.put("四段・バ行", "4-row-cons-b");
inflTypeTranslations.put("サ変・スル", "irregular-suru");
inflTypeTranslations.put("上二・ハ行", "2-row-upper-cons-h");
inflTypeTranslations.put("下二・マ行", "2-row-lower-cons-m");
inflTypeTranslations.put("四段・サ行", "4-row-cons-s");
inflTypeTranslations.put("下二・ガ行", "2-row-lower-cons-g");
inflTypeTranslations.put("カ変・来ル", "kuru-kanji");
inflTypeTranslations.put("一段・クレル", "1-row-kureru");
inflTypeTranslations.put("下二・得", "2-row-lower-u");
inflTypeTranslations.put("カ変・クル", "kuru-kana");
inflTypeTranslations.put("ラ変", "irregular-cons-r");
inflTypeTranslations.put("下二・カ行", "2-row-lower-cons-k");
Map<String, String> translations = new HashMap<>();
translations.put("*", "*");
translations.put("形容詞・アウオ段", "adj-group-a-o-u");
translations.put("形容詞・イ段", "adj-group-i");
translations.put("形容詞・イイ", "adj-group-ii");
translations.put("不変化型", "non-inflectional");
translations.put("特殊・タ", "special-da");
translations.put("特殊・ダ", "special-ta");
translations.put("文語・ゴトシ", "classical-gotoshi");
translations.put("特殊・ジャ", "special-ja");
translations.put("特殊・ナイ", "special-nai");
translations.put("五段・ラ行特殊", "5-row-cons-r-special");
translations.put("特殊・ヌ", "special-nu");
translations.put("文語・キ", "classical-ki");
translations.put("特殊・タイ", "special-tai");
translations.put("文語・ベシ", "classical-beshi");
translations.put("特殊・ヤ", "special-ya");
translations.put("文語・マジ", "classical-maji");
translations.put("下二・タ行", "2-row-lower-cons-t");
translations.put("特殊・デス", "special-desu");
translations.put("特殊・マス", "special-masu");
translations.put("五段・ラ行アル", "5-row-aru");
translations.put("文語・ナリ", "classical-nari");
translations.put("文語・リ", "classical-ri");
translations.put("文語・ケリ", "classical-keri");
translations.put("文語・ル", "classical-ru");
translations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin");
translations.put("五段・サ行", "5-row-cons-s");
translations.put("一段", "1-row");
translations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin");
translations.put("五段・マ行", "5-row-cons-m");
translations.put("五段・タ行", "5-row-cons-t");
translations.put("五段・ラ行", "5-row-cons-r");
translations.put("サ変・−スル", "irregular-suffix-suru");
translations.put("五段・ガ行", "5-row-cons-g");
translations.put("サ変・−ズル", "irregular-suffix-zuru");
translations.put("五段・バ行", "5-row-cons-b");
translations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin");
translations.put("下二・ダ行", "2-row-lower-cons-d");
translations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku");
translations.put("上二・ダ行", "2-row-upper-cons-d");
translations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin");
translations.put("一段・得ル", "1-row-eru");
translations.put("四段・タ行", "4-row-cons-t");
translations.put("五段・ナ行", "5-row-cons-n");
translations.put("下二・ハ行", "2-row-lower-cons-h");
translations.put("四段・ハ行", "4-row-cons-h");
translations.put("四段・バ行", "4-row-cons-b");
translations.put("サ変・スル", "irregular-suru");
translations.put("上二・ハ行", "2-row-upper-cons-h");
translations.put("下二・マ行", "2-row-lower-cons-m");
translations.put("四段・サ行", "4-row-cons-s");
translations.put("下二・ガ行", "2-row-lower-cons-g");
translations.put("カ変・来ル", "kuru-kanji");
translations.put("一段・クレル", "1-row-kureru");
translations.put("下二・得", "2-row-lower-u");
translations.put("カ変・クル", "kuru-kana");
translations.put("ラ変", "irregular-cons-r");
translations.put("下二・カ行", "2-row-lower-cons-k");
inflTypeTranslations = Collections.unmodifiableMap(translations);
}
/** Get the english form of inflection type */
@ -191,37 +197,39 @@ public class ToStringUtil {
}
// a translation map for inflection forms, only used for reflectWith
private static final HashMap<String, String> inflFormTranslations = new HashMap<>();
private static final Map<String, String> inflFormTranslations;
static {
inflFormTranslations.put("*", "*");
inflFormTranslations.put("基本形", "base");
inflFormTranslations.put("文語基本形", "classical-base");
inflFormTranslations.put("未然ヌ接続", "imperfective-nu-connection");
inflFormTranslations.put("未然ウ接続", "imperfective-u-connection");
inflFormTranslations.put("連用タ接続", "conjunctive-ta-connection");
inflFormTranslations.put("連用テ接続", "conjunctive-te-connection");
inflFormTranslations.put("連用ゴザイ接続", "conjunctive-gozai-connection");
inflFormTranslations.put("体言接続", "uninflected-connection");
inflFormTranslations.put("仮定形", "subjunctive");
inflFormTranslations.put("命令e", "imperative-e");
inflFormTranslations.put("仮定縮約1", "conditional-contracted-1");
inflFormTranslations.put("仮定縮約2", "conditional-contracted-2");
inflFormTranslations.put("ガル接続", "garu-connection");
inflFormTranslations.put("未然形", "imperfective");
inflFormTranslations.put("連用形", "conjunctive");
inflFormTranslations.put("音便基本形", "onbin-base");
inflFormTranslations.put("連用デ接続", "conjunctive-de-connection");
inflFormTranslations.put("未然特殊", "imperfective-special");
inflFormTranslations.put("命令i", "imperative-i");
inflFormTranslations.put("連用ニ接続", "conjunctive-ni-connection");
inflFormTranslations.put("命令yo", "imperative-yo");
inflFormTranslations.put("体言接続特殊", "adnominal-special");
inflFormTranslations.put("命令ro", "imperative-ro");
inflFormTranslations.put("体言接続特殊2", "uninflected-special-connection-2");
inflFormTranslations.put("未然レル接続", "imperfective-reru-connection");
inflFormTranslations.put("現代基本形", "modern-base");
inflFormTranslations.put("基本形-促音便", "base-onbin"); // not sure about this
Map<String, String> translations = new HashMap<>();
translations.put("*", "*");
translations.put("基本形", "base");
translations.put("文語基本形", "classical-base");
translations.put("未然ヌ接続", "imperfective-nu-connection");
translations.put("未然ウ接続", "imperfective-u-connection");
translations.put("連用タ接続", "conjunctive-ta-connection");
translations.put("連用テ接続", "conjunctive-te-connection");
translations.put("連用ゴザイ接続", "conjunctive-gozai-connection");
translations.put("体言接続", "uninflected-connection");
translations.put("仮定形", "subjunctive");
translations.put("命令e", "imperative-e");
translations.put("仮定縮約1", "conditional-contracted-1");
translations.put("仮定縮約2", "conditional-contracted-2");
translations.put("ガル接続", "garu-connection");
translations.put("未然形", "imperfective");
translations.put("連用形", "conjunctive");
translations.put("音便基本形", "onbin-base");
translations.put("連用デ接続", "conjunctive-de-connection");
translations.put("未然特殊", "imperfective-special");
translations.put("命令i", "imperative-i");
translations.put("連用ニ接続", "conjunctive-ni-connection");
translations.put("命令yo", "imperative-yo");
translations.put("体言接続特殊", "adnominal-special");
translations.put("命令ro", "imperative-ro");
translations.put("体言接続特殊2", "uninflected-special-connection-2");
translations.put("未然レル接続", "imperfective-reru-connection");
translations.put("現代基本形", "modern-base");
translations.put("基本形-促音便", "base-onbin"); // not sure about this
inflFormTranslations = Collections.unmodifiableMap(translations);
}
/** Get the english form of inflected form */

View File

@ -44,7 +44,7 @@ public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
private int sentenceStart = 0;
private int sentenceIndex = -1;
private NLPTokenizerOp tokenizerOp = null;
private final NLPTokenizerOp tokenizerOp;
public OpenNLPTokenizer(
AttributeFactory factory, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp)

View File

@ -23,7 +23,7 @@ import opennlp.tools.chunker.ChunkerModel;
/** Supply OpenNLP Chunking tool Requires binary models from OpenNLP project on SourceForge. */
public class NLPChunkerOp {
private ChunkerME chunker = null;
private final ChunkerME chunker;
public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException {
chunker = new ChunkerME(chunkerModel);

View File

@ -27,7 +27,7 @@ import opennlp.tools.postag.POSTaggerME;
* SourceForge.
*/
public class NLPPOSTaggerOp {
private POSTagger tagger = null;
private final POSTagger tagger;
public NLPPOSTaggerOp(POSModel model) throws IOException {
tagger = new POSTaggerME(model);

View File

@ -32,10 +32,10 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
*/
public final class DaitchMokotoffSoundexFilter extends TokenFilter {
/** true if encoded tokens should be added as synonyms */
protected boolean inject = true;
private final boolean inject;
/** phonetic encoder */
protected DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
private final DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
// output is a string such as ab|ac|...
private static final Pattern pattern = Pattern.compile("([^|]+)");

View File

@ -32,13 +32,13 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
*/
public final class PhoneticFilter extends TokenFilter {
/** true if encoded tokens should be added as synonyms */
protected boolean inject = true;
private final boolean inject;
/** phonetic encoder */
protected Encoder encoder = null;
private final Encoder encoder;
/** captured state, non-null when <code>inject=true</code> and a token is buffered */
protected State save = null;
private State save = null;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);

View File

@ -73,7 +73,7 @@ public class Trie {
List<CharSequence> cmds = new ArrayList<>();
int root;
boolean forward = false;
boolean forward;
/**
* Constructor for the Trie object.

View File

@ -191,7 +191,7 @@ public final class FieldReader extends Terms {
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
// BlockTreeTermsWriter.brToString(startTerm));
// ToStringUtils.bytesRefToString(startTerm));
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...?

View File

@ -543,19 +543,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
private void copyTerm() {
final int len = currentFrame.prefix + currentFrame.suffix;
if (term.bytes.length < len) {

View File

@ -354,24 +354,6 @@ public final class Lucene40BlockTreeTermsReader extends FieldsProducer {
return fieldMap.size();
}
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override
public void checkIntegrity() throws IOException {
// terms index

View File

@ -256,8 +256,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
// if (f.prefix > targetBeforeCurrentLength) {
@ -279,7 +279,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav;
// }
}
@ -299,27 +299,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return true;
}
/*
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRefBuilder b) {
return brToString(b.get());
}
*/
@Override
public boolean seekExact(BytesRef target) throws IOException {
@ -337,8 +316,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
// ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -496,8 +476,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -528,7 +508,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -544,7 +524,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -587,7 +567,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -623,7 +603,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) +
// " current=" + ToStringUtils.bytesRefToString(term)
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(System.out);
// }
@ -667,9 +648,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
// " output=" + output);
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
@ -781,8 +762,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -818,7 +799,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
if (next() != null) {
// if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term));
// System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return SeekStatus.NOT_FOUND;
} else {
@ -829,7 +811,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
} else {
// if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term));
// System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// }
return result;
}
@ -1029,9 +1012,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -1095,8 +1079,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// try to scan to the right floor frame:
currentFrame.loadBlock();
} else {
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
// + currentFrame.ord);
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
// " currentFrame.ord=" + currentFrame.ord);
return term.get();
}
}

View File

@ -317,8 +317,8 @@ final class SegmentTermsEnumFrame {
}
public void nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
@ -410,8 +410,8 @@ final class SegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -566,28 +566,14 @@ final class SegmentTermsEnumFrame {
private long subCode;
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
// for debugging
/*
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
*/
// Target's prefix matches this block's prefix; we
// scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
// brToString(term));
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1;
@ -617,7 +603,7 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + brToString(suffixBytesRef));
// + ToStringUtils.bytesRefToString(suffixBytesRef));
// }
startBytePos = suffixesReader.getPosition();
@ -682,8 +668,9 @@ final class SegmentTermsEnumFrame {
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
// brToString(target));
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1;
@ -711,7 +698,8 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
@ -743,8 +731,8 @@ final class SegmentTermsEnumFrame {
// return NOT_FOUND:
fillTerm();
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
// ste.termExists=" + ste.termExists);
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
// " ste.termExists=" + ste.termExists);
if (!exactOnly && !ste.termExists) {
// System.out.println(" now pushFrame");

View File

@ -46,6 +46,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
@ -349,7 +350,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
}
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
termsWriter.write(term, termsEnum, norms);
}
@ -388,33 +389,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "TERM: " + brToString(termBytes);
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
if (b == null) {
return "(null)";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
private static final class PendingBlock extends PendingEntry {
public final BytesRef prefix;
public final long fp;
@ -442,7 +420,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "BLOCK: prefix=" + brToString(prefix);
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
}
public void compileIndex(
@ -600,8 +578,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// if (DEBUG2) {
// BytesRef br = new BytesRef(lastTerm.bytes());
// br.length = prefixLength;
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
// + count);
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
// ToStringUtils.bytesRefToString(br) + " count=" + count);
// }
// Root block better write all remaining pending entries:
@ -754,9 +732,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
prefix.length = prefixLength;
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
// hasSubBlocks);
// Write block header:
int numEntries = end - start;
@ -769,7 +748,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
/*
if (DEBUG) {
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
}
*/
@ -804,7 +785,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// }
// For leaf block we write suffix straight
@ -837,7 +819,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// }
// For non-leaf block we borrow 1 bit to record
@ -879,8 +862,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// System.out.println(" write sub-block suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes) +
// " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// }
assert floorLeadLabel == -1
@ -998,7 +982,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
" pending.size()=" + pending.size());
}
*/
@ -1051,8 +1036,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
// minItemsInBlock=" + minItemsInBlock);
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1;
}

View File

@ -19,19 +19,25 @@ package org.apache.lucene.backward_index;
import com.carrotsearch.randomizedtesting.annotations.Name;
import java.io.IOException;
import java.io.InputStream;
import java.io.LineNumberReader;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext;
@ -47,26 +53,31 @@ import org.junit.Before;
public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
protected final Version version;
private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion();
protected final String indexPattern;
static final Set<String> OLD_VERSIONS;
protected static final Set<Version> BINARY_SUPPORTED_VERSIONS;
static {
String[] oldVersions =
new String[] {
"8.0.0", "8.0.0", "8.1.0", "8.1.0", "8.1.1", "8.1.1", "8.2.0", "8.2.0", "8.3.0", "8.3.0",
"8.3.1", "8.3.1", "8.4.0", "8.4.0", "8.4.1", "8.4.1", "8.5.0", "8.5.0", "8.5.1", "8.5.1",
"8.5.2", "8.5.2", "8.6.0", "8.6.0", "8.6.1", "8.6.1", "8.6.2", "8.6.2", "8.6.3", "8.6.3",
"8.7.0", "8.7.0", "8.8.0", "8.8.0", "8.8.1", "8.8.1", "8.8.2", "8.8.2", "8.9.0", "8.9.0",
"8.10.0", "8.10.0", "8.10.1", "8.10.1", "8.11.0", "8.11.0", "8.11.1", "8.11.1", "8.11.2",
"8.11.2", "8.11.3", "8.11.3", "9.0.0", "9.1.0", "9.2.0", "9.3.0", "9.4.0", "9.4.1",
"9.4.2", "9.5.0", "9.6.0", "9.7.0", "9.8.0", "9.9.0", "9.9.1", "9.9.2", "9.10.0",
"10.0.0",
};
private static final Version LATEST_PREVIOUS_MAJOR = getLatestPreviousMajorVersion();
protected final Version version;
protected final String indexPattern;
static {
String name = "versions.txt";
try (LineNumberReader in =
new LineNumberReader(
IOUtils.getDecodingReader(
IOUtils.requireResourceNonNull(
BackwardsCompatibilityTestBase.class.getResourceAsStream(name), name),
StandardCharsets.UTF_8))) {
OLD_VERSIONS =
in.lines()
.filter(Predicate.not(String::isBlank))
.collect(Collectors.toCollection(LinkedHashSet::new));
} catch (IOException exception) {
throw new RuntimeException("failed to load resource", exception);
}
Set<Version> binaryVersions = new HashSet<>();
for (String version : oldVersions) {
for (String version : OLD_VERSIONS) {
try {
Version v = Version.parse(version);
assertTrue("Unsupported binary version: " + v, v.major >= Version.MIN_SUPPORTED_MAJOR - 1);
@ -75,8 +86,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
throw new RuntimeException(ex);
}
}
List<Version> allCurrentVersions = getAllCurrentVersions();
for (Version version : allCurrentVersions) {
for (Version version : getAllCurrentReleasedVersions()) {
// make sure we never miss a version.
assertTrue("Version: " + version + " missing", binaryVersions.remove(version));
}
@ -181,21 +192,53 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
return versions;
}
private static List<Version> getAllCurrentReleasedVersions() {
List<Version> currentReleasedVersions = getAllCurrentVersions();
// The latest version from the current major is always under development.
assertTrue(currentReleasedVersions.remove(Version.LATEST));
// The latest minor from the previous major is also under development.
assertTrue(currentReleasedVersions.remove(LATEST_PREVIOUS_MAJOR));
// In addition to those, we may need to remove one more version in case a release is in
// progress, and the version constant has been added but backward-compatibility indexes have not
// been checked in yet.
List<Version> missingVersions = new ArrayList<>();
for (Iterator<Version> it = currentReleasedVersions.iterator(); it.hasNext(); ) {
Version version = it.next();
String indexName = String.format(Locale.ROOT, "index.%s-cfs.zip", version);
if (TestAncientIndicesCompatibility.class.getResource(indexName) == null) {
missingVersions.add(version);
it.remove();
}
}
if (missingVersions.size() > 1) {
throw new AssertionError(
"More than one version is missing backward-compatibility data: " + missingVersions);
}
return currentReleasedVersions;
}
/** Get all versions that are released, plus the latest version which is unreleased. */
public static List<Version> getAllCurrentReleasedVersionsAndCurrent() {
List<Version> versions = new ArrayList<>(getAllCurrentReleasedVersions());
versions.add(Version.LATEST);
return versions;
}
public static Iterable<Object[]> allVersion(String name, String... suffixes) {
List<Object> patterns = new ArrayList<>();
for (String suffix : suffixes) {
patterns.add(createPattern(name, suffix));
}
List<Object[]> versionAndPatterns = new ArrayList<>();
List<Version> versionList = getAllCurrentVersions();
List<Version> versionList = getAllCurrentReleasedVersionsAndCurrent();
for (Version v : versionList) {
if (v.equals(LATEST_PREVIOUS_MAJOR)
== false) { // the latest prev-major has not yet been released
for (Object p : patterns) {
versionAndPatterns.add(new Object[] {v, p});
}
}
}
return versionAndPatterns;
}

View File

@ -21,8 +21,16 @@ import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.LineNumberReader;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexFormatTooOldException;
@ -36,274 +44,57 @@ import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.IOUtils;
@SuppressWarnings("deprecation")
public class TestAncientIndicesCompatibility extends LuceneTestCase {
static final Set<String> UNSUPPORTED_INDEXES;
static final String[] unsupportedNames = {
"1.9.0-cfs",
"1.9.0-nocfs",
"2.0.0-cfs",
"2.0.0-nocfs",
"2.1.0-cfs",
"2.1.0-nocfs",
"2.2.0-cfs",
"2.2.0-nocfs",
"2.3.0-cfs",
"2.3.0-nocfs",
"2.4.0-cfs",
"2.4.0-nocfs",
"2.4.1-cfs",
"2.4.1-nocfs",
"2.9.0-cfs",
"2.9.0-nocfs",
"2.9.1-cfs",
"2.9.1-nocfs",
"2.9.2-cfs",
"2.9.2-nocfs",
"2.9.3-cfs",
"2.9.3-nocfs",
"2.9.4-cfs",
"2.9.4-nocfs",
"3.0.0-cfs",
"3.0.0-nocfs",
"3.0.1-cfs",
"3.0.1-nocfs",
"3.0.2-cfs",
"3.0.2-nocfs",
"3.0.3-cfs",
"3.0.3-nocfs",
"3.1.0-cfs",
"3.1.0-nocfs",
"3.2.0-cfs",
"3.2.0-nocfs",
"3.3.0-cfs",
"3.3.0-nocfs",
"3.4.0-cfs",
"3.4.0-nocfs",
"3.5.0-cfs",
"3.5.0-nocfs",
"3.6.0-cfs",
"3.6.0-nocfs",
"3.6.1-cfs",
"3.6.1-nocfs",
"3.6.2-cfs",
"3.6.2-nocfs",
"4.0.0-cfs",
"4.0.0-cfs",
"4.0.0-nocfs",
"4.0.0.1-cfs",
"4.0.0.1-nocfs",
"4.0.0.2-cfs",
"4.0.0.2-nocfs",
"4.1.0-cfs",
"4.1.0-nocfs",
"4.2.0-cfs",
"4.2.0-nocfs",
"4.2.1-cfs",
"4.2.1-nocfs",
"4.3.0-cfs",
"4.3.0-nocfs",
"4.3.1-cfs",
"4.3.1-nocfs",
"4.4.0-cfs",
"4.4.0-nocfs",
"4.5.0-cfs",
"4.5.0-nocfs",
"4.5.1-cfs",
"4.5.1-nocfs",
"4.6.0-cfs",
"4.6.0-nocfs",
"4.6.1-cfs",
"4.6.1-nocfs",
"4.7.0-cfs",
"4.7.0-nocfs",
"4.7.1-cfs",
"4.7.1-nocfs",
"4.7.2-cfs",
"4.7.2-nocfs",
"4.8.0-cfs",
"4.8.0-nocfs",
"4.8.1-cfs",
"4.8.1-nocfs",
"4.9.0-cfs",
"4.9.0-nocfs",
"4.9.1-cfs",
"4.9.1-nocfs",
"4.10.0-cfs",
"4.10.0-nocfs",
"4.10.1-cfs",
"4.10.1-nocfs",
"4.10.2-cfs",
"4.10.2-nocfs",
"4.10.3-cfs",
"4.10.3-nocfs",
"4.10.4-cfs",
"4.10.4-nocfs",
"5x-with-4x-segments-cfs",
"5x-with-4x-segments-nocfs",
"5.0.0.singlesegment-cfs",
"5.0.0.singlesegment-nocfs",
"5.0.0-cfs",
"5.0.0-nocfs",
"5.1.0-cfs",
"5.1.0-nocfs",
"5.2.0-cfs",
"5.2.0-nocfs",
"5.2.1-cfs",
"5.2.1-nocfs",
"5.3.0-cfs",
"5.3.0-nocfs",
"5.3.1-cfs",
"5.3.1-nocfs",
"5.3.2-cfs",
"5.3.2-nocfs",
"5.4.0-cfs",
"5.4.0-nocfs",
"5.4.1-cfs",
"5.4.1-nocfs",
"5.5.0-cfs",
"5.5.0-nocfs",
"5.5.1-cfs",
"5.5.1-nocfs",
"5.5.2-cfs",
"5.5.2-nocfs",
"5.5.3-cfs",
"5.5.3-nocfs",
"5.5.4-cfs",
"5.5.4-nocfs",
"5.5.5-cfs",
"5.5.5-nocfs",
"6.0.0-cfs",
"6.0.0-nocfs",
"6.0.1-cfs",
"6.0.1-nocfs",
"6.1.0-cfs",
"6.1.0-nocfs",
"6.2.0-cfs",
"6.2.0-nocfs",
"6.2.1-cfs",
"6.2.1-nocfs",
"6.3.0-cfs",
"6.3.0-nocfs",
"6.4.0-cfs",
"6.4.0-nocfs",
"6.4.1-cfs",
"6.4.1-nocfs",
"6.4.2-cfs",
"6.4.2-nocfs",
"6.5.0-cfs",
"6.5.0-nocfs",
"6.5.1-cfs",
"6.5.1-nocfs",
"6.6.0-cfs",
"6.6.0-nocfs",
"6.6.1-cfs",
"6.6.1-nocfs",
"6.6.2-cfs",
"6.6.2-nocfs",
"6.6.3-cfs",
"6.6.3-nocfs",
"6.6.4-cfs",
"6.6.4-nocfs",
"6.6.5-cfs",
"6.6.5-nocfs",
"6.6.6-cfs",
"6.6.6-nocfs",
"7.0.0-cfs",
"7.0.0-nocfs",
"7.0.1-cfs",
"7.0.1-nocfs",
"7.1.0-cfs",
"7.1.0-nocfs",
"7.2.0-cfs",
"7.2.0-nocfs",
"7.2.1-cfs",
"7.2.1-nocfs",
"7.3.0-cfs",
"7.3.0-nocfs",
"7.3.1-cfs",
"7.3.1-nocfs",
"7.4.0-cfs",
"7.4.0-nocfs",
"7.5.0-cfs",
"7.5.0-nocfs",
"7.6.0-cfs",
"7.6.0-nocfs",
"7.7.0-cfs",
"7.7.0-nocfs",
"7.7.1-cfs",
"7.7.1-nocfs",
"7.7.2-cfs",
"7.7.2-nocfs",
"7.7.3-cfs",
"7.7.3-nocfs",
"8.0.0-cfs",
"8.0.0-nocfs",
"8.1.0-cfs",
"8.1.0-nocfs",
"8.1.1-cfs",
"8.1.1-nocfs",
"8.2.0-cfs",
"8.2.0-nocfs",
"8.3.0-cfs",
"8.3.0-nocfs",
"8.3.1-cfs",
"8.3.1-nocfs",
"8.4.0-cfs",
"8.4.0-nocfs",
"8.4.1-cfs",
"8.4.1-nocfs",
"8.5.0-cfs",
"8.5.0-nocfs",
"8.5.1-cfs",
"8.5.1-nocfs",
"8.5.2-cfs",
"8.5.2-nocfs",
"8.6.0-cfs",
"8.6.0-nocfs",
"8.6.1-cfs",
"8.6.1-nocfs",
"8.6.2-cfs",
"8.6.2-nocfs",
"8.6.3-cfs",
"8.6.3-nocfs",
"8.7.0-cfs",
"8.7.0-nocfs",
"8.8.0-cfs",
"8.8.0-nocfs",
"8.8.1-cfs",
"8.8.1-nocfs",
"8.8.2-cfs",
"8.8.2-nocfs",
"8.9.0-cfs",
"8.9.0-nocfs",
"8.10.0-cfs",
"8.10.0-nocfs",
"8.10.1-cfs",
"8.10.1-nocfs",
"8.11.0-cfs",
"8.11.0-nocfs",
"8.11.1-cfs",
"8.11.1-nocfs",
"8.11.2-cfs",
"8.11.2-nocfs",
"8.11.3-cfs",
"8.11.3-nocfs"
};
static {
String name = "unsupported_versions.txt";
Set<String> indices;
try (LineNumberReader in =
new LineNumberReader(
IOUtils.getDecodingReader(
IOUtils.requireResourceNonNull(
TestAncientIndicesCompatibility.class.getResourceAsStream(name), name),
StandardCharsets.UTF_8))) {
indices =
in.lines()
.filter(Predicate.not(String::isBlank))
.flatMap(version -> Stream.of(version + "-cfs", version + "-nocfs"))
.collect(Collectors.toCollection(LinkedHashSet::new));
} catch (IOException exception) {
throw new RuntimeException("failed to load resource", exception);
}
name = "unsupported_indices.txt";
try (LineNumberReader in =
new LineNumberReader(
IOUtils.getDecodingReader(
IOUtils.requireResourceNonNull(
TestAncientIndicesCompatibility.class.getResourceAsStream(name), name),
StandardCharsets.UTF_8))) {
indices.addAll(
in.lines()
.filter(Predicate.not(String::isBlank))
.collect(Collectors.toCollection(LinkedHashSet::new)));
} catch (IOException exception) {
throw new RuntimeException("failed to load resource", exception);
}
UNSUPPORTED_INDEXES = Collections.unmodifiableSet(indices);
}
/**
* This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate
* on too old indexes!
*/
public void testUnsupportedOldIndexes() throws Exception {
for (int i = 0; i < unsupportedNames.length; i++) {
for (String version : UNSUPPORTED_INDEXES) {
if (VERBOSE) {
System.out.println("TEST: index " + unsupportedNames[i]);
System.out.println("TEST: index " + version);
}
Path oldIndexDir = createTempDir(unsupportedNames[i]);
TestUtil.unzip(
getDataInputStream("unsupported." + unsupportedNames[i] + ".zip"), oldIndexDir);
Path oldIndexDir = createTempDir(version);
TestUtil.unzip(getDataInputStream("unsupported." + version + ".zip"), oldIndexDir);
BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir);
// don't checkindex, these are intentionally not supported
dir.setCheckIndexOnClose(false);
@ -312,7 +103,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
IndexWriter writer = null;
try {
reader = DirectoryReader.open(dir);
fail("DirectoryReader.open should not pass for " + unsupportedNames[i]);
fail("DirectoryReader.open should not pass for " + version);
} catch (IndexFormatTooOldException e) {
if (e.getReason() != null) {
assertNull(e.getVersion());
@ -353,7 +144,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
writer =
new IndexWriter(
dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false));
fail("IndexWriter creation should not pass for " + unsupportedNames[i]);
fail("IndexWriter creation should not pass for " + version);
} catch (IndexFormatTooOldException e) {
if (e.getReason() != null) {
assertNull(e.getVersion());
@ -406,7 +197,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, UTF_8));
CheckIndex.Status indexStatus = checker.checkIndex();
if (unsupportedNames[i].startsWith("8.")) {
if (version.startsWith("8.")) {
assertTrue(indexStatus.clean);
} else {
assertFalse(indexStatus.clean);

View File

@ -101,8 +101,6 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
static final int MIN_BINARY_SUPPORTED_MAJOR = Version.MIN_SUPPORTED_MAJOR - 1;
/**
* A parameter constructor for {@link com.carrotsearch.randomizedtesting.RandomizedRunner}. See
* {@link #testVersionsFactory()} for details on the values provided to the framework.

View File

@ -62,7 +62,6 @@ public class TestBinaryBackwardsCompatibility extends BackwardsCompatibilityTest
@Nightly
public void testReadNMinusTwoCommit() throws IOException {
try (BaseDirectoryWrapper dir = newDirectory(directory)) {
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();

View File

@ -55,6 +55,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
static final String INDEX_NAME = "sorted";
static final String SUFFIX = "";
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_10_0;
private static final String PARENT_FIELD_NAME = "___parent";
public TestIndexSortBackwardsCompatibility(Version version, String pattern) {
super(version, pattern);
@ -79,8 +81,8 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
.setIndexSort(sort)
.setMergePolicy(newLogMergePolicy());
if (this.version.onOrAfter(Version.LUCENE_10_0_0)) {
indexWriterConfig.setParentField("___parent");
if (this.version.onOrAfter(FIRST_PARENT_DOC_VERSION)) {
indexWriterConfig.setParentField(PARENT_FIELD_NAME);
}
// open writer
try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) {
@ -89,7 +91,10 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
Document child = new Document();
child.add(new StringField("relation", "child", Field.Store.NO));
child.add(new StringField("bid", "" + i, Field.Store.NO));
if (version.onOrAfter(FIRST_PARENT_DOC_VERSION)
== false) { // only add this to earlier versions
child.add(new NumericDocValuesField("dateDV", i));
}
Document parent = new Document();
parent.add(new StringField("relation", "parent", Field.Store.NO));
parent.add(new StringField("bid", "" + i, Field.Store.NO));
@ -158,6 +163,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
conf.setUseCompoundFile(false);
conf.setCodec(TestUtil.getDefaultCodec());
conf.setParentField("___parent");
conf.setParentField(PARENT_FIELD_NAME);
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
IndexWriter writer = new IndexWriter(directory, conf);
LineFileDocs docs = new LineFileDocs(new Random(0));

View File

@ -0,0 +1,4 @@
5x-with-4x-segments-cfs
5x-with-4x-segments-nocfs
5.0.0.singlesegment-cfs
5.0.0.singlesegment-nocfs

View File

@ -0,0 +1,122 @@
1.9.0
2.0.0
2.1.0
2.2.0
2.3.0
2.4.0
2.4.1
2.9.0
2.9.1
2.9.2
2.9.3
2.9.4
3.0.0
3.0.1
3.0.2
3.0.3
3.1.0
3.2.0
3.3.0
3.4.0
3.5.0
3.6.0
3.6.1
3.6.2
4.0.0
4.0.0.1
4.0.0.2
4.1.0
4.2.0
4.2.1
4.3.0
4.3.1
4.4.0
4.5.0
4.5.1
4.6.0
4.6.1
4.7.0
4.7.1
4.7.2
4.8.0
4.8.1
4.9.0
4.9.1
4.10.0
4.10.1
4.10.2
4.10.3
4.10.4
5.0.0
5.1.0
5.2.0
5.2.1
5.3.0
5.3.1
5.3.2
5.4.0
5.4.1
5.5.0
5.5.1
5.5.2
5.5.3
5.5.4
5.5.5
6.0.0
6.0.1
6.1.0
6.2.0
6.2.1
6.3.0
6.4.0
6.4.1
6.4.2
6.5.0
6.5.1
6.6.0
6.6.1
6.6.2
6.6.3
6.6.4
6.6.5
6.6.6
7.0.0
7.0.1
7.1.0
7.2.0
7.2.1
7.3.0
7.3.1
7.4.0
7.5.0
7.6.0
7.7.0
7.7.1
7.7.2
7.7.3
8.0.0
8.1.0
8.1.1
8.2.0
8.3.0
8.3.1
8.4.0
8.4.1
8.5.0
8.5.1
8.5.2
8.6.0
8.6.1
8.6.2
8.6.3
8.7.0
8.8.0
8.8.1
8.8.2
8.9.0
8.10.0
8.10.1
8.11.0
8.11.1
8.11.2
8.11.3

View File

@ -0,0 +1,40 @@
8.0.0
8.1.0
8.1.1
8.2.0
8.3.0
8.3.1
8.4.0
8.4.1
8.5.0
8.5.1
8.5.2
8.6.0
8.6.1
8.6.2
8.6.3
8.7.0
8.8.0
8.8.1
8.8.2
8.9.0
8.10.0
8.10.1
8.11.0
8.11.1
8.11.2
8.11.3
9.0.0
9.1.0
9.2.0
9.3.0
9.4.0
9.4.1
9.4.2
9.5.0
9.6.0
9.7.0
9.8.0
9.9.0
9.9.1
9.9.2

View File

@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.text.ParseException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
@ -67,7 +68,7 @@ public class ExpressionsBenchmark {
lookup.findStatic(
lookup.lookupClass(), "ident", MethodType.methodType(double.class, double.class)));
m.put("mh_identity", MethodHandles.identity(double.class));
return m;
return Collections.unmodifiableMap(m);
} catch (ReflectiveOperationException e) {
throw new AssertionError(e);
}

View File

@ -21,7 +21,6 @@ import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import javax.xml.XMLConstants;
@ -68,7 +67,7 @@ public class EnwikiContentSource extends ContentSource {
private boolean stopped = false;
private String[] tuple;
private NoMoreDataException nmde;
private StringBuilder contents = new StringBuilder();
private final StringBuilder contents = new StringBuilder();
private String title;
private String body;
private String time;
@ -262,7 +261,6 @@ public class EnwikiContentSource extends ContentSource {
}
}
private static final Map<String, Integer> ELEMENTS = new HashMap<>();
private static final int TITLE = 0;
private static final int DATE = TITLE + 1;
private static final int BODY = DATE + 1;
@ -272,24 +270,24 @@ public class EnwikiContentSource extends ContentSource {
// should not be part of the tuple, we should define them after LENGTH.
private static final int PAGE = LENGTH + 1;
private static final Map<String, Integer> ELEMENTS =
Map.of(
"page", PAGE,
"text", BODY,
"timestamp", DATE,
"title", TITLE,
"id", ID);
private static final String[] months = {
"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
};
static {
ELEMENTS.put("page", Integer.valueOf(PAGE));
ELEMENTS.put("text", Integer.valueOf(BODY));
ELEMENTS.put("timestamp", Integer.valueOf(DATE));
ELEMENTS.put("title", Integer.valueOf(TITLE));
ELEMENTS.put("id", Integer.valueOf(ID));
}
/**
* Returns the type of the element if defined, otherwise returns -1. This method is useful in
* startElement and endElement, by not needing to compare the element qualified name over and
* over.
*/
private static final int getElementType(String elem) {
private static int getElementType(String elem) {
Integer val = ELEMENTS.get(elem);
return val == null ? -1 : val.intValue();
}
@ -297,7 +295,7 @@ public class EnwikiContentSource extends ContentSource {
private Path file;
private boolean keepImages = true;
private InputStream is;
private Parser parser = new Parser();
private final Parser parser = new Parser();
@Override
public void close() throws IOException {

View File

@ -18,6 +18,8 @@ package org.apache.lucene.benchmark.byTask.feeds;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@ -40,22 +42,28 @@ public abstract class TrecDocParser {
/** trec parser type used for unknown extensions */
public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2;
static final Map<ParsePathType, TrecDocParser> pathType2parser = new HashMap<>();
static final Map<ParsePathType, TrecDocParser> pathType2Parser;
static {
pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser());
pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser());
pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser());
pathType2parser.put(ParsePathType.FT, new TrecFTParser());
pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser());
pathType2Parser =
Collections.unmodifiableMap(
new EnumMap<>(
Map.of(
ParsePathType.GOV2, new TrecGov2Parser(),
ParsePathType.FBIS, new TrecFBISParser(),
ParsePathType.FR94, new TrecFR94Parser(),
ParsePathType.FT, new TrecFTParser(),
ParsePathType.LATIMES, new TrecLATimesParser())));
}
static final Map<String, ParsePathType> pathName2Type = new HashMap<>();
static final Map<String, ParsePathType> pathName2Type;
static {
Map<String, ParsePathType> name2Type = new HashMap<>();
for (ParsePathType ppt : ParsePathType.values()) {
pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt);
name2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt);
}
pathName2Type = Collections.unmodifiableMap(name2Type);
}
/** max length of walk up from file to its ancestors when looking for a known path type */

View File

@ -32,6 +32,6 @@ public class TrecParserByPath extends TrecDocParser {
StringBuilder docBuf,
ParsePathType pathType)
throws IOException {
return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
return pathType2Parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
}
}

View File

@ -43,7 +43,7 @@ public class TaskSequence extends PerfTask {
private boolean resetExhausted = false;
private PerfTask[] tasksArray;
private boolean anyExhaustibleTasks;
private boolean collapsable = false; // to not collapse external sequence named in alg.
private final boolean collapsable; // to not collapse external sequence named in alg.
private boolean fixedTime; // true if we run for fixed time
private double runTimeSec; // how long to run for

View File

@ -23,7 +23,6 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.commons.compress.compressors.CompressorException;
@ -70,15 +69,9 @@ public class StreamUtils {
}
}
private static final Map<String, Type> extensionToType = new HashMap<>();
static {
// these in are lower case, we will lower case at the test as well
extensionToType.put(".bz2", Type.BZIP2);
extensionToType.put(".bzip", Type.BZIP2);
extensionToType.put(".gz", Type.GZIP);
extensionToType.put(".gzip", Type.GZIP);
}
// these are in lower case, we will lower case at the test as well
private static final Map<String, Type> extensionToType =
Map.of(".bz2", Type.BZIP2, ".bzip", Type.BZIP2, ".gz", Type.GZIP, ".gzip", Type.GZIP);
/**
* Returns an {@link InputStream} over the requested file. This method attempts to identify the

View File

@ -36,7 +36,7 @@ public class TestTrecContentSource extends LuceneTestCase {
/** A TrecDocMaker which works on a String and not files. */
private static class StringableTrecSource extends TrecContentSource {
private String docs = null;
private final String docs;
public StringableTrecSource(String docs, boolean forever) {
this.docs = docs;

View File

@ -230,24 +230,6 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
return fields.size();
}
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override
public void checkIntegrity() throws IOException {
// term dictionary

View File

@ -43,6 +43,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
@ -288,29 +289,10 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return brToString(termBytes);
return ToStringUtils.bytesRefToString(termBytes);
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
private static final class SubIndex {
public final FST<Output> index;
public final long termOrdStart;
@ -353,7 +335,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "BLOCK: " + brToString(prefix);
return "BLOCK: " + ToStringUtils.bytesRefToString(prefix);
}
public void compileIndex(
@ -457,9 +439,9 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
Output newOutput =
FST_OUTPUTS.newOutput(
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + "
// termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" +
// newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output +
// " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput="
// + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
}
}
@ -642,8 +624,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
long startFP = out.getFilePointer();
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + "
// floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor +
// " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
// hasTerms + " hasSubBlocks=" + hasSubBlocks);
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
@ -662,11 +644,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
out.writeVInt(code);
// if (DEBUG) {
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + "
// pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
// brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? ("
// floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" +
// isLastInFloor);
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment +
// " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
// ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP +
// (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") +
// " isLastInFloor=" + isLastInFloor);
// }
final List<SubIndex> subIndices;
@ -784,7 +766,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
BytesRef suffixBytes = new BytesRef(suffix);
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix;
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) +
" subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
}
*/
@ -842,7 +825,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" +
Arrays.toString(tmp) + " pending.size()=" + pending.size());
}
*/
@ -885,8 +869,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
// minItemsInBlock=" + minItemsInBlock);
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1;
}

View File

@ -61,7 +61,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
throws IOException {
// if (DEBUG) {
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" +
// brToString(compiled.commonSuffixRef));
// ToStringUtils.bytesRefToString(compiled.commonSuffixRef));
// }
this.fr = fr;
this.byteRunnable = compiled.getByteRunnable();
@ -283,13 +283,15 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
currentFrame.loadNextFloorBlock();
continue;
} else {
// if (DEBUG) System.out.println(" return term=" + brToString(term));
// if (DEBUG) System.out.println(" return term=" +
// ToStringUtils.bytesRefToString(term));
return;
}
}
continue;
} else if (cmp == 0) {
// if (DEBUG) System.out.println(" return term=" + brToString(term));
// if (DEBUG) System.out.println(" return term=" +
// ToStringUtils.bytesRefToString(term));
return;
} else {
// Fallback to prior entry: the semantics of
@ -327,10 +329,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nintEnum.next seg=" + segment);
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new
// BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + "
// lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" +
// (currentFrame.transitions.length == 0 ? "n/a" :
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" +
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix);
// }
@ -343,9 +345,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// if (DEBUG) System.out.println(" next-floor-block");
currentFrame.loadNextFloorBlock();
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix);
} else {
@ -357,9 +360,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
currentFrame = stack[currentFrame.ord - 1];
assert currentFrame.lastSubFP == lastFP;
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix);
}
@ -373,7 +377,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// suffixRef.length = currentFrame.suffix;
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " +
// currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" +
// brToString(suffixRef));
// ToStringUtils.bytesRefToString(suffixRef));
// }
if (currentFrame.suffix != 0) {
@ -480,15 +484,16 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
copyTerm();
currentFrame = pushFrame(state);
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix);
} else if (byteRunnable.isAccept(state)) {
copyTerm();
// if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
return term;

View File

@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
@ -174,11 +175,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
throws IOException {
final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp + "
// nextEnt=" + f.nextEnt);
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp +
// " nextEnt=" + f.nextEnt);
if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
if (f.prefix > targetBeforeCurrentLength) {
@ -204,7 +205,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav;
// }
}
@ -224,19 +225,6 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
return true;
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
@Override
public boolean seekExact(final BytesRef target) throws IOException {
@ -250,7 +238,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
/*
if (DEBUG) {
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" +
ToStringUtils.bytesRefToString(target) + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
") validIndexPrefix=" + validIndexPrefix);
printSeekState(System.out);
}
*/
@ -411,8 +401,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
positioned = true;
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -443,7 +433,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -459,7 +449,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -502,7 +492,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -537,8 +527,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
// termExists + ") validIndexPrefix= " + validIndexPrefix);
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState();
// }
@ -581,9 +571,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
// " output=" + output);
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
@ -697,8 +687,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
positioned = true;
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -733,7 +723,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
if (next() != null) {
// if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
// System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return SeekStatus.NOT_FOUND;
} else {
@ -744,7 +735,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
}
} else {
// if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
// System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// }
return result;
}
@ -829,7 +821,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen="
+ f.prefix
+ " prefix="
+ brToString(prefix)
+ ToStringUtils.bytesRefToString(prefix)
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+ " hasTerms="
+ f.hasTerms
@ -859,7 +851,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen="
+ f.prefix
+ " prefix="
+ brToString(prefix)
+ ToStringUtils.bytesRefToString(prefix)
+ " nextEnt="
+ f.nextEnt
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
@ -951,8 +943,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
// ToStringUtils.bytesRefToString(term) +
// " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// printSeekState();
// }
@ -1019,8 +1012,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// currentFrame.hasTerms = true;
currentFrame.loadBlock();
} else {
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
// currentFrame.ord=" + currentFrame.ord);
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
// " currentFrame.ord=" + currentFrame.ord);
positioned = true;
return term.get();
}
@ -1235,8 +1228,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
int low = 0;
int high = arc.numArcs() - 1;
int mid = 0;
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
// output=" + output);
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput +
// " output=" + output);
boolean found = false;
while (low <= high) {
mid = (low + high) >>> 1;

View File

@ -119,8 +119,8 @@ final class OrdsSegmentTermsEnumFrame {
numFollowFloorBlocks = floorDataReader.readVInt();
nextFloorLabel = floorDataReader.readByte() & 0xff;
nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + "
// shift=" + (nextFloorTermOrd-termOrdOrig));
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd +
// " shift=" + (nextFloorTermOrd-termOrdOrig));
// if (DEBUG) {
// System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
@ -289,8 +289,8 @@ final class OrdsSegmentTermsEnumFrame {
// Decodes next entry; returns true if it's a sub-block
public boolean nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
nextEnt++;
@ -306,8 +306,8 @@ final class OrdsSegmentTermsEnumFrame {
}
public boolean nextNonLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
@ -374,8 +374,8 @@ final class OrdsSegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -440,8 +440,8 @@ final class OrdsSegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -495,8 +495,8 @@ final class OrdsSegmentTermsEnumFrame {
boolean absolute = metaDataUpto == 0;
assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt;
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + "
// mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment +
// " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
@ -593,10 +593,10 @@ final class OrdsSegmentTermsEnumFrame {
// scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
// OrdsSegmentTermsEnum.brToString(ste.term));
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) + " term=" +
// ToStringUtils.bytesRefToString(ste.term));
assert nextEnt != -1;
@ -627,7 +627,7 @@ final class OrdsSegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + OrdsSegmentTermsEnum.brToString(suffixBytesRef));
// + ToStringUtils.bytesRefToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
@ -714,8 +714,8 @@ final class OrdsSegmentTermsEnumFrame {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
// OrdsSegmentTermsEnum.brToString(ste.term));
// ToStringUtils.bytesRefToString(target) + " term=" +
// ToStringUtils.bytesRefToString(ste.term));
assert nextEnt != -1;
@ -743,7 +743,8 @@ final class OrdsSegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// }
ste.termExists = (code & 1) == 0;

View File

@ -210,7 +210,7 @@ public final class FieldReader extends Terms {
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
// BlockTreeTermsWriter.brToString(startTerm));
// ToStringUtils.bytesRefToString(startTerm));
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...?

View File

@ -549,19 +549,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
private void copyTerm() {
final int len = currentFrame.prefix + currentFrame.suffix;
if (term.bytes.length < len) {

View File

@ -307,24 +307,6 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
return fieldMap.size();
}
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override
public void checkIntegrity() throws IOException {
// terms index

View File

@ -47,6 +47,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
@ -394,7 +395,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
}
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
termsWriter.write(term, termsEnum, norms);
}
@ -433,33 +434,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "TERM: " + brToString(termBytes);
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
if (b == null) {
return "(null)";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
/**
* Encodes long value to variable length byte[], in MSB order. Use {@link
* FieldReader#readMSBVLong} to decode.
@ -506,7 +484,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "BLOCK: prefix=" + brToString(prefix);
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
}
public void compileIndex(
@ -689,8 +667,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// if (DEBUG2) {
// BytesRef br = new BytesRef(lastTerm.bytes());
// br.length = prefixLength;
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
// + count);
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
// ToStringUtils.bytesRefToString(br) + " count=" + count);
// }
// Root block better write all remaining pending entries:
@ -843,9 +821,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
prefix.length = prefixLength;
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
// hasSubBlocks);
// Write block header:
int numEntries = end - start;
@ -858,7 +837,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
/*
if (DEBUG) {
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
}
*/
@ -893,7 +874,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// }
// For leaf block we write suffix straight
@ -926,7 +908,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// }
// For non-leaf block we borrow 1 bit to record
@ -968,8 +951,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// System.out.println(" write sub-block suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes) + " subFP=" + block.fp + " subCode=" +
// (startFP-block.fp) + " floor=" + block.isFloor);
// }
assert floorLeadLabel == -1
@ -1090,7 +1074,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
" pending.size()=" + pending.size());
}
*/
@ -1143,8 +1128,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
// minItemsInBlock=" + minItemsInBlock);
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1;
}

View File

@ -263,8 +263,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
// if (f.prefix > targetBeforeCurrentLength) {
@ -286,7 +286,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav;
// }
}
@ -306,27 +306,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return true;
}
/*
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRefBuilder b) {
return brToString(b.get());
}
*/
@Override
public boolean seekExact(BytesRef target) throws IOException {
@ -344,8 +323,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// ToStringUtils.bytesRefToString(term) +
// " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -499,8 +479,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -531,7 +511,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -547,7 +527,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -586,7 +566,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -622,8 +602,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
// ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(System.out);
// }
@ -663,9 +644,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
// " output=" + output);
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
@ -771,8 +752,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -808,7 +789,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
if (next() != null) {
// if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term));
// System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return SeekStatus.NOT_FOUND;
} else {
@ -819,7 +801,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
} else {
// if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term));
// System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// }
return result;
}
@ -1015,9 +998,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -1081,8 +1065,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// try to scan to the right floor frame:
currentFrame.loadBlock();
} else {
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
// + currentFrame.ord);
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
// " currentFrame.ord=" + currentFrame.ord);
return term.get();
}
}

View File

@ -295,8 +295,8 @@ final class SegmentTermsEnumFrame {
}
public void nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
@ -388,8 +388,8 @@ final class SegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -531,28 +531,14 @@ final class SegmentTermsEnumFrame {
private long subCode;
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
// for debugging
/*
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
*/
// Target's prefix matches this block's prefix; we
// scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
// brToString(term));
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1;
@ -582,7 +568,7 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + brToString(suffixBytesRef));
// + ToStringUtils.bytesRefToString(suffixBytesRef));
// }
startBytePos = suffixesReader.getPosition();
@ -647,8 +633,9 @@ final class SegmentTermsEnumFrame {
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
// brToString(target));
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1;
@ -676,7 +663,8 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
@ -708,8 +696,8 @@ final class SegmentTermsEnumFrame {
// return NOT_FOUND:
fillTerm();
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
// ste.termExists=" + ste.termExists);
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
// " ste.termExists=" + ste.termExists);
if (!exactOnly && !ste.termExists) {
// System.out.println(" now pushFrame");

View File

@ -166,6 +166,16 @@ public final class FeatureField extends Field {
return stream;
}
/**
* This is useful if you have multiple features sharing a name and you want to take action to
* deduplicate them.
*
* @return the feature value of this field.
*/
public float getFeatureValue() {
return featureValue;
}
private static final class FeatureTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);

View File

@ -21,6 +21,7 @@ import java.io.StreamTokenizer;
import java.io.StringReader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@ -404,21 +405,23 @@ public class SimpleWKTShapeParser {
ENVELOPE("envelope"); // not part of the actual WKB spec
private final String shapeName;
private static final Map<String, ShapeType> shapeTypeMap = new HashMap<>();
private static final Map<String, ShapeType> shapeTypeMap;
private static final String BBOX = "BBOX";
static {
Map<String, ShapeType> shapeTypes = new HashMap<>();
for (ShapeType type : values()) {
shapeTypeMap.put(type.shapeName, type);
shapeTypes.put(type.shapeName, type);
}
shapeTypeMap.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE);
shapeTypes.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE);
shapeTypeMap = Collections.unmodifiableMap(shapeTypes);
}
ShapeType(String shapeName) {
this.shapeName = shapeName;
}
protected String typename() {
String typename() {
return shapeName;
}

View File

@ -32,7 +32,7 @@ public final class FieldInfo {
/** Internal field number */
public final int number;
private DocValuesType docValuesType = DocValuesType.NONE;
private DocValuesType docValuesType;
// True if any document indexed term vectors
private boolean storeTermVector;

View File

@ -84,7 +84,7 @@ public class LiveIndexWriterConfig {
protected volatile int perThreadHardLimitMB;
/** True if segment flushes should use compound file format */
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
protected volatile boolean useCompoundFile;
/** True if calls to {@link IndexWriter#close()} should first do a commit. */
protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;

View File

@ -597,12 +597,12 @@ public abstract class MergePolicy {
* If the size of the merge segment exceeds this ratio of the total index size then it will remain
* in non-compound format
*/
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
protected double noCFSRatio;
/**
* If the size of the merged segment exceeds this value then it will not use compound file format.
*/
protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
protected long maxCFSSegmentSize;
/** Creates a new merge policy instance. */
protected MergePolicy() {

View File

@ -103,7 +103,7 @@ public abstract class VectorizationProvider {
// visible for tests
static VectorizationProvider lookup(boolean testMode) {
final int runtimeVersion = Runtime.version().feature();
if (runtimeVersion >= 20 && runtimeVersion <= 21) {
if (runtimeVersion >= 20 && runtimeVersion <= 22) {
// is locale sane (only buggy in Java 20)
if (isAffectedByJDK8301190()) {
LOG.warning(
@ -169,9 +169,9 @@ public abstract class VectorizationProvider {
} catch (ClassNotFoundException cnfe) {
throw new LinkageError("PanamaVectorizationProvider is missing in Lucene JAR file", cnfe);
}
} else if (runtimeVersion >= 22) {
} else if (runtimeVersion >= 23) {
LOG.warning(
"You are running with Java 22 or later. To make full use of the Vector API, please update Apache Lucene.");
"You are running with Java 23 or later. To make full use of the Vector API, please update Apache Lucene.");
} else if (lookupVectorModule().isPresent()) {
LOG.warning(
"Java vector incubator module was enabled by command line flags, but your Java version is too old: "

View File

@ -120,7 +120,7 @@ final class WANDScorer extends Scorer {
private final int scalingFactor;
// scaled min competitive score
private long minCompetitiveScore = 0;
private long minCompetitiveScore;
private final Scorer[] allScorers;

View File

@ -89,7 +89,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
private boolean singleSort;
/** Whether this comparator is allowed to skip documents. */
private boolean canSkipDocuments = true;
private boolean canSkipDocuments;
/** Whether the collector is done with counting hits so that we can start skipping documents. */
private boolean hitsThresholdReached = false;

View File

@ -346,7 +346,7 @@ public class MMapDirectory extends FSDirectory {
}
final var lookup = MethodHandles.lookup();
final int runtimeVersion = Runtime.version().feature();
if (runtimeVersion >= 19 && runtimeVersion <= 21) {
if (runtimeVersion >= 19) {
try {
final var cls = lookup.findClass("org.apache.lucene.store.MemorySegmentIndexInputProvider");
// we use method handles, so we do not need to deal with setAccessible as we have private
@ -366,9 +366,6 @@ public class MMapDirectory extends FSDirectory {
throw new LinkageError(
"MemorySegmentIndexInputProvider is missing in Lucene JAR file", cnfe);
}
} else if (runtimeVersion >= 22) {
LOG.warning(
"You are running with Java 22 or later. To make full use of MMapDirectory, please update Apache Lucene.");
}
return new MappedByteBufferIndexInputProvider();
}

View File

@ -130,17 +130,20 @@ public final class BytesRef implements Comparable<BytesRef>, Cloneable {
return false;
}
/** Interprets stored bytes as UTF8 bytes, returning the resulting string */
/**
* Interprets stored bytes as UTF-8 bytes, returning the resulting string. May throw an {@link
* AssertionError} or a {@link RuntimeException} if the data is not well-formed UTF-8.
*/
public String utf8ToString() {
final char[] ref = new char[length];
final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
return new String(ref, 0, len);
}
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
/** Returns hex encoded bytes, e.g. "[6c 75 63 65 6e 65]" */
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
StringBuilder sb = new StringBuilder(2 + 3 * length);
sb.append('[');
final int end = offset + length;
for (int i = offset; i < end; i++) {

View File

@ -253,7 +253,7 @@ public class RoaringDocIdSet extends DocIdSet {
private class Iterator extends DocIdSetIterator {
int block;
DocIdSetIterator sub = null;
DocIdSetIterator sub;
int doc;
Iterator() throws IOException {

View File

@ -32,6 +32,10 @@ public final class ToStringUtils {
private static final char[] HEX = "0123456789abcdef".toCharArray();
/**
* Unlike {@link Long#toHexString(long)} returns a String with a "0x" prefix and all the leading
* zeros.
*/
public static String longHex(long x) {
char[] asHex = new char[16];
for (int i = 16; --i >= 0; x >>>= 4) {
@ -39,4 +43,31 @@ public final class ToStringUtils {
}
return "0x" + new String(asHex);
}
/**
* Builds a String with both textual representation of the {@link BytesRef} data and the bytes hex
* values. For example: {@code "hello [68 65 6c 6c 6f]"}. If the content is not a valid UTF-8
* sequence, only the bytes hex values are returned, as per {@link BytesRef#toString()}.
*/
@SuppressWarnings("unused")
public static String bytesRefToString(BytesRef b) {
if (b == null) {
return "null";
}
try {
return b.utf8ToString() + " " + b;
} catch (AssertionError | RuntimeException t) {
// If BytesRef isn't actually UTF-8, or it's e.g. a prefix of UTF-8
// that ends mid-unicode-char, we fall back to hex:
return b.toString();
}
}
public static String bytesRefToString(BytesRefBuilder b) {
return bytesRefToString(b.get());
}
public static String bytesRefToString(byte[] b) {
return bytesRefToString(new BytesRef(b));
}
}

View File

@ -133,10 +133,17 @@ public final class Version {
/**
* Match settings and bugs in Lucene's 9.10.0 release.
*
* @deprecated Use latest
* @deprecated (9.11.0) Use latest
*/
@Deprecated public static final Version LUCENE_9_10_0 = new Version(9, 10, 0);
/**
* Match settings and bugs in Lucene's 9.11.0 release.
*
* @deprecated Use latest
*/
@Deprecated public static final Version LUCENE_9_11_0 = new Version(9, 11, 0);
/**
* Match settings and bugs in Lucene's 10.0.0 release.
*

View File

@ -31,7 +31,7 @@ import org.apache.lucene.util.IntsRef;
*/
public class LimitedFiniteStringsIterator extends FiniteStringsIterator {
/** Maximum number of finite strings to create. */
private int limit = Integer.MAX_VALUE;
private final int limit;
/** Number of generated finite strings. */
private int count = 0;

View File

@ -108,10 +108,16 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces
if (this.curSegment == null) {
return new AlreadyClosedException("Already closed: " + this);
}
// ISE can be thrown by MemorySegment and contains "closed" in message:
// in Java 22 or later we can check the isAlive status of all segments
// (see https://bugs.openjdk.org/browse/JDK-8310644):
if (Arrays.stream(segments).allMatch(s -> s.scope().isAlive()) == false) {
return new AlreadyClosedException("Already closed: " + this);
}
// fallback for Java 21: ISE can be thrown by MemorySegment and contains "closed" in message:
if (e instanceof IllegalStateException
&& e.getMessage() != null
&& e.getMessage().contains("closed")) {
// the check is on message only, so preserve original cause for debugging:
return new AlreadyClosedException("Already closed: " + this, e);
}
// otherwise rethrow unmodified NPE/ISE (as it possibly a bug with passing a null parameter to

View File

@ -33,7 +33,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
public MemorySegmentIndexInputProvider() {
var log = Logger.getLogger(getClass().getName());
log.info(
"Using MemorySegmentIndexInput with Java 21; to disable start with -D"
"Using MemorySegmentIndexInput with Java 21 or later; to disable start with -D"
+ MMapDirectory.ENABLE_MEMORY_SEGMENTS_SYSPROP
+ "=false");
}

View File

@ -154,6 +154,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
IndexWriter writer =
new IndexWriter(
directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp));
TestUtil.reduceOpenFiles(writer);
Document doc = new Document();
Field idField = newStringField("id", "", Field.Store.YES);
@ -779,6 +780,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setMaxBufferedDocs(2);
iwc.setUseCompoundFile(true); // reduce open files
IndexWriter w = new IndexWriter(dir, iwc);
int numDocs = TEST_NIGHTLY ? 1000 : 100;
for (int i = 0; i < numDocs; i++) {

View File

@ -67,7 +67,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
assertAllBetween(last2, j, bd2, ids);
last2 = j + 1;
}
assertEquals(j + 1, queue.numGlobalTermDeletes());
assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes());
}
assertEquals(uniqueValues, bd1.deleteTerms.keySet());
assertEquals(uniqueValues, bd2.deleteTerms.keySet());

View File

@ -258,6 +258,7 @@ public class TestIndexWriterThreadsToSegments extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(r, new MockAnalyzer(r));
iwc.setCommitOnClose(false);
final RandomIndexWriter w = new RandomIndexWriter(r, dir, iwc);
TestUtil.reduceOpenFiles(w.w);
w.setDoRandomForceMerge(false);
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 30)];
final CountDownLatch startingGun = new CountDownLatch(1);

View File

@ -48,9 +48,9 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
public void testCorrectImplementation() {
final int runtimeVersion = Runtime.version().feature();
if (runtimeVersion >= 19 && runtimeVersion <= 21) {
if (runtimeVersion >= 19) {
assertTrue(
"on Java 19, 20, and 21 we should use MemorySegmentIndexInputProvider to create mmap IndexInputs",
"on Java 19 or later we should use MemorySegmentIndexInputProvider to create mmap IndexInputs",
isMemorySegmentImpl());
} else {
assertSame(MappedByteBufferIndexInputProvider.class, MMapDirectory.PROVIDER.getClass());

View File

@ -820,7 +820,7 @@ public final class JavascriptCompiler {
*/
public static final Map<String, MethodHandle> DEFAULT_FUNCTIONS = loadDefaultFunctions();
private static final Map<String, MethodHandle> loadDefaultFunctions() {
private static Map<String, MethodHandle> loadDefaultFunctions() {
final Map<String, MethodHandle> map = new HashMap<>();
final Lookup publicLookup = MethodHandles.publicLookup();
try {
@ -852,7 +852,7 @@ public final class JavascriptCompiler {
} catch (ReflectiveOperationException | IOException e) {
throw new Error("Cannot resolve function", e);
}
return Map.copyOf(map);
return Collections.unmodifiableMap(map);
}
/** Check Method signature for compatibility. */

View File

@ -123,7 +123,7 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
private final PostingsEnum postingsEnum; // with offsets
private final int freq;
private int posCounter = -1;
private int posCounter;
public OfPostings(BytesRef term, int freq, PostingsEnum postingsEnum) throws IOException {
this.term = Objects.requireNonNull(term);

View File

@ -23,6 +23,9 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
@ -208,21 +211,23 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
IndexSearcher searcher = new IndexSearcher(reader);
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
assertScorerResults(searcher, query, new float[] {1f, 1f / 51f}, new String[] {"2", "7"});
assertScorerResults(
searcher, query, new float[] {1f, 1f / 51f}, new String[] {"2", "7"}, 2);
query = getParentJoinKnnQuery("field", new float[] {6, 6}, null, 3, parentFilter);
assertScorerResults(
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"});
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 2);
query =
getParentJoinKnnQuery(
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 20, parentFilter);
assertScorerResults(
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"});
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 2);
query =
getParentJoinKnnQuery(
"field", new float[] {6, 6}, new MatchAllDocsQuery(), 1, parentFilter);
assertScorerResults(searcher, query, new float[] {1f / 3f}, new String[] {"5"});
assertScorerResults(
searcher, query, new float[] {1f / 3f, 1f / 3f}, new String[] {"5", "7"}, 1);
}
}
}
@ -324,7 +329,8 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
assertEquals(expectedId, actualId);
}
void assertScorerResults(IndexSearcher searcher, Query query, float[] scores, String[] ids)
void assertScorerResults(
IndexSearcher searcher, Query query, float[] possibleScores, String[] possibleIds, int count)
throws IOException {
IndexReader reader = searcher.getIndexReader();
Query rewritten = query.rewrite(searcher);
@ -334,11 +340,16 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
assertEquals(-1, scorer.docID());
expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score);
DocIdSetIterator it = scorer.iterator();
for (int i = 0; i < scores.length; i++) {
Map<String, Float> idToScore =
IntStream.range(0, possibleIds.length)
.boxed()
.collect(Collectors.toMap(i -> possibleIds[i], i -> possibleScores[i]));
for (int i = 0; i < count; i++) {
int docId = it.nextDoc();
assertNotEquals(NO_MORE_DOCS, docId);
assertEquals(scores[i], scorer.score(), 0.0001);
assertIdMatches(reader, ids[i], docId);
String actualId = reader.storedFields().document(docId).get("id");
assertTrue(idToScore.containsKey(actualId));
assertEquals(idToScore.get(actualId), scorer.score(), 0.0001);
}
}
}

View File

@ -81,7 +81,8 @@ public class TestParentBlockJoinFloatKnnVectorQuery extends ParentBlockJoinKnnVe
float score1 =
(float) ((1 + (2 * 2 + 3 * 4) / Math.sqrt((2 * 2 + 3 * 3) * (2 * 2 + 4 * 4))) / 2);
assertScorerResults(searcher, query, new float[] {score0, score1}, new String[] {"1", "2"});
assertScorerResults(
searcher, query, new float[] {score0, score1}, new String[] {"1", "2"}, 2);
}
}
}

View File

@ -239,7 +239,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
}
// Test data - format is artist, song, weeks at top of charts
private static String[] hitsOfThe60s = {
private static final String[] hitsOfThe60s = {
"1966\tSPENCER DAVIS GROUP\tKEEP ON RUNNING\t1",
"1966\tOVERLANDERS\tMICHELLE\t3",
"1966\tNANCY SINATRA\tTHESE BOOTS ARE MADE FOR WALKIN'\t4",
@ -317,7 +317,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
"1969\tARCHIES\tSUGAR, SUGAR\t4"
};
private static final Map<String, Record> parsedRecords = new HashMap<String, Record>();
private static final Map<String, Record> parsedRecords = new HashMap<>();
private Directory dir;
private IndexReader reader;
private IndexSearcher searcher;
@ -452,7 +452,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
private int getMaxNumRecordsPerArtist(ScoreDoc[] sd) throws IOException {
int result = 0;
HashMap<String, Integer> artistCounts = new HashMap<String, Integer>();
HashMap<String, Integer> artistCounts = new HashMap<>();
for (int i = 0; i < sd.length; i++) {
Document doc = reader.storedFields().document(sd[i].doc);
Record record = parsedRecords.get(doc.get("id"));

View File

@ -17,7 +17,9 @@
package org.apache.lucene.queries.payloads;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Map;
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation;
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType;
import org.apache.lucene.util.ArrayUtil;
@ -30,32 +32,45 @@ import org.apache.lucene.util.BytesRef;
*/
public class PayloadMatcherFactory {
private static final EnumMap<PayloadType, EnumMap<MatchOperation, PayloadMatcher>>
private static final Map<PayloadType, Map<MatchOperation, PayloadMatcher>>
payloadCheckerOpTypeMap;
static {
payloadCheckerOpTypeMap = new EnumMap<>(PayloadType.class);
// ints
EnumMap<MatchOperation, PayloadMatcher> intCheckers = new EnumMap<>(MatchOperation.class);
intCheckers.put(MatchOperation.LT, new LTIntPayloadMatcher());
intCheckers.put(MatchOperation.LTE, new LTEIntPayloadMatcher());
intCheckers.put(MatchOperation.GT, new GTIntPayloadMatcher());
intCheckers.put(MatchOperation.GTE, new GTEIntPayloadMatcher());
EnumMap<MatchOperation, PayloadMatcher> floatCheckers = new EnumMap<>(MatchOperation.class);
floatCheckers.put(MatchOperation.LT, new LTFloatPayloadMatcher());
floatCheckers.put(MatchOperation.LTE, new LTEFloatPayloadMatcher());
floatCheckers.put(MatchOperation.GT, new GTFloatPayloadMatcher());
floatCheckers.put(MatchOperation.GTE, new GTEFloatPayloadMatcher());
Map<MatchOperation, PayloadMatcher> intCheckers =
Collections.unmodifiableMap(
new EnumMap<>(
Map.of(
MatchOperation.LT, new LTIntPayloadMatcher(),
MatchOperation.LTE, new LTEIntPayloadMatcher(),
MatchOperation.GT, new GTIntPayloadMatcher(),
MatchOperation.GTE, new GTEIntPayloadMatcher())));
// floats
Map<MatchOperation, PayloadMatcher> floatCheckers =
Collections.unmodifiableMap(
new EnumMap<>(
Map.of(
MatchOperation.LT, new LTFloatPayloadMatcher(),
MatchOperation.LTE, new LTEFloatPayloadMatcher(),
MatchOperation.GT, new GTFloatPayloadMatcher(),
MatchOperation.GTE, new GTEFloatPayloadMatcher())));
// strings
EnumMap<MatchOperation, PayloadMatcher> stringCheckers = new EnumMap<>(MatchOperation.class);
stringCheckers.put(MatchOperation.LT, new LTStringPayloadMatcher());
stringCheckers.put(MatchOperation.LTE, new LTEStringPayloadMatcher());
stringCheckers.put(MatchOperation.GT, new GTStringPayloadMatcher());
stringCheckers.put(MatchOperation.GTE, new GTEStringPayloadMatcher());
Map<MatchOperation, PayloadMatcher> stringCheckers =
Collections.unmodifiableMap(
new EnumMap<>(
Map.of(
MatchOperation.LT, new LTStringPayloadMatcher(),
MatchOperation.LTE, new LTEStringPayloadMatcher(),
MatchOperation.GT, new GTStringPayloadMatcher(),
MatchOperation.GTE, new GTEStringPayloadMatcher())));
// load the matcher maps per payload type
payloadCheckerOpTypeMap.put(PayloadType.INT, intCheckers);
payloadCheckerOpTypeMap.put(PayloadType.FLOAT, floatCheckers);
payloadCheckerOpTypeMap.put(PayloadType.STRING, stringCheckers);
payloadCheckerOpTypeMap =
Collections.unmodifiableMap(
new EnumMap<>(
Map.of(
PayloadType.INT, intCheckers,
PayloadType.FLOAT, floatCheckers,
PayloadType.STRING, stringCheckers)));
}
/**
@ -75,7 +90,7 @@ public class PayloadMatcherFactory {
return new EQPayloadMatcher();
}
// otherwise, we need to pay attention to the payload type and operation
EnumMap<MatchOperation, PayloadMatcher> opMap = payloadCheckerOpTypeMap.get(payloadType);
Map<MatchOperation, PayloadMatcher> opMap = payloadCheckerOpTypeMap.get(payloadType);
if (opMap != null) {
return opMap.get(op);
} else {

View File

@ -269,10 +269,11 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
MatchOperation.GT);
checkHits(
stringGT2,
new int[] { // spotless:off
alignedIntArray(
"""
155, 255, 355, 455, 555, 655, 755, 855, 955,
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
}); // spotless:on
"""));
SpanQuery stringGTE2 =
new SpanPayloadCheckQuery(
new SpanNearQuery(new SpanQuery[] {termFifty, termFive}, 0, true),
@ -281,10 +282,11 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
MatchOperation.GTE);
checkHits(
stringGTE2,
new int[] { // spotless:off
alignedIntArray(
"""
55, 155, 255, 355, 455, 555, 655, 755, 855, 955,
1055, 1155, 1255, 1355, 1455, 1555, 1655, 1755, 1855, 1955
}); // spotless:on
"""));
SpanQuery stringLT2 =
new SpanPayloadCheckQuery(
@ -306,6 +308,23 @@ public class TestPayloadCheckQuery extends LuceneTestCase {
// sets "upto" back to zero between SpanOrQuery subclauses.
}
/**
* Parses a comma-separated array of integers, ignoring white space around them. This allows for
* arbitrary alignment of integers in the source string to convey additional information about
* their mutual relations. For example:
*
* <pre>{@code
* var ints =
* """
* 1, 2, 3,
* 11, 12, 13
* """
* }</pre>
*/
private static int[] alignedIntArray(String ints) {
return Arrays.stream(ints.split(",")).map(String::trim).mapToInt(Integer::parseInt).toArray();
}
public void testUnorderedPayloadChecks() throws Exception {
SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five"));

View File

@ -30,7 +30,7 @@ import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
*/
public class BoostQueryNode extends QueryNodeImpl {
private float value = 0;
private float value;
/**
* Constructs a boost node

View File

@ -84,7 +84,7 @@ public class ModifierQueryNode extends QueryNodeImpl {
}
}
private Modifier modifier = Modifier.MOD_NONE;
private Modifier modifier;
/**
* Used to store the modifier value on the original query string

View File

@ -25,9 +25,9 @@ import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
*/
public class OpaqueQueryNode extends QueryNodeImpl {
private CharSequence schema = null;
private CharSequence schema;
private CharSequence value = null;
private CharSequence value;
/**
* @param schema - schema identifier

View File

@ -41,7 +41,7 @@ public class PathQueryNode extends QueryNodeImpl {
/** Term text with a beginning and end position */
public static class QueryText implements Cloneable {
CharSequence value = null;
CharSequence value;
/** != null The term's begin position. */
int begin;
@ -97,7 +97,7 @@ public class PathQueryNode extends QueryNodeImpl {
}
}
private List<QueryText> values = null;
private List<QueryText> values;
/**
* @param pathElements - List of QueryText objects

View File

@ -25,7 +25,7 @@ import org.apache.lucene.search.PhraseQuery; // javadocs
/** Query node for {@link PhraseQuery}'s slop factor. */
public class PhraseSlopQueryNode extends QueryNodeImpl implements FieldableNode {
private int value = 0;
private int value;
/**
* @exception QueryNodeError throw in overridden method to disallow

View File

@ -57,9 +57,9 @@ public class ProximityQueryNode extends BooleanQueryNode {
/** utility class containing the distance condition and number */
public static class ProximityType {
int pDistance = 0;
int pDistance;
Type pType = null;
Type pType;
public ProximityType(Type type) {
this(type, 0);
@ -71,10 +71,10 @@ public class ProximityQueryNode extends BooleanQueryNode {
}
}
private Type proximityType = Type.SENTENCE;
private Type proximityType;
private int distance = -1;
private boolean inorder = false;
private CharSequence field = null;
private final boolean inorder;
private CharSequence field;
/**
* @param clauses - QueryNode children

View File

@ -32,7 +32,7 @@ import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
*/
public class SlopQueryNode extends QueryNodeImpl implements FieldableNode {
private int value = 0;
private int value;
/**
* @param query - QueryNode Tree with the phrase

View File

@ -32,10 +32,11 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
@Override
public String toString() {
if (getChildren() == null || getChildren().size() == 0) return "<tokenizedphrase/>";
List<QueryNode> children = getChildren();
if (children == null || children.isEmpty()) return "<tokenizedphrase/>";
StringBuilder sb = new StringBuilder();
sb.append("<tokenizedtphrase>");
for (QueryNode child : getChildren()) {
sb.append("<tokenizedphrase>");
for (QueryNode child : children) {
sb.append("\n");
sb.append(child.toString());
}
@ -46,16 +47,15 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
// This text representation is not re-parseable
@Override
public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
if (getChildren() == null || getChildren().size() == 0) return "";
List<QueryNode> children = getChildren();
if (children == null || children.isEmpty()) return "";
StringBuilder sb = new StringBuilder();
String filler = "";
for (QueryNode child : getChildren()) {
for (QueryNode child : children) {
sb.append(filler).append(child.toQueryString(escapeSyntaxParser));
filler = ",";
}
return "[TP[" + sb.toString() + "]]";
return "[TP[" + sb + "]]";
}
@Override
@ -70,27 +70,25 @@ public class TokenizedPhraseQueryNode extends QueryNodeImpl implements Fieldable
@Override
public CharSequence getField() {
List<QueryNode> children = getChildren();
if (children == null || children.size() == 0) {
return null;
} else {
return ((FieldableNode) children.get(0)).getField();
if (children != null) {
for (QueryNode child : children) {
if (child instanceof FieldableNode) {
return ((FieldableNode) child).getField();
}
}
}
return null;
}
@Override
public void setField(CharSequence fieldName) {
List<QueryNode> children = getChildren();
if (children != null) {
for (QueryNode child : getChildren()) {
for (QueryNode child : children) {
if (child instanceof FieldableNode) {
((FieldableNode) child).setField(fieldName);
}
}
}
}
} // end class MultitermQueryNode
}

View File

@ -34,7 +34,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfi
*/
public class FieldBoostMapFCListener implements FieldConfigListener {
private QueryConfigHandler config = null;
private final QueryConfigHandler config;
public FieldBoostMapFCListener(QueryConfigHandler config) {
this.config = config;

View File

@ -36,7 +36,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfi
*/
public class FieldDateResolutionFCListener implements FieldConfigListener {
private QueryConfigHandler config = null;
private final QueryConfigHandler config;
public FieldDateResolutionFCListener(QueryConfigHandler config) {
this.config = config;

View File

@ -30,6 +30,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.Util;
@ -175,8 +176,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
if (f.prefix > targetBeforeCurrentLength) {
@ -197,7 +198,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav;
// }
}
@ -222,19 +223,6 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
return seekExact(target, 0);
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
/** Get the version of the currently seek'd term; only valid if we are positioned. */
public long getVersion() {
return ((IDVersionTermState) currentFrame.state).idVersion;
@ -258,8 +246,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current="
// + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" +
// minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" +
// termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -460,8 +449,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength + " termExists=" + termExists);
// }
@ -492,7 +481,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -520,10 +509,11 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// termExists = false;
// }
// if (DEBUG) {
// System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + "
// targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion +
// " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " +
// currentFrame.fp + " termExists=" + termExists);
// System.out.println(" FAST version NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto +
// " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" +
// validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp +
// " termExists=" + termExists);
// }
return false;
}
@ -553,7 +543,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -604,7 +594,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -656,8 +646,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
// termExists + ") validIndexPrefix= " + validIndexPrefix);
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState();
// }
@ -700,9 +690,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
// " output=" + output);
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
@ -814,8 +804,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -850,7 +840,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
if (next() != null) {
// if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
// System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return SeekStatus.NOT_FOUND;
} else {
@ -861,7 +852,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
}
} else {
// if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
// System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// }
return result;
}
@ -946,7 +938,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen="
+ f.prefix
+ " prefix="
+ brToString(prefix)
+ ToStringUtils.bytesRefToString(prefix)
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+ " hasTerms="
+ f.hasTerms
@ -974,7 +966,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen="
+ f.prefix
+ " prefix="
+ brToString(prefix)
+ ToStringUtils.bytesRefToString(prefix)
+ " nextEnt="
+ f.nextEnt
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
@ -1063,9 +1055,10 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists +
// " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState();
// }
@ -1129,8 +1122,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// currentFrame.hasTerms = true;
currentFrame.loadBlock();
} else {
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
// currentFrame.ord=" + currentFrame.ord);
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
// " currentFrame.ord=" + currentFrame.ord);
return term.get();
}
}

Some files were not shown because too many files have changed in this diff Show More