Merge branch 'main' into java_21

This commit is contained in:
ChrisHegarty 2023-11-03 20:33:26 +00:00
commit ef1db18096
67 changed files with 656 additions and 555 deletions

View File

@ -27,6 +27,7 @@ import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
/**
* Split the Reuters SGML documents into Simple Text files containing:
@ -44,9 +45,10 @@ public class ExtractReuters {
public void extract() throws IOException {
long count = 0;
Files.createDirectories(outputDir);
if (Files.list(outputDir).count() > 0) {
throw new IOException("The output directory must be empty: " + outputDir);
try(Stream<Path> files = Files.list(outputDir)) {
if (files.count() > 0) {
throw new IOException("The output directory must be empty: " + outputDir);
}
}
try (DirectoryStream<Path> stream = Files.newDirectoryStream(reutersDir, "*.sgm")) {

View File

@ -63,7 +63,6 @@ except:
import scriptutil
from consolemenu import ConsoleMenu
from consolemenu.items import FunctionItem, SubmenuItem, ExitItem
from consolemenu.screen import Screen
from scriptutil import BranchType, Version, download, run
# Lucene-to-Java version mapping
@ -654,8 +653,8 @@ class TodoGroup(SecretYamlObject):
return "%s%s (%d/%d)" % (prefix, self.title, self.num_done(), self.num_applies())
def get_submenu(self):
menu = UpdatableConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
screen=MyScreen())
menu = ConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
clear_screen=False)
menu.exit_item = CustomExitItem("Return")
for todo in self.get_todos():
if todo.applies(state.release_type):
@ -663,7 +662,7 @@ class TodoGroup(SecretYamlObject):
return menu
def get_menu_item(self):
item = UpdatableSubmenuItem(self.get_title, self.get_submenu())
item = SubmenuItem(self.get_title, self.get_submenu())
return item
def get_todos(self):
@ -820,7 +819,7 @@ class Todo(SecretYamlObject):
print("ERROR while executing todo %s (%s)" % (self.get_title(), e))
def get_menu_item(self):
return UpdatableFunctionItem(self.get_title, self.display_and_confirm)
return FunctionItem(self.get_title, self.display_and_confirm)
def clone(self):
clone = Todo(self.id, self.title, description=self.description)
@ -1234,104 +1233,6 @@ def pause(fun=None):
input("\nPress ENTER to continue...")
# Custom classes for ConsoleMenu, to make menu texts dynamic
# Needed until https://github.com/aegirhall/console-menu/pull/25 is released
# See https://pypi.org/project/console-menu/ for other docs
class UpdatableConsoleMenu(ConsoleMenu):
def __repr__(self):
return "%s: %s. %d items" % (self.get_title(), self.get_subtitle(), len(self.items))
def draw(self):
"""
Refreshes the screen and redraws the menu. Should be called whenever something changes that needs to be redrawn.
"""
self.screen.printf(self.formatter.format(title=self.get_title(), subtitle=self.get_subtitle(), items=self.items,
prologue_text=self.get_prologue_text(), epilogue_text=self.get_epilogue_text()))
# Getters to get text in case method reference
def get_title(self):
return self.title() if callable(self.title) else self.title
def get_subtitle(self):
return self.subtitle() if callable(self.subtitle) else self.subtitle
def get_prologue_text(self):
return self.prologue_text() if callable(self.prologue_text) else self.prologue_text
def get_epilogue_text(self):
return self.epilogue_text() if callable(self.epilogue_text) else self.epilogue_text
class UpdatableSubmenuItem(SubmenuItem):
def __init__(self, text, submenu, menu=None, should_exit=False):
"""
:ivar ConsoleMenu self.submenu: The submenu to be opened when this item is selected
"""
super(UpdatableSubmenuItem, self).__init__(text=text, menu=menu, should_exit=should_exit, submenu=submenu)
if menu:
self.get_submenu().parent = menu
def show(self, index):
return "%2d - %s" % (index + 1, self.get_text())
# Getters to get text in case method reference
def get_text(self):
return self.text() if callable(self.text) else self.text
def set_menu(self, menu):
"""
Sets the menu of this item.
Should be used instead of directly accessing the menu attribute for this class.
:param ConsoleMenu menu: the menu
"""
self.menu = menu
self.get_submenu().parent = menu
def action(self):
"""
This class overrides this method
"""
self.get_submenu().start()
def clean_up(self):
"""
This class overrides this method
"""
self.get_submenu().join()
self.menu.clear_screen()
self.menu.resume()
def get_return(self):
"""
:return: The returned value in the submenu
"""
return self.get_submenu().returned_value
def get_submenu(self):
"""
We unwrap the submenu variable in case it is a reference to a method that returns a submenu
"""
return self.submenu if not callable(self.submenu) else self.submenu()
class UpdatableFunctionItem(FunctionItem):
def show(self, index):
return "%2d - %s" % (index + 1, self.get_text())
# Getters to get text in case method reference
def get_text(self):
return self.text() if callable(self.text) else self.text
class MyScreen(Screen):
def clear(self):
return
class CustomExitItem(ExitItem):
def show(self, index):
return super(CustomExitItem, self).show(index)
@ -1346,6 +1247,13 @@ def main():
global templates
print("Lucene releaseWizard v%s" % getScriptVersion())
try:
ConsoleMenu(clear_screen=True)
except Exception as e:
sys.exit("You need to install 'consolemenu' package version 0.7.1 for the Wizard to function. Please run 'pip "
"install -r requirements.txt'")
c = parse_config()
if c.dry:
@ -1402,18 +1310,18 @@ def main():
lucene_news_file = os.path.join(state.get_website_git_folder(), 'content', 'core', 'core_news',
"%s-%s-available.md" % (state.get_release_date_iso(), state.release_version.replace(".", "-")))
main_menu = UpdatableConsoleMenu(title="Lucene ReleaseWizard",
main_menu = ConsoleMenu(title="Lucene ReleaseWizard",
subtitle=get_releasing_text,
prologue_text="Welcome to the release wizard. From here you can manage the process including creating new RCs. "
"All changes are persisted, so you can exit any time and continue later. Make sure to read the Help section.",
epilogue_text="® 2022 The Lucene project. Licensed under the Apache License 2.0\nScript version v%s)" % getScriptVersion(),
screen=MyScreen())
clear_screen=False)
todo_menu = UpdatableConsoleMenu(title=get_releasing_text,
todo_menu = ConsoleMenu(title=get_releasing_text,
subtitle=get_subtitle,
prologue_text=None,
epilogue_text=None,
screen=MyScreen())
clear_screen=False)
todo_menu.exit_item = CustomExitItem("Return")
for todo_group in state.todo_groups:
@ -1422,14 +1330,14 @@ def main():
menu_item.set_menu(todo_menu)
todo_menu.append_item(menu_item)
main_menu.append_item(UpdatableSubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
main_menu.append_item(UpdatableFunctionItem(get_start_new_rc_menu_title, start_new_rc))
main_menu.append_item(UpdatableFunctionItem('Clear and restart current RC', state.clear_rc))
main_menu.append_item(UpdatableFunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
main_menu.append_item(UpdatableFunctionItem('Start release for a different version', release_other_version))
main_menu.append_item(UpdatableFunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
# main_menu.append_item(UpdatableFunctionItem('Dump YAML', dump_yaml))
main_menu.append_item(UpdatableFunctionItem('Help', help))
main_menu.append_item(SubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
main_menu.append_item(FunctionItem(get_start_new_rc_menu_title, start_new_rc))
main_menu.append_item(FunctionItem('Clear and restart current RC', state.clear_rc))
main_menu.append_item(FunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
main_menu.append_item(FunctionItem('Start release for a different version', release_other_version))
main_menu.append_item(FunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
# main_menu.append_item(FunctionItem('Dump YAML', dump_yaml))
main_menu.append_item(FunctionItem('Help', help))
main_menu.show()

View File

@ -521,7 +521,7 @@ groups:
addition wait a couple more days? Merges of bug fixes into the branch
may become more difficult.
* Only Github issues with Milestone {{ release_version_major }}.{{ release_version_minor }}
and priority "Blocker" will delay a release candidate build.
will delay a release candidate build.
----
types:
- major
@ -979,8 +979,8 @@ groups:
title: Publish docs, changes and javadocs
description: |
Ensure your refrigerator has at least 2 beers - the svn import operation can take a while,
depending on your upload bandwidth. We'll publish this directly to the production tree.
At the end of the task, the two links below shall work.
depending on your upload bandwidth. We'll publish this directly to the production tree. At
the end of the task, the two links below shall work.
links:
- http://lucene.apache.org/core/{{ version }}
vars:
@ -1126,12 +1126,18 @@ groups:
comment: Push all changes
logfile: push-website.log
post_description: |
Wait a few minutes for the build to happen. You can follow the site build at https://ci2.apache.org/#/builders/3
and view the staged site at https://lucene.staged.apache.org
Verify that correct links and versions are mentioned in download pages, download buttons etc.
If you find anything wrong, then commit and push any changes and check again.
Next step is to merge the changes to branch 'production' in order to publish the site.
Wait a few minutes for the build to happen. You can follow the site build at
https://ci2.apache.org/#/builders/3 and view the staged site at
https://lucene.staged.apache.org Verify that correct links and versions are mentioned in
download pages, download buttons etc. If you find anything wrong, then commit and push any
changes and check again. You may find that the publish fails, leaving a directory listing
instead a beautiful website. If this happens, check the "builder" link and click through into
its details to find possible error messages produced by the website publication process. You
may have produced malformed Markdown. Or the website publish may just fail for some reason out
of your control. If this happens, you can attempt to retrigger the publishing with some
innocuous changes. Next step is to merge the changes to branch 'production' in order to
publish the site. Before doing this, you may want to replenish your stock of beers, or get
stronger stuff.
links:
- https://ci2.apache.org/#/builders/3
- https://lucene.staged.apache.org
@ -1159,7 +1165,8 @@ groups:
post_description: |
Wait a few minutes for the build to happen. You can follow the site build at https://ci2.apache.org/#/builders/3
Verify on https://lucene.apache.org that the site is OK.
Verify on https://lucene.apache.org that the site is OK. It really should be, but see staging
site publication instructions for possible debugging/recovery options if it is not.
You can now also verify that http://lucene.apache.org/core/api/core/ redirects to the latest version
links:

View File

@ -1,8 +1,8 @@
six>=1.11.0
Jinja2>=2.10.1
PyYAML>=5.1
holidays>=0.9.10
ics>=0.4
console-menu>=0.5.1
PyGithub
jira
six~=1.16.0
Jinja2~=3.1.1
PyYAML~=6.0
holidays~=0.16
ics~=0.7.2
console-menu~=0.7.1
PyGithub~=1.56
jira~=3.4.1

View File

@ -67,6 +67,12 @@ allprojects {
tasks.named(sourceSet.getCompileJavaTaskName()).configure({ JavaCompile task ->
task.dependsOn modularPaths.compileModulePathConfiguration
// GH-12742: add the modular path as inputs so that if anything changes, the task
// is not up to date and is re-run. I [dw] believe this should be a @Classpath parameter
// on the task itself... but I don't know how to implement this on an existing class.
// this is a workaround but should work just fine though.
task.inputs.files(modularPaths.compileModulePathConfiguration)
// LUCENE-10327: don't allow gradle to emit an empty sourcepath as it would break
// compilation of modules.
task.options.setSourcepath(sourceSet.java.sourceDirectories)

View File

@ -67,7 +67,7 @@ allprojects {
// seed, repetition and amplification.
[propName: 'tests.seed', value: { -> rootSeed }, description: "Sets the master randomization seed."],
[propName: 'tests.iters', value: null, description: "Duplicate (re-run) each test case N times."],
[propName: 'tests.multiplier', value: 1, description: "Value multiplier for randomized tests."],
[propName: 'tests.multiplier', value: null, description: "Value multiplier for randomized tests."],
[propName: 'tests.maxfailures', value: null, description: "Skip tests after a given number of failures."],
[propName: 'tests.timeoutSuite', value: null, description: "Timeout (in millis) for an entire suite."],
[propName: 'tests.failfast', value: "false", description: "Stop the build early on failure.", buildOnly: true],

View File

@ -62,9 +62,11 @@ API Changes
* GITHUB#12599: Add RandomAccessInput#readBytes method to the RandomAccessInput interface. (Ignacio Vera)
* GITHUB#12709 Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
* GITHUB#12709: Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
of the two (Anh Dung Bui)
* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)
New Features
---------------------
@ -208,6 +210,9 @@ Improvements
* GITHUB#12689: TaskExecutor to cancel all tasks on exception to avoid needless computation. (Luca Cavanna)
* GITHUB#12754: Refactor lookup of Hotspot VM options and do not initialize constants with NULL
if SecurityManager prevents access. (Uwe Schindler)
Optimizations
---------------------
* GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary)
@ -251,6 +256,11 @@ Optimizations
* GITHUB#12719: Top-level conjunctions that are not sorted by score now have a
specialized bulk scorer. (Adrien Grand)
* GITHUB#1052: Faster merging of terms enums. (Adrien Grand)
* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)
Changes in runtime behavior
---------------------
@ -278,7 +288,14 @@ Bug Fixes
Build
---------------------
* GITHUB#12752: tests.multiplier could be omitted in test failure reproduce lines (esp. in
nightly mode). (Dawid Weiss)
* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed
leading to odd runtime errors (Chris Hostetter, Dawid Weiss)
* GITHUB#12612: Upgrade forbiddenapis to version 3.6 and ASM for APIJAR extraction to 9.6. (Uwe Schindler)
* GITHUB#12655: Upgrade to Gradle 8.4 (Kevin Risden)
Other

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.dict;
package org.apache.lucene.analysis.util;
import java.util.ArrayList;
import java.util.regex.Matcher;
@ -69,7 +69,7 @@ public final class CSVUtil {
return new String[0];
}
return result.toArray(new String[result.size()]);
return result.toArray(new String[0]);
}
private static String unQuoteUnEscape(String original) {
@ -83,7 +83,7 @@ public final class CSVUtil {
}
// Unescape
if (result.indexOf(ESCAPED_QUOTE) >= 0) {
if (result.contains(ESCAPED_QUOTE)) {
result = result.replace(ESCAPED_QUOTE, "\"");
}
}

View File

@ -14,10 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja;
package org.apache.lucene.analysis.util;
import java.io.IOException;
import org.apache.lucene.analysis.ja.dict.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase;
/*

View File

@ -156,19 +156,19 @@ public class GenerateUTR30DataFiles {
}
private static void getNFKCDataFilesFromIcuProject(String releaseTag) throws IOException {
URI icuTagsURL = URI.create(ICU_GIT_TAG_URL + "/");
URI icuReleaseTagURL = icuTagsURL.resolve(releaseTag + "/");
URI norm2url = icuReleaseTagURL.resolve(ICU_DATA_NORM2_PATH + "/");
URI icuTagsURI = URI.create(ICU_GIT_TAG_URL + "/");
URI icuReleaseTagURI = icuTagsURI.resolve(releaseTag + "/");
URI norm2uri = icuReleaseTagURI.resolve(ICU_DATA_NORM2_PATH + "/");
System.err.print("Downloading " + NFKC_TXT + " ... ");
download(norm2url.resolve(NFKC_TXT), NFKC_TXT);
download(norm2uri.resolve(NFKC_TXT), NFKC_TXT);
System.err.println("done.");
System.err.print("Downloading " + NFKC_CF_TXT + " ... ");
download(norm2url.resolve(NFKC_CF_TXT), NFKC_CF_TXT);
download(norm2uri.resolve(NFKC_CF_TXT), NFKC_CF_TXT);
System.err.println("done.");
System.err.print("Downloading " + NFKC_CF_TXT + " and making diacritic rules one-way ... ");
URLConnection connection = openConnection(norm2url.resolve(NFC_TXT).toURL());
URLConnection connection = openConnection(norm2uri.resolve(NFC_TXT).toURL());
try (BufferedReader reader =
new BufferedReader(
new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8));

View File

@ -28,6 +28,7 @@ import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;

View File

@ -25,6 +25,7 @@ import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.util.CSVUtil;
class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";

View File

@ -26,6 +26,7 @@ import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.analysis.morph.Dictionary;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;

View File

@ -19,6 +19,8 @@ package org.apache.lucene.analysis.ja.dict;
import static org.apache.lucene.analysis.ja.dict.UserDictionary.CUSTOM_DICTIONARY_WORD_ID_OFFSET;
import static org.apache.lucene.analysis.ja.dict.UserDictionary.INTERNAL_SEPARATOR;
import org.apache.lucene.analysis.util.CSVUtil;
/** Morphological information for user dictionary. */
final class UserMorphData implements JaMorphData {
public static final int WORD_COST = -100000;

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.ja.dict;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Test;

View File

@ -1,93 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ko.dict;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** Utility class for parsing CSV text */
public final class CSVUtil {
private static final char QUOTE = '"';
private static final char COMMA = ',';
private static final Pattern QUOTE_REPLACE_PATTERN = Pattern.compile("^\"([^\"]+)\"$");
private static final String ESCAPED_QUOTE = "\"\"";
private CSVUtil() {} // no instance!!!
/**
* Parse CSV line
*
* @param line line containing csv-encoded data
* @return Array of values
*/
public static String[] parse(String line) {
boolean insideQuote = false;
ArrayList<String> result = new ArrayList<>();
int quoteCount = 0;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (c == QUOTE) {
insideQuote = !insideQuote;
quoteCount++;
}
if (c == COMMA && !insideQuote) {
String value = sb.toString();
value = unQuoteUnEscape(value);
result.add(value);
sb.setLength(0);
continue;
}
sb.append(c);
}
result.add(sb.toString());
// Validate
if (quoteCount % 2 != 0) {
return new String[0];
}
return result.toArray(new String[0]);
}
private static String unQuoteUnEscape(String original) {
String result = original;
// Unquote
if (result.indexOf('\"') >= 0) {
Matcher m = QUOTE_REPLACE_PATTERN.matcher(original);
if (m.matches()) {
result = m.group(1);
}
// Unescape
if (result.contains(ESCAPED_QUOTE)) {
result = result.replace(ESCAPED_QUOTE, "\"");
}
}
return result;
}
}

View File

@ -28,6 +28,7 @@ import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;

View File

@ -24,6 +24,7 @@ import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.ko.POS;
import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;

View File

@ -25,6 +25,7 @@ import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.util.CSVUtil;
class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.ko.dict;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Test;

View File

@ -92,7 +92,7 @@ public final class PForUtil {
out.writeBytes(exceptions, exceptions.length);
}
/** Decode 128 integers into {@code ints}. */
/** Decode 128 integers into {@code longs}. */
void decode(DataInput in, long[] longs) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f;

View File

@ -24,8 +24,14 @@ import org.openjdk.jmh.annotations.*;
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Benchmark)
@Warmup(iterations = 3, time = 3)
@Measurement(iterations = 5, time = 3)
// first iteration is complete garbage, so make sure we really warmup
@Warmup(iterations = 4, time = 1)
// real iterations. not useful to spend tons of time here, better to fork more
@Measurement(iterations = 5, time = 1)
// engage some noise reduction
@Fork(
value = 3,
jvmArgsAppend = {"-Xmx2g", "-Xms2g", "-XX:+AlwaysPreTouch"})
public class VectorUtilBenchmark {
private byte[] bytesA;
@ -36,7 +42,7 @@ public class VectorUtilBenchmark {
@Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
int size;
@Setup(Level.Trial)
@Setup(Level.Iteration)
public void init() {
ThreadLocalRandom random = ThreadLocalRandom.current();
@ -56,84 +62,72 @@ public class VectorUtilBenchmark {
}
@Benchmark
@Fork(value = 1)
public float binaryCosineScalar() {
return VectorUtil.cosine(bytesA, bytesB);
}
@Benchmark
@Fork(
value = 1,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public float binaryCosineVector() {
return VectorUtil.cosine(bytesA, bytesB);
}
@Benchmark
@Fork(value = 1)
public int binaryDotProductScalar() {
return VectorUtil.dotProduct(bytesA, bytesB);
}
@Benchmark
@Fork(
value = 1,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public int binaryDotProductVector() {
return VectorUtil.dotProduct(bytesA, bytesB);
}
@Benchmark
@Fork(value = 1)
public int binarySquareScalar() {
return VectorUtil.squareDistance(bytesA, bytesB);
}
@Benchmark
@Fork(
value = 1,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public int binarySquareVector() {
return VectorUtil.squareDistance(bytesA, bytesB);
}
@Benchmark
@Fork(value = 1)
public float floatCosineScalar() {
return VectorUtil.cosine(floatsA, floatsB);
}
@Benchmark
@Fork(
value = 1,
value = 15,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public float floatCosineVector() {
return VectorUtil.cosine(floatsA, floatsB);
}
@Benchmark
@Fork(value = 1)
public float floatDotProductScalar() {
return VectorUtil.dotProduct(floatsA, floatsB);
}
@Benchmark
@Fork(
value = 1,
value = 15,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public float floatDotProductVector() {
return VectorUtil.dotProduct(floatsA, floatsB);
}
@Benchmark
@Fork(value = 1)
public float floatSquareScalar() {
return VectorUtil.squareDistance(floatsA, floatsB);
}
@Benchmark
@Fork(
value = 1,
value = 15,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public float floatSquareVector() {
return VectorUtil.squareDistance(floatsA, floatsB);

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -21,7 +21,7 @@
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
collector.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory

View File

@ -21,7 +21,7 @@
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
collector.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory

View File

@ -37,8 +37,8 @@ doc.term.vector=vector:true:true:false:false
log.step=500
log.step.DeleteDoc=100
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -20,7 +20,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
-AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer,
EnglishPossessiveFilter,LowerCaseFilter,StopFilter,

View File

@ -30,7 +30,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=1000
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -30,7 +30,8 @@ doc.term.vector.offsets=false
doc.term.vector.positions=false
log.step=2000
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -42,8 +42,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
#content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -16,7 +16,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
log.step=1000
-AnalyzerFactory(name:shingle-bigrams-unigrams,

View File

@ -30,7 +30,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
#docs.dir=reuters-111
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource

View File

@ -31,7 +31,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=100000
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -18,7 +18,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
-AnalyzerFactory(name:WhitespaceTokenizer, WhitespaceTokenizer(rule:java))

View File

@ -23,9 +23,9 @@ import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
import org.apache.lucene.util.Version;
/**
* Create a new {@link org.apache.lucene.analysis.Analyzer} and set it in the getRunData() for use
@ -42,17 +42,13 @@ public class NewAnalyzerTask extends PerfTask {
public static final Analyzer createAnalyzer(String className) throws Exception {
final Class<? extends Analyzer> clazz = Class.forName(className).asSubclass(Analyzer.class);
try {
// first try to use a ctor with version parameter (needed for many new Analyzers that have no
// default one anymore
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
return cnstr.newInstance(Version.LATEST);
} catch (
@SuppressWarnings("unused")
NoSuchMethodException nsme) {
// otherwise use default ctor
return clazz.getConstructor().newInstance();
Constructor<? extends Analyzer> cnstr;
if (className.equals("org.apache.lucene.analysis.core.StopAnalyzer")) {
cnstr = clazz.getConstructor(CharArraySet.class);
return cnstr.newInstance(CharArraySet.EMPTY_SET);
}
cnstr = clazz.getConstructor();
return cnstr.newInstance();
}
@Override

View File

@ -116,7 +116,7 @@ final class PForUtil {
out.writeBytes(exceptions, exceptions.length);
}
/** Decode 128 integers into {@code ints}. */
/** Decode 128 integers into {@code longs}. */
void decode(DataInput in, long[] longs) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f;

View File

@ -136,17 +136,16 @@ public final class MultiTerms extends Terms {
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
final List<TermsEnumIndex> termsEnums = new ArrayList<>();
for (int i = 0; i < subs.length; i++) {
final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
termsEnums.add(new TermsEnumIndex(termsEnum, i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices)
.reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
@ -181,17 +180,16 @@ public final class MultiTerms extends Terms {
@Override
public TermsEnum iterator() throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
final List<TermsEnumIndex> termsEnums = new ArrayList<>();
for (int i = 0; i < subs.length; i++) {
final TermsEnum termsEnum = subs[i].iterator();
if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
termsEnums.add(new TermsEnumIndex(termsEnum, i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices)
.reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}

View File

@ -36,7 +36,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
new Comparator<TermsEnumWithSlice>() {
@Override
public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) {
return o1.index - o2.index;
return o1.subIndex - o2.subIndex;
}
};
@ -56,17 +56,6 @@ public final class MultiTermsEnum extends BaseTermsEnum {
private int numSubs;
private BytesRef current;
static class TermsEnumIndex {
public static final TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
final int subIndex;
final TermsEnum termsEnum;
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
this.termsEnum = termsEnum;
this.subIndex = subIndex;
}
}
/** Returns how many sub-reader slices contain the current term. @see #getMatchArray */
public int getMatchCount() {
return numTop;
@ -114,10 +103,10 @@ public final class MultiTermsEnum extends BaseTermsEnum {
final TermsEnumIndex termsEnumIndex = termsEnumsIndex[i];
assert termsEnumIndex != null;
final BytesRef term = termsEnumIndex.termsEnum.next();
final BytesRef term = termsEnumIndex.next();
if (term != null) {
final TermsEnumWithSlice entry = subs[termsEnumIndex.subIndex];
entry.reset(termsEnumIndex.termsEnum, term);
entry.reset(termsEnumIndex);
queue.add(entry);
currentSubs[numSubs++] = entry;
} else {
@ -154,7 +143,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
// Doing so is a waste because this sub will simply
// seek to the same spot.
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
final BytesRef curTerm = currentSubs[i].term();
if (curTerm != null) {
final int cmp = term.compareTo(curTerm);
if (cmp == 0) {
@ -162,19 +151,19 @@ public final class MultiTermsEnum extends BaseTermsEnum {
} else if (cmp < 0) {
status = false;
} else {
status = currentSubs[i].terms.seekExact(term);
status = currentSubs[i].seekExact(term);
}
} else {
status = false;
}
} else {
status = currentSubs[i].terms.seekExact(term);
status = currentSubs[i].seekExact(term);
}
if (status) {
top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term();
assert term.equals(currentSubs[i].current);
current = currentSubs[i].term();
assert term.equals(currentSubs[i].term());
}
}
@ -206,7 +195,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
// Doing so is a waste because this sub will simply
// seek to the same spot.
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
final BytesRef curTerm = currentSubs[i].term();
if (curTerm != null) {
final int cmp = term.compareTo(curTerm);
if (cmp == 0) {
@ -214,28 +203,25 @@ public final class MultiTermsEnum extends BaseTermsEnum {
} else if (cmp < 0) {
status = SeekStatus.NOT_FOUND;
} else {
status = currentSubs[i].terms.seekCeil(term);
status = currentSubs[i].seekCeil(term);
}
} else {
status = SeekStatus.END;
}
} else {
status = currentSubs[i].terms.seekCeil(term);
status = currentSubs[i].seekCeil(term);
}
if (status == SeekStatus.FOUND) {
top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term();
current = currentSubs[i].term();
queue.add(currentSubs[i]);
} else {
if (status == SeekStatus.NOT_FOUND) {
currentSubs[i].current = currentSubs[i].terms.term();
assert currentSubs[i].current != null;
assert currentSubs[i].term() != null;
queue.add(currentSubs[i]);
} else {
assert status == SeekStatus.END;
// enum exhausted
currentSubs[i].current = null;
}
}
}
@ -269,15 +255,14 @@ public final class MultiTermsEnum extends BaseTermsEnum {
// top term
assert numTop == 0;
numTop = queue.fillTop(top);
current = top[0].current;
current = top[0].term();
}
private void pushTop() throws IOException {
// call next() on each top, and reorder queue
for (int i = 0; i < numTop; i++) {
TermsEnumWithSlice top = queue.top();
top.current = top.terms.next();
if (top.current == null) {
if (top.next() == null) {
queue.pop();
} else {
queue.updateTop();
@ -320,7 +305,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
public int docFreq() throws IOException {
int sum = 0;
for (int i = 0; i < numTop; i++) {
sum += top[i].terms.docFreq();
sum += top[i].termsEnum.docFreq();
}
return sum;
}
@ -329,7 +314,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
public long totalTermFreq() throws IOException {
long sum = 0;
for (int i = 0; i < numTop; i++) {
final long v = top[i].terms.totalTermFreq();
final long v = top[i].termsEnum.totalTermFreq();
assert v != -1;
sum += v;
}
@ -359,12 +344,12 @@ public final class MultiTermsEnum extends BaseTermsEnum {
final TermsEnumWithSlice entry = top[i];
assert entry.index < docsEnum.subPostingsEnums.length
: entry.index + " vs " + docsEnum.subPostingsEnums.length + "; " + subs.length;
assert entry.subIndex < docsEnum.subPostingsEnums.length
: entry.subIndex + " vs " + docsEnum.subPostingsEnums.length + "; " + subs.length;
final PostingsEnum subPostingsEnum =
entry.terms.postings(docsEnum.subPostingsEnums[entry.index], flags);
entry.termsEnum.postings(docsEnum.subPostingsEnums[entry.subIndex], flags);
assert subPostingsEnum != null;
docsEnum.subPostingsEnums[entry.index] = subPostingsEnum;
docsEnum.subPostingsEnums[entry.subIndex] = subPostingsEnum;
subDocs[upto].postingsEnum = subPostingsEnum;
subDocs[upto].slice = entry.subSlice;
upto++;
@ -379,26 +364,18 @@ public final class MultiTermsEnum extends BaseTermsEnum {
return new SlowImpactsEnum(postings(null, flags));
}
static final class TermsEnumWithSlice {
static final class TermsEnumWithSlice extends TermsEnumIndex {
private final ReaderSlice subSlice;
TermsEnum terms;
public BytesRef current;
final int index;
public TermsEnumWithSlice(int index, ReaderSlice subSlice) {
super(null, index);
this.subSlice = subSlice;
this.index = index;
assert subSlice.length >= 0 : "length=" + subSlice.length;
}
public void reset(TermsEnum terms, BytesRef term) {
this.terms = terms;
current = term;
}
@Override
public String toString() {
return subSlice.toString() + ":" + terms;
return subSlice.toString() + ":" + super.toString();
}
}
@ -413,7 +390,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
@Override
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
return termsA.current.compareTo(termsB.current) < 0;
return termsA.compareTermTo(termsB) < 0;
}
/**
@ -435,7 +412,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
final int leftChild = index << 1;
for (int child = leftChild, end = Math.min(size, leftChild + 1); child <= end; ++child) {
TermsEnumWithSlice te = get(child);
if (te.current.equals(tops[0].current)) {
if (te.compareTermTo(tops[0]) == 0) {
tops[numTop++] = te;
stack[stackLen++] = child;
}

View File

@ -24,8 +24,6 @@ import java.util.Collection;
import java.util.List;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PriorityQueue;
@ -48,19 +46,15 @@ public class OrdinalMap implements Accountable {
// need it
// TODO: use more efficient packed ints structures?
private static class TermsEnumIndex {
final int subIndex;
final TermsEnum termsEnum;
BytesRef currentTerm;
private static class TermsEnumPriorityQueue extends PriorityQueue<TermsEnumIndex> {
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
this.termsEnum = termsEnum;
this.subIndex = subIndex;
TermsEnumPriorityQueue(int size) {
super(size);
}
public BytesRef next() throws IOException {
currentTerm = termsEnum.next();
return currentTerm;
@Override
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
return a.compareTermTo(b) < 0;
}
}
@ -227,13 +221,7 @@ public class OrdinalMap implements Accountable {
long[] segmentOrds = new long[subs.length];
// Just merge-sorts by term:
PriorityQueue<TermsEnumIndex> queue =
new PriorityQueue<TermsEnumIndex>(subs.length) {
@Override
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
return a.currentTerm.compareTo(b.currentTerm) < 0;
}
};
TermsEnumPriorityQueue queue = new TermsEnumPriorityQueue(subs.length);
for (int i = 0; i < subs.length; i++) {
TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
@ -242,19 +230,18 @@ public class OrdinalMap implements Accountable {
}
}
BytesRefBuilder scratch = new BytesRefBuilder();
TermsEnumIndex.TermState topState = new TermsEnumIndex.TermState();
long globalOrd = 0;
while (queue.size() != 0) {
TermsEnumIndex top = queue.top();
scratch.copyBytes(top.currentTerm);
topState.copyFrom(top);
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
// Advance past this term, recording the per-segment ord deltas:
while (true) {
top = queue.top();
long segmentOrd = top.termsEnum.ord();
long delta = globalOrd - segmentOrd;
int segmentIndex = top.subIndex;
@ -284,10 +271,11 @@ public class OrdinalMap implements Accountable {
if (queue.size() == 0) {
break;
}
top = queue.top();
} else {
queue.updateTop();
top = queue.updateTop();
}
if (queue.top().currentTerm.equals(scratch.get()) == false) {
if (top.termEquals(topState) == false) {
break;
}
}

View File

@ -18,8 +18,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@ -555,8 +553,6 @@ final class ReadersAndUpdates {
FieldInfos fieldInfos = null;
boolean any = false;
for (List<DocValuesFieldUpdates> updates : pendingDVUpdates.values()) {
// Sort by increasing delGen:
Collections.sort(updates, Comparator.comparingLong(a -> a.delGen));
for (DocValuesFieldUpdates update : updates) {
if (update.delGen <= maxDelGen && update.any()) {
any = true;

View File

@ -0,0 +1,183 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
/**
* Wrapper around a {@link TermsEnum} and an integer that identifies it. All operations that move
* the current position of the {@link TermsEnum} must be performed via this wrapper class, not
* directly on the wrapped {@link TermsEnum}.
*/
class TermsEnumIndex {
static final TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
/**
* Copy the first 8 bytes of the given term as a comparable unsigned long. In case the term has
* less than 8 bytes, missing bytes will be replaced with zeroes. Note that two terms that produce
* the same long could still be different due to the fact that missing bytes are replaced with
* zeroes, e.g. {@code [1, 0]} and {@code [1]} get mapped to the same long.
*/
static long prefix8ToComparableUnsignedLong(BytesRef term) {
// Use Big Endian so that longs are comparable
if (term.length >= Long.BYTES) {
return (long) BitUtil.VH_BE_LONG.get(term.bytes, term.offset);
} else {
long l;
int o;
if (Integer.BYTES <= term.length) {
l = (int) BitUtil.VH_BE_INT.get(term.bytes, term.offset);
o = Integer.BYTES;
} else {
l = 0;
o = 0;
}
if (o + Short.BYTES <= term.length) {
l =
(l << Short.SIZE)
| Short.toUnsignedLong(
(short) BitUtil.VH_BE_SHORT.get(term.bytes, term.offset + o));
o += Short.BYTES;
}
if (o < term.length) {
l = (l << Byte.SIZE) | Byte.toUnsignedLong(term.bytes[term.offset + o]);
}
l <<= (Long.BYTES - term.length) << 3;
return l;
}
}
final int subIndex;
TermsEnum termsEnum;
private BytesRef currentTerm;
private long currentTermPrefix8;
TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
this.termsEnum = termsEnum;
this.subIndex = subIndex;
}
BytesRef term() {
return currentTerm;
}
private void setTerm(BytesRef term) {
currentTerm = term;
if (currentTerm == null) {
currentTermPrefix8 = 0;
} else {
currentTermPrefix8 = prefix8ToComparableUnsignedLong(currentTerm);
}
}
BytesRef next() throws IOException {
BytesRef term = termsEnum.next();
setTerm(term);
return term;
}
SeekStatus seekCeil(BytesRef term) throws IOException {
SeekStatus status = termsEnum.seekCeil(term);
if (status == SeekStatus.END) {
setTerm(null);
} else {
setTerm(termsEnum.term());
}
return status;
}
boolean seekExact(BytesRef term) throws IOException {
boolean found = termsEnum.seekExact(term);
if (found) {
setTerm(termsEnum.term());
} else {
setTerm(null);
}
return found;
}
void seekExact(long ord) throws IOException {
termsEnum.seekExact(ord);
setTerm(termsEnum.term());
}
void reset(TermsEnumIndex tei) throws IOException {
termsEnum = tei.termsEnum;
currentTerm = tei.currentTerm;
currentTermPrefix8 = tei.currentTermPrefix8;
}
int compareTermTo(TermsEnumIndex that) {
if (currentTermPrefix8 != that.currentTermPrefix8) {
int cmp = Long.compareUnsigned(currentTermPrefix8, that.currentTermPrefix8);
assert Integer.signum(cmp)
== Integer.signum(
Arrays.compareUnsigned(
currentTerm.bytes,
currentTerm.offset,
currentTerm.offset + currentTerm.length,
that.currentTerm.bytes,
that.currentTerm.offset,
that.currentTerm.offset + that.currentTerm.length));
return cmp;
}
return Arrays.compareUnsigned(
currentTerm.bytes,
currentTerm.offset,
currentTerm.offset + currentTerm.length,
that.currentTerm.bytes,
that.currentTerm.offset,
that.currentTerm.offset + that.currentTerm.length);
}
@Override
public String toString() {
return Objects.toString(termsEnum);
}
/** Wrapper around a term that allows for quick equals comparisons. */
static class TermState {
private final BytesRefBuilder term = new BytesRefBuilder();
private long termPrefix8;
void copyFrom(TermsEnumIndex tei) {
term.copyBytes(tei.term());
termPrefix8 = tei.currentTermPrefix8;
}
}
boolean termEquals(TermState that) {
if (currentTermPrefix8 != that.termPrefix8) {
return false;
}
return Arrays.equals(
currentTerm.bytes,
currentTerm.offset,
currentTerm.offset + currentTerm.length,
that.term.bytes(),
0,
that.term.length());
}
}

View File

@ -21,8 +21,6 @@ import java.lang.Runtime.Version;
import java.lang.StackWalker.StackFrame;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
@ -31,7 +29,7 @@ import java.util.Set;
import java.util.function.Predicate;
import java.util.logging.Logger;
import java.util.stream.Stream;
import org.apache.lucene.util.SuppressForbidden;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.VectorUtil;
/**
@ -129,7 +127,7 @@ public abstract class VectorizationProvider {
"Vector bitsize and/or integer vectors enforcement; using default vectorization provider outside of testMode");
return new DefaultVectorizationProvider();
}
if (isClientVM()) {
if (Constants.IS_CLIENT_VM) {
LOG.warning("C2 compiler is disabled; Java vector incubator API can't be enabled");
return new DefaultVectorizationProvider();
}
@ -188,23 +186,6 @@ public abstract class VectorizationProvider {
&& !Objects.equals("I", "i".toUpperCase(Locale.getDefault()));
}
@SuppressWarnings("removal")
@SuppressForbidden(reason = "security manager")
private static boolean isClientVM() {
try {
final PrivilegedAction<Boolean> action =
() -> System.getProperty("java.vm.info", "").contains("emulated-client");
return AccessController.doPrivileged(action);
} catch (
@SuppressWarnings("unused")
SecurityException e) {
LOG.warning(
"SecurityManager denies permission to 'java.vm.info' system property, so state of C2 compiler can't be detected. "
+ "In case of performance issues allow access to this property.");
return false;
}
}
// add all possible callers here as FQCN:
private static final Set<String> VALID_CALLERS = Set.of("org.apache.lucene.util.VectorUtil");

View File

@ -475,7 +475,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
private class CompetitiveIterator extends DocIdSetIterator {
private static final int MAX_TERMS = 128;
private static final int MAX_TERMS = 1024;
private final LeafReaderContext context;
private final int maxDoc;

View File

@ -16,18 +16,25 @@
*/
package org.apache.lucene.util;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Objects;
import java.util.logging.Logger;
/** Some useful constants. */
public final class Constants {
private Constants() {} // can't construct
private static final String UNKNOWN = "Unknown";
/** JVM vendor info. */
public static final String JVM_VENDOR = System.getProperty("java.vm.vendor");
public static final String JVM_VENDOR = getSysProp("java.vm.vendor", UNKNOWN);
/** JVM vendor name. */
public static final String JVM_NAME = System.getProperty("java.vm.name");
public static final String JVM_NAME = getSysProp("java.vm.name", UNKNOWN);
/** The value of <code>System.getProperty("os.name")</code>. * */
public static final String OS_NAME = System.getProperty("os.name");
public static final String OS_NAME = getSysProp("os.name", UNKNOWN);
/** True iff running on Linux. */
public static final boolean LINUX = OS_NAME.startsWith("Linux");
@ -45,36 +52,67 @@ public final class Constants {
public static final boolean FREE_BSD = OS_NAME.startsWith("FreeBSD");
/** The value of <code>System.getProperty("os.arch")</code>. */
public static final String OS_ARCH = System.getProperty("os.arch");
public static final String OS_ARCH = getSysProp("os.arch", UNKNOWN);
/** The value of <code>System.getProperty("os.version")</code>. */
public static final String OS_VERSION = System.getProperty("os.version");
public static final String OS_VERSION = getSysProp("os.version", UNKNOWN);
/** The value of <code>System.getProperty("java.vendor")</code>. */
public static final String JAVA_VENDOR = System.getProperty("java.vendor");
public static final String JAVA_VENDOR = getSysProp("java.vendor", UNKNOWN);
/** True iff the Java runtime is a client runtime and C2 compiler is not enabled */
public static final boolean IS_CLIENT_VM =
getSysProp("java.vm.info", "").contains("emulated-client");
/** True iff running on a 64bit JVM */
public static final boolean JRE_IS_64BIT;
public static final boolean JRE_IS_64BIT = is64Bit();
static {
boolean is64Bit = false;
String datamodel = null;
/** true iff we know fast FMA is supported, to deliver less error */
public static final boolean HAS_FAST_FMA =
(IS_CLIENT_VM == false)
&& Objects.equals(OS_ARCH, "amd64")
&& HotspotVMOptions.get("UseFMA").map(Boolean::valueOf).orElse(false);
private static boolean is64Bit() {
final String datamodel = getSysProp("sun.arch.data.model");
if (datamodel != null) {
return datamodel.contains("64");
} else {
return (OS_ARCH != null && OS_ARCH.contains("64"));
}
}
private static String getSysProp(String property) {
try {
datamodel = System.getProperty("sun.arch.data.model");
if (datamodel != null) {
is64Bit = datamodel.contains("64");
}
return doPrivileged(() -> System.getProperty(property));
} catch (
@SuppressWarnings("unused")
SecurityException ex) {
SecurityException se) {
logSecurityWarning(property);
return null;
}
if (datamodel == null) {
if (OS_ARCH != null && OS_ARCH.contains("64")) {
is64Bit = true;
} else {
is64Bit = false;
}
}
private static String getSysProp(String property, String def) {
try {
return doPrivileged(() -> System.getProperty(property, def));
} catch (
@SuppressWarnings("unused")
SecurityException se) {
logSecurityWarning(property);
return def;
}
JRE_IS_64BIT = is64Bit;
}
private static void logSecurityWarning(String property) {
var log = Logger.getLogger(Constants.class.getName());
log.warning("SecurityManager prevented access to system property: " + property);
}
// Extracted to a method to be able to apply the SuppressForbidden annotation
@SuppressWarnings("removal")
@SuppressForbidden(reason = "security manager")
private static <T> T doPrivileged(PrivilegedAction<T> action) {
return AccessController.doPrivileged(action);
}
}

View File

@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.lang.reflect.Method;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import java.util.logging.Logger;
/** Accessor to get Hotspot VM Options (if available). */
final class HotspotVMOptions {
private HotspotVMOptions() {} // can't construct
/** True if the Java VM is based on Hotspot and has the Hotspot MX bean readable by Lucene */
public static final boolean IS_HOTSPOT;
/**
* Returns an optional with the value of a Hotspot VM option. If the VM option does not exist or
* is not readable, returns an empty optional.
*/
public static Optional<String> get(String name) {
return ACCESSOR.apply(Objects.requireNonNull(name, "name"));
}
private static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
private static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
private static final Function<String, Optional<String>> ACCESSOR;
static {
boolean isHotspot = false;
Function<String, Optional<String>> accessor = name -> Optional.empty();
try {
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS);
// we use reflection for this, because the management factory is not part
// of java.base module:
final Object hotSpotBean =
Class.forName(MANAGEMENT_FACTORY_CLASS)
.getMethod("getPlatformMXBean", Class.class)
.invoke(null, beanClazz);
if (hotSpotBean != null) {
final Method getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
final Method getValueMethod = getVMOptionMethod.getReturnType().getMethod("getValue");
isHotspot = true;
accessor =
name -> {
try {
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, name);
return Optional.of(getValueMethod.invoke(vmOption).toString());
} catch (@SuppressWarnings("unused")
ReflectiveOperationException
| RuntimeException e) {
return Optional.empty();
}
};
}
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
isHotspot = false;
final Logger log = Logger.getLogger(HotspotVMOptions.class.getName());
final Module module = HotspotVMOptions.class.getModule();
final ModuleLayer layer = module.getLayer();
// classpath / unnamed module has no layer, so we need to check:
if (layer != null
&& layer.findModule("jdk.management").map(module::canRead).orElse(false) == false) {
log.warning(
"Lucene cannot access JVM internals to optimize algorithms or calculate object sizes, unless the 'jdk.management' Java module "
+ "is readable [please add 'jdk.management' to modular application either by command line or its module descriptor].");
} else {
log.warning(
"Lucene cannot optimize algorithms or calculate object sizes for JVMs that are not based on Hotspot or a compatible implementation.");
}
}
IS_HOTSPOT = isHotspot;
ACCESSOR = accessor;
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.util;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.security.AccessControlException;
import java.security.AccessController;
@ -30,7 +29,6 @@ import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.logging.Logger;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
@ -112,64 +110,16 @@ public final class RamUsageEstimator {
/** For testing only */
static final boolean JVM_IS_HOTSPOT_64BIT;
static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
/** Initialize constants and try to collect information about the JVM internals. */
static {
if (Constants.JRE_IS_64BIT) {
if (Constants.JRE_IS_64BIT && HotspotVMOptions.IS_HOTSPOT) {
// Try to get compressed oops and object alignment (the default seems to be 8 on Hotspot);
// (this only works on 64 bit, on 32 bits the alignment and reference size is fixed):
boolean compressedOops = false;
int objectAlignment = 8;
boolean isHotspot = false;
try {
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS);
// we use reflection for this, because the management factory is not part
// of Java 8's compact profile:
final Object hotSpotBean =
Class.forName(MANAGEMENT_FACTORY_CLASS)
.getMethod("getPlatformMXBean", Class.class)
.invoke(null, beanClazz);
if (hotSpotBean != null) {
isHotspot = true;
final Method getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
try {
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "UseCompressedOops");
compressedOops =
Boolean.parseBoolean(
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
isHotspot = false;
}
try {
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "ObjectAlignmentInBytes");
objectAlignment =
Integer.parseInt(
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
isHotspot = false;
}
}
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
isHotspot = false;
final Logger log = Logger.getLogger(RamUsageEstimator.class.getName());
final Module module = RamUsageEstimator.class.getModule();
final ModuleLayer layer = module.getLayer();
// classpath / unnamed module has no layer, so we need to check:
if (layer != null
&& layer.findModule("jdk.management").map(module::canRead).orElse(false) == false) {
log.warning(
"Lucene cannot correctly calculate object sizes on 64bit JVMs, unless the 'jdk.management' Java module "
+ "is readable [please add 'jdk.management' to modular application either by command line or its module descriptor]");
} else {
log.warning(
"Lucene cannot correctly calculate object sizes on 64bit JVMs that are not based on Hotspot or a compatible implementation.");
}
}
JVM_IS_HOTSPOT_64BIT = isHotspot;
COMPRESSED_REFS_ENABLED = compressedOops;
NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment;
JVM_IS_HOTSPOT_64BIT = true;
COMPRESSED_REFS_ENABLED =
HotspotVMOptions.get("UseCompressedOops").map(Boolean::valueOf).orElse(false);
NUM_BYTES_OBJECT_ALIGNMENT =
HotspotVMOptions.get("ObjectAlignmentInBytes").map(Integer::valueOf).orElse(8);
// reference size is 4, if we have compressed oops:
NUM_BYTES_OBJECT_REF = COMPRESSED_REFS_ENABLED ? 4 : 8;
// "best guess" based on reference size:

View File

@ -1128,6 +1128,10 @@ public class RegExp {
if (start != pos) m = Integer.parseInt(originalString.substring(start, pos));
} else m = n;
if (!match('}')) throw new IllegalArgumentException("expected '}' at position " + pos);
if (m != -1 && n > m) {
throw new IllegalArgumentException(
"invalid repetition range(out of order): " + n + ".." + m);
}
if (m == -1) e = makeRepeat(flags, e, n);
else e = makeRepeat(flags, e, n, m);
}

View File

@ -270,10 +270,6 @@ public class FSTCompiler<T> {
return directAddressingMaxOversizingFactor;
}
public long getTermCount() {
return frontier[0].inputCount;
}
public long getNodeCount() {
// 1+ in order to count the -1 implicit final node
return 1 + nodeCount;
@ -749,7 +745,6 @@ public class FSTCompiler<T> {
// format cannot represent the empty input since
// 'finalness' is stored on the incoming arc, not on
// the node
frontier[0].inputCount++;
frontier[0].isFinal = true;
fst.setEmptyOutput(output);
return;
@ -760,9 +755,6 @@ public class FSTCompiler<T> {
int pos2 = input.offset;
final int pos1Stop = Math.min(lastInput.length(), input.length);
while (true) {
frontier[pos1].inputCount++;
// System.out.println(" incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" +
// frontier[pos1]);
if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) {
break;
}
@ -786,7 +778,6 @@ public class FSTCompiler<T> {
// init tail states for current input
for (int idx = prefixLenPlus1; idx <= input.length; idx++) {
frontier[idx - 1].addArc(input.ints[input.offset + idx - 1], frontier[idx]);
frontier[idx].inputCount++;
}
final UnCompiledNode<T> lastNode = frontier[input.length];
@ -835,8 +826,6 @@ public class FSTCompiler<T> {
// save last input
lastInput.copyInts(input);
// System.out.println(" count[0]=" + frontier[0].inputCount);
}
private boolean validOutput(T output) {
@ -906,10 +895,6 @@ public class FSTCompiler<T> {
T output;
boolean isFinal;
// TODO: remove this tracking? we used to use it for confusingly pruning NodeHash, but
// we switched to LRU by RAM usage instead:
long inputCount;
/** This node's depth, starting from the automaton root. */
final int depth;
@ -935,7 +920,6 @@ public class FSTCompiler<T> {
numArcs = 0;
isFinal = false;
output = owner.NO_OUTPUT;
inputCount = 0;
// We don't clear the depth here because it never changes
// for nodes on the frontier (even when reused).

View File

@ -77,41 +77,9 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
VectorizationProvider.TESTS_FORCE_INTEGER_VECTORS || (isAMD64withoutAVX2 == false);
}
private static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
private static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
// best effort to see if FMA is fast (this is architecture-independent option)
private static boolean hasFastFMA() {
// on ARM cpus, FMA works fine but is a slight slowdown: don't use it.
if (Constants.OS_ARCH.equals("amd64") == false) {
return false;
}
try {
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS);
// we use reflection for this, because the management factory is not part
// of Java 8's compact profile:
final Object hotSpotBean =
Class.forName(MANAGEMENT_FACTORY_CLASS)
.getMethod("getPlatformMXBean", Class.class)
.invoke(null, beanClazz);
if (hotSpotBean != null) {
final var getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "UseFMA");
return Boolean.parseBoolean(
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
}
return false;
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
return false;
}
}
// true if we know FMA is supported, to deliver less error
static final boolean HAS_FAST_FMA = hasFastFMA();
// the way FMA should work! if available use it, otherwise fall back to mul/add
private static FloatVector fma(FloatVector a, FloatVector b, FloatVector c) {
if (HAS_FAST_FMA) {
if (Constants.HAS_FAST_FMA) {
return a.fma(b, c);
} else {
return a.mul(b).add(c);

View File

@ -21,6 +21,7 @@ import java.security.PrivilegedAction;
import java.util.Locale;
import java.util.logging.Logger;
import jdk.incubator.vector.FloatVector;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.SuppressForbidden;
/** A vectorization provider that leverages the Panama Vector API. */
@ -62,7 +63,7 @@ final class PanamaVectorizationProvider extends VectorizationProvider {
Locale.ENGLISH,
"Java vector incubator API enabled; uses preferredBitSize=%d%s%s",
PanamaVectorUtilSupport.VECTOR_BITSIZE,
PanamaVectorUtilSupport.HAS_FAST_FMA ? "; FMA enabled" : "",
Constants.HAS_FAST_FMA ? "; FMA enabled" : "",
PanamaVectorUtilSupport.HAS_FAST_INTEGER_VECTORS
? ""
: "; floating-point vectors only"));

View File

@ -459,7 +459,8 @@ public class TestDeletionPolicy extends LuceneTestCase {
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy)
.setIndexCommit(lastCommit));
.setIndexCommit(lastCommit)
.setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs);
// Should undo our rollback:
@ -476,12 +477,13 @@ public class TestDeletionPolicy extends LuceneTestCase {
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy)
.setIndexCommit(lastCommit));
.setIndexCommit(lastCommit)
.setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs);
// Commits the rollback:
writer.close();
// Now 8 because we made another commit
// Now 7 because we made another commit
assertEquals(7, DirectoryReader.listCommits(dir).size());
r = DirectoryReader.open(dir);
@ -507,7 +509,10 @@ public class TestDeletionPolicy extends LuceneTestCase {
// but this time keeping only the last commit:
writer =
new IndexWriter(
dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit));
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexCommit(lastCommit)
.setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs);
// Reader still sees fully merged index, because writer

View File

@ -2395,11 +2395,12 @@ public class TestIndexWriter extends LuceneTestCase {
writer.addDocument(doc);
assertTrue(writer.hasUncommittedChanges());
// Must commit, waitForMerges, commit again, to be
// certain that hasUncommittedChanges returns false:
writer.commit();
writer.waitForMerges();
writer.commit();
// Must commit and wait for merges as long as the commit triggers merges to be certain that
// hasUncommittedChanges returns false
do {
writer.waitForMerges();
writer.commit();
} while (writer.hasPendingMerges());
assertFalse(writer.hasUncommittedChanges());
writer.deleteDocuments(new Term("id", "xyz"));
assertTrue(writer.hasUncommittedChanges());

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
public class TestTermsEnumIndex extends LuceneTestCase {
public void testPrefix8ToComparableUnsignedLong() {
byte[] b = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
assertEquals(0L, TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 1, 0)));
assertEquals(4L << 56, TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 1)));
assertEquals(
(4L << 56) | (5L << 48),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 2)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 3)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 4)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 5)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24) | (9L << 16),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 6)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24) | (9L << 16) | (10L << 8),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 7)));
assertEquals(
(4L << 56)
| (5L << 48)
| (6L << 40)
| (7L << 32)
| (8L << 24)
| (9L << 16)
| (10L << 8)
| 11L,
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 8)));
assertEquals(
(4L << 56)
| (5L << 48)
| (6L << 40)
| (7L << 32)
| (8L << 24)
| (9L << 16)
| (10L << 8)
| 11L,
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 9)));
}
}

View File

@ -86,6 +86,17 @@ public class TestRegExp extends LuceneTestCase {
}
}
public void testParseIllegalRepeatExp() {
// out of order
IllegalArgumentException expected =
expectThrows(
IllegalArgumentException.class,
() -> {
new RegExp("a{99,11}");
});
assertTrue(expected.getMessage().contains("out of order"));
}
static String randomDocValue(int minLength) {
String charPalette = "AAAaaaBbbCccc123456 \t";
StringBuilder sb = new StringBuilder();

View File

@ -56,7 +56,7 @@ public class Test2BFST extends LuceneTestCase {
for (int iter = 0; iter < 1; iter++) {
// Build FST w/ NoOutputs and stop when nodeCount > 2.2B
{
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
System.out.println("\nTEST: ~2.2B nodes; output=NO_OUTPUTS");
Outputs<Object> outputs = NoOutputs.getSingleton();
Object NO_OUTPUT = outputs.getNoOutput();
final FSTCompiler<Object> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);

View File

@ -568,7 +568,6 @@ public class TestFSTs extends LuceneTestCase {
System.out.println(
((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms");
assert fstCompiler.getTermCount() == ord;
FST<T> fst = fstCompiler.compile();
long tEnd = System.nanoTime();
System.out.println(

View File

@ -38,8 +38,8 @@ public final class URLLabel extends JLabel {
super(text);
try {
this.link = (URI.create(text)).toURL();
} catch (MalformedURLException e) {
this.link = (new URI(text)).toURL();
} catch (URISyntaxException | MalformedURLException e) {
throw new LukeException(e.getMessage(), e);
}

View File

@ -476,7 +476,12 @@ public abstract class LuceneTestCase extends Assert {
* of iterations to scale your tests (for nightly builds).
*/
public static final int RANDOM_MULTIPLIER =
systemPropertyAsInt("tests.multiplier", TEST_NIGHTLY ? 2 : 1);
systemPropertyAsInt("tests.multiplier", defaultRandomMultiplier());
/** Compute the default value of the random multiplier (based on {@link #TEST_NIGHTLY}). */
static int defaultRandomMultiplier() {
return TEST_NIGHTLY ? 2 : 1;
}
/** Leave temporary files on disk, even on successful runs. */
public static final boolean LEAVE_TEMPORARY;

View File

@ -189,7 +189,8 @@ public final class RunListenerPrintReproduceInfo extends RunListener {
addVmOpt(b, "tests.seed", RandomizedContext.current().getRunnerSeedAsString());
// Test groups and multipliers.
if (RANDOM_MULTIPLIER > 1) addVmOpt(b, "tests.multiplier", RANDOM_MULTIPLIER);
if (RANDOM_MULTIPLIER != LuceneTestCase.defaultRandomMultiplier())
addVmOpt(b, "tests.multiplier", RANDOM_MULTIPLIER);
if (TEST_NIGHTLY) addVmOpt(b, SYSPROP_NIGHTLY, TEST_NIGHTLY);
if (TEST_WEEKLY) addVmOpt(b, SYSPROP_WEEKLY, TEST_WEEKLY);
if (TEST_MONSTER) addVmOpt(b, SYSPROP_MONSTER, TEST_MONSTER);