Merge branch 'main' into java_21

This commit is contained in:
ChrisHegarty 2023-11-03 20:33:26 +00:00
commit ef1db18096
67 changed files with 656 additions and 555 deletions

View File

@ -27,6 +27,7 @@ import java.nio.file.Paths;
import java.nio.file.StandardCopyOption; import java.nio.file.StandardCopyOption;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Stream;
/** /**
* Split the Reuters SGML documents into Simple Text files containing: * Split the Reuters SGML documents into Simple Text files containing:
@ -44,10 +45,11 @@ public class ExtractReuters {
public void extract() throws IOException { public void extract() throws IOException {
long count = 0; long count = 0;
Files.createDirectories(outputDir); Files.createDirectories(outputDir);
try(Stream<Path> files = Files.list(outputDir)) {
if (Files.list(outputDir).count() > 0) { if (files.count() > 0) {
throw new IOException("The output directory must be empty: " + outputDir); throw new IOException("The output directory must be empty: " + outputDir);
} }
}
try (DirectoryStream<Path> stream = Files.newDirectoryStream(reutersDir, "*.sgm")) { try (DirectoryStream<Path> stream = Files.newDirectoryStream(reutersDir, "*.sgm")) {
for (Path sgmFile : stream) { for (Path sgmFile : stream) {

View File

@ -63,7 +63,6 @@ except:
import scriptutil import scriptutil
from consolemenu import ConsoleMenu from consolemenu import ConsoleMenu
from consolemenu.items import FunctionItem, SubmenuItem, ExitItem from consolemenu.items import FunctionItem, SubmenuItem, ExitItem
from consolemenu.screen import Screen
from scriptutil import BranchType, Version, download, run from scriptutil import BranchType, Version, download, run
# Lucene-to-Java version mapping # Lucene-to-Java version mapping
@ -654,8 +653,8 @@ class TodoGroup(SecretYamlObject):
return "%s%s (%d/%d)" % (prefix, self.title, self.num_done(), self.num_applies()) return "%s%s (%d/%d)" % (prefix, self.title, self.num_done(), self.num_applies())
def get_submenu(self): def get_submenu(self):
menu = UpdatableConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(), menu = ConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
screen=MyScreen()) clear_screen=False)
menu.exit_item = CustomExitItem("Return") menu.exit_item = CustomExitItem("Return")
for todo in self.get_todos(): for todo in self.get_todos():
if todo.applies(state.release_type): if todo.applies(state.release_type):
@ -663,7 +662,7 @@ class TodoGroup(SecretYamlObject):
return menu return menu
def get_menu_item(self): def get_menu_item(self):
item = UpdatableSubmenuItem(self.get_title, self.get_submenu()) item = SubmenuItem(self.get_title, self.get_submenu())
return item return item
def get_todos(self): def get_todos(self):
@ -820,7 +819,7 @@ class Todo(SecretYamlObject):
print("ERROR while executing todo %s (%s)" % (self.get_title(), e)) print("ERROR while executing todo %s (%s)" % (self.get_title(), e))
def get_menu_item(self): def get_menu_item(self):
return UpdatableFunctionItem(self.get_title, self.display_and_confirm) return FunctionItem(self.get_title, self.display_and_confirm)
def clone(self): def clone(self):
clone = Todo(self.id, self.title, description=self.description) clone = Todo(self.id, self.title, description=self.description)
@ -1234,104 +1233,6 @@ def pause(fun=None):
input("\nPress ENTER to continue...") input("\nPress ENTER to continue...")
# Custom classes for ConsoleMenu, to make menu texts dynamic
# Needed until https://github.com/aegirhall/console-menu/pull/25 is released
# See https://pypi.org/project/console-menu/ for other docs
class UpdatableConsoleMenu(ConsoleMenu):
def __repr__(self):
return "%s: %s. %d items" % (self.get_title(), self.get_subtitle(), len(self.items))
def draw(self):
"""
Refreshes the screen and redraws the menu. Should be called whenever something changes that needs to be redrawn.
"""
self.screen.printf(self.formatter.format(title=self.get_title(), subtitle=self.get_subtitle(), items=self.items,
prologue_text=self.get_prologue_text(), epilogue_text=self.get_epilogue_text()))
# Getters to get text in case method reference
def get_title(self):
return self.title() if callable(self.title) else self.title
def get_subtitle(self):
return self.subtitle() if callable(self.subtitle) else self.subtitle
def get_prologue_text(self):
return self.prologue_text() if callable(self.prologue_text) else self.prologue_text
def get_epilogue_text(self):
return self.epilogue_text() if callable(self.epilogue_text) else self.epilogue_text
class UpdatableSubmenuItem(SubmenuItem):
def __init__(self, text, submenu, menu=None, should_exit=False):
"""
:ivar ConsoleMenu self.submenu: The submenu to be opened when this item is selected
"""
super(UpdatableSubmenuItem, self).__init__(text=text, menu=menu, should_exit=should_exit, submenu=submenu)
if menu:
self.get_submenu().parent = menu
def show(self, index):
return "%2d - %s" % (index + 1, self.get_text())
# Getters to get text in case method reference
def get_text(self):
return self.text() if callable(self.text) else self.text
def set_menu(self, menu):
"""
Sets the menu of this item.
Should be used instead of directly accessing the menu attribute for this class.
:param ConsoleMenu menu: the menu
"""
self.menu = menu
self.get_submenu().parent = menu
def action(self):
"""
This class overrides this method
"""
self.get_submenu().start()
def clean_up(self):
"""
This class overrides this method
"""
self.get_submenu().join()
self.menu.clear_screen()
self.menu.resume()
def get_return(self):
"""
:return: The returned value in the submenu
"""
return self.get_submenu().returned_value
def get_submenu(self):
"""
We unwrap the submenu variable in case it is a reference to a method that returns a submenu
"""
return self.submenu if not callable(self.submenu) else self.submenu()
class UpdatableFunctionItem(FunctionItem):
def show(self, index):
return "%2d - %s" % (index + 1, self.get_text())
# Getters to get text in case method reference
def get_text(self):
return self.text() if callable(self.text) else self.text
class MyScreen(Screen):
def clear(self):
return
class CustomExitItem(ExitItem): class CustomExitItem(ExitItem):
def show(self, index): def show(self, index):
return super(CustomExitItem, self).show(index) return super(CustomExitItem, self).show(index)
@ -1346,6 +1247,13 @@ def main():
global templates global templates
print("Lucene releaseWizard v%s" % getScriptVersion()) print("Lucene releaseWizard v%s" % getScriptVersion())
try:
ConsoleMenu(clear_screen=True)
except Exception as e:
sys.exit("You need to install 'consolemenu' package version 0.7.1 for the Wizard to function. Please run 'pip "
"install -r requirements.txt'")
c = parse_config() c = parse_config()
if c.dry: if c.dry:
@ -1402,18 +1310,18 @@ def main():
lucene_news_file = os.path.join(state.get_website_git_folder(), 'content', 'core', 'core_news', lucene_news_file = os.path.join(state.get_website_git_folder(), 'content', 'core', 'core_news',
"%s-%s-available.md" % (state.get_release_date_iso(), state.release_version.replace(".", "-"))) "%s-%s-available.md" % (state.get_release_date_iso(), state.release_version.replace(".", "-")))
main_menu = UpdatableConsoleMenu(title="Lucene ReleaseWizard", main_menu = ConsoleMenu(title="Lucene ReleaseWizard",
subtitle=get_releasing_text, subtitle=get_releasing_text,
prologue_text="Welcome to the release wizard. From here you can manage the process including creating new RCs. " prologue_text="Welcome to the release wizard. From here you can manage the process including creating new RCs. "
"All changes are persisted, so you can exit any time and continue later. Make sure to read the Help section.", "All changes are persisted, so you can exit any time and continue later. Make sure to read the Help section.",
epilogue_text="® 2022 The Lucene project. Licensed under the Apache License 2.0\nScript version v%s)" % getScriptVersion(), epilogue_text="® 2022 The Lucene project. Licensed under the Apache License 2.0\nScript version v%s)" % getScriptVersion(),
screen=MyScreen()) clear_screen=False)
todo_menu = UpdatableConsoleMenu(title=get_releasing_text, todo_menu = ConsoleMenu(title=get_releasing_text,
subtitle=get_subtitle, subtitle=get_subtitle,
prologue_text=None, prologue_text=None,
epilogue_text=None, epilogue_text=None,
screen=MyScreen()) clear_screen=False)
todo_menu.exit_item = CustomExitItem("Return") todo_menu.exit_item = CustomExitItem("Return")
for todo_group in state.todo_groups: for todo_group in state.todo_groups:
@ -1422,14 +1330,14 @@ def main():
menu_item.set_menu(todo_menu) menu_item.set_menu(todo_menu)
todo_menu.append_item(menu_item) todo_menu.append_item(menu_item)
main_menu.append_item(UpdatableSubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu)) main_menu.append_item(SubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
main_menu.append_item(UpdatableFunctionItem(get_start_new_rc_menu_title, start_new_rc)) main_menu.append_item(FunctionItem(get_start_new_rc_menu_title, start_new_rc))
main_menu.append_item(UpdatableFunctionItem('Clear and restart current RC', state.clear_rc)) main_menu.append_item(FunctionItem('Clear and restart current RC', state.clear_rc))
main_menu.append_item(UpdatableFunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state)) main_menu.append_item(FunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
main_menu.append_item(UpdatableFunctionItem('Start release for a different version', release_other_version)) main_menu.append_item(FunctionItem('Start release for a different version', release_other_version))
main_menu.append_item(UpdatableFunctionItem('Generate Asciidoc guide for this release', generate_asciidoc)) main_menu.append_item(FunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
# main_menu.append_item(UpdatableFunctionItem('Dump YAML', dump_yaml)) # main_menu.append_item(FunctionItem('Dump YAML', dump_yaml))
main_menu.append_item(UpdatableFunctionItem('Help', help)) main_menu.append_item(FunctionItem('Help', help))
main_menu.show() main_menu.show()

View File

@ -521,7 +521,7 @@ groups:
addition wait a couple more days? Merges of bug fixes into the branch addition wait a couple more days? Merges of bug fixes into the branch
may become more difficult. may become more difficult.
* Only Github issues with Milestone {{ release_version_major }}.{{ release_version_minor }} * Only Github issues with Milestone {{ release_version_major }}.{{ release_version_minor }}
and priority "Blocker" will delay a release candidate build. will delay a release candidate build.
---- ----
types: types:
- major - major
@ -979,8 +979,8 @@ groups:
title: Publish docs, changes and javadocs title: Publish docs, changes and javadocs
description: | description: |
Ensure your refrigerator has at least 2 beers - the svn import operation can take a while, Ensure your refrigerator has at least 2 beers - the svn import operation can take a while,
depending on your upload bandwidth. We'll publish this directly to the production tree. depending on your upload bandwidth. We'll publish this directly to the production tree. At
At the end of the task, the two links below shall work. the end of the task, the two links below shall work.
links: links:
- http://lucene.apache.org/core/{{ version }} - http://lucene.apache.org/core/{{ version }}
vars: vars:
@ -1126,12 +1126,18 @@ groups:
comment: Push all changes comment: Push all changes
logfile: push-website.log logfile: push-website.log
post_description: | post_description: |
Wait a few minutes for the build to happen. You can follow the site build at https://ci2.apache.org/#/builders/3 Wait a few minutes for the build to happen. You can follow the site build at
and view the staged site at https://lucene.staged.apache.org https://ci2.apache.org/#/builders/3 and view the staged site at
Verify that correct links and versions are mentioned in download pages, download buttons etc. https://lucene.staged.apache.org Verify that correct links and versions are mentioned in
If you find anything wrong, then commit and push any changes and check again. download pages, download buttons etc. If you find anything wrong, then commit and push any
changes and check again. You may find that the publish fails, leaving a directory listing
Next step is to merge the changes to branch 'production' in order to publish the site. instead a beautiful website. If this happens, check the "builder" link and click through into
its details to find possible error messages produced by the website publication process. You
may have produced malformed Markdown. Or the website publish may just fail for some reason out
of your control. If this happens, you can attempt to retrigger the publishing with some
innocuous changes. Next step is to merge the changes to branch 'production' in order to
publish the site. Before doing this, you may want to replenish your stock of beers, or get
stronger stuff.
links: links:
- https://ci2.apache.org/#/builders/3 - https://ci2.apache.org/#/builders/3
- https://lucene.staged.apache.org - https://lucene.staged.apache.org
@ -1159,7 +1165,8 @@ groups:
post_description: | post_description: |
Wait a few minutes for the build to happen. You can follow the site build at https://ci2.apache.org/#/builders/3 Wait a few minutes for the build to happen. You can follow the site build at https://ci2.apache.org/#/builders/3
Verify on https://lucene.apache.org that the site is OK. Verify on https://lucene.apache.org that the site is OK. It really should be, but see staging
site publication instructions for possible debugging/recovery options if it is not.
You can now also verify that http://lucene.apache.org/core/api/core/ redirects to the latest version You can now also verify that http://lucene.apache.org/core/api/core/ redirects to the latest version
links: links:

View File

@ -1,8 +1,8 @@
six>=1.11.0 six~=1.16.0
Jinja2>=2.10.1 Jinja2~=3.1.1
PyYAML>=5.1 PyYAML~=6.0
holidays>=0.9.10 holidays~=0.16
ics>=0.4 ics~=0.7.2
console-menu>=0.5.1 console-menu~=0.7.1
PyGithub PyGithub~=1.56
jira jira~=3.4.1

View File

@ -67,6 +67,12 @@ allprojects {
tasks.named(sourceSet.getCompileJavaTaskName()).configure({ JavaCompile task -> tasks.named(sourceSet.getCompileJavaTaskName()).configure({ JavaCompile task ->
task.dependsOn modularPaths.compileModulePathConfiguration task.dependsOn modularPaths.compileModulePathConfiguration
// GH-12742: add the modular path as inputs so that if anything changes, the task
// is not up to date and is re-run. I [dw] believe this should be a @Classpath parameter
// on the task itself... but I don't know how to implement this on an existing class.
// this is a workaround but should work just fine though.
task.inputs.files(modularPaths.compileModulePathConfiguration)
// LUCENE-10327: don't allow gradle to emit an empty sourcepath as it would break // LUCENE-10327: don't allow gradle to emit an empty sourcepath as it would break
// compilation of modules. // compilation of modules.
task.options.setSourcepath(sourceSet.java.sourceDirectories) task.options.setSourcepath(sourceSet.java.sourceDirectories)

View File

@ -67,7 +67,7 @@ allprojects {
// seed, repetition and amplification. // seed, repetition and amplification.
[propName: 'tests.seed', value: { -> rootSeed }, description: "Sets the master randomization seed."], [propName: 'tests.seed', value: { -> rootSeed }, description: "Sets the master randomization seed."],
[propName: 'tests.iters', value: null, description: "Duplicate (re-run) each test case N times."], [propName: 'tests.iters', value: null, description: "Duplicate (re-run) each test case N times."],
[propName: 'tests.multiplier', value: 1, description: "Value multiplier for randomized tests."], [propName: 'tests.multiplier', value: null, description: "Value multiplier for randomized tests."],
[propName: 'tests.maxfailures', value: null, description: "Skip tests after a given number of failures."], [propName: 'tests.maxfailures', value: null, description: "Skip tests after a given number of failures."],
[propName: 'tests.timeoutSuite', value: null, description: "Timeout (in millis) for an entire suite."], [propName: 'tests.timeoutSuite', value: null, description: "Timeout (in millis) for an entire suite."],
[propName: 'tests.failfast', value: "false", description: "Stop the build early on failure.", buildOnly: true], [propName: 'tests.failfast', value: "false", description: "Stop the build early on failure.", buildOnly: true],

View File

@ -62,9 +62,11 @@ API Changes
* GITHUB#12599: Add RandomAccessInput#readBytes method to the RandomAccessInput interface. (Ignacio Vera) * GITHUB#12599: Add RandomAccessInput#readBytes method to the RandomAccessInput interface. (Ignacio Vera)
* GITHUB#12709 Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods * GITHUB#12709: Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
of the two (Anh Dung Bui) of the two (Anh Dung Bui)
* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)
New Features New Features
--------------------- ---------------------
@ -208,6 +210,9 @@ Improvements
* GITHUB#12689: TaskExecutor to cancel all tasks on exception to avoid needless computation. (Luca Cavanna) * GITHUB#12689: TaskExecutor to cancel all tasks on exception to avoid needless computation. (Luca Cavanna)
* GITHUB#12754: Refactor lookup of Hotspot VM options and do not initialize constants with NULL
if SecurityManager prevents access. (Uwe Schindler)
Optimizations Optimizations
--------------------- ---------------------
* GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary) * GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary)
@ -251,6 +256,11 @@ Optimizations
* GITHUB#12719: Top-level conjunctions that are not sorted by score now have a * GITHUB#12719: Top-level conjunctions that are not sorted by score now have a
specialized bulk scorer. (Adrien Grand) specialized bulk scorer. (Adrien Grand)
* GITHUB#1052: Faster merging of terms enums. (Adrien Grand)
* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)
Changes in runtime behavior Changes in runtime behavior
--------------------- ---------------------
@ -278,7 +288,14 @@ Bug Fixes
Build Build
--------------------- ---------------------
* GITHUB#12752: tests.multiplier could be omitted in test failure reproduce lines (esp. in
nightly mode). (Dawid Weiss)
* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed
leading to odd runtime errors (Chris Hostetter, Dawid Weiss)
* GITHUB#12612: Upgrade forbiddenapis to version 3.6 and ASM for APIJAR extraction to 9.6. (Uwe Schindler) * GITHUB#12612: Upgrade forbiddenapis to version 3.6 and ASM for APIJAR extraction to 9.6. (Uwe Schindler)
* GITHUB#12655: Upgrade to Gradle 8.4 (Kevin Risden) * GITHUB#12655: Upgrade to Gradle 8.4 (Kevin Risden)
Other Other

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.analysis.ja.dict; package org.apache.lucene.analysis.util;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -69,7 +69,7 @@ public final class CSVUtil {
return new String[0]; return new String[0];
} }
return result.toArray(new String[result.size()]); return result.toArray(new String[0]);
} }
private static String unQuoteUnEscape(String original) { private static String unQuoteUnEscape(String original) {
@ -83,7 +83,7 @@ public final class CSVUtil {
} }
// Unescape // Unescape
if (result.indexOf(ESCAPED_QUOTE) >= 0) { if (result.contains(ESCAPED_QUOTE)) {
result = result.replace(ESCAPED_QUOTE, "\""); result = result.replace(ESCAPED_QUOTE, "\"");
} }
} }

View File

@ -14,10 +14,9 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.analysis.ja; package org.apache.lucene.analysis.util;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.analysis.ja.dict.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
/* /*

View File

@ -156,19 +156,19 @@ public class GenerateUTR30DataFiles {
} }
private static void getNFKCDataFilesFromIcuProject(String releaseTag) throws IOException { private static void getNFKCDataFilesFromIcuProject(String releaseTag) throws IOException {
URI icuTagsURL = URI.create(ICU_GIT_TAG_URL + "/"); URI icuTagsURI = URI.create(ICU_GIT_TAG_URL + "/");
URI icuReleaseTagURL = icuTagsURL.resolve(releaseTag + "/"); URI icuReleaseTagURI = icuTagsURI.resolve(releaseTag + "/");
URI norm2url = icuReleaseTagURL.resolve(ICU_DATA_NORM2_PATH + "/"); URI norm2uri = icuReleaseTagURI.resolve(ICU_DATA_NORM2_PATH + "/");
System.err.print("Downloading " + NFKC_TXT + " ... "); System.err.print("Downloading " + NFKC_TXT + " ... ");
download(norm2url.resolve(NFKC_TXT), NFKC_TXT); download(norm2uri.resolve(NFKC_TXT), NFKC_TXT);
System.err.println("done."); System.err.println("done.");
System.err.print("Downloading " + NFKC_CF_TXT + " ... "); System.err.print("Downloading " + NFKC_CF_TXT + " ... ");
download(norm2url.resolve(NFKC_CF_TXT), NFKC_CF_TXT); download(norm2uri.resolve(NFKC_CF_TXT), NFKC_CF_TXT);
System.err.println("done."); System.err.println("done.");
System.err.print("Downloading " + NFKC_CF_TXT + " and making diacritic rules one-way ... "); System.err.print("Downloading " + NFKC_CF_TXT + " and making diacritic rules one-way ... ");
URLConnection connection = openConnection(norm2url.resolve(NFC_TXT).toURL()); URLConnection connection = openConnection(norm2uri.resolve(NFC_TXT).toURL());
try (BufferedReader reader = try (BufferedReader reader =
new BufferedReader( new BufferedReader(
new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8)); new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8));

View File

@ -28,6 +28,7 @@ import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import org.apache.lucene.analysis.morph.DictionaryEntryWriter; import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;

View File

@ -25,6 +25,7 @@ import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.util.CSVUtil;
class UnknownDictionaryBuilder { class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*"; private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";

View File

@ -26,6 +26,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.lucene.analysis.morph.Dictionary; import org.apache.lucene.analysis.morph.Dictionary;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;

View File

@ -19,6 +19,8 @@ package org.apache.lucene.analysis.ja.dict;
import static org.apache.lucene.analysis.ja.dict.UserDictionary.CUSTOM_DICTIONARY_WORD_ID_OFFSET; import static org.apache.lucene.analysis.ja.dict.UserDictionary.CUSTOM_DICTIONARY_WORD_ID_OFFSET;
import static org.apache.lucene.analysis.ja.dict.UserDictionary.INTERNAL_SEPARATOR; import static org.apache.lucene.analysis.ja.dict.UserDictionary.INTERNAL_SEPARATOR;
import org.apache.lucene.analysis.util.CSVUtil;
/** Morphological information for user dictionary. */ /** Morphological information for user dictionary. */
final class UserMorphData implements JaMorphData { final class UserMorphData implements JaMorphData {
public static final int WORD_COST = -100000; public static final int WORD_COST = -100000;

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.analysis.ja.dict; package org.apache.lucene.analysis.ja.dict;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Test; import org.junit.Test;

View File

@ -1,93 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ko.dict;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** Utility class for parsing CSV text */
public final class CSVUtil {
private static final char QUOTE = '"';
private static final char COMMA = ',';
private static final Pattern QUOTE_REPLACE_PATTERN = Pattern.compile("^\"([^\"]+)\"$");
private static final String ESCAPED_QUOTE = "\"\"";
private CSVUtil() {} // no instance!!!
/**
* Parse CSV line
*
* @param line line containing csv-encoded data
* @return Array of values
*/
public static String[] parse(String line) {
boolean insideQuote = false;
ArrayList<String> result = new ArrayList<>();
int quoteCount = 0;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (c == QUOTE) {
insideQuote = !insideQuote;
quoteCount++;
}
if (c == COMMA && !insideQuote) {
String value = sb.toString();
value = unQuoteUnEscape(value);
result.add(value);
sb.setLength(0);
continue;
}
sb.append(c);
}
result.add(sb.toString());
// Validate
if (quoteCount % 2 != 0) {
return new String[0];
}
return result.toArray(new String[0]);
}
private static String unQuoteUnEscape(String original) {
String result = original;
// Unquote
if (result.indexOf('\"') >= 0) {
Matcher m = QUOTE_REPLACE_PATTERN.matcher(original);
if (m.matches()) {
result = m.group(1);
}
// Unescape
if (result.contains(ESCAPED_QUOTE)) {
result = result.replace(ESCAPED_QUOTE, "\"");
}
}
return result;
}
}

View File

@ -28,6 +28,7 @@ import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;

View File

@ -24,6 +24,7 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.ko.POS; import org.apache.lucene.analysis.ko.POS;
import org.apache.lucene.analysis.morph.DictionaryEntryWriter; import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;

View File

@ -25,6 +25,7 @@ import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.util.CSVUtil;
class UnknownDictionaryBuilder { class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*"; private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.analysis.ko.dict; package org.apache.lucene.analysis.ko.dict;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Test; import org.junit.Test;

View File

@ -92,7 +92,7 @@ public final class PForUtil {
out.writeBytes(exceptions, exceptions.length); out.writeBytes(exceptions, exceptions.length);
} }
/** Decode 128 integers into {@code ints}. */ /** Decode 128 integers into {@code longs}. */
void decode(DataInput in, long[] longs) throws IOException { void decode(DataInput in, long[] longs) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte()); final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f; final int bitsPerValue = token & 0x1f;

View File

@ -24,8 +24,14 @@ import org.openjdk.jmh.annotations.*;
@BenchmarkMode(Mode.Throughput) @BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MICROSECONDS) @OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Benchmark) @State(Scope.Benchmark)
@Warmup(iterations = 3, time = 3) // first iteration is complete garbage, so make sure we really warmup
@Measurement(iterations = 5, time = 3) @Warmup(iterations = 4, time = 1)
// real iterations. not useful to spend tons of time here, better to fork more
@Measurement(iterations = 5, time = 1)
// engage some noise reduction
@Fork(
value = 3,
jvmArgsAppend = {"-Xmx2g", "-Xms2g", "-XX:+AlwaysPreTouch"})
public class VectorUtilBenchmark { public class VectorUtilBenchmark {
private byte[] bytesA; private byte[] bytesA;
@ -36,7 +42,7 @@ public class VectorUtilBenchmark {
@Param({"1", "128", "207", "256", "300", "512", "702", "1024"}) @Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
int size; int size;
@Setup(Level.Trial) @Setup(Level.Iteration)
public void init() { public void init() {
ThreadLocalRandom random = ThreadLocalRandom.current(); ThreadLocalRandom random = ThreadLocalRandom.current();
@ -56,84 +62,72 @@ public class VectorUtilBenchmark {
} }
@Benchmark @Benchmark
@Fork(value = 1)
public float binaryCosineScalar() { public float binaryCosineScalar() {
return VectorUtil.cosine(bytesA, bytesB); return VectorUtil.cosine(bytesA, bytesB);
} }
@Benchmark @Benchmark
@Fork( @Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
value = 1,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public float binaryCosineVector() { public float binaryCosineVector() {
return VectorUtil.cosine(bytesA, bytesB); return VectorUtil.cosine(bytesA, bytesB);
} }
@Benchmark @Benchmark
@Fork(value = 1)
public int binaryDotProductScalar() { public int binaryDotProductScalar() {
return VectorUtil.dotProduct(bytesA, bytesB); return VectorUtil.dotProduct(bytesA, bytesB);
} }
@Benchmark @Benchmark
@Fork( @Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
value = 1,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public int binaryDotProductVector() { public int binaryDotProductVector() {
return VectorUtil.dotProduct(bytesA, bytesB); return VectorUtil.dotProduct(bytesA, bytesB);
} }
@Benchmark @Benchmark
@Fork(value = 1)
public int binarySquareScalar() { public int binarySquareScalar() {
return VectorUtil.squareDistance(bytesA, bytesB); return VectorUtil.squareDistance(bytesA, bytesB);
} }
@Benchmark @Benchmark
@Fork( @Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
value = 1,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public int binarySquareVector() { public int binarySquareVector() {
return VectorUtil.squareDistance(bytesA, bytesB); return VectorUtil.squareDistance(bytesA, bytesB);
} }
@Benchmark @Benchmark
@Fork(value = 1)
public float floatCosineScalar() { public float floatCosineScalar() {
return VectorUtil.cosine(floatsA, floatsB); return VectorUtil.cosine(floatsA, floatsB);
} }
@Benchmark @Benchmark
@Fork( @Fork(
value = 1, value = 15,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public float floatCosineVector() { public float floatCosineVector() {
return VectorUtil.cosine(floatsA, floatsB); return VectorUtil.cosine(floatsA, floatsB);
} }
@Benchmark @Benchmark
@Fork(value = 1)
public float floatDotProductScalar() { public float floatDotProductScalar() {
return VectorUtil.dotProduct(floatsA, floatsB); return VectorUtil.dotProduct(floatsA, floatsB);
} }
@Benchmark @Benchmark
@Fork( @Fork(
value = 1, value = 15,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public float floatDotProductVector() { public float floatDotProductVector() {
return VectorUtil.dotProduct(floatsA, floatsB); return VectorUtil.dotProduct(floatsA, floatsB);
} }
@Benchmark @Benchmark
@Fork(value = 1)
public float floatSquareScalar() { public float floatSquareScalar() {
return VectorUtil.squareDistance(floatsA, floatsB); return VectorUtil.squareDistance(floatsA, floatsB);
} }
@Benchmark @Benchmark
@Fork( @Fork(
value = 1, value = 15,
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public float floatSquareVector() { public float floatSquareVector() {
return VectorUtil.squareDistance(floatsA, floatsB); return VectorUtil.squareDistance(floatsA, floatsB);

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=500 log.step=500
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -21,7 +21,7 @@
# Fully Qualified Class Name of a Collector with a empty constructor # Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs # topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order # topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered collector.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory directory=FSDirectory

View File

@ -21,7 +21,7 @@
# Fully Qualified Class Name of a Collector with a empty constructor # Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs # topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order # topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered collector.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory directory=FSDirectory

View File

@ -37,8 +37,8 @@ doc.term.vector=vector:true:true:false:false
log.step=500 log.step=500
log.step.DeleteDoc=100 log.step.DeleteDoc=100
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -20,7 +20,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false doc.tokenized=false
doc.body.tokenized=true doc.body.tokenized=true
docs.dir=reuters-out work.dir=data
docs.dir=reuters21578
-AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer, -AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer,
EnglishPossessiveFilter,LowerCaseFilter,StopFilter, EnglishPossessiveFilter,LowerCaseFilter,StopFilter,

View File

@ -30,7 +30,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=1000 log.step=1000
docs.dir=reuters-out work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -30,7 +30,8 @@ doc.term.vector.offsets=false
doc.term.vector.positions=false doc.term.vector.positions=false
log.step=2000 log.step=2000
docs.dir=reuters-out work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=2000 log.step=2000
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=2000 log.step=2000
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=2000 log.step=2000
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=2000 log.step=2000
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=500 log.step=500
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -42,8 +42,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=500 log.step=500
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
#content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource #content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -16,7 +16,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false doc.tokenized=false
doc.body.tokenized=true doc.body.tokenized=true
docs.dir=reuters-out work.dir=data
docs.dir=reuters21578
log.step=1000 log.step=1000
-AnalyzerFactory(name:shingle-bigrams-unigrams, -AnalyzerFactory(name:shingle-bigrams-unigrams,

View File

@ -30,7 +30,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=500 log.step=500
docs.dir=reuters-out work.dir=data
docs.dir=reuters21578
#docs.dir=reuters-111 #docs.dir=reuters-111
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource

View File

@ -31,7 +31,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=100000 log.step=100000
docs.dir=reuters-out work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=2000 log.step=2000
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false doc.term.vector=false
log.step=2000 log.step=2000
docs.dir=reuters-out work.dir=data
#docs.dir=reuters-111 docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource #content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -18,7 +18,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false doc.tokenized=false
doc.body.tokenized=true doc.body.tokenized=true
docs.dir=reuters-out work.dir=data
docs.dir=reuters21578
-AnalyzerFactory(name:WhitespaceTokenizer, WhitespaceTokenizer(rule:java)) -AnalyzerFactory(name:WhitespaceTokenizer, WhitespaceTokenizer(rule:java))

View File

@ -23,9 +23,9 @@ import java.lang.reflect.Constructor;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory; import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
import org.apache.lucene.util.Version;
/** /**
* Create a new {@link org.apache.lucene.analysis.Analyzer} and set it in the getRunData() for use * Create a new {@link org.apache.lucene.analysis.Analyzer} and set it in the getRunData() for use
@ -42,17 +42,13 @@ public class NewAnalyzerTask extends PerfTask {
public static final Analyzer createAnalyzer(String className) throws Exception { public static final Analyzer createAnalyzer(String className) throws Exception {
final Class<? extends Analyzer> clazz = Class.forName(className).asSubclass(Analyzer.class); final Class<? extends Analyzer> clazz = Class.forName(className).asSubclass(Analyzer.class);
try { Constructor<? extends Analyzer> cnstr;
// first try to use a ctor with version parameter (needed for many new Analyzers that have no if (className.equals("org.apache.lucene.analysis.core.StopAnalyzer")) {
// default one anymore cnstr = clazz.getConstructor(CharArraySet.class);
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class); return cnstr.newInstance(CharArraySet.EMPTY_SET);
return cnstr.newInstance(Version.LATEST);
} catch (
@SuppressWarnings("unused")
NoSuchMethodException nsme) {
// otherwise use default ctor
return clazz.getConstructor().newInstance();
} }
cnstr = clazz.getConstructor();
return cnstr.newInstance();
} }
@Override @Override

View File

@ -116,7 +116,7 @@ final class PForUtil {
out.writeBytes(exceptions, exceptions.length); out.writeBytes(exceptions, exceptions.length);
} }
/** Decode 128 integers into {@code ints}. */ /** Decode 128 integers into {@code longs}. */
void decode(DataInput in, long[] longs) throws IOException { void decode(DataInput in, long[] longs) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte()); final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f; final int bitsPerValue = token & 0x1f;

View File

@ -136,17 +136,16 @@ public final class MultiTerms extends Terms {
@Override @Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>(); final List<TermsEnumIndex> termsEnums = new ArrayList<>();
for (int i = 0; i < subs.length; i++) { for (int i = 0; i < subs.length; i++) {
final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm); final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
if (termsEnum != null) { if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i)); termsEnums.add(new TermsEnumIndex(termsEnum, i));
} }
} }
if (termsEnums.size() > 0) { if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices) return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(TermsEnumIndex.EMPTY_ARRAY));
.reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else { } else {
return TermsEnum.EMPTY; return TermsEnum.EMPTY;
} }
@ -181,17 +180,16 @@ public final class MultiTerms extends Terms {
@Override @Override
public TermsEnum iterator() throws IOException { public TermsEnum iterator() throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>(); final List<TermsEnumIndex> termsEnums = new ArrayList<>();
for (int i = 0; i < subs.length; i++) { for (int i = 0; i < subs.length; i++) {
final TermsEnum termsEnum = subs[i].iterator(); final TermsEnum termsEnum = subs[i].iterator();
if (termsEnum != null) { if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i)); termsEnums.add(new TermsEnumIndex(termsEnum, i));
} }
} }
if (termsEnums.size() > 0) { if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices) return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(TermsEnumIndex.EMPTY_ARRAY));
.reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else { } else {
return TermsEnum.EMPTY; return TermsEnum.EMPTY;
} }

View File

@ -36,7 +36,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
new Comparator<TermsEnumWithSlice>() { new Comparator<TermsEnumWithSlice>() {
@Override @Override
public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) { public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) {
return o1.index - o2.index; return o1.subIndex - o2.subIndex;
} }
}; };
@ -56,17 +56,6 @@ public final class MultiTermsEnum extends BaseTermsEnum {
private int numSubs; private int numSubs;
private BytesRef current; private BytesRef current;
static class TermsEnumIndex {
public static final TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
final int subIndex;
final TermsEnum termsEnum;
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
this.termsEnum = termsEnum;
this.subIndex = subIndex;
}
}
/** Returns how many sub-reader slices contain the current term. @see #getMatchArray */ /** Returns how many sub-reader slices contain the current term. @see #getMatchArray */
public int getMatchCount() { public int getMatchCount() {
return numTop; return numTop;
@ -114,10 +103,10 @@ public final class MultiTermsEnum extends BaseTermsEnum {
final TermsEnumIndex termsEnumIndex = termsEnumsIndex[i]; final TermsEnumIndex termsEnumIndex = termsEnumsIndex[i];
assert termsEnumIndex != null; assert termsEnumIndex != null;
final BytesRef term = termsEnumIndex.termsEnum.next(); final BytesRef term = termsEnumIndex.next();
if (term != null) { if (term != null) {
final TermsEnumWithSlice entry = subs[termsEnumIndex.subIndex]; final TermsEnumWithSlice entry = subs[termsEnumIndex.subIndex];
entry.reset(termsEnumIndex.termsEnum, term); entry.reset(termsEnumIndex);
queue.add(entry); queue.add(entry);
currentSubs[numSubs++] = entry; currentSubs[numSubs++] = entry;
} else { } else {
@ -154,7 +143,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
// Doing so is a waste because this sub will simply // Doing so is a waste because this sub will simply
// seek to the same spot. // seek to the same spot.
if (seekOpt) { if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current; final BytesRef curTerm = currentSubs[i].term();
if (curTerm != null) { if (curTerm != null) {
final int cmp = term.compareTo(curTerm); final int cmp = term.compareTo(curTerm);
if (cmp == 0) { if (cmp == 0) {
@ -162,19 +151,19 @@ public final class MultiTermsEnum extends BaseTermsEnum {
} else if (cmp < 0) { } else if (cmp < 0) {
status = false; status = false;
} else { } else {
status = currentSubs[i].terms.seekExact(term); status = currentSubs[i].seekExact(term);
} }
} else { } else {
status = false; status = false;
} }
} else { } else {
status = currentSubs[i].terms.seekExact(term); status = currentSubs[i].seekExact(term);
} }
if (status) { if (status) {
top[numTop++] = currentSubs[i]; top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term(); current = currentSubs[i].term();
assert term.equals(currentSubs[i].current); assert term.equals(currentSubs[i].term());
} }
} }
@ -206,7 +195,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
// Doing so is a waste because this sub will simply // Doing so is a waste because this sub will simply
// seek to the same spot. // seek to the same spot.
if (seekOpt) { if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current; final BytesRef curTerm = currentSubs[i].term();
if (curTerm != null) { if (curTerm != null) {
final int cmp = term.compareTo(curTerm); final int cmp = term.compareTo(curTerm);
if (cmp == 0) { if (cmp == 0) {
@ -214,28 +203,25 @@ public final class MultiTermsEnum extends BaseTermsEnum {
} else if (cmp < 0) { } else if (cmp < 0) {
status = SeekStatus.NOT_FOUND; status = SeekStatus.NOT_FOUND;
} else { } else {
status = currentSubs[i].terms.seekCeil(term); status = currentSubs[i].seekCeil(term);
} }
} else { } else {
status = SeekStatus.END; status = SeekStatus.END;
} }
} else { } else {
status = currentSubs[i].terms.seekCeil(term); status = currentSubs[i].seekCeil(term);
} }
if (status == SeekStatus.FOUND) { if (status == SeekStatus.FOUND) {
top[numTop++] = currentSubs[i]; top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term(); current = currentSubs[i].term();
queue.add(currentSubs[i]); queue.add(currentSubs[i]);
} else { } else {
if (status == SeekStatus.NOT_FOUND) { if (status == SeekStatus.NOT_FOUND) {
currentSubs[i].current = currentSubs[i].terms.term(); assert currentSubs[i].term() != null;
assert currentSubs[i].current != null;
queue.add(currentSubs[i]); queue.add(currentSubs[i]);
} else { } else {
assert status == SeekStatus.END; assert status == SeekStatus.END;
// enum exhausted
currentSubs[i].current = null;
} }
} }
} }
@ -269,15 +255,14 @@ public final class MultiTermsEnum extends BaseTermsEnum {
// top term // top term
assert numTop == 0; assert numTop == 0;
numTop = queue.fillTop(top); numTop = queue.fillTop(top);
current = top[0].current; current = top[0].term();
} }
private void pushTop() throws IOException { private void pushTop() throws IOException {
// call next() on each top, and reorder queue // call next() on each top, and reorder queue
for (int i = 0; i < numTop; i++) { for (int i = 0; i < numTop; i++) {
TermsEnumWithSlice top = queue.top(); TermsEnumWithSlice top = queue.top();
top.current = top.terms.next(); if (top.next() == null) {
if (top.current == null) {
queue.pop(); queue.pop();
} else { } else {
queue.updateTop(); queue.updateTop();
@ -320,7 +305,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
public int docFreq() throws IOException { public int docFreq() throws IOException {
int sum = 0; int sum = 0;
for (int i = 0; i < numTop; i++) { for (int i = 0; i < numTop; i++) {
sum += top[i].terms.docFreq(); sum += top[i].termsEnum.docFreq();
} }
return sum; return sum;
} }
@ -329,7 +314,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
public long totalTermFreq() throws IOException { public long totalTermFreq() throws IOException {
long sum = 0; long sum = 0;
for (int i = 0; i < numTop; i++) { for (int i = 0; i < numTop; i++) {
final long v = top[i].terms.totalTermFreq(); final long v = top[i].termsEnum.totalTermFreq();
assert v != -1; assert v != -1;
sum += v; sum += v;
} }
@ -359,12 +344,12 @@ public final class MultiTermsEnum extends BaseTermsEnum {
final TermsEnumWithSlice entry = top[i]; final TermsEnumWithSlice entry = top[i];
assert entry.index < docsEnum.subPostingsEnums.length assert entry.subIndex < docsEnum.subPostingsEnums.length
: entry.index + " vs " + docsEnum.subPostingsEnums.length + "; " + subs.length; : entry.subIndex + " vs " + docsEnum.subPostingsEnums.length + "; " + subs.length;
final PostingsEnum subPostingsEnum = final PostingsEnum subPostingsEnum =
entry.terms.postings(docsEnum.subPostingsEnums[entry.index], flags); entry.termsEnum.postings(docsEnum.subPostingsEnums[entry.subIndex], flags);
assert subPostingsEnum != null; assert subPostingsEnum != null;
docsEnum.subPostingsEnums[entry.index] = subPostingsEnum; docsEnum.subPostingsEnums[entry.subIndex] = subPostingsEnum;
subDocs[upto].postingsEnum = subPostingsEnum; subDocs[upto].postingsEnum = subPostingsEnum;
subDocs[upto].slice = entry.subSlice; subDocs[upto].slice = entry.subSlice;
upto++; upto++;
@ -379,26 +364,18 @@ public final class MultiTermsEnum extends BaseTermsEnum {
return new SlowImpactsEnum(postings(null, flags)); return new SlowImpactsEnum(postings(null, flags));
} }
static final class TermsEnumWithSlice { static final class TermsEnumWithSlice extends TermsEnumIndex {
private final ReaderSlice subSlice; private final ReaderSlice subSlice;
TermsEnum terms;
public BytesRef current;
final int index;
public TermsEnumWithSlice(int index, ReaderSlice subSlice) { public TermsEnumWithSlice(int index, ReaderSlice subSlice) {
super(null, index);
this.subSlice = subSlice; this.subSlice = subSlice;
this.index = index;
assert subSlice.length >= 0 : "length=" + subSlice.length; assert subSlice.length >= 0 : "length=" + subSlice.length;
} }
public void reset(TermsEnum terms, BytesRef term) {
this.terms = terms;
current = term;
}
@Override @Override
public String toString() { public String toString() {
return subSlice.toString() + ":" + terms; return subSlice.toString() + ":" + super.toString();
} }
} }
@ -413,7 +390,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
@Override @Override
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) { protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
return termsA.current.compareTo(termsB.current) < 0; return termsA.compareTermTo(termsB) < 0;
} }
/** /**
@ -435,7 +412,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
final int leftChild = index << 1; final int leftChild = index << 1;
for (int child = leftChild, end = Math.min(size, leftChild + 1); child <= end; ++child) { for (int child = leftChild, end = Math.min(size, leftChild + 1); child <= end; ++child) {
TermsEnumWithSlice te = get(child); TermsEnumWithSlice te = get(child);
if (te.current.equals(tops[0].current)) { if (te.compareTermTo(tops[0]) == 0) {
tops[numTop++] = te; tops[numTop++] = te;
stack[stackLen++] = child; stack[stackLen++] = child;
} }

View File

@ -24,8 +24,6 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables; import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
@ -48,19 +46,15 @@ public class OrdinalMap implements Accountable {
// need it // need it
// TODO: use more efficient packed ints structures? // TODO: use more efficient packed ints structures?
private static class TermsEnumIndex { private static class TermsEnumPriorityQueue extends PriorityQueue<TermsEnumIndex> {
final int subIndex;
final TermsEnum termsEnum;
BytesRef currentTerm;
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) { TermsEnumPriorityQueue(int size) {
this.termsEnum = termsEnum; super(size);
this.subIndex = subIndex;
} }
public BytesRef next() throws IOException { @Override
currentTerm = termsEnum.next(); protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
return currentTerm; return a.compareTermTo(b) < 0;
} }
} }
@ -227,13 +221,7 @@ public class OrdinalMap implements Accountable {
long[] segmentOrds = new long[subs.length]; long[] segmentOrds = new long[subs.length];
// Just merge-sorts by term: // Just merge-sorts by term:
PriorityQueue<TermsEnumIndex> queue = TermsEnumPriorityQueue queue = new TermsEnumPriorityQueue(subs.length);
new PriorityQueue<TermsEnumIndex>(subs.length) {
@Override
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
return a.currentTerm.compareTo(b.currentTerm) < 0;
}
};
for (int i = 0; i < subs.length; i++) { for (int i = 0; i < subs.length; i++) {
TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i); TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
@ -242,19 +230,18 @@ public class OrdinalMap implements Accountable {
} }
} }
BytesRefBuilder scratch = new BytesRefBuilder(); TermsEnumIndex.TermState topState = new TermsEnumIndex.TermState();
long globalOrd = 0; long globalOrd = 0;
while (queue.size() != 0) { while (queue.size() != 0) {
TermsEnumIndex top = queue.top(); TermsEnumIndex top = queue.top();
scratch.copyBytes(top.currentTerm); topState.copyFrom(top);
int firstSegmentIndex = Integer.MAX_VALUE; int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE; long globalOrdDelta = Long.MAX_VALUE;
// Advance past this term, recording the per-segment ord deltas: // Advance past this term, recording the per-segment ord deltas:
while (true) { while (true) {
top = queue.top();
long segmentOrd = top.termsEnum.ord(); long segmentOrd = top.termsEnum.ord();
long delta = globalOrd - segmentOrd; long delta = globalOrd - segmentOrd;
int segmentIndex = top.subIndex; int segmentIndex = top.subIndex;
@ -284,10 +271,11 @@ public class OrdinalMap implements Accountable {
if (queue.size() == 0) { if (queue.size() == 0) {
break; break;
} }
top = queue.top();
} else { } else {
queue.updateTop(); top = queue.updateTop();
} }
if (queue.top().currentTerm.equals(scratch.get()) == false) { if (top.termEquals(topState) == false) {
break; break;
} }
} }

View File

@ -18,8 +18,6 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -555,8 +553,6 @@ final class ReadersAndUpdates {
FieldInfos fieldInfos = null; FieldInfos fieldInfos = null;
boolean any = false; boolean any = false;
for (List<DocValuesFieldUpdates> updates : pendingDVUpdates.values()) { for (List<DocValuesFieldUpdates> updates : pendingDVUpdates.values()) {
// Sort by increasing delGen:
Collections.sort(updates, Comparator.comparingLong(a -> a.delGen));
for (DocValuesFieldUpdates update : updates) { for (DocValuesFieldUpdates update : updates) {
if (update.delGen <= maxDelGen && update.any()) { if (update.delGen <= maxDelGen && update.any()) {
any = true; any = true;

View File

@ -0,0 +1,183 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
/**
* Wrapper around a {@link TermsEnum} and an integer that identifies it. All operations that move
* the current position of the {@link TermsEnum} must be performed via this wrapper class, not
* directly on the wrapped {@link TermsEnum}.
*/
class TermsEnumIndex {
static final TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
/**
* Copy the first 8 bytes of the given term as a comparable unsigned long. In case the term has
* less than 8 bytes, missing bytes will be replaced with zeroes. Note that two terms that produce
* the same long could still be different due to the fact that missing bytes are replaced with
* zeroes, e.g. {@code [1, 0]} and {@code [1]} get mapped to the same long.
*/
static long prefix8ToComparableUnsignedLong(BytesRef term) {
// Use Big Endian so that longs are comparable
if (term.length >= Long.BYTES) {
return (long) BitUtil.VH_BE_LONG.get(term.bytes, term.offset);
} else {
long l;
int o;
if (Integer.BYTES <= term.length) {
l = (int) BitUtil.VH_BE_INT.get(term.bytes, term.offset);
o = Integer.BYTES;
} else {
l = 0;
o = 0;
}
if (o + Short.BYTES <= term.length) {
l =
(l << Short.SIZE)
| Short.toUnsignedLong(
(short) BitUtil.VH_BE_SHORT.get(term.bytes, term.offset + o));
o += Short.BYTES;
}
if (o < term.length) {
l = (l << Byte.SIZE) | Byte.toUnsignedLong(term.bytes[term.offset + o]);
}
l <<= (Long.BYTES - term.length) << 3;
return l;
}
}
final int subIndex;
TermsEnum termsEnum;
private BytesRef currentTerm;
private long currentTermPrefix8;
TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
this.termsEnum = termsEnum;
this.subIndex = subIndex;
}
BytesRef term() {
return currentTerm;
}
private void setTerm(BytesRef term) {
currentTerm = term;
if (currentTerm == null) {
currentTermPrefix8 = 0;
} else {
currentTermPrefix8 = prefix8ToComparableUnsignedLong(currentTerm);
}
}
BytesRef next() throws IOException {
BytesRef term = termsEnum.next();
setTerm(term);
return term;
}
SeekStatus seekCeil(BytesRef term) throws IOException {
SeekStatus status = termsEnum.seekCeil(term);
if (status == SeekStatus.END) {
setTerm(null);
} else {
setTerm(termsEnum.term());
}
return status;
}
boolean seekExact(BytesRef term) throws IOException {
boolean found = termsEnum.seekExact(term);
if (found) {
setTerm(termsEnum.term());
} else {
setTerm(null);
}
return found;
}
void seekExact(long ord) throws IOException {
termsEnum.seekExact(ord);
setTerm(termsEnum.term());
}
void reset(TermsEnumIndex tei) throws IOException {
termsEnum = tei.termsEnum;
currentTerm = tei.currentTerm;
currentTermPrefix8 = tei.currentTermPrefix8;
}
int compareTermTo(TermsEnumIndex that) {
if (currentTermPrefix8 != that.currentTermPrefix8) {
int cmp = Long.compareUnsigned(currentTermPrefix8, that.currentTermPrefix8);
assert Integer.signum(cmp)
== Integer.signum(
Arrays.compareUnsigned(
currentTerm.bytes,
currentTerm.offset,
currentTerm.offset + currentTerm.length,
that.currentTerm.bytes,
that.currentTerm.offset,
that.currentTerm.offset + that.currentTerm.length));
return cmp;
}
return Arrays.compareUnsigned(
currentTerm.bytes,
currentTerm.offset,
currentTerm.offset + currentTerm.length,
that.currentTerm.bytes,
that.currentTerm.offset,
that.currentTerm.offset + that.currentTerm.length);
}
@Override
public String toString() {
return Objects.toString(termsEnum);
}
/** Wrapper around a term that allows for quick equals comparisons. */
static class TermState {
private final BytesRefBuilder term = new BytesRefBuilder();
private long termPrefix8;
void copyFrom(TermsEnumIndex tei) {
term.copyBytes(tei.term());
termPrefix8 = tei.currentTermPrefix8;
}
}
boolean termEquals(TermState that) {
if (currentTermPrefix8 != that.termPrefix8) {
return false;
}
return Arrays.equals(
currentTerm.bytes,
currentTerm.offset,
currentTerm.offset + currentTerm.length,
that.term.bytes(),
0,
that.term.length());
}
}

View File

@ -21,8 +21,6 @@ import java.lang.Runtime.Version;
import java.lang.StackWalker.StackFrame; import java.lang.StackWalker.StackFrame;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType; import java.lang.invoke.MethodType;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Locale; import java.util.Locale;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
@ -31,7 +29,7 @@ import java.util.Set;
import java.util.function.Predicate; import java.util.function.Predicate;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.lucene.util.SuppressForbidden; import org.apache.lucene.util.Constants;
import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.VectorUtil;
/** /**
@ -129,7 +127,7 @@ public abstract class VectorizationProvider {
"Vector bitsize and/or integer vectors enforcement; using default vectorization provider outside of testMode"); "Vector bitsize and/or integer vectors enforcement; using default vectorization provider outside of testMode");
return new DefaultVectorizationProvider(); return new DefaultVectorizationProvider();
} }
if (isClientVM()) { if (Constants.IS_CLIENT_VM) {
LOG.warning("C2 compiler is disabled; Java vector incubator API can't be enabled"); LOG.warning("C2 compiler is disabled; Java vector incubator API can't be enabled");
return new DefaultVectorizationProvider(); return new DefaultVectorizationProvider();
} }
@ -188,23 +186,6 @@ public abstract class VectorizationProvider {
&& !Objects.equals("I", "i".toUpperCase(Locale.getDefault())); && !Objects.equals("I", "i".toUpperCase(Locale.getDefault()));
} }
@SuppressWarnings("removal")
@SuppressForbidden(reason = "security manager")
private static boolean isClientVM() {
try {
final PrivilegedAction<Boolean> action =
() -> System.getProperty("java.vm.info", "").contains("emulated-client");
return AccessController.doPrivileged(action);
} catch (
@SuppressWarnings("unused")
SecurityException e) {
LOG.warning(
"SecurityManager denies permission to 'java.vm.info' system property, so state of C2 compiler can't be detected. "
+ "In case of performance issues allow access to this property.");
return false;
}
}
// add all possible callers here as FQCN: // add all possible callers here as FQCN:
private static final Set<String> VALID_CALLERS = Set.of("org.apache.lucene.util.VectorUtil"); private static final Set<String> VALID_CALLERS = Set.of("org.apache.lucene.util.VectorUtil");

View File

@ -475,7 +475,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
private class CompetitiveIterator extends DocIdSetIterator { private class CompetitiveIterator extends DocIdSetIterator {
private static final int MAX_TERMS = 128; private static final int MAX_TERMS = 1024;
private final LeafReaderContext context; private final LeafReaderContext context;
private final int maxDoc; private final int maxDoc;

View File

@ -16,18 +16,25 @@
*/ */
package org.apache.lucene.util; package org.apache.lucene.util;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Objects;
import java.util.logging.Logger;
/** Some useful constants. */ /** Some useful constants. */
public final class Constants { public final class Constants {
private Constants() {} // can't construct private Constants() {} // can't construct
private static final String UNKNOWN = "Unknown";
/** JVM vendor info. */ /** JVM vendor info. */
public static final String JVM_VENDOR = System.getProperty("java.vm.vendor"); public static final String JVM_VENDOR = getSysProp("java.vm.vendor", UNKNOWN);
/** JVM vendor name. */ /** JVM vendor name. */
public static final String JVM_NAME = System.getProperty("java.vm.name"); public static final String JVM_NAME = getSysProp("java.vm.name", UNKNOWN);
/** The value of <code>System.getProperty("os.name")</code>. * */ /** The value of <code>System.getProperty("os.name")</code>. * */
public static final String OS_NAME = System.getProperty("os.name"); public static final String OS_NAME = getSysProp("os.name", UNKNOWN);
/** True iff running on Linux. */ /** True iff running on Linux. */
public static final boolean LINUX = OS_NAME.startsWith("Linux"); public static final boolean LINUX = OS_NAME.startsWith("Linux");
@ -45,36 +52,67 @@ public final class Constants {
public static final boolean FREE_BSD = OS_NAME.startsWith("FreeBSD"); public static final boolean FREE_BSD = OS_NAME.startsWith("FreeBSD");
/** The value of <code>System.getProperty("os.arch")</code>. */ /** The value of <code>System.getProperty("os.arch")</code>. */
public static final String OS_ARCH = System.getProperty("os.arch"); public static final String OS_ARCH = getSysProp("os.arch", UNKNOWN);
/** The value of <code>System.getProperty("os.version")</code>. */ /** The value of <code>System.getProperty("os.version")</code>. */
public static final String OS_VERSION = System.getProperty("os.version"); public static final String OS_VERSION = getSysProp("os.version", UNKNOWN);
/** The value of <code>System.getProperty("java.vendor")</code>. */ /** The value of <code>System.getProperty("java.vendor")</code>. */
public static final String JAVA_VENDOR = System.getProperty("java.vendor"); public static final String JAVA_VENDOR = getSysProp("java.vendor", UNKNOWN);
/** True iff the Java runtime is a client runtime and C2 compiler is not enabled */
public static final boolean IS_CLIENT_VM =
getSysProp("java.vm.info", "").contains("emulated-client");
/** True iff running on a 64bit JVM */ /** True iff running on a 64bit JVM */
public static final boolean JRE_IS_64BIT; public static final boolean JRE_IS_64BIT = is64Bit();
static { /** true iff we know fast FMA is supported, to deliver less error */
boolean is64Bit = false; public static final boolean HAS_FAST_FMA =
String datamodel = null; (IS_CLIENT_VM == false)
try { && Objects.equals(OS_ARCH, "amd64")
datamodel = System.getProperty("sun.arch.data.model"); && HotspotVMOptions.get("UseFMA").map(Boolean::valueOf).orElse(false);
private static boolean is64Bit() {
final String datamodel = getSysProp("sun.arch.data.model");
if (datamodel != null) { if (datamodel != null) {
is64Bit = datamodel.contains("64"); return datamodel.contains("64");
} else {
return (OS_ARCH != null && OS_ARCH.contains("64"));
} }
}
private static String getSysProp(String property) {
try {
return doPrivileged(() -> System.getProperty(property));
} catch ( } catch (
@SuppressWarnings("unused") @SuppressWarnings("unused")
SecurityException ex) { SecurityException se) {
} logSecurityWarning(property);
if (datamodel == null) { return null;
if (OS_ARCH != null && OS_ARCH.contains("64")) {
is64Bit = true;
} else {
is64Bit = false;
} }
} }
JRE_IS_64BIT = is64Bit;
private static String getSysProp(String property, String def) {
try {
return doPrivileged(() -> System.getProperty(property, def));
} catch (
@SuppressWarnings("unused")
SecurityException se) {
logSecurityWarning(property);
return def;
}
}
private static void logSecurityWarning(String property) {
var log = Logger.getLogger(Constants.class.getName());
log.warning("SecurityManager prevented access to system property: " + property);
}
// Extracted to a method to be able to apply the SuppressForbidden annotation
@SuppressWarnings("removal")
@SuppressForbidden(reason = "security manager")
private static <T> T doPrivileged(PrivilegedAction<T> action) {
return AccessController.doPrivileged(action);
} }
} }

View File

@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.lang.reflect.Method;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import java.util.logging.Logger;
/** Accessor to get Hotspot VM Options (if available). */
final class HotspotVMOptions {
private HotspotVMOptions() {} // can't construct
/** True if the Java VM is based on Hotspot and has the Hotspot MX bean readable by Lucene */
public static final boolean IS_HOTSPOT;
/**
* Returns an optional with the value of a Hotspot VM option. If the VM option does not exist or
* is not readable, returns an empty optional.
*/
public static Optional<String> get(String name) {
return ACCESSOR.apply(Objects.requireNonNull(name, "name"));
}
private static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
private static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
private static final Function<String, Optional<String>> ACCESSOR;
static {
boolean isHotspot = false;
Function<String, Optional<String>> accessor = name -> Optional.empty();
try {
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS);
// we use reflection for this, because the management factory is not part
// of java.base module:
final Object hotSpotBean =
Class.forName(MANAGEMENT_FACTORY_CLASS)
.getMethod("getPlatformMXBean", Class.class)
.invoke(null, beanClazz);
if (hotSpotBean != null) {
final Method getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
final Method getValueMethod = getVMOptionMethod.getReturnType().getMethod("getValue");
isHotspot = true;
accessor =
name -> {
try {
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, name);
return Optional.of(getValueMethod.invoke(vmOption).toString());
} catch (@SuppressWarnings("unused")
ReflectiveOperationException
| RuntimeException e) {
return Optional.empty();
}
};
}
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
isHotspot = false;
final Logger log = Logger.getLogger(HotspotVMOptions.class.getName());
final Module module = HotspotVMOptions.class.getModule();
final ModuleLayer layer = module.getLayer();
// classpath / unnamed module has no layer, so we need to check:
if (layer != null
&& layer.findModule("jdk.management").map(module::canRead).orElse(false) == false) {
log.warning(
"Lucene cannot access JVM internals to optimize algorithms or calculate object sizes, unless the 'jdk.management' Java module "
+ "is readable [please add 'jdk.management' to modular application either by command line or its module descriptor].");
} else {
log.warning(
"Lucene cannot optimize algorithms or calculate object sizes for JVMs that are not based on Hotspot or a compatible implementation.");
}
}
IS_HOTSPOT = isHotspot;
ACCESSOR = accessor;
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.util;
import java.lang.reflect.Array; import java.lang.reflect.Array;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier; import java.lang.reflect.Modifier;
import java.security.AccessControlException; import java.security.AccessControlException;
import java.security.AccessController; import java.security.AccessController;
@ -30,7 +29,6 @@ import java.util.Collections;
import java.util.IdentityHashMap; import java.util.IdentityHashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.logging.Logger;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -112,64 +110,16 @@ public final class RamUsageEstimator {
/** For testing only */ /** For testing only */
static final boolean JVM_IS_HOTSPOT_64BIT; static final boolean JVM_IS_HOTSPOT_64BIT;
static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
/** Initialize constants and try to collect information about the JVM internals. */ /** Initialize constants and try to collect information about the JVM internals. */
static { static {
if (Constants.JRE_IS_64BIT) { if (Constants.JRE_IS_64BIT && HotspotVMOptions.IS_HOTSPOT) {
// Try to get compressed oops and object alignment (the default seems to be 8 on Hotspot); // Try to get compressed oops and object alignment (the default seems to be 8 on Hotspot);
// (this only works on 64 bit, on 32 bits the alignment and reference size is fixed): // (this only works on 64 bit, on 32 bits the alignment and reference size is fixed):
boolean compressedOops = false; JVM_IS_HOTSPOT_64BIT = true;
int objectAlignment = 8; COMPRESSED_REFS_ENABLED =
boolean isHotspot = false; HotspotVMOptions.get("UseCompressedOops").map(Boolean::valueOf).orElse(false);
try { NUM_BYTES_OBJECT_ALIGNMENT =
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS); HotspotVMOptions.get("ObjectAlignmentInBytes").map(Integer::valueOf).orElse(8);
// we use reflection for this, because the management factory is not part
// of Java 8's compact profile:
final Object hotSpotBean =
Class.forName(MANAGEMENT_FACTORY_CLASS)
.getMethod("getPlatformMXBean", Class.class)
.invoke(null, beanClazz);
if (hotSpotBean != null) {
isHotspot = true;
final Method getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
try {
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "UseCompressedOops");
compressedOops =
Boolean.parseBoolean(
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
isHotspot = false;
}
try {
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "ObjectAlignmentInBytes");
objectAlignment =
Integer.parseInt(
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
isHotspot = false;
}
}
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
isHotspot = false;
final Logger log = Logger.getLogger(RamUsageEstimator.class.getName());
final Module module = RamUsageEstimator.class.getModule();
final ModuleLayer layer = module.getLayer();
// classpath / unnamed module has no layer, so we need to check:
if (layer != null
&& layer.findModule("jdk.management").map(module::canRead).orElse(false) == false) {
log.warning(
"Lucene cannot correctly calculate object sizes on 64bit JVMs, unless the 'jdk.management' Java module "
+ "is readable [please add 'jdk.management' to modular application either by command line or its module descriptor]");
} else {
log.warning(
"Lucene cannot correctly calculate object sizes on 64bit JVMs that are not based on Hotspot or a compatible implementation.");
}
}
JVM_IS_HOTSPOT_64BIT = isHotspot;
COMPRESSED_REFS_ENABLED = compressedOops;
NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment;
// reference size is 4, if we have compressed oops: // reference size is 4, if we have compressed oops:
NUM_BYTES_OBJECT_REF = COMPRESSED_REFS_ENABLED ? 4 : 8; NUM_BYTES_OBJECT_REF = COMPRESSED_REFS_ENABLED ? 4 : 8;
// "best guess" based on reference size: // "best guess" based on reference size:

View File

@ -1128,6 +1128,10 @@ public class RegExp {
if (start != pos) m = Integer.parseInt(originalString.substring(start, pos)); if (start != pos) m = Integer.parseInt(originalString.substring(start, pos));
} else m = n; } else m = n;
if (!match('}')) throw new IllegalArgumentException("expected '}' at position " + pos); if (!match('}')) throw new IllegalArgumentException("expected '}' at position " + pos);
if (m != -1 && n > m) {
throw new IllegalArgumentException(
"invalid repetition range(out of order): " + n + ".." + m);
}
if (m == -1) e = makeRepeat(flags, e, n); if (m == -1) e = makeRepeat(flags, e, n);
else e = makeRepeat(flags, e, n, m); else e = makeRepeat(flags, e, n, m);
} }

View File

@ -270,10 +270,6 @@ public class FSTCompiler<T> {
return directAddressingMaxOversizingFactor; return directAddressingMaxOversizingFactor;
} }
public long getTermCount() {
return frontier[0].inputCount;
}
public long getNodeCount() { public long getNodeCount() {
// 1+ in order to count the -1 implicit final node // 1+ in order to count the -1 implicit final node
return 1 + nodeCount; return 1 + nodeCount;
@ -749,7 +745,6 @@ public class FSTCompiler<T> {
// format cannot represent the empty input since // format cannot represent the empty input since
// 'finalness' is stored on the incoming arc, not on // 'finalness' is stored on the incoming arc, not on
// the node // the node
frontier[0].inputCount++;
frontier[0].isFinal = true; frontier[0].isFinal = true;
fst.setEmptyOutput(output); fst.setEmptyOutput(output);
return; return;
@ -760,9 +755,6 @@ public class FSTCompiler<T> {
int pos2 = input.offset; int pos2 = input.offset;
final int pos1Stop = Math.min(lastInput.length(), input.length); final int pos1Stop = Math.min(lastInput.length(), input.length);
while (true) { while (true) {
frontier[pos1].inputCount++;
// System.out.println(" incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" +
// frontier[pos1]);
if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) { if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) {
break; break;
} }
@ -786,7 +778,6 @@ public class FSTCompiler<T> {
// init tail states for current input // init tail states for current input
for (int idx = prefixLenPlus1; idx <= input.length; idx++) { for (int idx = prefixLenPlus1; idx <= input.length; idx++) {
frontier[idx - 1].addArc(input.ints[input.offset + idx - 1], frontier[idx]); frontier[idx - 1].addArc(input.ints[input.offset + idx - 1], frontier[idx]);
frontier[idx].inputCount++;
} }
final UnCompiledNode<T> lastNode = frontier[input.length]; final UnCompiledNode<T> lastNode = frontier[input.length];
@ -835,8 +826,6 @@ public class FSTCompiler<T> {
// save last input // save last input
lastInput.copyInts(input); lastInput.copyInts(input);
// System.out.println(" count[0]=" + frontier[0].inputCount);
} }
private boolean validOutput(T output) { private boolean validOutput(T output) {
@ -906,10 +895,6 @@ public class FSTCompiler<T> {
T output; T output;
boolean isFinal; boolean isFinal;
// TODO: remove this tracking? we used to use it for confusingly pruning NodeHash, but
// we switched to LRU by RAM usage instead:
long inputCount;
/** This node's depth, starting from the automaton root. */ /** This node's depth, starting from the automaton root. */
final int depth; final int depth;
@ -935,7 +920,6 @@ public class FSTCompiler<T> {
numArcs = 0; numArcs = 0;
isFinal = false; isFinal = false;
output = owner.NO_OUTPUT; output = owner.NO_OUTPUT;
inputCount = 0;
// We don't clear the depth here because it never changes // We don't clear the depth here because it never changes
// for nodes on the frontier (even when reused). // for nodes on the frontier (even when reused).

View File

@ -77,41 +77,9 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
VectorizationProvider.TESTS_FORCE_INTEGER_VECTORS || (isAMD64withoutAVX2 == false); VectorizationProvider.TESTS_FORCE_INTEGER_VECTORS || (isAMD64withoutAVX2 == false);
} }
private static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
private static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
// best effort to see if FMA is fast (this is architecture-independent option)
private static boolean hasFastFMA() {
// on ARM cpus, FMA works fine but is a slight slowdown: don't use it.
if (Constants.OS_ARCH.equals("amd64") == false) {
return false;
}
try {
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS);
// we use reflection for this, because the management factory is not part
// of Java 8's compact profile:
final Object hotSpotBean =
Class.forName(MANAGEMENT_FACTORY_CLASS)
.getMethod("getPlatformMXBean", Class.class)
.invoke(null, beanClazz);
if (hotSpotBean != null) {
final var getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "UseFMA");
return Boolean.parseBoolean(
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
}
return false;
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
return false;
}
}
// true if we know FMA is supported, to deliver less error
static final boolean HAS_FAST_FMA = hasFastFMA();
// the way FMA should work! if available use it, otherwise fall back to mul/add // the way FMA should work! if available use it, otherwise fall back to mul/add
private static FloatVector fma(FloatVector a, FloatVector b, FloatVector c) { private static FloatVector fma(FloatVector a, FloatVector b, FloatVector c) {
if (HAS_FAST_FMA) { if (Constants.HAS_FAST_FMA) {
return a.fma(b, c); return a.fma(b, c);
} else { } else {
return a.mul(b).add(c); return a.mul(b).add(c);

View File

@ -21,6 +21,7 @@ import java.security.PrivilegedAction;
import java.util.Locale; import java.util.Locale;
import java.util.logging.Logger; import java.util.logging.Logger;
import jdk.incubator.vector.FloatVector; import jdk.incubator.vector.FloatVector;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.SuppressForbidden; import org.apache.lucene.util.SuppressForbidden;
/** A vectorization provider that leverages the Panama Vector API. */ /** A vectorization provider that leverages the Panama Vector API. */
@ -62,7 +63,7 @@ final class PanamaVectorizationProvider extends VectorizationProvider {
Locale.ENGLISH, Locale.ENGLISH,
"Java vector incubator API enabled; uses preferredBitSize=%d%s%s", "Java vector incubator API enabled; uses preferredBitSize=%d%s%s",
PanamaVectorUtilSupport.VECTOR_BITSIZE, PanamaVectorUtilSupport.VECTOR_BITSIZE,
PanamaVectorUtilSupport.HAS_FAST_FMA ? "; FMA enabled" : "", Constants.HAS_FAST_FMA ? "; FMA enabled" : "",
PanamaVectorUtilSupport.HAS_FAST_INTEGER_VECTORS PanamaVectorUtilSupport.HAS_FAST_INTEGER_VECTORS
? "" ? ""
: "; floating-point vectors only")); : "; floating-point vectors only"));

View File

@ -459,7 +459,8 @@ public class TestDeletionPolicy extends LuceneTestCase {
dir, dir,
newIndexWriterConfig(new MockAnalyzer(random())) newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy) .setIndexDeletionPolicy(policy)
.setIndexCommit(lastCommit)); .setIndexCommit(lastCommit)
.setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs); assertEquals(10, writer.getDocStats().numDocs);
// Should undo our rollback: // Should undo our rollback:
@ -476,12 +477,13 @@ public class TestDeletionPolicy extends LuceneTestCase {
dir, dir,
newIndexWriterConfig(new MockAnalyzer(random())) newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy) .setIndexDeletionPolicy(policy)
.setIndexCommit(lastCommit)); .setIndexCommit(lastCommit)
.setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs); assertEquals(10, writer.getDocStats().numDocs);
// Commits the rollback: // Commits the rollback:
writer.close(); writer.close();
// Now 8 because we made another commit // Now 7 because we made another commit
assertEquals(7, DirectoryReader.listCommits(dir).size()); assertEquals(7, DirectoryReader.listCommits(dir).size());
r = DirectoryReader.open(dir); r = DirectoryReader.open(dir);
@ -507,7 +509,10 @@ public class TestDeletionPolicy extends LuceneTestCase {
// but this time keeping only the last commit: // but this time keeping only the last commit:
writer = writer =
new IndexWriter( new IndexWriter(
dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit)); dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexCommit(lastCommit)
.setMergePolicy(newLogMergePolicy(10)));
assertEquals(10, writer.getDocStats().numDocs); assertEquals(10, writer.getDocStats().numDocs);
// Reader still sees fully merged index, because writer // Reader still sees fully merged index, because writer

View File

@ -2395,11 +2395,12 @@ public class TestIndexWriter extends LuceneTestCase {
writer.addDocument(doc); writer.addDocument(doc);
assertTrue(writer.hasUncommittedChanges()); assertTrue(writer.hasUncommittedChanges());
// Must commit, waitForMerges, commit again, to be // Must commit and wait for merges as long as the commit triggers merges to be certain that
// certain that hasUncommittedChanges returns false: // hasUncommittedChanges returns false
writer.commit(); do {
writer.waitForMerges(); writer.waitForMerges();
writer.commit(); writer.commit();
} while (writer.hasPendingMerges());
assertFalse(writer.hasUncommittedChanges()); assertFalse(writer.hasUncommittedChanges());
writer.deleteDocuments(new Term("id", "xyz")); writer.deleteDocuments(new Term("id", "xyz"));
assertTrue(writer.hasUncommittedChanges()); assertTrue(writer.hasUncommittedChanges());

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
public class TestTermsEnumIndex extends LuceneTestCase {
public void testPrefix8ToComparableUnsignedLong() {
byte[] b = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
assertEquals(0L, TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 1, 0)));
assertEquals(4L << 56, TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 1)));
assertEquals(
(4L << 56) | (5L << 48),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 2)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 3)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 4)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 5)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24) | (9L << 16),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 6)));
assertEquals(
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24) | (9L << 16) | (10L << 8),
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 7)));
assertEquals(
(4L << 56)
| (5L << 48)
| (6L << 40)
| (7L << 32)
| (8L << 24)
| (9L << 16)
| (10L << 8)
| 11L,
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 8)));
assertEquals(
(4L << 56)
| (5L << 48)
| (6L << 40)
| (7L << 32)
| (8L << 24)
| (9L << 16)
| (10L << 8)
| 11L,
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 9)));
}
}

View File

@ -86,6 +86,17 @@ public class TestRegExp extends LuceneTestCase {
} }
} }
public void testParseIllegalRepeatExp() {
// out of order
IllegalArgumentException expected =
expectThrows(
IllegalArgumentException.class,
() -> {
new RegExp("a{99,11}");
});
assertTrue(expected.getMessage().contains("out of order"));
}
static String randomDocValue(int minLength) { static String randomDocValue(int minLength) {
String charPalette = "AAAaaaBbbCccc123456 \t"; String charPalette = "AAAaaaBbbCccc123456 \t";
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();

View File

@ -56,7 +56,7 @@ public class Test2BFST extends LuceneTestCase {
for (int iter = 0; iter < 1; iter++) { for (int iter = 0; iter < 1; iter++) {
// Build FST w/ NoOutputs and stop when nodeCount > 2.2B // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
{ {
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS"); System.out.println("\nTEST: ~2.2B nodes; output=NO_OUTPUTS");
Outputs<Object> outputs = NoOutputs.getSingleton(); Outputs<Object> outputs = NoOutputs.getSingleton();
Object NO_OUTPUT = outputs.getNoOutput(); Object NO_OUTPUT = outputs.getNoOutput();
final FSTCompiler<Object> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); final FSTCompiler<Object> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);

View File

@ -568,7 +568,6 @@ public class TestFSTs extends LuceneTestCase {
System.out.println( System.out.println(
((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms"); ((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms");
assert fstCompiler.getTermCount() == ord;
FST<T> fst = fstCompiler.compile(); FST<T> fst = fstCompiler.compile();
long tEnd = System.nanoTime(); long tEnd = System.nanoTime();
System.out.println( System.out.println(

View File

@ -38,8 +38,8 @@ public final class URLLabel extends JLabel {
super(text); super(text);
try { try {
this.link = (URI.create(text)).toURL(); this.link = (new URI(text)).toURL();
} catch (MalformedURLException e) { } catch (URISyntaxException | MalformedURLException e) {
throw new LukeException(e.getMessage(), e); throw new LukeException(e.getMessage(), e);
} }

View File

@ -476,7 +476,12 @@ public abstract class LuceneTestCase extends Assert {
* of iterations to scale your tests (for nightly builds). * of iterations to scale your tests (for nightly builds).
*/ */
public static final int RANDOM_MULTIPLIER = public static final int RANDOM_MULTIPLIER =
systemPropertyAsInt("tests.multiplier", TEST_NIGHTLY ? 2 : 1); systemPropertyAsInt("tests.multiplier", defaultRandomMultiplier());
/** Compute the default value of the random multiplier (based on {@link #TEST_NIGHTLY}). */
static int defaultRandomMultiplier() {
return TEST_NIGHTLY ? 2 : 1;
}
/** Leave temporary files on disk, even on successful runs. */ /** Leave temporary files on disk, even on successful runs. */
public static final boolean LEAVE_TEMPORARY; public static final boolean LEAVE_TEMPORARY;

View File

@ -189,7 +189,8 @@ public final class RunListenerPrintReproduceInfo extends RunListener {
addVmOpt(b, "tests.seed", RandomizedContext.current().getRunnerSeedAsString()); addVmOpt(b, "tests.seed", RandomizedContext.current().getRunnerSeedAsString());
// Test groups and multipliers. // Test groups and multipliers.
if (RANDOM_MULTIPLIER > 1) addVmOpt(b, "tests.multiplier", RANDOM_MULTIPLIER); if (RANDOM_MULTIPLIER != LuceneTestCase.defaultRandomMultiplier())
addVmOpt(b, "tests.multiplier", RANDOM_MULTIPLIER);
if (TEST_NIGHTLY) addVmOpt(b, SYSPROP_NIGHTLY, TEST_NIGHTLY); if (TEST_NIGHTLY) addVmOpt(b, SYSPROP_NIGHTLY, TEST_NIGHTLY);
if (TEST_WEEKLY) addVmOpt(b, SYSPROP_WEEKLY, TEST_WEEKLY); if (TEST_WEEKLY) addVmOpt(b, SYSPROP_WEEKLY, TEST_WEEKLY);
if (TEST_MONSTER) addVmOpt(b, SYSPROP_MONSTER, TEST_MONSTER); if (TEST_MONSTER) addVmOpt(b, SYSPROP_MONSTER, TEST_MONSTER);