mirror of https://github.com/apache/lucene.git
Merge branch 'main' into java_21
This commit is contained in:
commit
ef1db18096
|
@ -27,6 +27,7 @@ import java.nio.file.Paths;
|
|||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Split the Reuters SGML documents into Simple Text files containing:
|
||||
|
@ -44,9 +45,10 @@ public class ExtractReuters {
|
|||
public void extract() throws IOException {
|
||||
long count = 0;
|
||||
Files.createDirectories(outputDir);
|
||||
|
||||
if (Files.list(outputDir).count() > 0) {
|
||||
throw new IOException("The output directory must be empty: " + outputDir);
|
||||
try(Stream<Path> files = Files.list(outputDir)) {
|
||||
if (files.count() > 0) {
|
||||
throw new IOException("The output directory must be empty: " + outputDir);
|
||||
}
|
||||
}
|
||||
|
||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(reutersDir, "*.sgm")) {
|
||||
|
|
|
@ -63,7 +63,6 @@ except:
|
|||
import scriptutil
|
||||
from consolemenu import ConsoleMenu
|
||||
from consolemenu.items import FunctionItem, SubmenuItem, ExitItem
|
||||
from consolemenu.screen import Screen
|
||||
from scriptutil import BranchType, Version, download, run
|
||||
|
||||
# Lucene-to-Java version mapping
|
||||
|
@ -654,8 +653,8 @@ class TodoGroup(SecretYamlObject):
|
|||
return "%s%s (%d/%d)" % (prefix, self.title, self.num_done(), self.num_applies())
|
||||
|
||||
def get_submenu(self):
|
||||
menu = UpdatableConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
|
||||
screen=MyScreen())
|
||||
menu = ConsoleMenu(title=self.title, subtitle=self.get_subtitle, prologue_text=self.get_description(),
|
||||
clear_screen=False)
|
||||
menu.exit_item = CustomExitItem("Return")
|
||||
for todo in self.get_todos():
|
||||
if todo.applies(state.release_type):
|
||||
|
@ -663,7 +662,7 @@ class TodoGroup(SecretYamlObject):
|
|||
return menu
|
||||
|
||||
def get_menu_item(self):
|
||||
item = UpdatableSubmenuItem(self.get_title, self.get_submenu())
|
||||
item = SubmenuItem(self.get_title, self.get_submenu())
|
||||
return item
|
||||
|
||||
def get_todos(self):
|
||||
|
@ -820,7 +819,7 @@ class Todo(SecretYamlObject):
|
|||
print("ERROR while executing todo %s (%s)" % (self.get_title(), e))
|
||||
|
||||
def get_menu_item(self):
|
||||
return UpdatableFunctionItem(self.get_title, self.display_and_confirm)
|
||||
return FunctionItem(self.get_title, self.display_and_confirm)
|
||||
|
||||
def clone(self):
|
||||
clone = Todo(self.id, self.title, description=self.description)
|
||||
|
@ -1234,104 +1233,6 @@ def pause(fun=None):
|
|||
input("\nPress ENTER to continue...")
|
||||
|
||||
|
||||
# Custom classes for ConsoleMenu, to make menu texts dynamic
|
||||
# Needed until https://github.com/aegirhall/console-menu/pull/25 is released
|
||||
# See https://pypi.org/project/console-menu/ for other docs
|
||||
|
||||
class UpdatableConsoleMenu(ConsoleMenu):
|
||||
|
||||
def __repr__(self):
|
||||
return "%s: %s. %d items" % (self.get_title(), self.get_subtitle(), len(self.items))
|
||||
|
||||
def draw(self):
|
||||
"""
|
||||
Refreshes the screen and redraws the menu. Should be called whenever something changes that needs to be redrawn.
|
||||
"""
|
||||
self.screen.printf(self.formatter.format(title=self.get_title(), subtitle=self.get_subtitle(), items=self.items,
|
||||
prologue_text=self.get_prologue_text(), epilogue_text=self.get_epilogue_text()))
|
||||
|
||||
# Getters to get text in case method reference
|
||||
def get_title(self):
|
||||
return self.title() if callable(self.title) else self.title
|
||||
|
||||
def get_subtitle(self):
|
||||
return self.subtitle() if callable(self.subtitle) else self.subtitle
|
||||
|
||||
def get_prologue_text(self):
|
||||
return self.prologue_text() if callable(self.prologue_text) else self.prologue_text
|
||||
|
||||
def get_epilogue_text(self):
|
||||
return self.epilogue_text() if callable(self.epilogue_text) else self.epilogue_text
|
||||
|
||||
|
||||
class UpdatableSubmenuItem(SubmenuItem):
|
||||
def __init__(self, text, submenu, menu=None, should_exit=False):
|
||||
"""
|
||||
:ivar ConsoleMenu self.submenu: The submenu to be opened when this item is selected
|
||||
"""
|
||||
super(UpdatableSubmenuItem, self).__init__(text=text, menu=menu, should_exit=should_exit, submenu=submenu)
|
||||
|
||||
if menu:
|
||||
self.get_submenu().parent = menu
|
||||
|
||||
def show(self, index):
|
||||
return "%2d - %s" % (index + 1, self.get_text())
|
||||
|
||||
# Getters to get text in case method reference
|
||||
def get_text(self):
|
||||
return self.text() if callable(self.text) else self.text
|
||||
|
||||
def set_menu(self, menu):
|
||||
"""
|
||||
Sets the menu of this item.
|
||||
Should be used instead of directly accessing the menu attribute for this class.
|
||||
|
||||
:param ConsoleMenu menu: the menu
|
||||
"""
|
||||
self.menu = menu
|
||||
self.get_submenu().parent = menu
|
||||
|
||||
def action(self):
|
||||
"""
|
||||
This class overrides this method
|
||||
"""
|
||||
self.get_submenu().start()
|
||||
|
||||
def clean_up(self):
|
||||
"""
|
||||
This class overrides this method
|
||||
"""
|
||||
self.get_submenu().join()
|
||||
self.menu.clear_screen()
|
||||
self.menu.resume()
|
||||
|
||||
def get_return(self):
|
||||
"""
|
||||
:return: The returned value in the submenu
|
||||
"""
|
||||
return self.get_submenu().returned_value
|
||||
|
||||
def get_submenu(self):
|
||||
"""
|
||||
We unwrap the submenu variable in case it is a reference to a method that returns a submenu
|
||||
"""
|
||||
return self.submenu if not callable(self.submenu) else self.submenu()
|
||||
|
||||
|
||||
class UpdatableFunctionItem(FunctionItem):
|
||||
def show(self, index):
|
||||
return "%2d - %s" % (index + 1, self.get_text())
|
||||
|
||||
# Getters to get text in case method reference
|
||||
def get_text(self):
|
||||
return self.text() if callable(self.text) else self.text
|
||||
|
||||
|
||||
class MyScreen(Screen):
|
||||
def clear(self):
|
||||
return
|
||||
|
||||
|
||||
class CustomExitItem(ExitItem):
|
||||
def show(self, index):
|
||||
return super(CustomExitItem, self).show(index)
|
||||
|
@ -1346,6 +1247,13 @@ def main():
|
|||
global templates
|
||||
|
||||
print("Lucene releaseWizard v%s" % getScriptVersion())
|
||||
|
||||
try:
|
||||
ConsoleMenu(clear_screen=True)
|
||||
except Exception as e:
|
||||
sys.exit("You need to install 'consolemenu' package version 0.7.1 for the Wizard to function. Please run 'pip "
|
||||
"install -r requirements.txt'")
|
||||
|
||||
c = parse_config()
|
||||
|
||||
if c.dry:
|
||||
|
@ -1402,18 +1310,18 @@ def main():
|
|||
lucene_news_file = os.path.join(state.get_website_git_folder(), 'content', 'core', 'core_news',
|
||||
"%s-%s-available.md" % (state.get_release_date_iso(), state.release_version.replace(".", "-")))
|
||||
|
||||
main_menu = UpdatableConsoleMenu(title="Lucene ReleaseWizard",
|
||||
main_menu = ConsoleMenu(title="Lucene ReleaseWizard",
|
||||
subtitle=get_releasing_text,
|
||||
prologue_text="Welcome to the release wizard. From here you can manage the process including creating new RCs. "
|
||||
"All changes are persisted, so you can exit any time and continue later. Make sure to read the Help section.",
|
||||
epilogue_text="® 2022 The Lucene project. Licensed under the Apache License 2.0\nScript version v%s)" % getScriptVersion(),
|
||||
screen=MyScreen())
|
||||
clear_screen=False)
|
||||
|
||||
todo_menu = UpdatableConsoleMenu(title=get_releasing_text,
|
||||
todo_menu = ConsoleMenu(title=get_releasing_text,
|
||||
subtitle=get_subtitle,
|
||||
prologue_text=None,
|
||||
epilogue_text=None,
|
||||
screen=MyScreen())
|
||||
clear_screen=False)
|
||||
todo_menu.exit_item = CustomExitItem("Return")
|
||||
|
||||
for todo_group in state.todo_groups:
|
||||
|
@ -1422,14 +1330,14 @@ def main():
|
|||
menu_item.set_menu(todo_menu)
|
||||
todo_menu.append_item(menu_item)
|
||||
|
||||
main_menu.append_item(UpdatableSubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
|
||||
main_menu.append_item(UpdatableFunctionItem(get_start_new_rc_menu_title, start_new_rc))
|
||||
main_menu.append_item(UpdatableFunctionItem('Clear and restart current RC', state.clear_rc))
|
||||
main_menu.append_item(UpdatableFunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
|
||||
main_menu.append_item(UpdatableFunctionItem('Start release for a different version', release_other_version))
|
||||
main_menu.append_item(UpdatableFunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
|
||||
# main_menu.append_item(UpdatableFunctionItem('Dump YAML', dump_yaml))
|
||||
main_menu.append_item(UpdatableFunctionItem('Help', help))
|
||||
main_menu.append_item(SubmenuItem(get_todo_menuitem_title, todo_menu, menu=main_menu))
|
||||
main_menu.append_item(FunctionItem(get_start_new_rc_menu_title, start_new_rc))
|
||||
main_menu.append_item(FunctionItem('Clear and restart current RC', state.clear_rc))
|
||||
main_menu.append_item(FunctionItem("Clear all state, restart the %s release" % state.release_version, reset_state))
|
||||
main_menu.append_item(FunctionItem('Start release for a different version', release_other_version))
|
||||
main_menu.append_item(FunctionItem('Generate Asciidoc guide for this release', generate_asciidoc))
|
||||
# main_menu.append_item(FunctionItem('Dump YAML', dump_yaml))
|
||||
main_menu.append_item(FunctionItem('Help', help))
|
||||
|
||||
main_menu.show()
|
||||
|
||||
|
|
|
@ -521,7 +521,7 @@ groups:
|
|||
addition wait a couple more days? Merges of bug fixes into the branch
|
||||
may become more difficult.
|
||||
* Only Github issues with Milestone {{ release_version_major }}.{{ release_version_minor }}
|
||||
and priority "Blocker" will delay a release candidate build.
|
||||
will delay a release candidate build.
|
||||
----
|
||||
types:
|
||||
- major
|
||||
|
@ -979,8 +979,8 @@ groups:
|
|||
title: Publish docs, changes and javadocs
|
||||
description: |
|
||||
Ensure your refrigerator has at least 2 beers - the svn import operation can take a while,
|
||||
depending on your upload bandwidth. We'll publish this directly to the production tree.
|
||||
At the end of the task, the two links below shall work.
|
||||
depending on your upload bandwidth. We'll publish this directly to the production tree. At
|
||||
the end of the task, the two links below shall work.
|
||||
links:
|
||||
- http://lucene.apache.org/core/{{ version }}
|
||||
vars:
|
||||
|
@ -1126,12 +1126,18 @@ groups:
|
|||
comment: Push all changes
|
||||
logfile: push-website.log
|
||||
post_description: |
|
||||
Wait a few minutes for the build to happen. You can follow the site build at https://ci2.apache.org/#/builders/3
|
||||
and view the staged site at https://lucene.staged.apache.org
|
||||
Verify that correct links and versions are mentioned in download pages, download buttons etc.
|
||||
If you find anything wrong, then commit and push any changes and check again.
|
||||
|
||||
Next step is to merge the changes to branch 'production' in order to publish the site.
|
||||
Wait a few minutes for the build to happen. You can follow the site build at
|
||||
https://ci2.apache.org/#/builders/3 and view the staged site at
|
||||
https://lucene.staged.apache.org Verify that correct links and versions are mentioned in
|
||||
download pages, download buttons etc. If you find anything wrong, then commit and push any
|
||||
changes and check again. You may find that the publish fails, leaving a directory listing
|
||||
instead a beautiful website. If this happens, check the "builder" link and click through into
|
||||
its details to find possible error messages produced by the website publication process. You
|
||||
may have produced malformed Markdown. Or the website publish may just fail for some reason out
|
||||
of your control. If this happens, you can attempt to retrigger the publishing with some
|
||||
innocuous changes. Next step is to merge the changes to branch 'production' in order to
|
||||
publish the site. Before doing this, you may want to replenish your stock of beers, or get
|
||||
stronger stuff.
|
||||
links:
|
||||
- https://ci2.apache.org/#/builders/3
|
||||
- https://lucene.staged.apache.org
|
||||
|
@ -1159,7 +1165,8 @@ groups:
|
|||
post_description: |
|
||||
Wait a few minutes for the build to happen. You can follow the site build at https://ci2.apache.org/#/builders/3
|
||||
|
||||
Verify on https://lucene.apache.org that the site is OK.
|
||||
Verify on https://lucene.apache.org that the site is OK. It really should be, but see staging
|
||||
site publication instructions for possible debugging/recovery options if it is not.
|
||||
|
||||
You can now also verify that http://lucene.apache.org/core/api/core/ redirects to the latest version
|
||||
links:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
six>=1.11.0
|
||||
Jinja2>=2.10.1
|
||||
PyYAML>=5.1
|
||||
holidays>=0.9.10
|
||||
ics>=0.4
|
||||
console-menu>=0.5.1
|
||||
PyGithub
|
||||
jira
|
||||
six~=1.16.0
|
||||
Jinja2~=3.1.1
|
||||
PyYAML~=6.0
|
||||
holidays~=0.16
|
||||
ics~=0.7.2
|
||||
console-menu~=0.7.1
|
||||
PyGithub~=1.56
|
||||
jira~=3.4.1
|
|
@ -67,6 +67,12 @@ allprojects {
|
|||
tasks.named(sourceSet.getCompileJavaTaskName()).configure({ JavaCompile task ->
|
||||
task.dependsOn modularPaths.compileModulePathConfiguration
|
||||
|
||||
// GH-12742: add the modular path as inputs so that if anything changes, the task
|
||||
// is not up to date and is re-run. I [dw] believe this should be a @Classpath parameter
|
||||
// on the task itself... but I don't know how to implement this on an existing class.
|
||||
// this is a workaround but should work just fine though.
|
||||
task.inputs.files(modularPaths.compileModulePathConfiguration)
|
||||
|
||||
// LUCENE-10327: don't allow gradle to emit an empty sourcepath as it would break
|
||||
// compilation of modules.
|
||||
task.options.setSourcepath(sourceSet.java.sourceDirectories)
|
||||
|
|
|
@ -67,7 +67,7 @@ allprojects {
|
|||
// seed, repetition and amplification.
|
||||
[propName: 'tests.seed', value: { -> rootSeed }, description: "Sets the master randomization seed."],
|
||||
[propName: 'tests.iters', value: null, description: "Duplicate (re-run) each test case N times."],
|
||||
[propName: 'tests.multiplier', value: 1, description: "Value multiplier for randomized tests."],
|
||||
[propName: 'tests.multiplier', value: null, description: "Value multiplier for randomized tests."],
|
||||
[propName: 'tests.maxfailures', value: null, description: "Skip tests after a given number of failures."],
|
||||
[propName: 'tests.timeoutSuite', value: null, description: "Timeout (in millis) for an entire suite."],
|
||||
[propName: 'tests.failfast', value: "false", description: "Stop the build early on failure.", buildOnly: true],
|
||||
|
|
|
@ -62,9 +62,11 @@ API Changes
|
|||
|
||||
* GITHUB#12599: Add RandomAccessInput#readBytes method to the RandomAccessInput interface. (Ignacio Vera)
|
||||
|
||||
* GITHUB#12709 Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
|
||||
* GITHUB#12709: Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
|
||||
of the two (Anh Dung Bui)
|
||||
|
||||
* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
|
@ -208,6 +210,9 @@ Improvements
|
|||
|
||||
* GITHUB#12689: TaskExecutor to cancel all tasks on exception to avoid needless computation. (Luca Cavanna)
|
||||
|
||||
* GITHUB#12754: Refactor lookup of Hotspot VM options and do not initialize constants with NULL
|
||||
if SecurityManager prevents access. (Uwe Schindler)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
* GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary)
|
||||
|
@ -251,6 +256,11 @@ Optimizations
|
|||
* GITHUB#12719: Top-level conjunctions that are not sorted by score now have a
|
||||
specialized bulk scorer. (Adrien Grand)
|
||||
|
||||
* GITHUB#1052: Faster merging of terms enums. (Adrien Grand)
|
||||
|
||||
* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)
|
||||
|
||||
|
||||
Changes in runtime behavior
|
||||
---------------------
|
||||
|
||||
|
@ -278,7 +288,14 @@ Bug Fixes
|
|||
Build
|
||||
---------------------
|
||||
|
||||
* GITHUB#12752: tests.multiplier could be omitted in test failure reproduce lines (esp. in
|
||||
nightly mode). (Dawid Weiss)
|
||||
|
||||
* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed
|
||||
leading to odd runtime errors (Chris Hostetter, Dawid Weiss)
|
||||
|
||||
* GITHUB#12612: Upgrade forbiddenapis to version 3.6 and ASM for APIJAR extraction to 9.6. (Uwe Schindler)
|
||||
|
||||
* GITHUB#12655: Upgrade to Gradle 8.4 (Kevin Risden)
|
||||
|
||||
Other
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.ja.dict;
|
||||
package org.apache.lucene.analysis.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Matcher;
|
||||
|
@ -69,7 +69,7 @@ public final class CSVUtil {
|
|||
return new String[0];
|
||||
}
|
||||
|
||||
return result.toArray(new String[result.size()]);
|
||||
return result.toArray(new String[0]);
|
||||
}
|
||||
|
||||
private static String unQuoteUnEscape(String original) {
|
||||
|
@ -83,7 +83,7 @@ public final class CSVUtil {
|
|||
}
|
||||
|
||||
// Unescape
|
||||
if (result.indexOf(ESCAPED_QUOTE) >= 0) {
|
||||
if (result.contains(ESCAPED_QUOTE)) {
|
||||
result = result.replace(ESCAPED_QUOTE, "\"");
|
||||
}
|
||||
}
|
|
@ -14,10 +14,9 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.ja;
|
||||
package org.apache.lucene.analysis.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.ja.dict.CSVUtil;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
/*
|
|
@ -156,19 +156,19 @@ public class GenerateUTR30DataFiles {
|
|||
}
|
||||
|
||||
private static void getNFKCDataFilesFromIcuProject(String releaseTag) throws IOException {
|
||||
URI icuTagsURL = URI.create(ICU_GIT_TAG_URL + "/");
|
||||
URI icuReleaseTagURL = icuTagsURL.resolve(releaseTag + "/");
|
||||
URI norm2url = icuReleaseTagURL.resolve(ICU_DATA_NORM2_PATH + "/");
|
||||
URI icuTagsURI = URI.create(ICU_GIT_TAG_URL + "/");
|
||||
URI icuReleaseTagURI = icuTagsURI.resolve(releaseTag + "/");
|
||||
URI norm2uri = icuReleaseTagURI.resolve(ICU_DATA_NORM2_PATH + "/");
|
||||
|
||||
System.err.print("Downloading " + NFKC_TXT + " ... ");
|
||||
download(norm2url.resolve(NFKC_TXT), NFKC_TXT);
|
||||
download(norm2uri.resolve(NFKC_TXT), NFKC_TXT);
|
||||
System.err.println("done.");
|
||||
System.err.print("Downloading " + NFKC_CF_TXT + " ... ");
|
||||
download(norm2url.resolve(NFKC_CF_TXT), NFKC_CF_TXT);
|
||||
download(norm2uri.resolve(NFKC_CF_TXT), NFKC_CF_TXT);
|
||||
System.err.println("done.");
|
||||
|
||||
System.err.print("Downloading " + NFKC_CF_TXT + " and making diacritic rules one-way ... ");
|
||||
URLConnection connection = openConnection(norm2url.resolve(NFC_TXT).toURL());
|
||||
URLConnection connection = openConnection(norm2uri.resolve(NFC_TXT).toURL());
|
||||
try (BufferedReader reader =
|
||||
new BufferedReader(
|
||||
new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8));
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FSTCompiler;
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.io.OutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.nio.file.Path;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
|
||||
class UnknownDictionaryBuilder {
|
||||
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import org.apache.lucene.analysis.morph.Dictionary;
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FSTCompiler;
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.analysis.ja.dict;
|
|||
import static org.apache.lucene.analysis.ja.dict.UserDictionary.CUSTOM_DICTIONARY_WORD_ID_OFFSET;
|
||||
import static org.apache.lucene.analysis.ja.dict.UserDictionary.INTERNAL_SEPARATOR;
|
||||
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
|
||||
/** Morphological information for user dictionary. */
|
||||
final class UserMorphData implements JaMorphData {
|
||||
public static final int WORD_COST = -100000;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.ja.dict;
|
||||
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
|
|
|
@ -1,93 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.ko.dict;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/** Utility class for parsing CSV text */
|
||||
public final class CSVUtil {
|
||||
private static final char QUOTE = '"';
|
||||
|
||||
private static final char COMMA = ',';
|
||||
|
||||
private static final Pattern QUOTE_REPLACE_PATTERN = Pattern.compile("^\"([^\"]+)\"$");
|
||||
|
||||
private static final String ESCAPED_QUOTE = "\"\"";
|
||||
|
||||
private CSVUtil() {} // no instance!!!
|
||||
|
||||
/**
|
||||
* Parse CSV line
|
||||
*
|
||||
* @param line line containing csv-encoded data
|
||||
* @return Array of values
|
||||
*/
|
||||
public static String[] parse(String line) {
|
||||
boolean insideQuote = false;
|
||||
ArrayList<String> result = new ArrayList<>();
|
||||
int quoteCount = 0;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < line.length(); i++) {
|
||||
char c = line.charAt(i);
|
||||
|
||||
if (c == QUOTE) {
|
||||
insideQuote = !insideQuote;
|
||||
quoteCount++;
|
||||
}
|
||||
|
||||
if (c == COMMA && !insideQuote) {
|
||||
String value = sb.toString();
|
||||
value = unQuoteUnEscape(value);
|
||||
result.add(value);
|
||||
sb.setLength(0);
|
||||
continue;
|
||||
}
|
||||
|
||||
sb.append(c);
|
||||
}
|
||||
|
||||
result.add(sb.toString());
|
||||
|
||||
// Validate
|
||||
if (quoteCount % 2 != 0) {
|
||||
return new String[0];
|
||||
}
|
||||
|
||||
return result.toArray(new String[0]);
|
||||
}
|
||||
|
||||
private static String unQuoteUnEscape(String original) {
|
||||
String result = original;
|
||||
|
||||
// Unquote
|
||||
if (result.indexOf('\"') >= 0) {
|
||||
Matcher m = QUOTE_REPLACE_PATTERN.matcher(original);
|
||||
if (m.matches()) {
|
||||
result = m.group(1);
|
||||
}
|
||||
|
||||
// Unescape
|
||||
if (result.contains(ESCAPED_QUOTE)) {
|
||||
result = result.replace(ESCAPED_QUOTE, "\"");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -28,6 +28,7 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FSTCompiler;
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
import org.apache.lucene.analysis.ko.POS;
|
||||
import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.nio.file.Path;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
|
||||
class UnknownDictionaryBuilder {
|
||||
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.ko.dict;
|
||||
|
||||
import org.apache.lucene.analysis.util.CSVUtil;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ public final class PForUtil {
|
|||
out.writeBytes(exceptions, exceptions.length);
|
||||
}
|
||||
|
||||
/** Decode 128 integers into {@code ints}. */
|
||||
/** Decode 128 integers into {@code longs}. */
|
||||
void decode(DataInput in, long[] longs) throws IOException {
|
||||
final int token = Byte.toUnsignedInt(in.readByte());
|
||||
final int bitsPerValue = token & 0x1f;
|
||||
|
|
|
@ -24,8 +24,14 @@ import org.openjdk.jmh.annotations.*;
|
|||
@BenchmarkMode(Mode.Throughput)
|
||||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
@Warmup(iterations = 3, time = 3)
|
||||
@Measurement(iterations = 5, time = 3)
|
||||
// first iteration is complete garbage, so make sure we really warmup
|
||||
@Warmup(iterations = 4, time = 1)
|
||||
// real iterations. not useful to spend tons of time here, better to fork more
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
// engage some noise reduction
|
||||
@Fork(
|
||||
value = 3,
|
||||
jvmArgsAppend = {"-Xmx2g", "-Xms2g", "-XX:+AlwaysPreTouch"})
|
||||
public class VectorUtilBenchmark {
|
||||
|
||||
private byte[] bytesA;
|
||||
|
@ -36,7 +42,7 @@ public class VectorUtilBenchmark {
|
|||
@Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
|
||||
int size;
|
||||
|
||||
@Setup(Level.Trial)
|
||||
@Setup(Level.Iteration)
|
||||
public void init() {
|
||||
ThreadLocalRandom random = ThreadLocalRandom.current();
|
||||
|
||||
|
@ -56,84 +62,72 @@ public class VectorUtilBenchmark {
|
|||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(value = 1)
|
||||
public float binaryCosineScalar() {
|
||||
return VectorUtil.cosine(bytesA, bytesB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(
|
||||
value = 1,
|
||||
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public float binaryCosineVector() {
|
||||
return VectorUtil.cosine(bytesA, bytesB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(value = 1)
|
||||
public int binaryDotProductScalar() {
|
||||
return VectorUtil.dotProduct(bytesA, bytesB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(
|
||||
value = 1,
|
||||
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public int binaryDotProductVector() {
|
||||
return VectorUtil.dotProduct(bytesA, bytesB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(value = 1)
|
||||
public int binarySquareScalar() {
|
||||
return VectorUtil.squareDistance(bytesA, bytesB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(
|
||||
value = 1,
|
||||
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public int binarySquareVector() {
|
||||
return VectorUtil.squareDistance(bytesA, bytesB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(value = 1)
|
||||
public float floatCosineScalar() {
|
||||
return VectorUtil.cosine(floatsA, floatsB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(
|
||||
value = 1,
|
||||
value = 15,
|
||||
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public float floatCosineVector() {
|
||||
return VectorUtil.cosine(floatsA, floatsB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(value = 1)
|
||||
public float floatDotProductScalar() {
|
||||
return VectorUtil.dotProduct(floatsA, floatsB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(
|
||||
value = 1,
|
||||
value = 15,
|
||||
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public float floatDotProductVector() {
|
||||
return VectorUtil.dotProduct(floatsA, floatsB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(value = 1)
|
||||
public float floatSquareScalar() {
|
||||
return VectorUtil.squareDistance(floatsA, floatsB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@Fork(
|
||||
value = 1,
|
||||
value = 15,
|
||||
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public float floatSquareVector() {
|
||||
return VectorUtil.squareDistance(floatsA, floatsB);
|
||||
|
|
|
@ -32,8 +32,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=500
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
# Fully Qualified Class Name of a Collector with a empty constructor
|
||||
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
|
||||
# topScoreDocUnordered - Like above, but allows out of order
|
||||
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
|
||||
collector.class=coll:topScoreDoc
|
||||
|
||||
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
|
||||
directory=FSDirectory
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
# Fully Qualified Class Name of a Collector with a empty constructor
|
||||
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
|
||||
# topScoreDocUnordered - Like above, but allows out of order
|
||||
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
|
||||
collector.class=coll:topScoreDoc
|
||||
|
||||
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
|
||||
directory=FSDirectory
|
||||
|
|
|
@ -37,8 +37,8 @@ doc.term.vector=vector:true:true:false:false
|
|||
log.step=500
|
||||
log.step.DeleteDoc=100
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
||||
|
|
|
@ -20,7 +20,8 @@
|
|||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
doc.tokenized=false
|
||||
doc.body.tokenized=true
|
||||
docs.dir=reuters-out
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
-AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer,
|
||||
EnglishPossessiveFilter,LowerCaseFilter,StopFilter,
|
||||
|
|
|
@ -30,7 +30,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=1000
|
||||
|
||||
docs.dir=reuters-out
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
||||
|
|
|
@ -30,7 +30,8 @@ doc.term.vector.offsets=false
|
|||
doc.term.vector.positions=false
|
||||
log.step=2000
|
||||
|
||||
docs.dir=reuters-out
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
||||
|
|
|
@ -32,8 +32,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=2000
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
|
|
@ -32,8 +32,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=2000
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
|
|
@ -32,8 +32,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=2000
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
|
|
@ -32,8 +32,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=2000
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
|
|
@ -31,8 +31,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=500
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
|
|
@ -42,8 +42,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=500
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
#content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
|
|
@ -16,7 +16,8 @@
|
|||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
doc.tokenized=false
|
||||
doc.body.tokenized=true
|
||||
docs.dir=reuters-out
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
log.step=1000
|
||||
|
||||
-AnalyzerFactory(name:shingle-bigrams-unigrams,
|
||||
|
|
|
@ -30,7 +30,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=500
|
||||
|
||||
docs.dir=reuters-out
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
#docs.dir=reuters-111
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
|
|
|
@ -31,7 +31,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=100000
|
||||
|
||||
docs.dir=reuters-out
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource
|
||||
|
||||
|
|
|
@ -31,8 +31,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=2000
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
|
|
@ -31,8 +31,8 @@ doc.tokenized=true
|
|||
doc.term.vector=false
|
||||
log.step=2000
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
|
|
@ -18,7 +18,8 @@
|
|||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
doc.tokenized=false
|
||||
doc.body.tokenized=true
|
||||
docs.dir=reuters-out
|
||||
work.dir=data
|
||||
docs.dir=reuters21578
|
||||
|
||||
-AnalyzerFactory(name:WhitespaceTokenizer, WhitespaceTokenizer(rule:java))
|
||||
|
||||
|
|
|
@ -23,9 +23,9 @@ import java.lang.reflect.Constructor;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Create a new {@link org.apache.lucene.analysis.Analyzer} and set it in the getRunData() for use
|
||||
|
@ -42,17 +42,13 @@ public class NewAnalyzerTask extends PerfTask {
|
|||
|
||||
public static final Analyzer createAnalyzer(String className) throws Exception {
|
||||
final Class<? extends Analyzer> clazz = Class.forName(className).asSubclass(Analyzer.class);
|
||||
try {
|
||||
// first try to use a ctor with version parameter (needed for many new Analyzers that have no
|
||||
// default one anymore
|
||||
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
|
||||
return cnstr.newInstance(Version.LATEST);
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
NoSuchMethodException nsme) {
|
||||
// otherwise use default ctor
|
||||
return clazz.getConstructor().newInstance();
|
||||
Constructor<? extends Analyzer> cnstr;
|
||||
if (className.equals("org.apache.lucene.analysis.core.StopAnalyzer")) {
|
||||
cnstr = clazz.getConstructor(CharArraySet.class);
|
||||
return cnstr.newInstance(CharArraySet.EMPTY_SET);
|
||||
}
|
||||
cnstr = clazz.getConstructor();
|
||||
return cnstr.newInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -116,7 +116,7 @@ final class PForUtil {
|
|||
out.writeBytes(exceptions, exceptions.length);
|
||||
}
|
||||
|
||||
/** Decode 128 integers into {@code ints}. */
|
||||
/** Decode 128 integers into {@code longs}. */
|
||||
void decode(DataInput in, long[] longs) throws IOException {
|
||||
final int token = Byte.toUnsignedInt(in.readByte());
|
||||
final int bitsPerValue = token & 0x1f;
|
||||
|
|
|
@ -136,17 +136,16 @@ public final class MultiTerms extends Terms {
|
|||
|
||||
@Override
|
||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
|
||||
final List<TermsEnumIndex> termsEnums = new ArrayList<>();
|
||||
for (int i = 0; i < subs.length; i++) {
|
||||
final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
|
||||
if (termsEnum != null) {
|
||||
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
|
||||
termsEnums.add(new TermsEnumIndex(termsEnum, i));
|
||||
}
|
||||
}
|
||||
|
||||
if (termsEnums.size() > 0) {
|
||||
return new MultiTermsEnum(subSlices)
|
||||
.reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
|
||||
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(TermsEnumIndex.EMPTY_ARRAY));
|
||||
} else {
|
||||
return TermsEnum.EMPTY;
|
||||
}
|
||||
|
@ -181,17 +180,16 @@ public final class MultiTerms extends Terms {
|
|||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
|
||||
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
|
||||
final List<TermsEnumIndex> termsEnums = new ArrayList<>();
|
||||
for (int i = 0; i < subs.length; i++) {
|
||||
final TermsEnum termsEnum = subs[i].iterator();
|
||||
if (termsEnum != null) {
|
||||
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
|
||||
termsEnums.add(new TermsEnumIndex(termsEnum, i));
|
||||
}
|
||||
}
|
||||
|
||||
if (termsEnums.size() > 0) {
|
||||
return new MultiTermsEnum(subSlices)
|
||||
.reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
|
||||
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(TermsEnumIndex.EMPTY_ARRAY));
|
||||
} else {
|
||||
return TermsEnum.EMPTY;
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
new Comparator<TermsEnumWithSlice>() {
|
||||
@Override
|
||||
public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) {
|
||||
return o1.index - o2.index;
|
||||
return o1.subIndex - o2.subIndex;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -56,17 +56,6 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
private int numSubs;
|
||||
private BytesRef current;
|
||||
|
||||
static class TermsEnumIndex {
|
||||
public static final TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
|
||||
final int subIndex;
|
||||
final TermsEnum termsEnum;
|
||||
|
||||
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
|
||||
this.termsEnum = termsEnum;
|
||||
this.subIndex = subIndex;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns how many sub-reader slices contain the current term. @see #getMatchArray */
|
||||
public int getMatchCount() {
|
||||
return numTop;
|
||||
|
@ -114,10 +103,10 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
final TermsEnumIndex termsEnumIndex = termsEnumsIndex[i];
|
||||
assert termsEnumIndex != null;
|
||||
|
||||
final BytesRef term = termsEnumIndex.termsEnum.next();
|
||||
final BytesRef term = termsEnumIndex.next();
|
||||
if (term != null) {
|
||||
final TermsEnumWithSlice entry = subs[termsEnumIndex.subIndex];
|
||||
entry.reset(termsEnumIndex.termsEnum, term);
|
||||
entry.reset(termsEnumIndex);
|
||||
queue.add(entry);
|
||||
currentSubs[numSubs++] = entry;
|
||||
} else {
|
||||
|
@ -154,7 +143,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
// Doing so is a waste because this sub will simply
|
||||
// seek to the same spot.
|
||||
if (seekOpt) {
|
||||
final BytesRef curTerm = currentSubs[i].current;
|
||||
final BytesRef curTerm = currentSubs[i].term();
|
||||
if (curTerm != null) {
|
||||
final int cmp = term.compareTo(curTerm);
|
||||
if (cmp == 0) {
|
||||
|
@ -162,19 +151,19 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
} else if (cmp < 0) {
|
||||
status = false;
|
||||
} else {
|
||||
status = currentSubs[i].terms.seekExact(term);
|
||||
status = currentSubs[i].seekExact(term);
|
||||
}
|
||||
} else {
|
||||
status = false;
|
||||
}
|
||||
} else {
|
||||
status = currentSubs[i].terms.seekExact(term);
|
||||
status = currentSubs[i].seekExact(term);
|
||||
}
|
||||
|
||||
if (status) {
|
||||
top[numTop++] = currentSubs[i];
|
||||
current = currentSubs[i].current = currentSubs[i].terms.term();
|
||||
assert term.equals(currentSubs[i].current);
|
||||
current = currentSubs[i].term();
|
||||
assert term.equals(currentSubs[i].term());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -206,7 +195,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
// Doing so is a waste because this sub will simply
|
||||
// seek to the same spot.
|
||||
if (seekOpt) {
|
||||
final BytesRef curTerm = currentSubs[i].current;
|
||||
final BytesRef curTerm = currentSubs[i].term();
|
||||
if (curTerm != null) {
|
||||
final int cmp = term.compareTo(curTerm);
|
||||
if (cmp == 0) {
|
||||
|
@ -214,28 +203,25 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
} else if (cmp < 0) {
|
||||
status = SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
status = currentSubs[i].terms.seekCeil(term);
|
||||
status = currentSubs[i].seekCeil(term);
|
||||
}
|
||||
} else {
|
||||
status = SeekStatus.END;
|
||||
}
|
||||
} else {
|
||||
status = currentSubs[i].terms.seekCeil(term);
|
||||
status = currentSubs[i].seekCeil(term);
|
||||
}
|
||||
|
||||
if (status == SeekStatus.FOUND) {
|
||||
top[numTop++] = currentSubs[i];
|
||||
current = currentSubs[i].current = currentSubs[i].terms.term();
|
||||
current = currentSubs[i].term();
|
||||
queue.add(currentSubs[i]);
|
||||
} else {
|
||||
if (status == SeekStatus.NOT_FOUND) {
|
||||
currentSubs[i].current = currentSubs[i].terms.term();
|
||||
assert currentSubs[i].current != null;
|
||||
assert currentSubs[i].term() != null;
|
||||
queue.add(currentSubs[i]);
|
||||
} else {
|
||||
assert status == SeekStatus.END;
|
||||
// enum exhausted
|
||||
currentSubs[i].current = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -269,15 +255,14 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
// top term
|
||||
assert numTop == 0;
|
||||
numTop = queue.fillTop(top);
|
||||
current = top[0].current;
|
||||
current = top[0].term();
|
||||
}
|
||||
|
||||
private void pushTop() throws IOException {
|
||||
// call next() on each top, and reorder queue
|
||||
for (int i = 0; i < numTop; i++) {
|
||||
TermsEnumWithSlice top = queue.top();
|
||||
top.current = top.terms.next();
|
||||
if (top.current == null) {
|
||||
if (top.next() == null) {
|
||||
queue.pop();
|
||||
} else {
|
||||
queue.updateTop();
|
||||
|
@ -320,7 +305,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
public int docFreq() throws IOException {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < numTop; i++) {
|
||||
sum += top[i].terms.docFreq();
|
||||
sum += top[i].termsEnum.docFreq();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
@ -329,7 +314,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
public long totalTermFreq() throws IOException {
|
||||
long sum = 0;
|
||||
for (int i = 0; i < numTop; i++) {
|
||||
final long v = top[i].terms.totalTermFreq();
|
||||
final long v = top[i].termsEnum.totalTermFreq();
|
||||
assert v != -1;
|
||||
sum += v;
|
||||
}
|
||||
|
@ -359,12 +344,12 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
|
||||
final TermsEnumWithSlice entry = top[i];
|
||||
|
||||
assert entry.index < docsEnum.subPostingsEnums.length
|
||||
: entry.index + " vs " + docsEnum.subPostingsEnums.length + "; " + subs.length;
|
||||
assert entry.subIndex < docsEnum.subPostingsEnums.length
|
||||
: entry.subIndex + " vs " + docsEnum.subPostingsEnums.length + "; " + subs.length;
|
||||
final PostingsEnum subPostingsEnum =
|
||||
entry.terms.postings(docsEnum.subPostingsEnums[entry.index], flags);
|
||||
entry.termsEnum.postings(docsEnum.subPostingsEnums[entry.subIndex], flags);
|
||||
assert subPostingsEnum != null;
|
||||
docsEnum.subPostingsEnums[entry.index] = subPostingsEnum;
|
||||
docsEnum.subPostingsEnums[entry.subIndex] = subPostingsEnum;
|
||||
subDocs[upto].postingsEnum = subPostingsEnum;
|
||||
subDocs[upto].slice = entry.subSlice;
|
||||
upto++;
|
||||
|
@ -379,26 +364,18 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
return new SlowImpactsEnum(postings(null, flags));
|
||||
}
|
||||
|
||||
static final class TermsEnumWithSlice {
|
||||
static final class TermsEnumWithSlice extends TermsEnumIndex {
|
||||
private final ReaderSlice subSlice;
|
||||
TermsEnum terms;
|
||||
public BytesRef current;
|
||||
final int index;
|
||||
|
||||
public TermsEnumWithSlice(int index, ReaderSlice subSlice) {
|
||||
super(null, index);
|
||||
this.subSlice = subSlice;
|
||||
this.index = index;
|
||||
assert subSlice.length >= 0 : "length=" + subSlice.length;
|
||||
}
|
||||
|
||||
public void reset(TermsEnum terms, BytesRef term) {
|
||||
this.terms = terms;
|
||||
current = term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return subSlice.toString() + ":" + terms;
|
||||
return subSlice.toString() + ":" + super.toString();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -413,7 +390,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
|
||||
@Override
|
||||
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
|
||||
return termsA.current.compareTo(termsB.current) < 0;
|
||||
return termsA.compareTermTo(termsB) < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -435,7 +412,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
|
|||
final int leftChild = index << 1;
|
||||
for (int child = leftChild, end = Math.min(size, leftChild + 1); child <= end; ++child) {
|
||||
TermsEnumWithSlice te = get(child);
|
||||
if (te.current.equals(tops[0].current)) {
|
||||
if (te.compareTermTo(tops[0]) == 0) {
|
||||
tops[numTop++] = te;
|
||||
stack[stackLen++] = child;
|
||||
}
|
||||
|
|
|
@ -24,8 +24,6 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
@ -48,19 +46,15 @@ public class OrdinalMap implements Accountable {
|
|||
// need it
|
||||
// TODO: use more efficient packed ints structures?
|
||||
|
||||
private static class TermsEnumIndex {
|
||||
final int subIndex;
|
||||
final TermsEnum termsEnum;
|
||||
BytesRef currentTerm;
|
||||
private static class TermsEnumPriorityQueue extends PriorityQueue<TermsEnumIndex> {
|
||||
|
||||
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
|
||||
this.termsEnum = termsEnum;
|
||||
this.subIndex = subIndex;
|
||||
TermsEnumPriorityQueue(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
public BytesRef next() throws IOException {
|
||||
currentTerm = termsEnum.next();
|
||||
return currentTerm;
|
||||
@Override
|
||||
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
|
||||
return a.compareTermTo(b) < 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -227,13 +221,7 @@ public class OrdinalMap implements Accountable {
|
|||
long[] segmentOrds = new long[subs.length];
|
||||
|
||||
// Just merge-sorts by term:
|
||||
PriorityQueue<TermsEnumIndex> queue =
|
||||
new PriorityQueue<TermsEnumIndex>(subs.length) {
|
||||
@Override
|
||||
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
|
||||
return a.currentTerm.compareTo(b.currentTerm) < 0;
|
||||
}
|
||||
};
|
||||
TermsEnumPriorityQueue queue = new TermsEnumPriorityQueue(subs.length);
|
||||
|
||||
for (int i = 0; i < subs.length; i++) {
|
||||
TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
|
||||
|
@ -242,19 +230,18 @@ public class OrdinalMap implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
TermsEnumIndex.TermState topState = new TermsEnumIndex.TermState();
|
||||
|
||||
long globalOrd = 0;
|
||||
while (queue.size() != 0) {
|
||||
TermsEnumIndex top = queue.top();
|
||||
scratch.copyBytes(top.currentTerm);
|
||||
topState.copyFrom(top);
|
||||
|
||||
int firstSegmentIndex = Integer.MAX_VALUE;
|
||||
long globalOrdDelta = Long.MAX_VALUE;
|
||||
|
||||
// Advance past this term, recording the per-segment ord deltas:
|
||||
while (true) {
|
||||
top = queue.top();
|
||||
long segmentOrd = top.termsEnum.ord();
|
||||
long delta = globalOrd - segmentOrd;
|
||||
int segmentIndex = top.subIndex;
|
||||
|
@ -284,10 +271,11 @@ public class OrdinalMap implements Accountable {
|
|||
if (queue.size() == 0) {
|
||||
break;
|
||||
}
|
||||
top = queue.top();
|
||||
} else {
|
||||
queue.updateTop();
|
||||
top = queue.updateTop();
|
||||
}
|
||||
if (queue.top().currentTerm.equals(scratch.get()) == false) {
|
||||
if (top.termEquals(topState) == false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,8 +18,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
@ -555,8 +553,6 @@ final class ReadersAndUpdates {
|
|||
FieldInfos fieldInfos = null;
|
||||
boolean any = false;
|
||||
for (List<DocValuesFieldUpdates> updates : pendingDVUpdates.values()) {
|
||||
// Sort by increasing delGen:
|
||||
Collections.sort(updates, Comparator.comparingLong(a -> a.delGen));
|
||||
for (DocValuesFieldUpdates update : updates) {
|
||||
if (update.delGen <= maxDelGen && update.any()) {
|
||||
any = true;
|
||||
|
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
||||
/**
|
||||
* Wrapper around a {@link TermsEnum} and an integer that identifies it. All operations that move
|
||||
* the current position of the {@link TermsEnum} must be performed via this wrapper class, not
|
||||
* directly on the wrapped {@link TermsEnum}.
|
||||
*/
|
||||
class TermsEnumIndex {
|
||||
|
||||
static final TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
|
||||
|
||||
/**
|
||||
* Copy the first 8 bytes of the given term as a comparable unsigned long. In case the term has
|
||||
* less than 8 bytes, missing bytes will be replaced with zeroes. Note that two terms that produce
|
||||
* the same long could still be different due to the fact that missing bytes are replaced with
|
||||
* zeroes, e.g. {@code [1, 0]} and {@code [1]} get mapped to the same long.
|
||||
*/
|
||||
static long prefix8ToComparableUnsignedLong(BytesRef term) {
|
||||
// Use Big Endian so that longs are comparable
|
||||
if (term.length >= Long.BYTES) {
|
||||
return (long) BitUtil.VH_BE_LONG.get(term.bytes, term.offset);
|
||||
} else {
|
||||
long l;
|
||||
int o;
|
||||
if (Integer.BYTES <= term.length) {
|
||||
l = (int) BitUtil.VH_BE_INT.get(term.bytes, term.offset);
|
||||
o = Integer.BYTES;
|
||||
} else {
|
||||
l = 0;
|
||||
o = 0;
|
||||
}
|
||||
if (o + Short.BYTES <= term.length) {
|
||||
l =
|
||||
(l << Short.SIZE)
|
||||
| Short.toUnsignedLong(
|
||||
(short) BitUtil.VH_BE_SHORT.get(term.bytes, term.offset + o));
|
||||
o += Short.BYTES;
|
||||
}
|
||||
if (o < term.length) {
|
||||
l = (l << Byte.SIZE) | Byte.toUnsignedLong(term.bytes[term.offset + o]);
|
||||
}
|
||||
l <<= (Long.BYTES - term.length) << 3;
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
final int subIndex;
|
||||
TermsEnum termsEnum;
|
||||
private BytesRef currentTerm;
|
||||
private long currentTermPrefix8;
|
||||
|
||||
TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
|
||||
this.termsEnum = termsEnum;
|
||||
this.subIndex = subIndex;
|
||||
}
|
||||
|
||||
BytesRef term() {
|
||||
return currentTerm;
|
||||
}
|
||||
|
||||
private void setTerm(BytesRef term) {
|
||||
currentTerm = term;
|
||||
if (currentTerm == null) {
|
||||
currentTermPrefix8 = 0;
|
||||
} else {
|
||||
currentTermPrefix8 = prefix8ToComparableUnsignedLong(currentTerm);
|
||||
}
|
||||
}
|
||||
|
||||
BytesRef next() throws IOException {
|
||||
BytesRef term = termsEnum.next();
|
||||
setTerm(term);
|
||||
return term;
|
||||
}
|
||||
|
||||
SeekStatus seekCeil(BytesRef term) throws IOException {
|
||||
SeekStatus status = termsEnum.seekCeil(term);
|
||||
if (status == SeekStatus.END) {
|
||||
setTerm(null);
|
||||
} else {
|
||||
setTerm(termsEnum.term());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
boolean seekExact(BytesRef term) throws IOException {
|
||||
boolean found = termsEnum.seekExact(term);
|
||||
if (found) {
|
||||
setTerm(termsEnum.term());
|
||||
} else {
|
||||
setTerm(null);
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
void seekExact(long ord) throws IOException {
|
||||
termsEnum.seekExact(ord);
|
||||
setTerm(termsEnum.term());
|
||||
}
|
||||
|
||||
void reset(TermsEnumIndex tei) throws IOException {
|
||||
termsEnum = tei.termsEnum;
|
||||
currentTerm = tei.currentTerm;
|
||||
currentTermPrefix8 = tei.currentTermPrefix8;
|
||||
}
|
||||
|
||||
int compareTermTo(TermsEnumIndex that) {
|
||||
if (currentTermPrefix8 != that.currentTermPrefix8) {
|
||||
int cmp = Long.compareUnsigned(currentTermPrefix8, that.currentTermPrefix8);
|
||||
assert Integer.signum(cmp)
|
||||
== Integer.signum(
|
||||
Arrays.compareUnsigned(
|
||||
currentTerm.bytes,
|
||||
currentTerm.offset,
|
||||
currentTerm.offset + currentTerm.length,
|
||||
that.currentTerm.bytes,
|
||||
that.currentTerm.offset,
|
||||
that.currentTerm.offset + that.currentTerm.length));
|
||||
return cmp;
|
||||
}
|
||||
|
||||
return Arrays.compareUnsigned(
|
||||
currentTerm.bytes,
|
||||
currentTerm.offset,
|
||||
currentTerm.offset + currentTerm.length,
|
||||
that.currentTerm.bytes,
|
||||
that.currentTerm.offset,
|
||||
that.currentTerm.offset + that.currentTerm.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Objects.toString(termsEnum);
|
||||
}
|
||||
|
||||
/** Wrapper around a term that allows for quick equals comparisons. */
|
||||
static class TermState {
|
||||
private final BytesRefBuilder term = new BytesRefBuilder();
|
||||
private long termPrefix8;
|
||||
|
||||
void copyFrom(TermsEnumIndex tei) {
|
||||
term.copyBytes(tei.term());
|
||||
termPrefix8 = tei.currentTermPrefix8;
|
||||
}
|
||||
}
|
||||
|
||||
boolean termEquals(TermState that) {
|
||||
if (currentTermPrefix8 != that.termPrefix8) {
|
||||
return false;
|
||||
}
|
||||
return Arrays.equals(
|
||||
currentTerm.bytes,
|
||||
currentTerm.offset,
|
||||
currentTerm.offset + currentTerm.length,
|
||||
that.term.bytes(),
|
||||
0,
|
||||
that.term.length());
|
||||
}
|
||||
}
|
|
@ -21,8 +21,6 @@ import java.lang.Runtime.Version;
|
|||
import java.lang.StackWalker.StackFrame;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.invoke.MethodType;
|
||||
import java.security.AccessController;
|
||||
import java.security.PrivilegedAction;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
@ -31,7 +29,7 @@ import java.util.Set;
|
|||
import java.util.function.Predicate;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
|
||||
/**
|
||||
|
@ -129,7 +127,7 @@ public abstract class VectorizationProvider {
|
|||
"Vector bitsize and/or integer vectors enforcement; using default vectorization provider outside of testMode");
|
||||
return new DefaultVectorizationProvider();
|
||||
}
|
||||
if (isClientVM()) {
|
||||
if (Constants.IS_CLIENT_VM) {
|
||||
LOG.warning("C2 compiler is disabled; Java vector incubator API can't be enabled");
|
||||
return new DefaultVectorizationProvider();
|
||||
}
|
||||
|
@ -188,23 +186,6 @@ public abstract class VectorizationProvider {
|
|||
&& !Objects.equals("I", "i".toUpperCase(Locale.getDefault()));
|
||||
}
|
||||
|
||||
@SuppressWarnings("removal")
|
||||
@SuppressForbidden(reason = "security manager")
|
||||
private static boolean isClientVM() {
|
||||
try {
|
||||
final PrivilegedAction<Boolean> action =
|
||||
() -> System.getProperty("java.vm.info", "").contains("emulated-client");
|
||||
return AccessController.doPrivileged(action);
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
SecurityException e) {
|
||||
LOG.warning(
|
||||
"SecurityManager denies permission to 'java.vm.info' system property, so state of C2 compiler can't be detected. "
|
||||
+ "In case of performance issues allow access to this property.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// add all possible callers here as FQCN:
|
||||
private static final Set<String> VALID_CALLERS = Set.of("org.apache.lucene.util.VectorUtil");
|
||||
|
||||
|
|
|
@ -475,7 +475,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
|
|||
|
||||
private class CompetitiveIterator extends DocIdSetIterator {
|
||||
|
||||
private static final int MAX_TERMS = 128;
|
||||
private static final int MAX_TERMS = 1024;
|
||||
|
||||
private final LeafReaderContext context;
|
||||
private final int maxDoc;
|
||||
|
|
|
@ -16,18 +16,25 @@
|
|||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.security.AccessController;
|
||||
import java.security.PrivilegedAction;
|
||||
import java.util.Objects;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/** Some useful constants. */
|
||||
public final class Constants {
|
||||
private Constants() {} // can't construct
|
||||
|
||||
private static final String UNKNOWN = "Unknown";
|
||||
|
||||
/** JVM vendor info. */
|
||||
public static final String JVM_VENDOR = System.getProperty("java.vm.vendor");
|
||||
public static final String JVM_VENDOR = getSysProp("java.vm.vendor", UNKNOWN);
|
||||
|
||||
/** JVM vendor name. */
|
||||
public static final String JVM_NAME = System.getProperty("java.vm.name");
|
||||
public static final String JVM_NAME = getSysProp("java.vm.name", UNKNOWN);
|
||||
|
||||
/** The value of <code>System.getProperty("os.name")</code>. * */
|
||||
public static final String OS_NAME = System.getProperty("os.name");
|
||||
public static final String OS_NAME = getSysProp("os.name", UNKNOWN);
|
||||
|
||||
/** True iff running on Linux. */
|
||||
public static final boolean LINUX = OS_NAME.startsWith("Linux");
|
||||
|
@ -45,36 +52,67 @@ public final class Constants {
|
|||
public static final boolean FREE_BSD = OS_NAME.startsWith("FreeBSD");
|
||||
|
||||
/** The value of <code>System.getProperty("os.arch")</code>. */
|
||||
public static final String OS_ARCH = System.getProperty("os.arch");
|
||||
public static final String OS_ARCH = getSysProp("os.arch", UNKNOWN);
|
||||
|
||||
/** The value of <code>System.getProperty("os.version")</code>. */
|
||||
public static final String OS_VERSION = System.getProperty("os.version");
|
||||
public static final String OS_VERSION = getSysProp("os.version", UNKNOWN);
|
||||
|
||||
/** The value of <code>System.getProperty("java.vendor")</code>. */
|
||||
public static final String JAVA_VENDOR = System.getProperty("java.vendor");
|
||||
public static final String JAVA_VENDOR = getSysProp("java.vendor", UNKNOWN);
|
||||
|
||||
/** True iff the Java runtime is a client runtime and C2 compiler is not enabled */
|
||||
public static final boolean IS_CLIENT_VM =
|
||||
getSysProp("java.vm.info", "").contains("emulated-client");
|
||||
|
||||
/** True iff running on a 64bit JVM */
|
||||
public static final boolean JRE_IS_64BIT;
|
||||
public static final boolean JRE_IS_64BIT = is64Bit();
|
||||
|
||||
static {
|
||||
boolean is64Bit = false;
|
||||
String datamodel = null;
|
||||
/** true iff we know fast FMA is supported, to deliver less error */
|
||||
public static final boolean HAS_FAST_FMA =
|
||||
(IS_CLIENT_VM == false)
|
||||
&& Objects.equals(OS_ARCH, "amd64")
|
||||
&& HotspotVMOptions.get("UseFMA").map(Boolean::valueOf).orElse(false);
|
||||
|
||||
private static boolean is64Bit() {
|
||||
final String datamodel = getSysProp("sun.arch.data.model");
|
||||
if (datamodel != null) {
|
||||
return datamodel.contains("64");
|
||||
} else {
|
||||
return (OS_ARCH != null && OS_ARCH.contains("64"));
|
||||
}
|
||||
}
|
||||
|
||||
private static String getSysProp(String property) {
|
||||
try {
|
||||
datamodel = System.getProperty("sun.arch.data.model");
|
||||
if (datamodel != null) {
|
||||
is64Bit = datamodel.contains("64");
|
||||
}
|
||||
return doPrivileged(() -> System.getProperty(property));
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
SecurityException ex) {
|
||||
SecurityException se) {
|
||||
logSecurityWarning(property);
|
||||
return null;
|
||||
}
|
||||
if (datamodel == null) {
|
||||
if (OS_ARCH != null && OS_ARCH.contains("64")) {
|
||||
is64Bit = true;
|
||||
} else {
|
||||
is64Bit = false;
|
||||
}
|
||||
}
|
||||
|
||||
private static String getSysProp(String property, String def) {
|
||||
try {
|
||||
return doPrivileged(() -> System.getProperty(property, def));
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
SecurityException se) {
|
||||
logSecurityWarning(property);
|
||||
return def;
|
||||
}
|
||||
JRE_IS_64BIT = is64Bit;
|
||||
}
|
||||
|
||||
private static void logSecurityWarning(String property) {
|
||||
var log = Logger.getLogger(Constants.class.getName());
|
||||
log.warning("SecurityManager prevented access to system property: " + property);
|
||||
}
|
||||
|
||||
// Extracted to a method to be able to apply the SuppressForbidden annotation
|
||||
@SuppressWarnings("removal")
|
||||
@SuppressForbidden(reason = "security manager")
|
||||
private static <T> T doPrivileged(PrivilegedAction<T> action) {
|
||||
return AccessController.doPrivileged(action);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/** Accessor to get Hotspot VM Options (if available). */
|
||||
final class HotspotVMOptions {
|
||||
private HotspotVMOptions() {} // can't construct
|
||||
|
||||
/** True if the Java VM is based on Hotspot and has the Hotspot MX bean readable by Lucene */
|
||||
public static final boolean IS_HOTSPOT;
|
||||
|
||||
/**
|
||||
* Returns an optional with the value of a Hotspot VM option. If the VM option does not exist or
|
||||
* is not readable, returns an empty optional.
|
||||
*/
|
||||
public static Optional<String> get(String name) {
|
||||
return ACCESSOR.apply(Objects.requireNonNull(name, "name"));
|
||||
}
|
||||
|
||||
private static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
|
||||
private static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
|
||||
private static final Function<String, Optional<String>> ACCESSOR;
|
||||
|
||||
static {
|
||||
boolean isHotspot = false;
|
||||
Function<String, Optional<String>> accessor = name -> Optional.empty();
|
||||
try {
|
||||
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS);
|
||||
// we use reflection for this, because the management factory is not part
|
||||
// of java.base module:
|
||||
final Object hotSpotBean =
|
||||
Class.forName(MANAGEMENT_FACTORY_CLASS)
|
||||
.getMethod("getPlatformMXBean", Class.class)
|
||||
.invoke(null, beanClazz);
|
||||
if (hotSpotBean != null) {
|
||||
final Method getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
|
||||
final Method getValueMethod = getVMOptionMethod.getReturnType().getMethod("getValue");
|
||||
isHotspot = true;
|
||||
accessor =
|
||||
name -> {
|
||||
try {
|
||||
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, name);
|
||||
return Optional.of(getValueMethod.invoke(vmOption).toString());
|
||||
} catch (@SuppressWarnings("unused")
|
||||
ReflectiveOperationException
|
||||
| RuntimeException e) {
|
||||
return Optional.empty();
|
||||
}
|
||||
};
|
||||
}
|
||||
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
|
||||
isHotspot = false;
|
||||
final Logger log = Logger.getLogger(HotspotVMOptions.class.getName());
|
||||
final Module module = HotspotVMOptions.class.getModule();
|
||||
final ModuleLayer layer = module.getLayer();
|
||||
// classpath / unnamed module has no layer, so we need to check:
|
||||
if (layer != null
|
||||
&& layer.findModule("jdk.management").map(module::canRead).orElse(false) == false) {
|
||||
log.warning(
|
||||
"Lucene cannot access JVM internals to optimize algorithms or calculate object sizes, unless the 'jdk.management' Java module "
|
||||
+ "is readable [please add 'jdk.management' to modular application either by command line or its module descriptor].");
|
||||
} else {
|
||||
log.warning(
|
||||
"Lucene cannot optimize algorithms or calculate object sizes for JVMs that are not based on Hotspot or a compatible implementation.");
|
||||
}
|
||||
}
|
||||
IS_HOTSPOT = isHotspot;
|
||||
ACCESSOR = accessor;
|
||||
}
|
||||
}
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.util;
|
|||
|
||||
import java.lang.reflect.Array;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.security.AccessControlException;
|
||||
import java.security.AccessController;
|
||||
|
@ -30,7 +29,6 @@ import java.util.Collections;
|
|||
import java.util.IdentityHashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Logger;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -112,64 +110,16 @@ public final class RamUsageEstimator {
|
|||
/** For testing only */
|
||||
static final boolean JVM_IS_HOTSPOT_64BIT;
|
||||
|
||||
static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
|
||||
static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
|
||||
|
||||
/** Initialize constants and try to collect information about the JVM internals. */
|
||||
static {
|
||||
if (Constants.JRE_IS_64BIT) {
|
||||
if (Constants.JRE_IS_64BIT && HotspotVMOptions.IS_HOTSPOT) {
|
||||
// Try to get compressed oops and object alignment (the default seems to be 8 on Hotspot);
|
||||
// (this only works on 64 bit, on 32 bits the alignment and reference size is fixed):
|
||||
boolean compressedOops = false;
|
||||
int objectAlignment = 8;
|
||||
boolean isHotspot = false;
|
||||
try {
|
||||
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS);
|
||||
// we use reflection for this, because the management factory is not part
|
||||
// of Java 8's compact profile:
|
||||
final Object hotSpotBean =
|
||||
Class.forName(MANAGEMENT_FACTORY_CLASS)
|
||||
.getMethod("getPlatformMXBean", Class.class)
|
||||
.invoke(null, beanClazz);
|
||||
if (hotSpotBean != null) {
|
||||
isHotspot = true;
|
||||
final Method getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
|
||||
try {
|
||||
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "UseCompressedOops");
|
||||
compressedOops =
|
||||
Boolean.parseBoolean(
|
||||
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
|
||||
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
|
||||
isHotspot = false;
|
||||
}
|
||||
try {
|
||||
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "ObjectAlignmentInBytes");
|
||||
objectAlignment =
|
||||
Integer.parseInt(
|
||||
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
|
||||
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
|
||||
isHotspot = false;
|
||||
}
|
||||
}
|
||||
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
|
||||
isHotspot = false;
|
||||
final Logger log = Logger.getLogger(RamUsageEstimator.class.getName());
|
||||
final Module module = RamUsageEstimator.class.getModule();
|
||||
final ModuleLayer layer = module.getLayer();
|
||||
// classpath / unnamed module has no layer, so we need to check:
|
||||
if (layer != null
|
||||
&& layer.findModule("jdk.management").map(module::canRead).orElse(false) == false) {
|
||||
log.warning(
|
||||
"Lucene cannot correctly calculate object sizes on 64bit JVMs, unless the 'jdk.management' Java module "
|
||||
+ "is readable [please add 'jdk.management' to modular application either by command line or its module descriptor]");
|
||||
} else {
|
||||
log.warning(
|
||||
"Lucene cannot correctly calculate object sizes on 64bit JVMs that are not based on Hotspot or a compatible implementation.");
|
||||
}
|
||||
}
|
||||
JVM_IS_HOTSPOT_64BIT = isHotspot;
|
||||
COMPRESSED_REFS_ENABLED = compressedOops;
|
||||
NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment;
|
||||
JVM_IS_HOTSPOT_64BIT = true;
|
||||
COMPRESSED_REFS_ENABLED =
|
||||
HotspotVMOptions.get("UseCompressedOops").map(Boolean::valueOf).orElse(false);
|
||||
NUM_BYTES_OBJECT_ALIGNMENT =
|
||||
HotspotVMOptions.get("ObjectAlignmentInBytes").map(Integer::valueOf).orElse(8);
|
||||
// reference size is 4, if we have compressed oops:
|
||||
NUM_BYTES_OBJECT_REF = COMPRESSED_REFS_ENABLED ? 4 : 8;
|
||||
// "best guess" based on reference size:
|
||||
|
|
|
@ -1128,6 +1128,10 @@ public class RegExp {
|
|||
if (start != pos) m = Integer.parseInt(originalString.substring(start, pos));
|
||||
} else m = n;
|
||||
if (!match('}')) throw new IllegalArgumentException("expected '}' at position " + pos);
|
||||
if (m != -1 && n > m) {
|
||||
throw new IllegalArgumentException(
|
||||
"invalid repetition range(out of order): " + n + ".." + m);
|
||||
}
|
||||
if (m == -1) e = makeRepeat(flags, e, n);
|
||||
else e = makeRepeat(flags, e, n, m);
|
||||
}
|
||||
|
|
|
@ -270,10 +270,6 @@ public class FSTCompiler<T> {
|
|||
return directAddressingMaxOversizingFactor;
|
||||
}
|
||||
|
||||
public long getTermCount() {
|
||||
return frontier[0].inputCount;
|
||||
}
|
||||
|
||||
public long getNodeCount() {
|
||||
// 1+ in order to count the -1 implicit final node
|
||||
return 1 + nodeCount;
|
||||
|
@ -749,7 +745,6 @@ public class FSTCompiler<T> {
|
|||
// format cannot represent the empty input since
|
||||
// 'finalness' is stored on the incoming arc, not on
|
||||
// the node
|
||||
frontier[0].inputCount++;
|
||||
frontier[0].isFinal = true;
|
||||
fst.setEmptyOutput(output);
|
||||
return;
|
||||
|
@ -760,9 +755,6 @@ public class FSTCompiler<T> {
|
|||
int pos2 = input.offset;
|
||||
final int pos1Stop = Math.min(lastInput.length(), input.length);
|
||||
while (true) {
|
||||
frontier[pos1].inputCount++;
|
||||
// System.out.println(" incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" +
|
||||
// frontier[pos1]);
|
||||
if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) {
|
||||
break;
|
||||
}
|
||||
|
@ -786,7 +778,6 @@ public class FSTCompiler<T> {
|
|||
// init tail states for current input
|
||||
for (int idx = prefixLenPlus1; idx <= input.length; idx++) {
|
||||
frontier[idx - 1].addArc(input.ints[input.offset + idx - 1], frontier[idx]);
|
||||
frontier[idx].inputCount++;
|
||||
}
|
||||
|
||||
final UnCompiledNode<T> lastNode = frontier[input.length];
|
||||
|
@ -835,8 +826,6 @@ public class FSTCompiler<T> {
|
|||
|
||||
// save last input
|
||||
lastInput.copyInts(input);
|
||||
|
||||
// System.out.println(" count[0]=" + frontier[0].inputCount);
|
||||
}
|
||||
|
||||
private boolean validOutput(T output) {
|
||||
|
@ -906,10 +895,6 @@ public class FSTCompiler<T> {
|
|||
T output;
|
||||
boolean isFinal;
|
||||
|
||||
// TODO: remove this tracking? we used to use it for confusingly pruning NodeHash, but
|
||||
// we switched to LRU by RAM usage instead:
|
||||
long inputCount;
|
||||
|
||||
/** This node's depth, starting from the automaton root. */
|
||||
final int depth;
|
||||
|
||||
|
@ -935,7 +920,6 @@ public class FSTCompiler<T> {
|
|||
numArcs = 0;
|
||||
isFinal = false;
|
||||
output = owner.NO_OUTPUT;
|
||||
inputCount = 0;
|
||||
|
||||
// We don't clear the depth here because it never changes
|
||||
// for nodes on the frontier (even when reused).
|
||||
|
|
|
@ -77,41 +77,9 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
VectorizationProvider.TESTS_FORCE_INTEGER_VECTORS || (isAMD64withoutAVX2 == false);
|
||||
}
|
||||
|
||||
private static final String MANAGEMENT_FACTORY_CLASS = "java.lang.management.ManagementFactory";
|
||||
private static final String HOTSPOT_BEAN_CLASS = "com.sun.management.HotSpotDiagnosticMXBean";
|
||||
|
||||
// best effort to see if FMA is fast (this is architecture-independent option)
|
||||
private static boolean hasFastFMA() {
|
||||
// on ARM cpus, FMA works fine but is a slight slowdown: don't use it.
|
||||
if (Constants.OS_ARCH.equals("amd64") == false) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
final Class<?> beanClazz = Class.forName(HOTSPOT_BEAN_CLASS);
|
||||
// we use reflection for this, because the management factory is not part
|
||||
// of Java 8's compact profile:
|
||||
final Object hotSpotBean =
|
||||
Class.forName(MANAGEMENT_FACTORY_CLASS)
|
||||
.getMethod("getPlatformMXBean", Class.class)
|
||||
.invoke(null, beanClazz);
|
||||
if (hotSpotBean != null) {
|
||||
final var getVMOptionMethod = beanClazz.getMethod("getVMOption", String.class);
|
||||
final Object vmOption = getVMOptionMethod.invoke(hotSpotBean, "UseFMA");
|
||||
return Boolean.parseBoolean(
|
||||
vmOption.getClass().getMethod("getValue").invoke(vmOption).toString());
|
||||
}
|
||||
return false;
|
||||
} catch (@SuppressWarnings("unused") ReflectiveOperationException | RuntimeException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// true if we know FMA is supported, to deliver less error
|
||||
static final boolean HAS_FAST_FMA = hasFastFMA();
|
||||
|
||||
// the way FMA should work! if available use it, otherwise fall back to mul/add
|
||||
private static FloatVector fma(FloatVector a, FloatVector b, FloatVector c) {
|
||||
if (HAS_FAST_FMA) {
|
||||
if (Constants.HAS_FAST_FMA) {
|
||||
return a.fma(b, c);
|
||||
} else {
|
||||
return a.mul(b).add(c);
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.security.PrivilegedAction;
|
|||
import java.util.Locale;
|
||||
import java.util.logging.Logger;
|
||||
import jdk.incubator.vector.FloatVector;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
|
||||
/** A vectorization provider that leverages the Panama Vector API. */
|
||||
|
@ -62,7 +63,7 @@ final class PanamaVectorizationProvider extends VectorizationProvider {
|
|||
Locale.ENGLISH,
|
||||
"Java vector incubator API enabled; uses preferredBitSize=%d%s%s",
|
||||
PanamaVectorUtilSupport.VECTOR_BITSIZE,
|
||||
PanamaVectorUtilSupport.HAS_FAST_FMA ? "; FMA enabled" : "",
|
||||
Constants.HAS_FAST_FMA ? "; FMA enabled" : "",
|
||||
PanamaVectorUtilSupport.HAS_FAST_INTEGER_VECTORS
|
||||
? ""
|
||||
: "; floating-point vectors only"));
|
||||
|
|
|
@ -459,7 +459,8 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
|||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setIndexDeletionPolicy(policy)
|
||||
.setIndexCommit(lastCommit));
|
||||
.setIndexCommit(lastCommit)
|
||||
.setMergePolicy(newLogMergePolicy(10)));
|
||||
assertEquals(10, writer.getDocStats().numDocs);
|
||||
|
||||
// Should undo our rollback:
|
||||
|
@ -476,12 +477,13 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
|||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setIndexDeletionPolicy(policy)
|
||||
.setIndexCommit(lastCommit));
|
||||
.setIndexCommit(lastCommit)
|
||||
.setMergePolicy(newLogMergePolicy(10)));
|
||||
assertEquals(10, writer.getDocStats().numDocs);
|
||||
// Commits the rollback:
|
||||
writer.close();
|
||||
|
||||
// Now 8 because we made another commit
|
||||
// Now 7 because we made another commit
|
||||
assertEquals(7, DirectoryReader.listCommits(dir).size());
|
||||
|
||||
r = DirectoryReader.open(dir);
|
||||
|
@ -507,7 +509,10 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
|||
// but this time keeping only the last commit:
|
||||
writer =
|
||||
new IndexWriter(
|
||||
dir, newIndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(lastCommit));
|
||||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setIndexCommit(lastCommit)
|
||||
.setMergePolicy(newLogMergePolicy(10)));
|
||||
assertEquals(10, writer.getDocStats().numDocs);
|
||||
|
||||
// Reader still sees fully merged index, because writer
|
||||
|
|
|
@ -2395,11 +2395,12 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
writer.addDocument(doc);
|
||||
assertTrue(writer.hasUncommittedChanges());
|
||||
|
||||
// Must commit, waitForMerges, commit again, to be
|
||||
// certain that hasUncommittedChanges returns false:
|
||||
writer.commit();
|
||||
writer.waitForMerges();
|
||||
writer.commit();
|
||||
// Must commit and wait for merges as long as the commit triggers merges to be certain that
|
||||
// hasUncommittedChanges returns false
|
||||
do {
|
||||
writer.waitForMerges();
|
||||
writer.commit();
|
||||
} while (writer.hasPendingMerges());
|
||||
assertFalse(writer.hasUncommittedChanges());
|
||||
writer.deleteDocuments(new Term("id", "xyz"));
|
||||
assertTrue(writer.hasUncommittedChanges());
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public class TestTermsEnumIndex extends LuceneTestCase {
|
||||
|
||||
public void testPrefix8ToComparableUnsignedLong() {
|
||||
byte[] b = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
|
||||
assertEquals(0L, TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 1, 0)));
|
||||
assertEquals(4L << 56, TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 1)));
|
||||
assertEquals(
|
||||
(4L << 56) | (5L << 48),
|
||||
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 2)));
|
||||
assertEquals(
|
||||
(4L << 56) | (5L << 48) | (6L << 40),
|
||||
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 3)));
|
||||
assertEquals(
|
||||
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32),
|
||||
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 4)));
|
||||
assertEquals(
|
||||
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24),
|
||||
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 5)));
|
||||
assertEquals(
|
||||
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24) | (9L << 16),
|
||||
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 6)));
|
||||
assertEquals(
|
||||
(4L << 56) | (5L << 48) | (6L << 40) | (7L << 32) | (8L << 24) | (9L << 16) | (10L << 8),
|
||||
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 7)));
|
||||
assertEquals(
|
||||
(4L << 56)
|
||||
| (5L << 48)
|
||||
| (6L << 40)
|
||||
| (7L << 32)
|
||||
| (8L << 24)
|
||||
| (9L << 16)
|
||||
| (10L << 8)
|
||||
| 11L,
|
||||
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 8)));
|
||||
assertEquals(
|
||||
(4L << 56)
|
||||
| (5L << 48)
|
||||
| (6L << 40)
|
||||
| (7L << 32)
|
||||
| (8L << 24)
|
||||
| (9L << 16)
|
||||
| (10L << 8)
|
||||
| 11L,
|
||||
TermsEnumIndex.prefix8ToComparableUnsignedLong(new BytesRef(b, 3, 9)));
|
||||
}
|
||||
}
|
|
@ -86,6 +86,17 @@ public class TestRegExp extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testParseIllegalRepeatExp() {
|
||||
// out of order
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new RegExp("a{99,11}");
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("out of order"));
|
||||
}
|
||||
|
||||
static String randomDocValue(int minLength) {
|
||||
String charPalette = "AAAaaaBbbCccc123456 \t";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
|
|
@ -56,7 +56,7 @@ public class Test2BFST extends LuceneTestCase {
|
|||
for (int iter = 0; iter < 1; iter++) {
|
||||
// Build FST w/ NoOutputs and stop when nodeCount > 2.2B
|
||||
{
|
||||
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
|
||||
System.out.println("\nTEST: ~2.2B nodes; output=NO_OUTPUTS");
|
||||
Outputs<Object> outputs = NoOutputs.getSingleton();
|
||||
Object NO_OUTPUT = outputs.getNoOutput();
|
||||
final FSTCompiler<Object> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
|
|
@ -568,7 +568,6 @@ public class TestFSTs extends LuceneTestCase {
|
|||
System.out.println(
|
||||
((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms");
|
||||
|
||||
assert fstCompiler.getTermCount() == ord;
|
||||
FST<T> fst = fstCompiler.compile();
|
||||
long tEnd = System.nanoTime();
|
||||
System.out.println(
|
||||
|
|
|
@ -38,8 +38,8 @@ public final class URLLabel extends JLabel {
|
|||
super(text);
|
||||
|
||||
try {
|
||||
this.link = (URI.create(text)).toURL();
|
||||
} catch (MalformedURLException e) {
|
||||
this.link = (new URI(text)).toURL();
|
||||
} catch (URISyntaxException | MalformedURLException e) {
|
||||
throw new LukeException(e.getMessage(), e);
|
||||
}
|
||||
|
||||
|
|
|
@ -476,7 +476,12 @@ public abstract class LuceneTestCase extends Assert {
|
|||
* of iterations to scale your tests (for nightly builds).
|
||||
*/
|
||||
public static final int RANDOM_MULTIPLIER =
|
||||
systemPropertyAsInt("tests.multiplier", TEST_NIGHTLY ? 2 : 1);
|
||||
systemPropertyAsInt("tests.multiplier", defaultRandomMultiplier());
|
||||
|
||||
/** Compute the default value of the random multiplier (based on {@link #TEST_NIGHTLY}). */
|
||||
static int defaultRandomMultiplier() {
|
||||
return TEST_NIGHTLY ? 2 : 1;
|
||||
}
|
||||
|
||||
/** Leave temporary files on disk, even on successful runs. */
|
||||
public static final boolean LEAVE_TEMPORARY;
|
||||
|
|
|
@ -189,7 +189,8 @@ public final class RunListenerPrintReproduceInfo extends RunListener {
|
|||
addVmOpt(b, "tests.seed", RandomizedContext.current().getRunnerSeedAsString());
|
||||
|
||||
// Test groups and multipliers.
|
||||
if (RANDOM_MULTIPLIER > 1) addVmOpt(b, "tests.multiplier", RANDOM_MULTIPLIER);
|
||||
if (RANDOM_MULTIPLIER != LuceneTestCase.defaultRandomMultiplier())
|
||||
addVmOpt(b, "tests.multiplier", RANDOM_MULTIPLIER);
|
||||
if (TEST_NIGHTLY) addVmOpt(b, SYSPROP_NIGHTLY, TEST_NIGHTLY);
|
||||
if (TEST_WEEKLY) addVmOpt(b, SYSPROP_WEEKLY, TEST_WEEKLY);
|
||||
if (TEST_MONSTER) addVmOpt(b, SYSPROP_MONSTER, TEST_MONSTER);
|
||||
|
|
Loading…
Reference in New Issue