Merge branch 'main' into check_liveDoc_one_time

This commit is contained in:
zhouhui 2024-06-20 10:03:12 +08:00
commit 036275df66
171 changed files with 8912 additions and 4743 deletions

4
.gitattributes vendored
View File

@ -1,6 +1,6 @@
# Ignore all differences in line endings for the lock file. # Ignore all differences in line endings for the lock file.
versions.lock text eol=lf versions.lock text eol=lf
versions.props text eol=lf versions.toml text eol=lf
# Gradle files are always in LF. # Gradle files are always in LF.
*.gradle text eol=lf *.gradle text eol=lf

View File

@ -58,7 +58,7 @@ In case your contribution fixes a bug, please create a new test case that fails
### IDE support ### IDE support
- *IntelliJ* - IntelliJ idea can import and build gradle-based projects out of the box. - *IntelliJ* - IntelliJ idea can import and build gradle-based projects out of the box. It will default to running tests by calling the gradle wrapper, and while this works, it is can be a bit slow. If instead you configure IntelliJ to use its own built-in test runner by (in 2024 version) navigating to settings for Build Execution & Deployment/Build Tools/Gradle (under File/Settings menu on some platforms) and selecting "Build and Run using: IntelliJ IDEA" and "Run Tests using: IntelliJ IDEA", then some tests will run faster. However some other tests will not run using this configuration.
- *Eclipse* - Basic support ([help/IDEs.txt](https://github.com/apache/lucene/blob/main/help/IDEs.txt#L7)). - *Eclipse* - Basic support ([help/IDEs.txt](https://github.com/apache/lucene/blob/main/help/IDEs.txt#L7)).
- *Netbeans* - Not tested. - *Netbeans* - Not tested.

View File

@ -31,8 +31,8 @@ comprehensive documentation, visit:
- Latest Releases: <https://lucene.apache.org/core/documentation.html> - Latest Releases: <https://lucene.apache.org/core/documentation.html>
- Nightly: <https://ci-builds.apache.org/job/Lucene/job/Lucene-Artifacts-main/javadoc/> - Nightly: <https://ci-builds.apache.org/job/Lucene/job/Lucene-Artifacts-main/javadoc/>
- New contributors should start by reading [Contributing Guide](./CONTRIBUTING.md)
- Build System Documentation: [help/](./help/) - Build System Documentation: [help/](./help/)
- Developer Documentation: [dev-docs/](./dev-docs/)
- Migration Guide: [lucene/MIGRATE.md](./lucene/MIGRATE.md) - Migration Guide: [lucene/MIGRATE.md](./lucene/MIGRATE.md)
## Building ## Building
@ -45,8 +45,6 @@ comprehensive documentation, visit:
We'll assume that you know how to get and set up the JDK - if you don't, then we suggest starting at https://jdk.java.net/ and learning more about Java, before returning to this README. We'll assume that you know how to get and set up the JDK - if you don't, then we suggest starting at https://jdk.java.net/ and learning more about Java, before returning to this README.
See [Contributing Guide](./CONTRIBUTING.md) for details.
## Contributing ## Contributing
Bug fixes, improvements and new features are always welcome! Bug fixes, improvements and new features are always welcome!
@ -54,6 +52,8 @@ Please review the [Contributing to Lucene
Guide](./CONTRIBUTING.md) for information on Guide](./CONTRIBUTING.md) for information on
contributing. contributing.
- Additional Developer Documentation: [dev-docs/](./dev-docs/)
## Discussion and Support ## Discussion and Support
- [Users Mailing List](https://lucene.apache.org/core/discussion.html#java-user-list-java-userluceneapacheorg) - [Users Mailing List](https://lucene.apache.org/core/discussion.html#java-user-list-java-userluceneapacheorg)

View File

@ -15,30 +15,50 @@
* limitations under the License. * limitations under the License.
*/ */
plugins {
id "java-gradle-plugin"
alias(deps.plugins.spotless) apply false
alias(deps.plugins.forbiddenapis) apply false
}
repositories { repositories {
mavenCentral() mavenCentral()
} }
ext { group = "org.apache"
// Minimum Java version required to compile buildSrc.
minJavaVersion = JavaVersion.VERSION_21
}
// Make sure the build environment is consistent. // Make sure the build environment is consistent.
apply from: file('../gradle/validation/check-environment.gradle') apply from: file('../../gradle/conventions.gradle')
apply from: file('../../gradle/validation/check-environment.gradle')
// Load common buildSrc and script deps. // Add spotless/ tidy.
apply from: file("scriptDepVersions.gradle") tasks.register("checkJdkInternalsExportedToGradle") {}
apply from: file('../../gradle/validation/spotless.gradle')
apply from: file('../../gradle/validation/forbidden-apis.gradle')
java {
sourceCompatibility = JavaVersion.toVersion(deps.versions.minJava.get())
targetCompatibility = JavaVersion.toVersion(deps.versions.minJava.get())
}
gradlePlugin {
automatedPublishing = false
plugins {
buildInfra {
id = 'lucene.build-infra'
implementationClass = 'org.apache.lucene.gradle.buildinfra.BuildInfraPlugin'
}
}
}
dependencies { dependencies {
implementation gradleApi() implementation gradleApi()
implementation localGroovy() implementation localGroovy()
implementation deps.commons.codec
implementation "commons-codec:commons-codec:${scriptDepVersions['commons-codec']}"
} }
if (!rootProject.hasJavaFlightRecorder) { if (!hasJavaFlightRecorder) {
logger.warn('Module jdk.jfr is not available; skipping compilation of Java Flight Recorder support.') logger.warn('Module jdk.jfr is not available; skipping compilation of Java Flight Recorder support.')
tasks.named('compileJava').configure { tasks.named('compileJava').configure {
exclude('**/ProfileResults.java') exclude('**/ProfileResults.java')

View File

@ -15,18 +15,12 @@
* limitations under the License. * limitations under the License.
*/ */
plugins { rootProject.name = 'build-infra'
id 'java-library'
} dependencyResolutionManagement {
versionCatalogs {
version = "1.0.0-SNAPSHOT" deps {
group = "org.apache.lucene.tools" from(files('../../versions.toml'))
description = 'Doclet-based javadoc validation' }
}
sourceCompatibility = JavaVersion.VERSION_21
targetCompatibility = JavaVersion.VERSION_21
tasks.withType(JavaCompile) {
options.compilerArgs += ["--release", targetCompatibility.toString()]
options.encoding = "UTF-8"
} }

View File

@ -27,6 +27,11 @@
package org.apache.lucene.gradle; package org.apache.lucene.gradle;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Locale;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.gradle.api.DefaultTask; import org.gradle.api.DefaultTask;
import org.gradle.api.GradleException; import org.gradle.api.GradleException;
@ -39,16 +44,10 @@ import org.gradle.api.tasks.TaskAction;
import org.gradle.work.Incremental; import org.gradle.work.Incremental;
import org.gradle.work.InputChanges; import org.gradle.work.InputChanges;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Locale;
public class Checksum extends DefaultTask { public class Checksum extends DefaultTask {
private FileCollection files; private FileCollection files;
private File outputDir; private File outputDir;
private Algorithm algorithm; private Algorithm algorithm = Algorithm.SHA512;
public enum Algorithm { public enum Algorithm {
MD5(new DigestUtils(DigestUtils.getMd5Digest())), MD5(new DigestUtils(DigestUtils.getMd5Digest())),
@ -69,7 +68,6 @@ public class Checksum extends DefaultTask {
public Checksum() { public Checksum() {
outputDir = new File(getProject().getBuildDir(), "checksums"); outputDir = new File(getProject().getBuildDir(), "checksums");
algorithm = Algorithm.SHA256;
} }
@InputFiles @InputFiles
@ -190,6 +188,8 @@ public class Checksum extends DefaultTask {
private FileCollection filesFor(final Algorithm algo) { private FileCollection filesFor(final Algorithm algo) {
return getProject() return getProject()
.fileTree(getOutputDir(), files -> files.include("**/*." + algo.toString().toLowerCase(Locale.ROOT))); .fileTree(
getOutputDir(),
files -> files.include("**/*." + algo.toString().toLowerCase(Locale.ROOT)));
} }
} }

View File

@ -0,0 +1,288 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import org.gradle.api.internal.tasks.testing.logging.FullExceptionFormatter;
import org.gradle.api.internal.tasks.testing.logging.TestExceptionFormatter;
import org.gradle.api.logging.Logger;
import org.gradle.api.logging.Logging;
import org.gradle.api.tasks.testing.TestDescriptor;
import org.gradle.api.tasks.testing.TestListener;
import org.gradle.api.tasks.testing.TestOutputEvent;
import org.gradle.api.tasks.testing.TestOutputListener;
import org.gradle.api.tasks.testing.TestResult;
import org.gradle.api.tasks.testing.logging.TestLogging;
/**
* An error reporting listener that queues test output streams and displays them on failure.
*
* <p>Heavily inspired by Elasticsearch's ErrorReportingTestListener (ASL 2.0 licensed).
*/
public class ErrorReportingTestListener implements TestOutputListener, TestListener {
private static final Logger LOGGER = Logging.getLogger(ErrorReportingTestListener.class);
private final TestExceptionFormatter formatter;
private final Map<TestKey, OutputHandler> outputHandlers = new ConcurrentHashMap<>();
private final Path spillDir;
private final Path outputsDir;
private final boolean verboseMode;
public ErrorReportingTestListener(
TestLogging testLogging, Path spillDir, Path outputsDir, boolean verboseMode) {
this.formatter = new FullExceptionFormatter(testLogging);
this.spillDir = spillDir;
this.outputsDir = outputsDir;
this.verboseMode = verboseMode;
}
@Override
public void onOutput(TestDescriptor testDescriptor, TestOutputEvent outputEvent) {
handlerFor(testDescriptor).write(outputEvent);
}
@Override
public void beforeSuite(TestDescriptor suite) {
// noop.
}
@Override
public void beforeTest(TestDescriptor testDescriptor) {
// Noop.
}
@Override
public void afterSuite(final TestDescriptor suite, TestResult result) {
if (suite.getParent() == null || suite.getName().startsWith("Gradle")) {
return;
}
TestKey key = TestKey.of(suite);
try {
OutputHandler outputHandler = outputHandlers.get(key);
if (outputHandler != null) {
long length = outputHandler.length();
if (length > 1024 * 1024 * 10) {
LOGGER.warn(
String.format(
Locale.ROOT,
"WARNING: Test %s wrote %,d bytes of output.",
suite.getName(),
length));
}
}
boolean echoOutput = Objects.equals(result.getResultType(), TestResult.ResultType.FAILURE);
boolean dumpOutput = echoOutput;
// If the test suite failed, report output.
if (dumpOutput || echoOutput) {
Files.createDirectories(outputsDir);
Path outputLog = outputsDir.resolve(getOutputLogName(suite));
// Save the output of a failing test to disk.
try (Writer w = Files.newBufferedWriter(outputLog, StandardCharsets.UTF_8)) {
if (outputHandler != null) {
outputHandler.copyTo(w);
}
}
if (echoOutput && !verboseMode) {
synchronized (this) {
System.out.println();
System.out.println(
suite.getClassName()
+ " > test suite's output saved to "
+ outputLog
+ ", copied below:");
try (BufferedReader reader =
Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) {
char[] buf = new char[1024];
int len;
while ((len = reader.read(buf)) >= 0) {
System.out.print(new String(buf, 0, len));
}
System.out.println();
}
}
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
OutputHandler handler = outputHandlers.remove(key);
if (handler != null) {
try {
handler.close();
} catch (IOException e) {
LOGGER.error("Failed to close output handler for: " + key, e);
}
}
}
}
private static Pattern SANITIZE = Pattern.compile("[^a-zA-Z .\\-_0-9]+");
public static String getOutputLogName(TestDescriptor suite) {
return SANITIZE.matcher("OUTPUT-" + suite.getName() + ".txt").replaceAll("_");
}
@Override
public void afterTest(TestDescriptor testDescriptor, TestResult result) {
// Include test failure exception stacktrace(s) in test output log.
if (result.getResultType() == TestResult.ResultType.FAILURE) {
if (result.getExceptions().size() > 0) {
String message = formatter.format(testDescriptor, result.getExceptions());
handlerFor(testDescriptor).write(message);
}
}
}
private OutputHandler handlerFor(TestDescriptor descriptor) {
// Attach output of leaves (individual tests) to their parent.
if (!descriptor.isComposite()) {
descriptor = descriptor.getParent();
}
return outputHandlers.computeIfAbsent(TestKey.of(descriptor), (key) -> new OutputHandler());
}
public static class TestKey {
private final String key;
private TestKey(String key) {
this.key = key;
}
public static TestKey of(TestDescriptor d) {
StringBuilder key = new StringBuilder();
key.append(d.getClassName());
key.append("::");
key.append(d.getName());
key.append("::");
key.append(d.getParent() == null ? "-" : d.getParent().toString());
return new TestKey(key.toString());
}
@Override
public boolean equals(Object o) {
return o != null && o.getClass() == this.getClass() && Objects.equals(((TestKey) o).key, key);
}
@Override
public int hashCode() {
return key.hashCode();
}
@Override
public String toString() {
return key;
}
}
private class OutputHandler implements Closeable {
// Max single-line buffer before automatic wrap occurs.
private static final int MAX_LINE_WIDTH = 1024 * 4;
private final SpillWriter buffer;
// internal stream.
private final PrefixedWriter sint;
// stdout
private final PrefixedWriter sout;
// stderr
private final PrefixedWriter serr;
// last used stream (so that we can flush it properly and prefixes are not screwed up).
private PrefixedWriter last;
public OutputHandler() {
buffer =
new SpillWriter(
() -> {
try {
return Files.createTempFile(spillDir, "spill-", ".tmp");
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
Writer sink = buffer;
if (verboseMode) {
sink = new StdOutTeeWriter(buffer);
}
sint = new PrefixedWriter(" > ", sink, MAX_LINE_WIDTH);
sout = new PrefixedWriter(" 1> ", sink, MAX_LINE_WIDTH);
serr = new PrefixedWriter(" 2> ", sink, MAX_LINE_WIDTH);
last = sint;
}
public void write(TestOutputEvent event) {
write(
(event.getDestination() == TestOutputEvent.Destination.StdOut ? sout : serr),
event.getMessage());
}
public void write(String message) {
write(sint, message);
}
public long length() throws IOException {
return buffer.length();
}
private void write(PrefixedWriter out, String message) {
try {
if (out != last) {
last.completeLine();
last = out;
}
out.write(message);
} catch (IOException e) {
throw new UncheckedIOException("Unable to write to test output.", e);
}
}
public void copyTo(Writer out) throws IOException {
flush();
buffer.copyTo(out);
}
public void flush() throws IOException {
sout.completeLine();
serr.completeLine();
buffer.flush();
}
@Override
public void close() throws IOException {
buffer.close();
}
}
}

View File

@ -67,6 +67,6 @@ public class GradlePropertiesGenerator {
fileContent = fileContent.replace(entry.getKey(), String.valueOf(entry.getValue())); fileContent = fileContent.replace(entry.getKey(), String.valueOf(entry.getValue()));
} }
Files.writeString( Files.writeString(
destination, fileContent, StandardCharsets.UTF_8, StandardOpenOption.CREATE_NEW); destination, fileContent, StandardCharsets.UTF_8, StandardOpenOption.CREATE_NEW);
} }
} }

View File

@ -20,12 +20,13 @@ import java.io.IOException;
import java.io.Writer; import java.io.Writer;
/** /**
* Prefixes every new line with a given string, synchronizing multiple streams to emit consistent lines. * Prefixes every new line with a given string, synchronizing multiple streams to emit consistent
* lines.
*/ */
public class PrefixedWriter extends Writer { public class PrefixedWriter extends Writer {
Writer sink; Writer sink;
private final static char LF = '\n'; private static final char LF = '\n';
private final String prefix; private final String prefix;
private final StringBuilder lineBuffer = new StringBuilder(); private final StringBuilder lineBuffer = new StringBuilder();
private final int maxLineLength; private final int maxLineLength;
@ -45,7 +46,7 @@ public class PrefixedWriter extends Writer {
sink.write(LF); sink.write(LF);
lineBuffer.setLength(0); lineBuffer.setLength(0);
if (c != LF) { if (c != LF) {
lineBuffer.append((char) c); lineBuffer.append((char) c);
} }
} else { } else {
@ -70,9 +71,7 @@ public class PrefixedWriter extends Writer {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
/** /** Complete the current line (emit LF if not at the start of the line already). */
* Complete the current line (emit LF if not at the start of the line already).
*/
public void completeLine() throws IOException { public void completeLine() throws IOException {
if (lineBuffer.length() > 0) { if (lineBuffer.length() > 0) {
write(LF); write(LF);

View File

@ -20,13 +20,12 @@ package org.apache.lucene.gradle;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.AbstractMap.SimpleEntry; import java.util.AbstractMap.SimpleEntry;
import java.util.Arrays;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import jdk.jfr.consumer.RecordedClass; import jdk.jfr.consumer.RecordedClass;
import jdk.jfr.consumer.RecordedEvent; import jdk.jfr.consumer.RecordedEvent;
import jdk.jfr.consumer.RecordedFrame; import jdk.jfr.consumer.RecordedFrame;
@ -36,15 +35,12 @@ import jdk.jfr.consumer.RecordedThread;
import jdk.jfr.consumer.RecordingFile; import jdk.jfr.consumer.RecordingFile;
/** /**
* Processes an array of recording files (from tests), and prints a simple histogram. * Processes an array of recording files (from tests), and prints a simple histogram. Inspired by
* Inspired by the JFR example code. * the JFR example code. Whole stacks are deduplicated (with the default stacksize being 1): you can
* Whole stacks are deduplicated (with the default stacksize being 1): you can drill deeper * drill deeper by adjusting the parameters.
* by adjusting the parameters.
*/ */
public class ProfileResults { public class ProfileResults {
/** /** Formats a frame to a formatted line. This is deduplicated on! */
* Formats a frame to a formatted line. This is deduplicated on!
*/
static String frameToString(RecordedFrame frame, boolean lineNumbers) { static String frameToString(RecordedFrame frame, boolean lineNumbers) {
StringBuilder builder = new StringBuilder(); StringBuilder builder = new StringBuilder();
RecordedMethod method = frame.getMethod(); RecordedMethod method = frame.getMethod();
@ -84,29 +80,32 @@ public class ProfileResults {
/** /**
* Driver method, for testing standalone. * Driver method, for testing standalone.
*
* <pre> * <pre>
* java -Dtests.profile.count=5 buildSrc/src/main/java/org/apache/lucene/gradle/ProfileResults.java \ * java -Dtests.profile.count=5 buildSrc/src/main/java/org/apache/lucene/gradle/ProfileResults.java \
* ./lucene/core/build/tmp/tests-cwd/somefile.jfr ... * ./lucene/core/build/tmp/tests-cwd/somefile.jfr ...
* </pre> * </pre>
*/ */
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
printReport(Arrays.asList(args), printReport(
System.getProperty(MODE_KEY, MODE_DEFAULT), Arrays.asList(args),
Integer.parseInt(System.getProperty(STACKSIZE_KEY, STACKSIZE_DEFAULT)), System.getProperty(MODE_KEY, MODE_DEFAULT),
Integer.parseInt(System.getProperty(COUNT_KEY, COUNT_DEFAULT)), Integer.parseInt(System.getProperty(STACKSIZE_KEY, STACKSIZE_DEFAULT)),
Boolean.parseBoolean(System.getProperty(LINENUMBERS_KEY, LINENUMBERS_DEFAULT))); Integer.parseInt(System.getProperty(COUNT_KEY, COUNT_DEFAULT)),
Boolean.parseBoolean(System.getProperty(LINENUMBERS_KEY, LINENUMBERS_DEFAULT)));
} }
/** true if we care about this event */ /** true if we care about this event */
static boolean isInteresting(String mode, RecordedEvent event) { static boolean isInteresting(String mode, RecordedEvent event) {
String name = event.getEventType().getName(); String name = event.getEventType().getName();
switch(mode) { switch (mode) {
case "cpu": case "cpu":
return (name.equals("jdk.ExecutionSample") || name.equals("jdk.NativeMethodSample")) && return (name.equals("jdk.ExecutionSample") || name.equals("jdk.NativeMethodSample"))
!isGradlePollThread(event.getThread("sampledThread")); && !isGradlePollThread(event.getThread("sampledThread"));
case "heap": case "heap":
return (name.equals("jdk.ObjectAllocationInNewTLAB") || name.equals("jdk.ObjectAllocationOutsideTLAB")) && return (name.equals("jdk.ObjectAllocationInNewTLAB")
!isGradlePollThread(event.getThread("eventThread")); || name.equals("jdk.ObjectAllocationOutsideTLAB"))
&& !isGradlePollThread(event.getThread("eventThread"));
default: default:
throw new UnsupportedOperationException(event.toString()); throw new UnsupportedOperationException(event.toString());
} }
@ -119,7 +118,7 @@ public class ProfileResults {
/** value we accumulate for this event */ /** value we accumulate for this event */
static long getValue(RecordedEvent event) { static long getValue(RecordedEvent event) {
switch(event.getEventType().getName()) { switch (event.getEventType().getName()) {
case "jdk.ObjectAllocationInNewTLAB": case "jdk.ObjectAllocationInNewTLAB":
return event.getLong("tlabSize"); return event.getLong("tlabSize");
case "jdk.ObjectAllocationOutsideTLAB": case "jdk.ObjectAllocationOutsideTLAB":
@ -133,10 +132,10 @@ public class ProfileResults {
} }
} }
/** format a value, if its huge, we show millions */ /** format a value, if it's huge, we show millions */
static String formatValue(long value) { static String formatValue(long value) {
if (value > 1_000_000) { if (value > 1_000_000) {
return String.format("%dM", value / 1_000_000); return String.format(Locale.ROOT, "%dM", value / 1_000_000);
} else { } else {
return Long.toString(value); return Long.toString(value);
} }
@ -144,15 +143,17 @@ public class ProfileResults {
/** fixed width used for printing the different columns */ /** fixed width used for printing the different columns */
private static final int COLUMN_SIZE = 14; private static final int COLUMN_SIZE = 14;
private static final String COLUMN_PAD = "%-" + COLUMN_SIZE + "s"; private static final String COLUMN_PAD = "%-" + COLUMN_SIZE + "s";
private static String pad(String input) { private static String pad(String input) {
return String.format(Locale.ROOT, COLUMN_PAD, input); return String.format(Locale.ROOT, COLUMN_PAD, input);
} }
/** /** Process all the JFR files passed in args and print a merged summary. */
* Process all the JFR files passed in args and print a merged summary. public static void printReport(
*/ List<String> files, String mode, int stacksize, int count, boolean lineNumbers)
public static void printReport(List<String> files, String mode, int stacksize, int count, boolean lineNumbers) throws IOException { throws IOException {
if (!"cpu".equals(mode) && !"heap".equals(mode)) { if (!"cpu".equals(mode) && !"heap".equals(mode)) {
throw new IllegalArgumentException("tests.profile.mode must be one of (cpu,heap)"); throw new IllegalArgumentException("tests.profile.mode must be one of (cpu,heap)");
} }
@ -178,14 +179,13 @@ public class ProfileResults {
StringBuilder stack = new StringBuilder(); StringBuilder stack = new StringBuilder();
for (int i = 0; i < Math.min(stacksize, trace.getFrames().size()); i++) { for (int i = 0; i < Math.min(stacksize, trace.getFrames().size()); i++) {
if (stack.length() > 0) { if (stack.length() > 0) {
stack.append("\n") stack.append("\n").append(framePadding).append(" at ");
.append(framePadding)
.append(" at ");
} }
stack.append(frameToString(trace.getFrames().get(i), lineNumbers)); stack.append(frameToString(trace.getFrames().get(i), lineNumbers));
} }
String line = stack.toString(); String line = stack.toString();
SimpleEntry<String,Long> entry = histogram.computeIfAbsent(line, u -> new SimpleEntry<String, Long>(line, 0L)); SimpleEntry<String, Long> entry =
histogram.computeIfAbsent(line, u -> new SimpleEntry<String, Long>(line, 0L));
long value = getValue(event); long value = getValue(event);
entry.setValue(entry.getValue() + value); entry.setValue(entry.getValue() + value);
totalEvents++; totalEvents++;
@ -195,12 +195,20 @@ public class ProfileResults {
} }
} }
// print summary from histogram // print summary from histogram
System.out.printf(Locale.ROOT, "PROFILE SUMMARY from %d events (total: %s)\n", totalEvents, formatValue(sumValues)); System.out.printf(
Locale.ROOT,
"PROFILE SUMMARY from %d events (total: %s)\n",
totalEvents,
formatValue(sumValues));
System.out.printf(Locale.ROOT, " tests.profile.mode=%s\n", mode); System.out.printf(Locale.ROOT, " tests.profile.mode=%s\n", mode);
System.out.printf(Locale.ROOT, " tests.profile.count=%d\n", count); System.out.printf(Locale.ROOT, " tests.profile.count=%d\n", count);
System.out.printf(Locale.ROOT, " tests.profile.stacksize=%d\n", stacksize); System.out.printf(Locale.ROOT, " tests.profile.stacksize=%d\n", stacksize);
System.out.printf(Locale.ROOT, " tests.profile.linenumbers=%b\n", lineNumbers); System.out.printf(Locale.ROOT, " tests.profile.linenumbers=%b\n", lineNumbers);
System.out.printf(Locale.ROOT, "%s%sSTACK\n", pad("PERCENT"), pad(mode.toUpperCase(Locale.ROOT) + " SAMPLES")); System.out.printf(
Locale.ROOT,
"%s%sSTACK\n",
pad("PERCENT"),
pad(mode.toUpperCase(Locale.ROOT) + " SAMPLES"));
List<SimpleEntry<String, Long>> entries = new ArrayList<>(histogram.values()); List<SimpleEntry<String, Long>> entries = new ArrayList<>(histogram.values());
entries.sort((u, v) -> v.getValue().compareTo(u.getValue())); entries.sort((u, v) -> v.getValue().compareTo(u.getValue()));
int seen = 0; int seen = 0;
@ -208,8 +216,10 @@ public class ProfileResults {
if (seen++ == count) { if (seen++ == count) {
break; break;
} }
String percent = String.format("%2.2f%%", 100 * (c.getValue() / (float) sumValues)); String percent =
System.out.printf(Locale.ROOT, "%s%s%s\n", pad(percent), pad(formatValue(c.getValue())), c.getKey()); String.format(Locale.ROOT, "%2.2f%%", 100 * (c.getValue() / (float) sumValues));
System.out.printf(
Locale.ROOT, "%s%s%s\n", pad(percent), pad(formatValue(c.getValue())), c.getKey());
} }
} }
} }

View File

@ -26,7 +26,7 @@ import java.nio.file.Path;
import java.util.function.Supplier; import java.util.function.Supplier;
public class SpillWriter extends Writer { public class SpillWriter extends Writer {
private final static int MAX_BUFFERED = 2 * 1024; private static final int MAX_BUFFERED = 2 * 1024;
private final StringWriter buffer = new StringWriter(MAX_BUFFERED); private final StringWriter buffer = new StringWriter(MAX_BUFFERED);
private final Supplier<Path> spillPathSupplier; private final Supplier<Path> spillPathSupplier;

View File

@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Writer;
class StdOutTeeWriter extends Writer {
private final Writer delegate;
private final PrintStream out = System.out;
public StdOutTeeWriter(Writer delegate) {
this.delegate = delegate;
}
@Override
public void write(int c) throws IOException {
delegate.write(c);
out.write(c);
}
@Override
public void write(char[] cbuf) throws IOException {
delegate.write(cbuf);
out.print(cbuf);
}
@Override
public void write(String str) throws IOException {
delegate.write(str);
out.print(str);
}
@Override
public void write(String str, int off, int len) throws IOException {
delegate.write(str, off, len);
out.append(str, off, len);
}
@Override
public Writer append(CharSequence csq) throws IOException {
delegate.append(csq);
out.append(csq);
return this;
}
@Override
public Writer append(CharSequence csq, int start, int end) throws IOException {
delegate.append(csq, start, end);
out.append(csq, start, end);
return this;
}
@Override
public Writer append(char c) throws IOException {
delegate.append(c);
out.append(c);
return this;
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException {
delegate.write(cbuf, off, len);
out.print(new String(cbuf, off, len));
}
@Override
public void flush() throws IOException {
delegate.flush();
out.flush();
}
@Override
public void close() throws IOException {
delegate.close();
// Don't close the actual output.
}
}

View File

@ -16,12 +16,18 @@
*/ */
package org.apache.lucene.gradle; package org.apache.lucene.gradle;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URI; import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@ -31,12 +37,10 @@ import java.security.NoSuchAlgorithmException;
import java.util.Locale; import java.util.Locale;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
/** /**
* Standalone class that can be used to download a gradle-wrapper.jar * Standalone class that can be used to download a gradle-wrapper.jar
* <p> *
* Has no dependencies outside of standard java libraries * <p>Has no dependencies outside of standard java libraries
*/ */
public class WrapperDownloader { public class WrapperDownloader {
public static void main(String[] args) { public static void main(String[] args) {
@ -62,13 +66,15 @@ public class WrapperDownloader {
} }
public void run(Path destination) throws IOException, NoSuchAlgorithmException { public void run(Path destination) throws IOException, NoSuchAlgorithmException {
Path checksumPath = destination.resolveSibling(destination.getFileName().toString() + ".sha256"); Path checksumPath =
destination.resolveSibling(destination.getFileName().toString() + ".sha256");
if (!Files.exists(checksumPath)) { if (!Files.exists(checksumPath)) {
throw new IOException("Checksum file not found: " + checksumPath); throw new IOException("Checksum file not found: " + checksumPath);
} }
String expectedChecksum = Files.readString(checksumPath, StandardCharsets.UTF_8).trim(); String expectedChecksum = Files.readString(checksumPath, StandardCharsets.UTF_8).trim();
Path versionPath = destination.resolveSibling(destination.getFileName().toString() + ".version"); Path versionPath =
destination.resolveSibling(destination.getFileName().toString() + ".version");
if (!Files.exists(versionPath)) { if (!Files.exists(versionPath)) {
throw new IOException("Wrapper version file not found: " + versionPath); throw new IOException("Wrapper version file not found: " + versionPath);
} }
@ -87,7 +93,12 @@ public class WrapperDownloader {
} }
} }
URL url = URI.create("https://raw.githubusercontent.com/gradle/gradle/v" + wrapperVersion + "/gradle/wrapper/gradle-wrapper.jar").toURL(); URL url =
URI.create(
"https://raw.githubusercontent.com/gradle/gradle/v"
+ wrapperVersion
+ "/gradle/wrapper/gradle-wrapper.jar")
.toURL();
System.err.println("Downloading gradle-wrapper.jar from " + url); System.err.println("Downloading gradle-wrapper.jar from " + url);
// Zero-copy save the jar to a temp file // Zero-copy save the jar to a temp file
@ -103,8 +114,9 @@ public class WrapperDownloader {
} catch (IOException e) { } catch (IOException e) {
if (retries-- > 0) { if (retries-- > 0) {
// Retry after a short delay // Retry after a short delay
System.err.println("Error connecting to server: " + e + ", will retry in " + retryDelay + " seconds."); System.err.println(
Thread.sleep(TimeUnit.SECONDS.toMillis(retryDelay)); "Error connecting to server: " + e + ", will retry in " + retryDelay + " seconds.");
sleep(TimeUnit.SECONDS.toMillis(retryDelay));
continue; continue;
} }
} }
@ -115,8 +127,13 @@ public class WrapperDownloader {
case HttpURLConnection.HTTP_BAD_GATEWAY: case HttpURLConnection.HTTP_BAD_GATEWAY:
if (retries-- > 0) { if (retries-- > 0) {
// Retry after a short delay. // Retry after a short delay.
System.err.println("Server returned HTTP " + connection.getResponseCode() + ", will retry in " + retryDelay + " seconds."); System.err.println(
Thread.sleep(TimeUnit.SECONDS.toMillis(retryDelay)); "Server returned HTTP "
+ connection.getResponseCode()
+ ", will retry in "
+ retryDelay
+ " seconds.");
sleep(TimeUnit.SECONDS.toMillis(retryDelay));
continue; continue;
} }
} }
@ -126,13 +143,15 @@ public class WrapperDownloader {
} }
try (InputStream is = connection.getInputStream(); try (InputStream is = connection.getInputStream();
OutputStream out = Files.newOutputStream(temp)){ OutputStream out = Files.newOutputStream(temp)) {
is.transferTo(out); is.transferTo(out);
} }
String checksum = checksum(digest, temp); String checksum = checksum(digest, temp);
if (!checksum.equalsIgnoreCase(expectedChecksum)) { if (!checksum.equalsIgnoreCase(expectedChecksum)) {
throw new IOException(String.format(Locale.ROOT, throw new IOException(
String.format(
Locale.ROOT,
"Checksum mismatch on downloaded gradle-wrapper.jar (was: %s, expected: %s).", "Checksum mismatch on downloaded gradle-wrapper.jar (was: %s, expected: %s).",
checksum, checksum,
expectedChecksum)); expectedChecksum));
@ -141,8 +160,12 @@ public class WrapperDownloader {
Files.move(temp, destination, REPLACE_EXISTING); Files.move(temp, destination, REPLACE_EXISTING);
temp = null; temp = null;
} catch (IOException | InterruptedException e) { } catch (IOException | InterruptedException e) {
throw new IOException("Could not download gradle-wrapper.jar (" + throw new IOException(
e.getClass().getSimpleName() + ": " + e.getMessage() + ")."); "Could not download gradle-wrapper.jar ("
+ e.getClass().getSimpleName()
+ ": "
+ e.getMessage()
+ ").");
} finally { } finally {
if (temp != null) { if (temp != null) {
Files.deleteIfExists(temp); Files.deleteIfExists(temp);
@ -150,6 +173,11 @@ public class WrapperDownloader {
} }
} }
@SuppressForbidden(reason = "Correct use of thread.sleep.")
private static void sleep(long millis) throws InterruptedException {
Thread.sleep(millis);
}
private String checksum(MessageDigest messageDigest, Path path) throws IOException { private String checksum(MessageDigest messageDigest, Path path) throws IOException {
try { try {
char[] hex = "0123456789abcdef".toCharArray(); char[] hex = "0123456789abcdef".toCharArray();
@ -160,7 +188,15 @@ public class WrapperDownloader {
} }
return sb.toString(); return sb.toString();
} catch (IOException e) { } catch (IOException e) {
throw new IOException("Could not compute digest of file: " + path + " (" + e.getMessage() + ")"); throw new IOException(
"Could not compute digest of file: " + path + " (" + e.getMessage() + ")");
} }
} }
@Retention(RetentionPolicy.CLASS)
@Target({ElementType.CONSTRUCTOR, ElementType.FIELD, ElementType.METHOD, ElementType.TYPE})
public @interface SuppressForbidden {
/** A reason for suppressing should always be given. */
String reason();
}
} }

View File

@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle.buildinfra;
import java.nio.file.Path;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.lucene.gradle.Checksum;
import org.apache.lucene.gradle.ErrorReportingTestListener;
import org.apache.lucene.gradle.datasets.ExtractReuters;
import org.gradle.api.Plugin;
import org.gradle.api.Project;
import org.gradle.api.tasks.testing.TestDescriptor;
import org.gradle.api.tasks.testing.logging.TestLogging;
public class BuildInfraPlugin implements Plugin<Project> {
@Override
public void apply(Project project) {
project.getExtensions().create(BuildInfraExtension.NAME, BuildInfraExtension.class);
}
public static class BuildInfraExtension {
public static final String NAME = "buildinfra";
public ErrorReportingTestListener newErrorReportingTestListener(
TestLogging testLogging, Path spillDir, Path outputsDir, boolean verboseMode) {
return new ErrorReportingTestListener(testLogging, spillDir, outputsDir, verboseMode);
}
public DigestUtils sha1Digest() {
return new DigestUtils(DigestUtils.getSha1Digest());
}
public void extractReuters(String reutersDir, String outputDir) throws Exception {
ExtractReuters.main(new String[] {reutersDir, outputDir});
}
public String getOutputLogName(TestDescriptor suite) {
return ErrorReportingTestListener.getOutputLogName(suite);
}
public Class<?> checksumClass() {
return Checksum.class;
}
}
}

View File

@ -30,8 +30,7 @@ import java.util.regex.Pattern;
import java.util.stream.Stream; import java.util.stream.Stream;
/** /**
* Split the Reuters SGML documents into Simple Text files containing: * Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
* Title, Date, Dateline, Body
*/ */
public class ExtractReuters { public class ExtractReuters {
private final Path reutersDir; private final Path reutersDir;
@ -67,7 +66,9 @@ public class ExtractReuters {
private static final String[] META_CHARS = {"&", "<", ">", "\"", "'"}; private static final String[] META_CHARS = {"&", "<", ">", "\"", "'"};
private static final String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"}; private static final String[] META_CHARS_SERIALIZATIONS = {
"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"
};
/** Override if you wish to change what is extracted */ /** Override if you wish to change what is extracted */
protected void extractFile(Path sgmFile) throws IOException { protected void extractFile(Path sgmFile) throws IOException {

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins {
id 'java-library'
alias(deps.plugins.spotless) apply false
alias(deps.plugins.forbiddenapis) apply false
}
repositories {
mavenCentral()
}
version = "1.0.0-SNAPSHOT"
group = "org.apache.lucene.tools"
description = 'Doclet-based javadoc validation'
// Make sure the build environment is consistent.
apply from: file('../../gradle/conventions.gradle')
apply from: file('../../gradle/validation/check-environment.gradle')
// Add spotless/ tidy.
tasks.register("checkJdkInternalsExportedToGradle") {}
apply from: file('../../gradle/validation/spotless.gradle')
apply from: file('../../gradle/validation/forbidden-apis.gradle')
java {
sourceCompatibility = JavaVersion.toVersion(deps.versions.minJava.get())
targetCompatibility = JavaVersion.toVersion(deps.versions.minJava.get())
}
tasks.withType(JavaCompile).configureEach {
options.compilerArgs += ["--release", java.targetCompatibility.toString()]
options.encoding = "UTF-8"
}

View File

@ -15,3 +15,10 @@
* limitations under the License. * limitations under the License.
*/ */
dependencyResolutionManagement {
versionCatalogs {
deps {
from(files('../../versions.toml'))
}
}
}

View File

@ -16,6 +16,9 @@
*/ */
package org.apache.lucene.missingdoclet; package org.apache.lucene.missingdoclet;
import com.sun.source.doctree.DocCommentTree;
import com.sun.source.doctree.ParamTree;
import com.sun.source.util.DocTrees;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
@ -24,7 +27,6 @@ import java.util.Locale;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import javax.lang.model.element.Element; import javax.lang.model.element.Element;
import javax.lang.model.element.ElementKind; import javax.lang.model.element.ElementKind;
import javax.lang.model.element.ExecutableElement; import javax.lang.model.element.ExecutableElement;
@ -36,24 +38,19 @@ import javax.lang.model.util.ElementFilter;
import javax.lang.model.util.Elements; import javax.lang.model.util.Elements;
import javax.lang.model.util.Elements.Origin; import javax.lang.model.util.Elements.Origin;
import javax.tools.Diagnostic; import javax.tools.Diagnostic;
import com.sun.source.doctree.DocCommentTree;
import com.sun.source.doctree.ParamTree;
import com.sun.source.util.DocTrees;
import jdk.javadoc.doclet.Doclet; import jdk.javadoc.doclet.Doclet;
import jdk.javadoc.doclet.DocletEnvironment; import jdk.javadoc.doclet.DocletEnvironment;
import jdk.javadoc.doclet.Reporter; import jdk.javadoc.doclet.Reporter;
import jdk.javadoc.doclet.StandardDoclet; import jdk.javadoc.doclet.StandardDoclet;
/** /**
* Checks for missing javadocs, where missing also means "only whitespace" or "license header". * Checks for missing javadocs, where missing also means "only whitespace" or "license header". Has
* Has option --missing-level (package, class, method, parameter) so that we can improve over time. * option --missing-level (package, class, method, parameter) so that we can improve over time. Has
* Has option --missing-ignore to ignore individual elements (such as split packages). * option --missing-ignore to ignore individual elements (such as split packages). It isn't
* It isn't recursive, just ignores exactly the elements you tell it. * recursive, just ignores exactly the elements you tell it. This should be removed when packaging
* This should be removed when packaging is fixed to no longer be split across JARs. * is fixed to no longer be split across JARs. Has option --missing-method to apply "method" level
* Has option --missing-method to apply "method" level to selected packages (fix one at a time). * to selected packages (fix one at a time). Matches package names exactly: so you'll need to list
* Matches package names exactly: so you'll need to list subpackages separately. * subpackages separately.
*/ */
public class MissingDoclet extends StandardDoclet { public class MissingDoclet extends StandardDoclet {
// checks that modules and packages have documentation // checks that modules and packages have documentation
@ -71,120 +68,123 @@ public class MissingDoclet extends StandardDoclet {
Elements elementUtils; Elements elementUtils;
Set<String> ignored = Collections.emptySet(); Set<String> ignored = Collections.emptySet();
Set<String> methodPackages = Collections.emptySet(); Set<String> methodPackages = Collections.emptySet();
@Override @Override
public Set<Doclet.Option> getSupportedOptions() { public Set<Doclet.Option> getSupportedOptions() {
Set<Doclet.Option> options = new HashSet<>(super.getSupportedOptions()); Set<Doclet.Option> options = new HashSet<>(super.getSupportedOptions());
options.add(new Doclet.Option() { options.add(
@Override new Doclet.Option() {
public int getArgumentCount() { @Override
return 1; public int getArgumentCount() {
} return 1;
}
@Override @Override
public String getDescription() { public String getDescription() {
return "level to enforce for missing javadocs: [package, class, method, parameter]"; return "level to enforce for missing javadocs: [package, class, method, parameter]";
} }
@Override @Override
public Kind getKind() { public Kind getKind() {
return Option.Kind.STANDARD; return Option.Kind.STANDARD;
} }
@Override @Override
public List<String> getNames() { public List<String> getNames() {
return Collections.singletonList("--missing-level"); return Collections.singletonList("--missing-level");
} }
@Override @Override
public String getParameters() { public String getParameters() {
return "level"; return "level";
} }
@Override @Override
public boolean process(String option, List<String> arguments) { public boolean process(String option, List<String> arguments) {
switch (arguments.get(0)) { switch (arguments.get(0)) {
case "package": case "package":
level = PACKAGE; level = PACKAGE;
return true;
case "class":
level = CLASS;
return true;
case "method":
level = METHOD;
return true;
case "parameter":
level = PARAMETER;
return true;
default:
return false;
}
}
});
options.add(
new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "comma separated list of element names to ignore (e.g. as a workaround for split packages)";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-ignore");
}
@Override
public String getParameters() {
return "ignoredNames";
}
@Override
public boolean process(String option, List<String> arguments) {
ignored = new HashSet<>(Arrays.asList(arguments.get(0).split(",")));
return true; return true;
case "class": }
level = CLASS; });
options.add(
new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "comma separated list of packages to check at 'method' level";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-method");
}
@Override
public String getParameters() {
return "packages";
}
@Override
public boolean process(String option, List<String> arguments) {
methodPackages = new HashSet<>(Arrays.asList(arguments.get(0).split(",")));
return true; return true;
case "method": }
level = METHOD; });
return true;
case "parameter":
level = PARAMETER;
return true;
default:
return false;
}
}
});
options.add(new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "comma separated list of element names to ignore (e.g. as a workaround for split packages)";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-ignore");
}
@Override
public String getParameters() {
return "ignoredNames";
}
@Override
public boolean process(String option, List<String> arguments) {
ignored = new HashSet<>(Arrays.asList(arguments.get(0).split(",")));
return true;
}
});
options.add(new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "comma separated list of packages to check at 'method' level";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-method");
}
@Override
public String getParameters() {
return "packages";
}
@Override
public boolean process(String option, List<String> arguments) {
methodPackages = new HashSet<>(Arrays.asList(arguments.get(0).split(",")));
return true;
}
});
return options; return options;
} }
@ -205,10 +205,8 @@ public class MissingDoclet extends StandardDoclet {
return super.run(docEnv); return super.run(docEnv);
} }
/** /** Returns effective check level for this element */
* Returns effective check level for this element
*/
private int level(Element element) { private int level(Element element) {
String pkg = elementUtils.getPackageOf(element).getQualifiedName().toString(); String pkg = elementUtils.getPackageOf(element).getQualifiedName().toString();
if (methodPackages.contains(pkg)) { if (methodPackages.contains(pkg)) {
@ -217,24 +215,24 @@ public class MissingDoclet extends StandardDoclet {
return level; return level;
} }
} }
/** /**
* Check an individual element. * Check an individual element. This checks packages and types from the doctrees. It will
* This checks packages and types from the doctrees. * recursively check methods/fields from encountered types when the level is "method"
* It will recursively check methods/fields from encountered types when the level is "method"
*/ */
private void check(Element element) { private void check(Element element) {
switch(element.getKind()) { switch (element.getKind()) {
case MODULE: case MODULE:
// don't check the unnamed module, it won't have javadocs // don't check the unnamed module, it won't have javadocs
if (!((ModuleElement)element).isUnnamed()) { if (!((ModuleElement) element).isUnnamed()) {
checkComment(element); checkComment(element);
} }
break; break;
case PACKAGE: case PACKAGE:
checkComment(element); checkComment(element);
break; break;
// class-like elements, check them, then recursively check their children (fields and methods) // class-like elements, check them, then recursively check their children (fields and
// methods)
case CLASS: case CLASS:
case INTERFACE: case INTERFACE:
case ENUM: case ENUM:
@ -242,21 +240,24 @@ public class MissingDoclet extends StandardDoclet {
case ANNOTATION_TYPE: case ANNOTATION_TYPE:
if (level(element) >= CLASS) { if (level(element) >= CLASS) {
checkComment(element); checkComment(element);
if (element instanceof TypeElement te && element.getKind() == ElementKind.RECORD && level(element) >= METHOD) { if (element instanceof TypeElement te
&& element.getKind() == ElementKind.RECORD
&& level(element) >= METHOD) {
checkRecordParameters(te, docTrees.getDocCommentTree(element)); checkRecordParameters(te, docTrees.getDocCommentTree(element));
} }
for (var subElement : element.getEnclosedElements()) { for (var subElement : element.getEnclosedElements()) {
// don't recurse into enclosed types, otherwise we'll double-check since they are already in the included docTree // don't recurse into enclosed types, otherwise we'll double-check since they are
if (subElement.getKind() == ElementKind.METHOD || // already in the included docTree
subElement.getKind() == ElementKind.CONSTRUCTOR || if (subElement.getKind() == ElementKind.METHOD
subElement.getKind() == ElementKind.FIELD || || subElement.getKind() == ElementKind.CONSTRUCTOR
subElement.getKind() == ElementKind.ENUM_CONSTANT) { || subElement.getKind() == ElementKind.FIELD
|| subElement.getKind() == ElementKind.ENUM_CONSTANT) {
check(subElement); check(subElement);
} }
} }
} }
break; break;
// method-like elements, check them if we are configured to do so // method-like elements, check them if we are configured to do so
case METHOD: case METHOD:
case CONSTRUCTOR: case CONSTRUCTOR:
case FIELD: case FIELD:
@ -272,8 +273,8 @@ public class MissingDoclet extends StandardDoclet {
/** /**
* Return true if the method is synthetic enum (values/valueOf) or record accessor method. * Return true if the method is synthetic enum (values/valueOf) or record accessor method.
* According to the doctree documentation, the "included" set never includes synthetic/mandated elements. * According to the doctree documentation, the "included" set never includes synthetic/mandated
* UweSays: It should not happen but it happens! * elements. UweSays: It should not happen but it happens!
*/ */
private boolean isSyntheticMethod(Element element) { private boolean isSyntheticMethod(Element element) {
// exclude all not explicitely declared methods // exclude all not explicitely declared methods
@ -293,20 +294,23 @@ public class MissingDoclet extends StandardDoclet {
} }
return false; return false;
} }
/** /**
* Checks that an element doesn't have missing javadocs. * Checks that an element doesn't have missing javadocs. In addition to truly "missing", check
* In addition to truly "missing", check that comments aren't solely whitespace (generated by some IDEs), * that comments aren't solely whitespace (generated by some IDEs), that they aren't a license
* that they aren't a license header masquerading as a javadoc comment. * header masquerading as a javadoc comment.
*/ */
private void checkComment(Element element) { private void checkComment(Element element) {
// sanity check that the element is really "included", because we do some recursion into types // sanity check that the element is really "included", because we do some recursion into types
if (!docEnv.isIncluded(element)) { if (!docEnv.isIncluded(element)) {
return; return;
} }
// check that this element isn't on our ignore list. This is only used as a workaround for "split packages". // check that this element isn't on our ignore list. This is only used as a workaround for
// ignoring a package isn't recursive (on purpose), we still check all the classes, etc. inside it. // "split packages".
// we just need to cope with the fact package-info.java isn't there because it is split across multiple jars. // ignoring a package isn't recursive (on purpose), we still check all the classes, etc. inside
// it.
// we just need to cope with the fact package-info.java isn't there because it is split across
// multiple jars.
if (ignored.contains(element.toString())) { if (ignored.contains(element.toString())) {
return; return;
} }
@ -319,14 +323,17 @@ public class MissingDoclet extends StandardDoclet {
error(element, "javadocs are missing"); error(element, "javadocs are missing");
} }
} else { } else {
var normalized = tree.getFirstSentence().get(0).toString() var normalized =
.replace('\u00A0', ' ') tree.getFirstSentence()
.trim() .get(0)
.toLowerCase(Locale.ROOT); .toString()
.replace('\u00A0', ' ')
.trim()
.toLowerCase(Locale.ROOT);
if (normalized.isEmpty()) { if (normalized.isEmpty()) {
error(element, "blank javadoc comment"); error(element, "blank javadoc comment");
} else if (normalized.startsWith("licensed to the apache software foundation") || } else if (normalized.startsWith("licensed to the apache software foundation")
normalized.startsWith("copyright 2004 the apache software foundation")) { || normalized.startsWith("copyright 2004 the apache software foundation")) {
error(element, "comment is really a license"); error(element, "comment is really a license");
} }
} }
@ -336,13 +343,15 @@ public class MissingDoclet extends StandardDoclet {
} }
private boolean hasInheritedJavadocs(Element element) { private boolean hasInheritedJavadocs(Element element) {
boolean hasOverrides = element.getAnnotationMirrors().stream() boolean hasOverrides =
.anyMatch(ann -> ann.getAnnotationType().toString().equals(Override.class.getName())); element.getAnnotationMirrors().stream()
.anyMatch(ann -> ann.getAnnotationType().toString().equals(Override.class.getName()));
if (hasOverrides) { if (hasOverrides) {
// If an element has explicit @Overrides annotation, assume it does // If an element has explicit @Overrides annotation, assume it does
// have inherited javadocs somewhere. // have inherited javadocs somewhere.
// reporter.print(Diagnostic.Kind.NOTE, element, "javadoc empty but @Override declared, skipping."); // reporter.print(Diagnostic.Kind.NOTE, element, "javadoc empty but @Override declared,
// skipping.");
return true; return true;
} }
@ -359,7 +368,8 @@ public class MissingDoclet extends StandardDoclet {
// We could check supMethod for non-empty javadoc here. Don't know if this makes // We could check supMethod for non-empty javadoc here. Don't know if this makes
// sense though as all methods will be verified in the end so it'd fail on the // sense though as all methods will be verified in the end so it'd fail on the
// top of the hierarchy (if empty) anyway. // top of the hierarchy (if empty) anyway.
// reporter.print(Diagnostic.Kind.NOTE, element, "javadoc empty but method overrides another, skipping."); // reporter.print(Diagnostic.Kind.NOTE, element, "javadoc empty but method overrides
// another, skipping.");
return true; return true;
} }
} }
@ -369,15 +379,14 @@ public class MissingDoclet extends StandardDoclet {
return false; return false;
} }
/* Find types from which methods in type may inherit javadoc, in the proper order.*/ /* Find types from which methods in type may inherit javadoc, in the proper order.*/
private Stream<Element> superTypeForInheritDoc(Element type) { private Stream<Element> superTypeForInheritDoc(Element type) {
TypeElement clazz = (TypeElement) type; TypeElement clazz = (TypeElement) type;
List<Element> interfaces = clazz.getInterfaces() List<Element> interfaces =
.stream() clazz.getInterfaces().stream()
.filter(tm -> tm.getKind() == TypeKind.DECLARED) .filter(tm -> tm.getKind() == TypeKind.DECLARED)
.map(tm -> ((DeclaredType) tm).asElement()) .map(tm -> ((DeclaredType) tm).asElement())
.collect(Collectors.toList()); .collect(Collectors.toList());
Stream<Element> result = interfaces.stream(); Stream<Element> result = interfaces.stream();
result = Stream.concat(result, interfaces.stream().flatMap(this::superTypeForInheritDoc)); result = Stream.concat(result, interfaces.stream().flatMap(this::superTypeForInheritDoc));
@ -394,12 +403,12 @@ public class MissingDoclet extends StandardDoclet {
/** Returns all {@code @param} parameters we see in the javadocs of the element */ /** Returns all {@code @param} parameters we see in the javadocs of the element */
private Set<String> getDocParameters(DocCommentTree tree) { private Set<String> getDocParameters(DocCommentTree tree) {
return Stream.ofNullable(tree) return Stream.ofNullable(tree)
.flatMap(t -> t.getBlockTags().stream()) .flatMap(t -> t.getBlockTags().stream())
.filter(ParamTree.class::isInstance) .filter(ParamTree.class::isInstance)
.map(tag -> ((ParamTree)tag).getName().getName().toString()) .map(tag -> ((ParamTree) tag).getName().getName().toString())
.collect(Collectors.toSet()); .collect(Collectors.toSet());
} }
/** Checks there is a corresponding "param" tag for each method parameter */ /** Checks there is a corresponding "param" tag for each method parameter */
private void checkMethodParameters(ExecutableElement element, DocCommentTree tree) { private void checkMethodParameters(ExecutableElement element, DocCommentTree tree) {
// record each @param that we see // record each @param that we see
@ -412,7 +421,7 @@ public class MissingDoclet extends StandardDoclet {
} }
} }
} }
/** Checks there is a corresponding "param" tag for each record component */ /** Checks there is a corresponding "param" tag for each record component */
private void checkRecordParameters(TypeElement element, DocCommentTree tree) { private void checkRecordParameters(TypeElement element, DocCommentTree tree) {
// record each @param that we see // record each @param that we see
@ -425,7 +434,7 @@ public class MissingDoclet extends StandardDoclet {
} }
} }
} }
/** logs a new error for the particular element */ /** logs a new error for the particular element */
private void error(Element element, String message) { private void error(Element element, String message) {
var fullMessage = new StringBuilder(); var fullMessage = new StringBuilder();

View File

@ -20,13 +20,18 @@ import java.time.format.DateTimeFormatter
plugins { plugins {
id "base" id "base"
id "com.palantir.consistent-versions" version "2.11.0" id "lucene.build-infra"
id "org.owasp.dependencycheck" version "7.2.0"
id 'de.thetaphi.forbiddenapis' version '3.7' apply false alias(deps.plugins.dependencychecks)
id "de.undercouch.download" version "5.2.0" apply false alias(deps.plugins.spotless) apply false
id "net.ltgt.errorprone" version "3.1.0" apply false alias(deps.plugins.benmanes.versions)
id 'com.diffplug.spotless' version "6.5.2" apply false alias(deps.plugins.forbiddenapis) apply false
id 'org.barfuin.gradle.jacocolog' version "3.1.0" apply false alias(deps.plugins.versionCatalogUpdate) apply false
alias(deps.plugins.randomizedtesting) apply false
alias(deps.plugins.owasp.dependencycheck)
alias(deps.plugins.undercouch.download) apply false
alias(deps.plugins.errorprone) apply false
alias(deps.plugins.jacocolog) apply false
} }
apply from: file('gradle/globals.gradle') apply from: file('gradle/globals.gradle')
@ -73,7 +78,7 @@ ext {
} }
// Minimum Java version required to compile and run Lucene. // Minimum Java version required to compile and run Lucene.
minJavaVersion = JavaVersion.VERSION_21 minJavaVersion = JavaVersion.toVersion(deps.versions.minJava.get())
// snapshot build marker used in scripts. // snapshot build marker used in scripts.
snapshotBuild = version.contains("SNAPSHOT") snapshotBuild = version.contains("SNAPSHOT")
@ -98,17 +103,15 @@ configurations {
dependencies { dependencies {
// Use a newer groovy that doesn't have illegal reflective accesses. // Use a newer groovy that doesn't have illegal reflective accesses.
groovy "org.codehaus.groovy:groovy-all:3.0.21" groovy deps.groovy
} }
apply from: file('buildSrc/scriptDepVersions.gradle')
// Include smaller chunks configuring dedicated build areas. // Include smaller chunks configuring dedicated build areas.
// Some of these intersect or add additional functionality. // Some of these intersect or add additional functionality.
// The order of inclusion of these files shouldn't matter (but may // The order of inclusion of these files shouldn't matter (but may
// if the build file is incorrectly written and evaluates something // if the build file is incorrectly written and evaluates something
// eagerly). // eagerly).
apply from: file('gradle/conventions.gradle')
apply from: file('gradle/generation/local-settings.gradle') apply from: file('gradle/generation/local-settings.gradle')
// Make sure the build environment is consistent. // Make sure the build environment is consistent.
@ -140,15 +143,25 @@ apply from: file('gradle/validation/precommit.gradle')
apply from: file('gradle/validation/forbidden-apis.gradle') apply from: file('gradle/validation/forbidden-apis.gradle')
apply from: file('gradle/validation/jar-checks.gradle') apply from: file('gradle/validation/jar-checks.gradle')
apply from: file('gradle/validation/git-status.gradle') apply from: file('gradle/validation/git-status.gradle')
apply from: file('gradle/validation/versions-props-sorted.gradle')
apply from: file('gradle/validation/validate-source-patterns.gradle') apply from: file('gradle/validation/validate-source-patterns.gradle')
apply from: file('gradle/validation/rat-sources.gradle') apply from: file('gradle/validation/rat-sources.gradle')
apply from: file('gradle/validation/owasp-dependency-check.gradle') apply from: file('gradle/validation/owasp-dependency-check.gradle')
apply from: file('gradle/validation/ecj-lint.gradle') apply from: file('gradle/validation/ecj-lint.gradle')
apply from: file('gradle/validation/gradlew-scripts-tweaked.gradle') apply from: file('gradle/validation/gradlew-scripts-tweaked.gradle')
apply from: file('gradle/validation/dependencies.gradle')
apply from: file('gradle/validation/spotless.gradle') apply from: file('gradle/validation/spotless.gradle')
// Wire up included builds to some validation tasks.
rootProject.tasks.named("tidy").configure {
dependsOn gradle.includedBuilds*.task(":tidy")
}
rootProject.tasks.named("clean").configure {
dependsOn gradle.includedBuilds*.task(":clean")
}
rootProject.tasks.named("check").configure {
dependsOn gradle.includedBuilds*.task(":forbiddenApis")
}
// Source or data regeneration tasks // Source or data regeneration tasks
apply from: file('gradle/generation/regenerate.gradle') apply from: file('gradle/generation/regenerate.gradle')
apply from: file('gradle/generation/jflex.gradle') apply from: file('gradle/generation/jflex.gradle')

View File

@ -1,279 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import org.gradle.api.internal.tasks.testing.logging.FullExceptionFormatter;
import org.gradle.api.internal.tasks.testing.logging.TestExceptionFormatter;
import org.gradle.api.logging.Logger;
import org.gradle.api.logging.Logging;
import org.gradle.api.tasks.testing.TestDescriptor;
import org.gradle.api.tasks.testing.TestListener;
import org.gradle.api.tasks.testing.TestOutputEvent;
import org.gradle.api.tasks.testing.TestOutputListener;
import org.gradle.api.tasks.testing.TestResult;
import org.gradle.api.tasks.testing.logging.TestLogging;
/**
* An error reporting listener that queues test output streams and displays them
* on failure.
* <p>
* Heavily inspired by Elasticsearch's ErrorReportingTestListener (ASL 2.0 licensed).
*/
public class ErrorReportingTestListener implements TestOutputListener, TestListener {
private static final Logger LOGGER = Logging.getLogger(ErrorReportingTestListener.class);
private final TestExceptionFormatter formatter;
private final Map<TestKey, OutputHandler> outputHandlers = new ConcurrentHashMap<>();
private final Path spillDir;
private final Path outputsDir;
private final boolean verboseMode;
public ErrorReportingTestListener(TestLogging testLogging, Path spillDir, Path outputsDir, boolean verboseMode) {
this.formatter = new FullExceptionFormatter(testLogging);
this.spillDir = spillDir;
this.outputsDir = outputsDir;
this.verboseMode = verboseMode;
}
@Override
public void onOutput(TestDescriptor testDescriptor, TestOutputEvent outputEvent) {
handlerFor(testDescriptor).write(outputEvent);
}
@Override
public void beforeSuite(TestDescriptor suite) {
// noop.
}
@Override
public void beforeTest(TestDescriptor testDescriptor) {
// Noop.
}
@Override
public void afterSuite(final TestDescriptor suite, TestResult result) {
if (suite.getParent() == null || suite.getName().startsWith("Gradle")) {
return;
}
TestKey key = TestKey.of(suite);
try {
OutputHandler outputHandler = outputHandlers.get(key);
if (outputHandler != null) {
long length = outputHandler.length();
if (length > 1024 * 1024 * 10) {
LOGGER.warn(String.format(Locale.ROOT, "WARNING: Test %s wrote %,d bytes of output.",
suite.getName(),
length));
}
}
boolean echoOutput = Objects.equals(result.getResultType(), TestResult.ResultType.FAILURE);
boolean dumpOutput = echoOutput;
// If the test suite failed, report output.
if (dumpOutput || echoOutput) {
Files.createDirectories(outputsDir);
Path outputLog = outputsDir.resolve(getOutputLogName(suite));
// Save the output of a failing test to disk.
try (Writer w = Files.newBufferedWriter(outputLog, StandardCharsets.UTF_8)) {
if (outputHandler != null) {
outputHandler.copyTo(w);
}
}
if (echoOutput && !verboseMode) {
synchronized (this) {
System.out.println();
System.out.println(suite.getClassName() + " > test suite's output saved to " + outputLog + ", copied below:");
try (BufferedReader reader = Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) {
char[] buf = new char[1024];
int len;
while ((len = reader.read(buf)) >= 0) {
System.out.print(new String(buf, 0, len));
}
System.out.println();
}
}
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
OutputHandler handler = outputHandlers.remove(key);
if (handler != null) {
try {
handler.close();
} catch (IOException e) {
LOGGER.error("Failed to close output handler for: " + key, e);
}
}
}
}
private static Pattern SANITIZE = Pattern.compile("[^a-zA-Z .\\-_0-9]+");
public static String getOutputLogName(TestDescriptor suite) {
return SANITIZE.matcher("OUTPUT-" + suite.getName() + ".txt").replaceAll("_");
}
@Override
public void afterTest(TestDescriptor testDescriptor, TestResult result) {
// Include test failure exception stacktrace(s) in test output log.
if (result.getResultType() == TestResult.ResultType.FAILURE) {
if (result.getExceptions().size() > 0) {
String message = formatter.format(testDescriptor, result.getExceptions());
handlerFor(testDescriptor).write(message);
}
}
}
private OutputHandler handlerFor(TestDescriptor descriptor) {
// Attach output of leaves (individual tests) to their parent.
if (!descriptor.isComposite()) {
descriptor = descriptor.getParent();
}
return outputHandlers.computeIfAbsent(TestKey.of(descriptor), (key) -> new OutputHandler());
}
public static class TestKey {
private final String key;
private TestKey(String key) {
this.key = key;
}
public static TestKey of(TestDescriptor d) {
StringBuilder key = new StringBuilder();
key.append(d.getClassName());
key.append("::");
key.append(d.getName());
key.append("::");
key.append(d.getParent() == null ? "-" : d.getParent().toString());
return new TestKey(key.toString());
}
@Override
public boolean equals(Object o) {
return o != null &&
o.getClass() == this.getClass() &&
Objects.equals(((TestKey) o).key, key);
}
@Override
public int hashCode() {
return key.hashCode();
}
@Override
public String toString() {
return key;
}
}
private class OutputHandler implements Closeable {
// Max single-line buffer before automatic wrap occurs.
private static final int MAX_LINE_WIDTH = 1024 * 4;
private final SpillWriter buffer;
// internal stream.
private final PrefixedWriter sint;
// stdout
private final PrefixedWriter sout;
// stderr
private final PrefixedWriter serr;
// last used stream (so that we can flush it properly and prefixes are not screwed up).
private PrefixedWriter last;
public OutputHandler() {
buffer = new SpillWriter(() -> {
try {
return Files.createTempFile(spillDir, "spill-", ".tmp");
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
Writer sink = buffer;
if (verboseMode) {
sink = new StdOutTeeWriter(buffer);
}
sint = new PrefixedWriter(" > ", sink, MAX_LINE_WIDTH);
sout = new PrefixedWriter(" 1> ", sink, MAX_LINE_WIDTH);
serr = new PrefixedWriter(" 2> ", sink, MAX_LINE_WIDTH);
last = sint;
}
public void write(TestOutputEvent event) {
write((event.getDestination() == TestOutputEvent.Destination.StdOut ? sout : serr), event.getMessage());
}
public void write(String message) {
write(sint, message);
}
public long length() throws IOException {
return buffer.length();
}
private void write(PrefixedWriter out, String message) {
try {
if (out != last) {
last.completeLine();
last = out;
}
out.write(message);
} catch (IOException e) {
throw new UncheckedIOException("Unable to write to test output.", e);
}
}
public void copyTo(Writer out) throws IOException {
flush();
buffer.copyTo(out);
}
public void flush() throws IOException {
sout.completeLine();
serr.completeLine();
buffer.flush();
}
@Override
public void close() throws IOException {
buffer.close();
}
}
}

View File

@ -1,93 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Writer;
class StdOutTeeWriter extends Writer {
private final Writer delegate;
private final PrintStream out = System.out;
public StdOutTeeWriter(Writer delegate) {
this.delegate = delegate;
}
@Override
public void write(int c) throws IOException {
delegate.write(c);
out.write(c);
}
@Override
public void write(char[] cbuf) throws IOException {
delegate.write(cbuf);
out.print(cbuf);
}
@Override
public void write(String str) throws IOException {
delegate.write(str);
out.print(str);
}
@Override
public void write(String str, int off, int len) throws IOException {
delegate.write(str, off, len);
out.append(str, off, len);
}
@Override
public Writer append(CharSequence csq) throws IOException {
delegate.append(csq);
out.append(csq);
return this;
}
@Override
public Writer append(CharSequence csq, int start, int end) throws IOException {
delegate.append(csq, start, end);
out.append(csq, start, end);
return this;
}
@Override
public Writer append(char c) throws IOException {
delegate.append(c);
out.append(c);
return this;
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException {
delegate.write(cbuf, off, len);
out.print(new String(cbuf, off, len));
}
@Override
public void flush() throws IOException {
delegate.flush();
out.flush();
}
@Override
public void close() throws IOException {
delegate.close();
// Don't close the actual output.
}
}

View File

@ -40,6 +40,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'cfs': 'index', 'cfs': 'index',
'nocfs': 'index', 'nocfs': 'index',
'sorted': 'sorted', 'sorted': 'sorted',
'int8_hnsw': 'int8_hnsw',
'moreterms': 'moreterms', 'moreterms': 'moreterms',
'dvupdates': 'dvupdates', 'dvupdates': 'dvupdates',
'emptyIndex': 'empty' 'emptyIndex': 'empty'
@ -60,6 +61,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'cfs': 'testCreateCFS', 'cfs': 'testCreateCFS',
'nocfs': 'testCreateNoCFS', 'nocfs': 'testCreateNoCFS',
'sorted': 'testCreateSortedIndex', 'sorted': 'testCreateSortedIndex',
'int8_hnsw': 'testCreateInt8HNSWIndices',
'moreterms': 'testCreateMoreTermsIndex', 'moreterms': 'testCreateMoreTermsIndex',
'dvupdates': 'testCreateIndexWithDocValuesUpdates', 'dvupdates': 'testCreateIndexWithDocValuesUpdates',
'emptyIndex': 'testCreateEmptyIndex' 'emptyIndex': 'testCreateEmptyIndex'
@ -204,6 +206,7 @@ def main():
current_version = scriptutil.Version.parse(scriptutil.find_current_version()) current_version = scriptutil.Version.parse(scriptutil.find_current_version())
create_and_add_index(source, 'cfs', c.version, current_version, c.temp_dir) create_and_add_index(source, 'cfs', c.version, current_version, c.temp_dir)
create_and_add_index(source, 'nocfs', c.version, current_version, c.temp_dir) create_and_add_index(source, 'nocfs', c.version, current_version, c.temp_dir)
create_and_add_index(source, 'int8_hnsw', c.version, current_version, c.temp_dir)
should_make_sorted = current_version.is_back_compat_with(c.version) \ should_make_sorted = current_version.is_back_compat_with(c.version) \
and (c.version.major > 6 or (c.version.major == 6 and c.version.minor >= 2)) and (c.version.major > 6 or (c.version.major == 6 and c.version.minor >= 2))
if should_make_sorted: if should_make_sorted:

View File

@ -582,8 +582,8 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
'luke', 'memory', 'misc', 'monitor', 'queries', 'queryparser', 'replicator', 'luke', 'memory', 'misc', 'monitor', 'queries', 'queryparser', 'replicator',
'sandbox', 'spatial-extras', 'spatial-test-fixtures', 'spatial3d', 'suggest', 'test-framework', 'licenses'] 'sandbox', 'spatial-extras', 'spatial-test-fixtures', 'spatial3d', 'suggest', 'test-framework', 'licenses']
if isSrc: if isSrc:
expected_src_root_files = ['build.gradle', 'buildSrc', 'CONTRIBUTING.md', 'dev-docs', 'dev-tools', 'gradle', 'gradlew', expected_src_root_files = ['build.gradle', 'build-tools', 'CONTRIBUTING.md', 'dev-docs', 'dev-tools', 'gradle', 'gradlew',
'gradlew.bat', 'help', 'lucene', 'settings.gradle', 'versions.lock', 'versions.props'] 'gradlew.bat', 'help', 'lucene', 'settings.gradle', 'versions.lock', 'versions.toml']
expected_src_lucene_files = ['build.gradle', 'documentation', 'distribution', 'dev-docs'] expected_src_lucene_files = ['build.gradle', 'documentation', 'distribution', 'dev-docs']
is_in_list(in_root_folder, expected_src_root_files) is_in_list(in_root_folder, expected_src_root_files)
is_in_list(in_lucene_folder, expected_folders) is_in_list(in_lucene_folder, expected_folders)

View File

@ -15,19 +15,19 @@
* limitations under the License. * limitations under the License.
*/ */
// Declare script dependency versions outside of palantir's configure(allprojects) {
// version unification control. These are not our main dependencies tasks.register("tidy").configure {
// but are reused in buildSrc and across applied scripts. description "Applies formatters and cleanups to sources."
group "verification"
ext { }
scriptDepVersions = [
"apache-rat": "0.14",
"asm": "9.7",
"commons-codec": "1.13",
"ecj": "3.36.0",
"flexmark": "0.61.24",
"javacc": "7.0.12",
"jflex": "1.8.2",
"jgit": "5.13.1.202206130422-r",
]
} }
// Locate script-relative resource folder. This is context-sensitive so pass
// the right buildscript (top-level).
configure(rootProject) {
ext {
scriptResources = { buildscript ->
return file(buildscript.sourceFile.absolutePath.replaceAll('.gradle$', ""))
}
}
}

View File

@ -1,5 +1,3 @@
import org.apache.lucene.gradle.datasets.ExtractReuters
import java.nio.file.Files import java.nio.file.Files
/* /*
@ -25,7 +23,7 @@ buildscript {
} }
dependencies { dependencies {
classpath "com.github.luben:zstd-jni:1.5.5-11" classpath deps.zstd
} }
} }
@ -40,7 +38,7 @@ def unzstd(java.nio.file.Path src, java.nio.file.Path dst) {
// TODO: not sure whether this should live in benchmarks, but for now let it be. // TODO: not sure whether this should live in benchmarks, but for now let it be.
configure(project(":lucene:benchmark")) { configure(project(":lucene:benchmark")) {
apply plugin: "java" apply plugin: "java"
apply plugin: "de.undercouch.download" apply plugin: deps.plugins.undercouch.download.get().pluginId
ext { ext {
dataDir = file("work") dataDir = file("work")
@ -164,7 +162,7 @@ configure(project(":lucene:benchmark")) {
logger.lifecycle("Extracting ${ext.name} into ${ext.dst}...") logger.lifecycle("Extracting ${ext.name} into ${ext.dst}...")
ext.dst.deleteDir() ext.dst.deleteDir()
ExtractReuters.main(untarPath.toString(), ext.dst.toString()) buildinfra.extractReuters(untarPath.toString(), ext.dst.toString())
} }
} }

View File

@ -34,11 +34,11 @@ buildscript {
} }
dependencies { dependencies {
classpath "com.vladsch.flexmark:flexmark:${scriptDepVersions['flexmark']}" classpath deps.flexmark.core
classpath "com.vladsch.flexmark:flexmark-ext-abbreviation:${scriptDepVersions['flexmark']}" classpath deps.flexmark.ext.abbreviation
classpath "com.vladsch.flexmark:flexmark-ext-attributes:${scriptDepVersions['flexmark']}" classpath deps.flexmark.ext.attributes
classpath "com.vladsch.flexmark:flexmark-ext-autolink:${scriptDepVersions['flexmark']}" classpath deps.flexmark.ext.autolink
classpath "com.vladsch.flexmark:flexmark-ext-tables:${scriptDepVersions['flexmark']}" classpath deps.flexmark.ext.tables
} }
} }

View File

@ -23,7 +23,7 @@ configure(project(":lucene:expressions")) {
} }
dependencies { dependencies {
antlr "org.antlr:antlr4" antlr deps.antlr.core
} }
task generateAntlrInternal() { task generateAntlrInternal() {

View File

@ -35,42 +35,44 @@ configure(project(":lucene:core")) {
} }
dependencies { dependencies {
apiextractor "org.ow2.asm:asm:${scriptDepVersions['asm']}" apiextractor deps.asm.core
} }
mrjarJavaVersions.each { jdkVersion -> plugins.withType(JavaPlugin) {
def task = tasks.create(name: "generateJdkApiJar${jdkVersion}", type: JavaExec) { mrjarJavaVersions.each { jdkVersion ->
description "Regenerate the API-only JAR file with public Panama Foreign & Vector API from JDK ${jdkVersion}" def task = tasks.create(name: "generateJdkApiJar${jdkVersion}", type: JavaExec) {
group "generation" description "Regenerate the API-only JAR file with public Panama Foreign & Vector API from JDK ${jdkVersion}"
group "generation"
javaLauncher = javaToolchains.launcherFor {
languageVersion = JavaLanguageVersion.of(jdkVersion) javaLauncher = javaToolchains.launcherFor {
} languageVersion = JavaLanguageVersion.of(jdkVersion)
onlyIf {
try {
javaLauncher.get()
return true
} catch (Exception e) {
logger.warn('Launcher for Java {} is not available; skipping regeneration of Panama Foreign & Vector API JAR.', jdkVersion)
logger.warn('Error: {}', e.cause?.message)
logger.warn("Please make sure to point env 'JAVA{}_HOME' to exactly JDK version {} or enable Gradle toolchain auto-download.", jdkVersion, jdkVersion)
return false
} }
onlyIf {
try {
javaLauncher.get()
return true
} catch (Exception e) {
logger.warn('Launcher for Java {} is not available; skipping regeneration of Panama Foreign & Vector API JAR.', jdkVersion)
logger.warn('Error: {}', e.cause?.message)
logger.warn("Please make sure to point env 'JAVA{}_HOME' to exactly JDK version {} or enable Gradle toolchain auto-download.", jdkVersion, jdkVersion)
return false
}
}
classpath = configurations.apiextractor
mainClass = file("${resources}/ExtractJdkApis.java") as String
systemProperties = [
'user.timezone': 'UTC',
'file.encoding': 'UTF-8',
]
args = [
jdkVersion,
apijars.file("jdk${jdkVersion}.apijar"),
]
} }
classpath = configurations.apiextractor regenerate.dependsOn task
mainClass = file("${resources}/ExtractJdkApis.java") as String
systemProperties = [
'user.timezone': 'UTC',
'file.encoding': 'UTF-8',
]
args = [
jdkVersion,
apijars.file("jdk${jdkVersion}.apijar"),
]
} }
regenerate.dependsOn task
} }
} }

View File

@ -33,18 +33,11 @@ def resources = scriptResources(buildscript)
// Configure different icu4j dependencies. // Configure different icu4j dependencies.
configure(rootProject) { configure(rootProject) {
configurations { configurations {
// icu_xyz
icu_current icu_current
} }
dependencies { dependencies {
// icu_xyz "com.ibm.icu:icu4j:xyz" icu_current deps.icu4j
icu_current 'com.ibm.icu:icu4j'
}
// Exclude explicit ICU configs from palantir's version unification.
versionRecommendations {
// excludeConfigurations "icu_xyz"
} }
} }

View File

@ -26,7 +26,7 @@ configure(rootProject) {
} }
dependencies { dependencies {
javacc "net.java.dev.javacc:javacc:${scriptDepVersions['javacc']}" javacc deps.javacc
} }
task javacc() { task javacc() {

View File

@ -25,7 +25,7 @@ configure(rootProject) {
} }
dependencies { dependencies {
jflex "de.jflex:jflex:${scriptDepVersions['jflex']}" jflex deps.jflex
} }
} }

View File

@ -30,7 +30,7 @@ def recompileDictionary(project, dictionaryName, Closure closure) {
} }
configure(project(":lucene:analysis:kuromoji")) { configure(project(":lucene:analysis:kuromoji")) {
apply plugin: "de.undercouch.download" apply plugin: deps.plugins.undercouch.download.get().pluginId
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
ext { ext {

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
apply plugin: "de.undercouch.download" apply plugin: deps.plugins.undercouch.download.get().pluginId
def resources = scriptResources(buildscript) def resources = scriptResources(buildscript)

View File

@ -30,7 +30,7 @@ def recompileDictionary(project, dictionaryName, Closure closure) {
} }
configure(project(":lucene:analysis:nori")) { configure(project(":lucene:analysis:nori")) {
apply plugin: "de.undercouch.download" apply plugin: deps.plugins.undercouch.download.get().pluginId
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
ext { ext {

View File

@ -1,7 +1,5 @@
import groovy.json.JsonOutput import groovy.json.JsonOutput
import groovy.json.JsonSlurper import groovy.json.JsonSlurper
import org.apache.commons.codec.digest.DigestUtils
import java.util.function.Function import java.util.function.Function
/* /*
@ -58,7 +56,7 @@ def computeChecksummedEntries = { Task sourceTask ->
allFiles.files.forEach { file -> allFiles.files.forEach { file ->
allEntries.put( allEntries.put(
sourceTask.project.rootDir.relativePath(file), sourceTask.project.rootDir.relativePath(file),
file.exists() ? new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() : "--") file.exists() ? buildinfra.sha1Digest().digestAsHex(file).trim() : "--")
} }
return allEntries return allEntries

View File

@ -19,7 +19,7 @@ import org.apache.tools.ant.taskdefs.condition.Os
def resources = scriptResources(buildscript) def resources = scriptResources(buildscript)
apply plugin: "de.undercouch.download" apply plugin: deps.plugins.undercouch.download.get().pluginId
configure(project(":lucene:analysis:common")) { configure(project(":lucene:analysis:common")) {
ext { ext {

View File

@ -27,7 +27,7 @@ allprojects {
// Artifacts will have names after full gradle project path // Artifacts will have names after full gradle project path
// so :solr:core will have solr-core.jar, etc. // so :solr:core will have solr-core.jar, etc.
project.archivesBaseName = project.path.replaceAll("^:", "").replace(':', '-') project.base.archivesName = project.path.replaceAll("^:", "").replace(':', '-')
ext { ext {
// Utility method to support passing overrides via -P or -D. // Utility method to support passing overrides via -P or -D.
@ -59,12 +59,6 @@ allprojects {
return propertyOrDefault(propName, envOrDefault(envName, defValue)); return propertyOrDefault(propName, envOrDefault(envName, defValue));
} }
// Locate script-relative resource folder. This is context-sensitive so pass
// the right buildscript (top-level).
scriptResources = { buildscript ->
return file(buildscript.sourceFile.absolutePath.replaceAll('.gradle$', ""))
}
// Utility function similar to project.exec but not emitting // Utility function similar to project.exec but not emitting
// any output unless an error code is returned from the executed command. // any output unless an error code is returned from the executed command.
quietExec = { closure -> quietExec = { closure ->

View File

@ -20,7 +20,11 @@ allprojects {
tasks.withType(AbstractArchiveTask).configureEach { task -> tasks.withType(AbstractArchiveTask).configureEach { task ->
duplicatesStrategy = DuplicatesStrategy.FAIL duplicatesStrategy = DuplicatesStrategy.FAIL
reproducibleFileOrder = true reproducibleFileOrder = true
dirMode = 0755 dirPermissions {
fileMode = 0644 it.unix(0755)
}
filePermissions {
it.unix(0644)
}
} }
} }

View File

@ -22,48 +22,49 @@ import org.gradle.plugins.ide.eclipse.model.ClasspathEntry
def resources = scriptResources(buildscript) def resources = scriptResources(buildscript)
configure(rootProject) { configure(rootProject) {
apply plugin: "eclipse" plugins.withType(JavaPlugin) {
apply plugin: "eclipse"
def eclipseJavaVersion = propertyOrDefault("eclipse.javaVersion", rootProject.minJavaVersion) def eclipseJavaVersion = propertyOrDefault("eclipse.javaVersion", rootProject.minJavaVersion)
def relativize = { other -> rootProject.rootDir.relativePath(other).toString() } def relativize = { other -> rootProject.rootDir.relativePath(other).toString() }
eclipse { eclipse {
project { project {
name = "Apache Lucene ${version}" name = "Apache Lucene ${version}"
} }
classpath { classpath {
defaultOutputDir = file('build/eclipse') defaultOutputDir = file('build/eclipse')
file { file {
beforeMerged { classpath -> classpath.entries.removeAll { it.kind == "src" } } beforeMerged { classpath -> classpath.entries.removeAll { it.kind == "src" } }
whenMerged { classpath -> whenMerged { classpath ->
def projects = allprojects.findAll { prj -> def projects = allprojects.findAll { prj ->
return prj.plugins.hasPlugin(JavaPlugin) return prj.plugins.hasPlugin(JavaPlugin)
}
Set<String> sourceSetNames = ['main', 'test', "main${eclipseJavaVersion}" as String, "test${eclipseJavaVersion}" as String, 'tools'] as Set
Set<String> sources = []
Set<File> jars = []
projects.each { prj ->
prj.sourceSets.each { sourceSet ->
if (sourceSetNames.contains(sourceSet.name)) {
sources += sourceSet.java.srcDirs.findAll { dir -> dir.exists() }.collect { dir -> relativize(dir) }
sources += sourceSet.resources.srcDirs.findAll { dir -> dir.exists() }.collect { dir -> relativize(dir) }
}
} }
// This is hacky - we take the resolved compile classpath and just Set<String> sourceSetNames = ['main', 'test', "main${eclipseJavaVersion}" as String, "test${eclipseJavaVersion}" as String, 'tools'] as Set
// include JAR files from there. We should probably make it smarter Set<String> sources = []
// by looking at real dependencies. But then: this Eclipse configuration Set<File> jars = []
// doesn't really separate sources anyway so why bother. projects.each { prj ->
jars += prj.configurations.compileClasspath.resolve() prj.sourceSets.each { sourceSet ->
jars += prj.configurations.testCompileClasspath.resolve() if (sourceSetNames.contains(sourceSet.name)) {
} sources += sourceSet.java.srcDirs.findAll { dir -> dir.exists() }.collect { dir -> relativize(dir) }
sources += sourceSet.resources.srcDirs.findAll { dir -> dir.exists() }.collect { dir -> relativize(dir) }
}
}
classpath.entries += sources.sort().collect { name -> // This is hacky - we take the resolved compile classpath and just
def sourceFolder = new SourceFolder(name, "build/eclipse/" + name) // include JAR files from there. We should probably make it smarter
// by looking at real dependencies. But then: this Eclipse configuration
// doesn't really separate sources anyway so why bother.
jars += prj.configurations.compileClasspath.resolve()
jars += prj.configurations.testCompileClasspath.resolve()
}
classpath.entries += sources.sort().collect { name ->
def sourceFolder = new SourceFolder(name, "build/eclipse/" + name)
sourceFolder.setExcludes(["module-info.java"]) sourceFolder.setExcludes(["module-info.java"])
return sourceFolder return sourceFolder
} }
@ -81,36 +82,38 @@ configure(rootProject) {
} }
} }
task luceneEclipseJdt(type: Sync) { task luceneEclipseJdt(type: Sync) {
def errorMode = project.propertyOrDefault('eclipse.errors','warning'); def errorMode = project.propertyOrDefault('eclipse.errors' ,'warning');
def ecjLintFile = rootProject.file('gradle/validation/ecj-lint/ecj.javadocs.prefs'); def ecjLintFile = rootProject.file('gradle/validation/ecj-lint/ecj.javadocs.prefs');
description = 'Generates the Eclipse JDT settings file.' description = 'Generates the Eclipse JDT settings file.'
inputs.file(ecjLintFile) inputs.file(ecjLintFile)
inputs.property('errorMode', errorMode) inputs.property('errorMode', errorMode)
inputs.property('eclipseJavaVersion', eclipseJavaVersion as String) inputs.property('eclipseJavaVersion', eclipseJavaVersion as String)
from rootProject.file("${resources}/dot.settings") from rootProject.file("${resources}/dot.settings")
into rootProject.file(".settings") into rootProject.file(".settings")
filter(ReplaceTokens, tokens: [ filter(ReplaceTokens, tokens: [
'ecj-lint-config': ecjLintFile.getText('UTF-8').replaceAll(/=error\b/, '=' + errorMode) 'ecj-lint-config': ecjLintFile.getText('UTF-8').replaceAll(/=error\b/, '=' + errorMode)
]) ])
filteringCharset = 'UTF-8' filteringCharset = 'UTF-8'
doLast { doLast {
logger.lifecycle('Eclipse config for Java {} written with ECJ errors configured as {}. Change by passing -Peclipse.errors=ignore/warning/error.', eclipseJavaVersion, errorMode) logger.lifecycle('Eclipse config for Java {} written with ECJ errors configured as {}. Change by passing -Peclipse.errors=ignore/warning/error.', eclipseJavaVersion, errorMode)
logger.lifecycle('To edit classes of MR-JARs for a specific Java version, use e.g., -Peclipse.javaVersion=19') logger.lifecycle('To edit classes of MR-JARs for a specific Java version, use e.g., -Peclipse.javaVersion=19')
}
}
eclipseJdt {
enabled = false
dependsOn 'luceneEclipse'
}
eclipseClasspath {
inputs.property('eclipseJavaVersion', eclipseJavaVersion as String
)
} }
}
eclipseJdt {
enabled = false
dependsOn 'luceneEclipseJdt'
}
eclipseClasspath {
inputs.property('eclipseJavaVersion', eclipseJavaVersion as String)
} }
} }
@ -131,6 +134,6 @@ public class LibEntry implements ClasspathEntry {
node.appendNode("classpathentry", Map.of( node.appendNode("classpathentry", Map.of(
"kind", "lib", "kind", "lib",
"path", path "path", path
)); ))
} }
} }

View File

@ -49,7 +49,7 @@ configure(rootProject.ext.mavenProjects) { Project project ->
// This moves pom metadata configuration after all the scripts of all projects // This moves pom metadata configuration after all the scripts of all projects
// have been evaluated. This is required because we set artifact groups // have been evaluated. This is required because we set artifact groups
// and archivesBaseName in other scripts and some of the properties below don't // and archivesName in other scripts and some of the properties below don't
// accept lazy property providers (so everything must be in its final form). // accept lazy property providers (so everything must be in its final form).
gradle.projectsEvaluated { gradle.projectsEvaluated {
publishing { publishing {
@ -57,22 +57,10 @@ configure(rootProject.ext.mavenProjects) { Project project ->
configure(publication) { configure(publication) {
from components.java from components.java
groupId = project.group groupId = project.group
artifactId = project.archivesBaseName artifactId = project.base.archivesName.get()
artifact sourcesJar artifact sourcesJar
artifact javadocJar artifact javadocJar
// LUCENE-9561:
// Remove dependencyManagement section created by a combination of
// Palantir and the publishing plugin.
//
// https://github.com/palantir/gradle-consistent-versions/issues/550
pom({
withXml {
def dm = asNode().dependencyManagement
if (dm) dm.replaceNode {}
}
})
} }
} }
} }

View File

@ -104,3 +104,6 @@ org.gradle.java.installations.auto-download=true
# Set these to enable automatic JVM location discovery. # Set these to enable automatic JVM location discovery.
org.gradle.java.installations.fromEnv=JAVA21_HOME,JAVA22_HOME,RUNTIME_JAVA_HOME org.gradle.java.installations.fromEnv=JAVA21_HOME,JAVA22_HOME,RUNTIME_JAVA_HOME
#org.gradle.java.installations.paths=(custom paths) #org.gradle.java.installations.paths=(custom paths)
# Opt out of gradle enterprise build scan plugin entire.
# gradle.ge=false

View File

@ -18,7 +18,6 @@
import org.apache.tools.ant.taskdefs.condition.Os import org.apache.tools.ant.taskdefs.condition.Os
import org.apache.tools.ant.types.Commandline import org.apache.tools.ant.types.Commandline
import org.gradle.api.tasks.testing.logging.* import org.gradle.api.tasks.testing.logging.*
import org.apache.lucene.gradle.ErrorReportingTestListener
def resources = scriptResources(buildscript) def resources = scriptResources(buildscript)
def verboseModeHookInstalled = false def verboseModeHookInstalled = false
@ -133,7 +132,12 @@ allprojects {
jvmArgs '--add-modules', 'jdk.incubator.vector' jvmArgs '--add-modules', 'jdk.incubator.vector'
} }
jvmArgs '--enable-native-access=' + (project.path == ':lucene:core' ? 'ALL-UNNAMED' : 'org.apache.lucene.core') jvmArgs '--enable-native-access=' + (project.path in [
':lucene:core',
':lucene:codecs',
":lucene:distribution.tests",
":lucene:test-framework"
] ? 'ALL-UNNAMED' : 'org.apache.lucene.core')
def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties") def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties")
def tempDir = layout.projectDirectory.dir(testsTmpDir.toString()) def tempDir = layout.projectDirectory.dir(testsTmpDir.toString())
@ -196,7 +200,7 @@ allprojects {
} }
def spillDir = getTemporaryDir().toPath() def spillDir = getTemporaryDir().toPath()
def listener = new ErrorReportingTestListener(test.testLogging, spillDir, testOutputsDir.toPath(), verboseMode) def listener = buildinfra.newErrorReportingTestListener(test.testLogging, spillDir, testOutputsDir.toPath(), verboseMode)
addTestOutputListener(listener) addTestOutputListener(listener)
addTestListener(listener) addTestListener(listener)

View File

@ -15,8 +15,6 @@
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.gradle.ErrorReportingTestListener
// Display all failed tests at the end of the build. // Display all failed tests at the end of the build.
def failedTests = [] def failedTests = []
@ -28,7 +26,7 @@ allprojects {
failedTests << [ failedTests << [
"name": "${desc.className}.${desc.name}", "name": "${desc.className}.${desc.name}",
"project": "${test.project.path}", "project": "${test.project.path}",
"output": file("${task.testOutputsDir}/${ErrorReportingTestListener.getOutputLogName(desc.parent)}"), "output": file("${task.testOutputsDir}/${buildinfra.getOutputLogName(desc.parent)}"),
"reproduce": "gradlew ${project.path}:test --tests \"${desc.className}.${desc.name}\" ${task.project.testOptionsForReproduceLine}" "reproduce": "gradlew ${project.path}:test --tests \"${desc.className}.${desc.name}\" ${task.project.testOptionsForReproduceLine}"
] ]
} }
@ -39,7 +37,7 @@ allprojects {
failedTests << [ failedTests << [
"name": "${desc.name}", "name": "${desc.name}",
"project": "${test.project.path}", "project": "${test.project.path}",
"output": file("${task.testOutputsDir}/${ErrorReportingTestListener.getOutputLogName(desc)}"), "output": file("${task.testOutputsDir}/${buildinfra.getOutputLogName(desc)}"),
"reproduce": "gradlew ${project.path}:test --tests \"${desc.name}\" ${task.project.testOptionsForReproduceLine}" "reproduce": "gradlew ${project.path}:test --tests \"${desc.name}\" ${task.project.testOptionsForReproduceLine}"
] ]
} }

View File

@ -30,7 +30,7 @@ buildscript {
} }
dependencies { dependencies {
classpath 'com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.7.2' classpath deps.randomizedtesting.runner
} }
} }
@ -126,10 +126,10 @@ allprojects {
secManagerExclusions secManagerExclusions
} }
dependencies { dependencies {
secManagerExclusions ( "com.carrotsearch.randomizedtesting:randomizedtesting-runner", { secManagerExclusions ( deps.randomizedtesting.runner, {
exclude group: "junit" exclude group: "junit"
}) })
secManagerExclusions ( "junit:junit", { secManagerExclusions ( deps.junit, {
exclude group: "org.hamcrest" exclude group: "org.hamcrest"
}) })
} }

View File

@ -22,7 +22,7 @@ import org.gradle.util.GradleVersion
configure(rootProject) { configure(rootProject) {
ext { ext {
expectedGradleVersion = '8.8' expectedGradleVersion = deps.versions.minGradle.get()
hasJavaFlightRecorder = ModuleLayer.boot().findModule('jdk.jfr').map(this.class.module::canRead).orElse(false) hasJavaFlightRecorder = ModuleLayer.boot().findModule('jdk.jfr').map(this.class.module::canRead).orElse(false)
} }
@ -32,6 +32,7 @@ configure(rootProject) {
} }
def currentJavaVersion = JavaVersion.current() def currentJavaVersion = JavaVersion.current()
def minJavaVersion = JavaVersion.toVersion(deps.versions.minJava.get())
if (currentJavaVersion < minJavaVersion) { if (currentJavaVersion < minJavaVersion) {
throw new GradleException("At least Java ${minJavaVersion} is required, you are running Java ${currentJavaVersion} " throw new GradleException("At least Java ${minJavaVersion} is required, you are running Java ${currentJavaVersion} "
+ "[${System.getProperty('java.vm.name')} ${System.getProperty('java.vm.version')}]") + "[${System.getProperty('java.vm.name')} ${System.getProperty('java.vm.version')}]")

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Configure sanity check for conflicting dependencies across certain configurations
allprojects {
apply plugin: deps.plugins.dependencychecks.get().pluginId
def mainConfigurations = project.configurations.matching {
it.name in [
"compileClasspath",
"runtimeClasspath"
]
}
def testConfigurations = project.configurations.matching {
it.name in [
"annotationProcessor",
"testCompileClasspath",
"testRuntimeClasspath"
]
}
dependencyVersionChecks {
lockFileComment = "An inventory of resolved dependency versions. Do not edit this file directly."
configurationGroups {
main_dependencies {
include mainConfigurations
}
test_dependencies {
include testConfigurations
}
}
}
dependencies {
constraints {
mainConfigurations.configureEach { Configuration conf ->
// no resolutions for conflicting dependencies at the moment.
}
}
}
}
// Configure version catalog cleanups plugin.
configure(rootProject) {
apply plugin: deps.plugins.versionCatalogUpdate.get().pluginId
versionCatalogUpdate {
sortByKey = true
versionCatalogs {
deps {
catalogFile = file("versions.toml")
}
}
}
tasks.matching { it.name == "tidy" }.configureEach {
it.dependsOn(":versionCatalogFormatDeps")
}
tasks.matching {
it.path in [
":versionCatalogUpdateDeps"
]
}.configureEach {
it.interactive = true
}
tasks.register("updateDeps", {
dependsOn ":versionCatalogUpdateDeps"
})
}

View File

@ -23,7 +23,7 @@ configure(rootProject) {
} }
dependencies { dependencies {
ecjDeps "org.eclipse.jdt:ecj:${scriptDepVersions['ecj']}" ecjDeps deps.ecj
} }
} }

View File

@ -37,24 +37,25 @@ if (skipReason) {
allprojects { prj -> allprojects { prj ->
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
// LUCENE-9650: Errorprone on master/gradle does not work when running as plugin // LUCENE-9650: Errorprone does not work when running as a plugin inside a forked Javac process.
// inside a forked Javac process. Javac running inside Gradle works, because we have // Javac running inside Gradle works, because we have additional module system opens in place.
// additional module system opens in place.
// This is a hack to keep the dependency (so that palantir's version check doesn't complain)
// but don't include the plugin (which fails on JDK16+).
if (skipReason) { if (skipReason) {
tasks.withType(JavaCompile) { task -> task.dependsOn ":errorProneSkipped" } tasks.withType(JavaCompile) { task -> task.dependsOn ":errorProneSkipped" }
// Error prone plugin adds error prone to test classpath. We need to add it here too (manually) so that
// versions.lock is consistent with or without error prone.
configurations { configurations {
errorprone errorprone
} }
dependencies { dependencies {
errorprone("com.google.errorprone:error_prone_core") errorprone deps.errorprone
} }
configurations.annotationProcessor.extendsFrom(configurations.errorprone)
} else { } else {
prj.apply plugin: 'net.ltgt.errorprone' prj.apply plugin: deps.plugins.errorprone.get().pluginId
dependencies { dependencies {
errorprone("com.google.errorprone:error_prone_core") errorprone deps.errorprone
} }
tasks.withType(JavaCompile) { task -> tasks.withType(JavaCompile) { task ->

View File

@ -1,4 +1,4 @@
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -57,7 +57,7 @@ allprojects { prj ->
} }
// Configure defaults for sourceSets.main // Configure defaults for sourceSets.main
tasks.matching { it.name ==~ /forbiddenApisMain\d*/ }.all { tasks.matching { it.name ==~ /forbiddenApisMain\d*/ }.configureEach {
bundledSignatures += [ bundledSignatures += [
'jdk-unsafe', 'jdk-unsafe',
'jdk-deprecated', 'jdk-deprecated',
@ -76,12 +76,12 @@ allprojects { prj ->
// Configure defaults for the MR-JAR feature sourceSets by setting java version and ignore missing classes // Configure defaults for the MR-JAR feature sourceSets by setting java version and ignore missing classes
// TODO: // TODO:
// - Get hold of warning messages, see https://github.com/policeman-tools/forbidden-apis/issues/207 // - Get hold of warning messages, see https://github.com/policeman-tools/forbidden-apis/issues/207
tasks.matching { it.name ==~ /forbiddenApisMain\d+/ }.all { tasks.matching { it.name ==~ /forbiddenApisMain\d+/ }.configureEach {
failOnMissingClasses = false failOnMissingClasses = false
} }
// Configure defaults for sourceSets.test // Configure defaults for sourceSets.test
tasks.matching { it.name in ["forbiddenApisTest", "forbiddenApisTestFixtures"] }.all { tasks.matching { it.name in ["forbiddenApisTest", "forbiddenApisTestFixtures"] }.configureEach {
bundledSignatures += [ bundledSignatures += [
'jdk-unsafe', 'jdk-unsafe',
'jdk-deprecated', 'jdk-deprecated',
@ -105,7 +105,7 @@ allprojects { prj ->
} }
// Configure defaults for sourceSets.tools (if present). // Configure defaults for sourceSets.tools (if present).
tasks.matching { it.name == "forbiddenApisTools" }.all { tasks.matching { it.name == "forbiddenApisTools" }.configureEach {
bundledSignatures += [ bundledSignatures += [
'jdk-unsafe', 'jdk-unsafe',
'jdk-deprecated', 'jdk-deprecated',
@ -129,12 +129,24 @@ allprojects { prj ->
// //
// This is the simplest workaround possible: just point at all the rule files and indicate // This is the simplest workaround possible: just point at all the rule files and indicate
// them as inputs. This way if a rule is modified, checks will be reapplied. // them as inputs. This way if a rule is modified, checks will be reapplied.
configure(tasks.matching { it.name.startsWith("forbiddenApis") }) { task -> tasks.matching { it.name.startsWith("forbiddenApis") }.configureEach { task ->
task.inputs.dir(file(resources)) task.inputs.dir(file(resources))
} }
// Disable sysout signatures for these projects. // Disable sysout signatures for these projects.
if (prj.path in [ if (prj.name in ["missing-doclet", "build-infra"]) {
forbiddenApisMain.bundledSignatures -= [
'jdk-non-portable',
'jdk-system-out'
]
forbiddenApisMain.exclude("**/Checksum*")
forbiddenApisMain.suppressAnnotations += [
"**.*SuppressForbidden"
]
}
if (prj.name in ["missing-doclet"] || prj.path in [
":lucene:demo", ":lucene:demo",
":lucene:benchmark", ":lucene:benchmark",
":lucene:test-framework" ":lucene:test-framework"

View File

@ -33,7 +33,7 @@ buildscript {
} }
dependencies { dependencies {
classpath "org.eclipse.jgit:org.eclipse.jgit:${scriptDepVersions['jgit']}" classpath deps.jgit
} }
} }

View File

@ -20,8 +20,6 @@
// 2) notice file // 2) notice file
// 3) checksum validation/ generation. // 3) checksum validation/ generation.
import org.apache.commons.codec.digest.DigestUtils
// This should be false only for debugging. // This should be false only for debugging.
def failOnError = true def failOnError = true
@ -136,7 +134,7 @@ subprojects {
jarName : file.toPath().getFileName().toString(), jarName : file.toPath().getFileName().toString(),
path : file, path : file,
module : resolvedArtifact.moduleVersion, module : resolvedArtifact.moduleVersion,
checksum : provider { new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() }, checksum : provider { buildinfra.sha1Digest().digestAsHex(file).trim() },
// We keep track of the files referenced by this dependency (sha, license, notice, etc.) // We keep track of the files referenced by this dependency (sha, license, notice, etc.)
// so that we can determine unused dangling files later on. // so that we can determine unused dangling files later on.
referencedFiles: [] referencedFiles: []

View File

@ -23,8 +23,7 @@ configure(rootProject) {
description = "All precommit checks" description = "All precommit checks"
// Root-level validation tasks. // Root-level validation tasks.
dependsOn ":verifyLocks" dependsOn ":checkLocks"
dependsOn ":versionsPropsAreSorted"
dependsOn ":checkWorkingCopyClean" dependsOn ":checkWorkingCopyClean"
} }

View File

@ -18,22 +18,23 @@
import groovy.xml.NamespaceBuilder import groovy.xml.NamespaceBuilder
// Configure rat dependencies for use in the custom task. // Configure rat dependencies for use in the custom task.
configure(rootProject) {
// Configure the rat validation task and all scanned directories.
allprojects {
configurations { configurations {
ratDeps ratDeps
} }
dependencies { dependencies {
ratDeps "org.apache.rat:apache-rat:${scriptDepVersions['apache-rat']}" ratDeps deps.rat
} }
}
// Configure the rat validation task and all scanned directories. tasks.register("rat", RatTask).configure {
allprojects {
task("rat", type: RatTask) {
group = 'Verification' group = 'Verification'
description = 'Runs Apache Rat checks.' description = 'Runs Apache Rat checks.'
dependsOn configurations.ratDeps
def defaultScanFileTree = project.fileTree(projectDir, { def defaultScanFileTree = project.fileTree(projectDir, {
// Don't check under the project's build folder. // Don't check under the project's build folder.
exclude project.buildDir.name exclude project.buildDir.name
@ -78,10 +79,10 @@ allprojects {
// Exclude github stuff (templates, workflows). // Exclude github stuff (templates, workflows).
exclude ".github" exclude ".github"
// The root project also includes patterns for the boostrap (buildSrc) and composite // The root project also includes patterns for the include composite
// projects. Include their sources in the scan. // projects. Include their sources in the scan.
include "buildSrc/src/**" include "build-tools/build-infra/src/**"
include "dev-tools/missing-doclet/src/**" include "build-tools/missing-doclet/src/**"
// do not let RAT attempt to scan a python venv, it gets lost and confused... // do not let RAT attempt to scan a python venv, it gets lost and confused...
exclude "dev-tools/aws-jmh/build/**" exclude "dev-tools/aws-jmh/build/**"
@ -142,7 +143,7 @@ class RatTask extends DefaultTask {
def generateReport(File reportFile) { def generateReport(File reportFile) {
// Set up ant rat task. // Set up ant rat task.
def ratClasspath = project.rootProject.configurations.ratDeps.asPath def ratClasspath = project.configurations.ratDeps.asPath
ant.setLifecycleLogLevel(AntBuilder.AntMessagePriority.ERROR) ant.setLifecycleLogLevel(AntBuilder.AntMessagePriority.ERROR)
ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', classpath: ratClasspath) ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', classpath: ratClasspath)

View File

@ -20,9 +20,9 @@
* spotless and Google Java Format. * spotless and Google Java Format.
*/ */
def resources = scriptResources(buildscript) // def resources = scriptResources(buildscript)
configure(project(":lucene").subprojects) { prj -> configure(allprojects) { prj ->
plugins.withType(JavaPlugin) { plugins.withType(JavaPlugin) {
prj.apply plugin: 'com.diffplug.spotless' prj.apply plugin: 'com.diffplug.spotless'
@ -36,7 +36,7 @@ configure(project(":lucene").subprojects) { prj ->
lineEndings 'UNIX' lineEndings 'UNIX'
endWithNewline() endWithNewline()
googleJavaFormat('1.18.1') googleJavaFormat(deps.versions.googleJavaFormat.get())
// Apply to all Java sources // Apply to all Java sources
target "src/**/*.java" target "src/**/*.java"
@ -100,23 +100,19 @@ configure(project(":lucene").subprojects) { prj ->
// Emit a custom message about how to fix formatting errors. // Emit a custom message about how to fix formatting errors.
tasks.matching { task -> task.name == "spotlessJavaCheck" }.configureEach { tasks.matching { task -> task.name == "spotlessJavaCheck" }.configureEach {
runToFixMessage.set("\nIMPORTANT: run the top-level './gradlew tidy' to format code automatically (see help/formatting.txt for more info).") it.runToFixMessage.set("\nIMPORTANT: run the top-level './gradlew tidy' to format code automatically (see help/formatting.txt for more info).")
} }
// Add an alias to 'spotlessApply' simply called 'tidy' and wire up // Hook up spotless to tidy and check tasks.
// spotlessCheck to convention's check. tasks.matching { it.name == "tidy" }.configureEach { v ->
task tidy() { v.dependsOn tasks.matching { it.name == "spotlessApply" }
description "Applies formatters and cleanups to sources."
group "verification"
} }
tasks.matching { task -> task.name == "spotlessApply" }.configureEach { v -> tasks.matching { it.name == "check" }.configureEach { v ->
tidy.dependsOn v v.dependsOn tasks.matching { it.name == "spotlessCheck" }
v.dependsOn ":checkJdkInternalsExportedToGradle"
} }
tasks.matching { task -> task.name == "spotlessCheck" }.configureEach { v -> tasks.matching { task -> task.name in ["spotlessApply", "spotlessCheck"] }.configureEach { v ->
check.dependsOn v
v.dependsOn ":checkJdkInternalsExportedToGradle" v.dependsOn ":checkJdkInternalsExportedToGradle"
} }
} }

View File

@ -33,7 +33,7 @@ buildscript {
} }
dependencies { dependencies {
classpath "org.apache.rat:apache-rat:${scriptDepVersions['apache-rat']}" classpath deps.rat
} }
} }

View File

@ -1,34 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// This ensures 'versions.props' file is sorted lexicographically.
configure(rootProject) {
task versionsPropsAreSorted() {
doFirst {
def versionsProps = file('versions.props')
def lines = versionsProps.readLines("UTF-8")
def sorted = lines.toSorted()
if (!Objects.equals(lines, sorted)) {
def sortedFile = file("${buildDir}/versions.props")
sortedFile.write(sorted.join("\n"), "UTF-8")
throw new GradleException("${versionsProps} file is not sorted lexicographically. I wrote a sorted file to ${sortedFile} - please review and commit.")
}
}
}
}

4
gradlew vendored
View File

@ -158,7 +158,7 @@ fi
GRADLE_WRAPPER_JAR="$APP_HOME/gradle/wrapper/gradle-wrapper.jar" GRADLE_WRAPPER_JAR="$APP_HOME/gradle/wrapper/gradle-wrapper.jar"
if [ ! -e "$GRADLE_WRAPPER_JAR" ]; then if [ ! -e "$GRADLE_WRAPPER_JAR" ]; then
"$JAVACMD" $JAVA_OPTS "$APP_HOME/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "$GRADLE_WRAPPER_JAR" "$JAVACMD" $JAVA_OPTS "$APP_HOME/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "$GRADLE_WRAPPER_JAR"
WRAPPER_STATUS=$? WRAPPER_STATUS=$?
if [ "$WRAPPER_STATUS" -eq 1 ]; then if [ "$WRAPPER_STATUS" -eq 1 ]; then
echo "ERROR: Something went wrong. Make sure you're using Java version of exactly 21." echo "ERROR: Something went wrong. Make sure you're using Java version of exactly 21."
@ -173,7 +173,7 @@ CLASSPATH=$GRADLE_WRAPPER_JAR
# START OF LUCENE CUSTOMIZATION # START OF LUCENE CUSTOMIZATION
# Generate gradle.properties if they don't exist # Generate gradle.properties if they don't exist
if [ ! -e "$APP_HOME/gradle.properties" ]; then if [ ! -e "$APP_HOME/gradle.properties" ]; then
"$JAVACMD" $JAVA_OPTS "$APP_HOME/buildSrc/src/main/java/org/apache/lucene/gradle/GradlePropertiesGenerator.java" "$APP_HOME/gradle/template.gradle.properties" "$APP_HOME/gradle.properties" "$JAVACMD" $JAVA_OPTS "$APP_HOME/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/GradlePropertiesGenerator.java" "$APP_HOME/gradle/template.gradle.properties" "$APP_HOME/gradle.properties"
GENERATOR_STATUS=$? GENERATOR_STATUS=$?
if [ "$GENERATOR_STATUS" -ne 0 ]; then if [ "$GENERATOR_STATUS" -ne 0 ]; then
exit $GENERATOR_STATUS exit $GENERATOR_STATUS

4
gradlew.bat vendored
View File

@ -76,7 +76,7 @@ goto fail
@rem LUCENE-9266: verify and download the gradle wrapper jar if we don't have one. @rem LUCENE-9266: verify and download the gradle wrapper jar if we don't have one.
set GRADLE_WRAPPER_JAR=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar set GRADLE_WRAPPER_JAR=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
IF NOT EXIST "%GRADLE_WRAPPER_JAR%" ( IF NOT EXIST "%GRADLE_WRAPPER_JAR%" (
"%JAVA_EXE%" %JAVA_OPTS% "%APP_HOME%/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "%GRADLE_WRAPPER_JAR%" "%JAVA_EXE%" %JAVA_OPTS% "%APP_HOME%/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "%GRADLE_WRAPPER_JAR%"
IF %ERRORLEVEL% EQU 1 goto failWithJvmMessage IF %ERRORLEVEL% EQU 1 goto failWithJvmMessage
IF %ERRORLEVEL% NEQ 0 goto fail IF %ERRORLEVEL% NEQ 0 goto fail
) )
@ -89,7 +89,7 @@ set CLASSPATH=%GRADLE_WRAPPER_JAR%
IF NOT EXIST "%APP_HOME%\gradle.properties" ( IF NOT EXIST "%APP_HOME%\gradle.properties" (
@rem local expansion is needed to check ERRORLEVEL inside control blocks. @rem local expansion is needed to check ERRORLEVEL inside control blocks.
setlocal enableDelayedExpansion setlocal enableDelayedExpansion
"%JAVA_EXE%" %JAVA_OPTS% "%APP_HOME%/buildSrc/src/main/java/org/apache/lucene/gradle/GradlePropertiesGenerator.java" "%APP_HOME%\gradle\template.gradle.properties" "%APP_HOME%\gradle.properties" "%JAVA_EXE%" %JAVA_OPTS% "%APP_HOME%/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/GradlePropertiesGenerator.java" "%APP_HOME%\gradle\template.gradle.properties" "%APP_HOME%\gradle.properties"
IF %ERRORLEVEL% NEQ 0 goto fail IF %ERRORLEVEL% NEQ 0 goto fail
endlocal endlocal
) )

View File

@ -7,81 +7,79 @@ and each configuration can have dependencies attached to it.
There are some standard conventions so, for example, the Java plugin There are some standard conventions so, for example, the Java plugin
adds standard configurations such as "api", "implementation", adds standard configurations such as "api", "implementation",
"testImplementation" and others. These configurations can also inherit "testImplementation" and others. These configurations can also inherit
from each other; more about this typic can be found here: from each other; more about this topic can be found here:
https://docs.gradle.org/current/userguide/dependency_management_for_java_projects.html#dependency_management_for_java_projects https://docs.gradle.org/current/userguide/dependency_management_for_java_projects.html#dependency_management_for_java_projects
https://docs.gradle.org/current/userguide/java_library_plugin.html#sec:java_library_separation https://docs.gradle.org/current/userguide/java_library_plugin.html#sec:java_library_separation
https://docs.gradle.org/current/userguide/java_plugin.html#sec:java_plugin_and_dependency_management https://docs.gradle.org/current/userguide/java_plugin.html#sec:java_plugin_and_dependency_management
Lucene typically uses three configurations and attach project Lucene uses the following configurations and attach project dependencies
dependencies to them: to them:
api - makes a dependency available for main classes, tests and any moduleApi - makes the dependency available to main classes, tests and any
other modules importing the project (exportable dependency), other modules importing the project (exportable dependency),
implementation - makes a dependency available for main classes, tests moduleImplementation - makes the dependency available to main classes, tests
but will *not* export the dependency for other modules (so their but will *not* export the dependency to other modules (so their
compilation classpath won't contain it). compilation classpath won't contain it).
testImplementation - makes a dependency only available for test classes. moduleTestImplementation - makes the dependency available for test classes only.
The "module" prefix is used to distinguish configurations which apply
to modular builds, compared to the regular classpath-configurations defined
by gradle's java module. Some Lucene modules may define regular classpath
entries to bypass the limitations of the module system (or gradle's).
Adding a library dependency Adding a library dependency
--------------------------- ---------------------------
Lucene dependencies and their versions are managed globally using version
catalogs (in versions.toml) [https://docs.gradle.org/current/userguide/platforms.html].
Let's say we wish to add a dependency on library "foo.bar:baz" in Let's say we wish to add a dependency on library "foo.bar:baz" in
version 1.2 to :lucene:core. Let's assume this library is only version 1.2 to :lucene:core. Let's assume this library is only
used internally by the project. The :lucene:core project is configured used internally by the project. The :lucene:core project is configured
by lucene/core/build.gradle and we would add (or modify) the dependency by lucene/core/build.gradle, so we add (or modify) the dependency
block as follows: block as follows:
dependencies { dependencies {
implementation "foo.bar:baz" moduleImplementation deps.baz
} }
The "implementation" here is a named configuration; we don't need to declare The "moduleImplementation" here is a named configuration explained in the
it because it is declared for us by the java-library plugin. section above. The "deps.baz" refers to the version catalog named "deps",
in which the dependency "baz" should be declared. If this is the first
reference to this library, then we have to add it to "versions.toml" catalog:
the version goes under the "versions" and module coordinates
under the "libraries" section:
In "normal" gradle the version of the dependency would be present [versions]
directly inside the declaration but we use a plugin baz = "1.2"
(palantir-consistent-versions) to manage all dependency versions ...
from the top-level (so that conflicts can be resolved globally). [libraries]
baz = { module = "foo.bar:baz", version.ref = "baz" }
If this is the first time "foo.bar:baz" is added to the project, we'd have The version defined in the "versions" section is the preferred version of the library
to add its version to "versions.props" file at the top level of the we wish to use. Finally, run tidy to sort all entries in versions.toml:
checkout:
foo.bar:baz=1.2 gradlew tidy
and then regenerate the "versions.lock" file using the following Gradle will try to consolidate different versions across different
command: configurations to make sure they're compatible and may complain if it encounters
conflicting versions in the dependency tree. We want all dependencies to be consistent,
so we use an additional build plugin to ensure no accidental version changes
occur. Whenever we add or remove dependencies, we have to follow-up with lock file
regeneration:
gradlew --write-locks gradlew writeLocks
git diff versions.*
IMPORTANT: The versions.lock file will contain the actual version IMPORTANT: The versions.lock file will contain a list of actual library versions
of the dependency picked based on other project dependencies and and configurations they occurred in.
their transitive dependencies. This selected version may be
different from what each of these actually requires (the highest
version number will be typically selected). To see which dependencies
require which version of the library use:
gradlew why --hash=... Once a new dependency is added it always makes sense to regenerate the lock file
and look at which dependencies have changed (and why).
where the hash code comes from versions.lock file. For example, at
the time of writing, jackson-databind has the following entry:
com.fasterxml.jackson.core:jackson-databind:2.10.0 (3 constraints: 931a7796)
and "gradlew why --hash=931a7796" prints:
com.fasterxml.jackson.core:jackson-databind:2.10.0
projects -> 2.10.0
net.thisptr:jackson-jq -> 2.7.0
org.carrot2:carrot2-mini -> 2.9.9.3
Once the dependency is added it always makes sense to see the
tree of all module dependencies and maybe exclude transitive
dependencies of foo.bar:baz that we won't need.
Inspecting current dependencies Inspecting current dependencies
@ -98,12 +96,12 @@ in just the "publicly visible" and "classpath-visible" configurations.
The publicly visible project dependencies (classes shared by other The publicly visible project dependencies (classes shared by other
modules importing our module) can be displayed with: modules importing our module) can be displayed with:
gradlew -p lucene\analysis\icu dependencies --configuration api gradlew -p lucene\analysis\icu dependencies --configuration moduleApi
And the "private" set of dependencies (real classpath) can be dumped And the "private" set of dependencies (real classpath) can be dumped
with: with:
gradlew -p lucene\analysis\icu dependencies --configuration runtimeClasspath gradlew -p lucene\analysis\icu dependencies --configuration moduleRuntimePath
Excluding a transitive dependency Excluding a transitive dependency
@ -115,7 +113,7 @@ crucial for the functioning of "foo.bar:baz". We can exclude it
by adding an exclusion block to the original declaration: by adding an exclusion block to the original declaration:
dependencies { dependencies {
implementation("foo.bar:baz", { implementation(deps.baz, {
exclude group: "foo.bar", module: "irrelevant" exclude group: "foo.bar", module: "irrelevant"
}) })
} }

View File

@ -2,7 +2,7 @@ Code formatting
=============== ===============
Starting with (LUCENE-9564) Java code is enforced to comply with Starting with (LUCENE-9564) Java code is enforced to comply with
google-java-format conventions. In theory you shouldn't worry about google-java-format conventions. In theory, you shouldn't worry about
what the convention actually looks like - write the code in any way what the convention actually looks like - write the code in any way
you like and then run: you like and then run:
@ -13,7 +13,7 @@ your code so that it complies with the convention and passes gradle
'check' task. 'check' task.
IMPORTANT: There is *no* way to mark sections of the code as excluded IMPORTANT: There is *no* way to mark sections of the code as excluded
from formatting. This is by design and cannot be altered. In vast from formatting. This is by design and cannot be altered. In the vast
majority of cases the formatter will do a great job of cleaning up the majority of cases the formatter will do a great job of cleaning up the
code. Occasionally you may want to rewrite the code (introduce a local code. Occasionally you may want to rewrite the code (introduce a local
variable or reshape code paths) so that it's easier to read after variable or reshape code paths) so that it's easier to read after

View File

@ -54,7 +54,7 @@ Signing can be enabled by adding the "-Psign" option, for example:
gradlew assembleRelease mavenToApacheReleases -Psign gradlew assembleRelease mavenToApacheReleases -Psign
By default gradle uses a Java-based implementation of PGP for signing, which requieres By default, gradle uses a Java-based implementation of PGP for signing, which requires
several "signing.*" properties via either ~/.gradle/gradle.properties or command-line options: several "signing.*" properties via either ~/.gradle/gradle.properties or command-line options:
https://docs.gradle.org/current/userguide/signing_plugin.html#sec:signatory_credentials https://docs.gradle.org/current/userguide/signing_plugin.html#sec:signatory_credentials
@ -92,9 +92,9 @@ signing.gnupg.passphrase=... # Provide your passphrase to
If in doubt, consult gradle's signing plugin documentation: If in doubt, consult gradle's signing plugin documentation:
https://docs.gradle.org/current/userguide/signing_plugin.html#sec:using_gpg_agent https://docs.gradle.org/current/userguide/signing_plugin.html#sec:using_gpg_agent
"signing.gnupg.passphrase" is not recomended because there is no advantage to using an external GPG process if you use it. If you "signing.gnupg.passphrase" is not recommended because there is no advantage to using an external GPG process if you use it.
are comfortable giving gradle your passphrase, then there is no reason to use an external GPG process via '-PuseGpg'. Just use the If you are comfortable giving gradle your passphrase, then there is no reason to use an external GPG process via '-PuseGpg'.
"signing.*" options described previuosly to let gradle deal with your key directly. Just use the "signing.*" options described previuosly to let gradle deal with your key directly.
Because of how Gradle's signing plugin invokes GPG, using an external GPG process *only* works if your GPG configuration uses a Because of how Gradle's signing plugin invokes GPG, using an external GPG process *only* works if your GPG configuration uses a
GPG agent (required by gpg2) and if the "pinentry" for your GPG agent does not require access to the tty to prompt you for a password. GPG agent (required by gpg2) and if the "pinentry" for your GPG agent does not require access to the tty to prompt you for a password.

View File

@ -129,6 +129,10 @@ New Features
* GITHUB#13233: Add RomanianNormalizationFilter (Trey Jones, Robert Muir) * GITHUB#13233: Add RomanianNormalizationFilter (Trey Jones, Robert Muir)
* GITHUB#13449: Sparse index: optional skip list on top of doc values which is exposed via the
DocValuesSkipper abstraction. A new flag is added to FieldType.java that configures whether
to create a "skip index" for doc values. (Ignacio Vera)
Improvements Improvements
--------------------- ---------------------
@ -204,6 +208,10 @@ Changes in Backwards Compatibility Policy
Other Other
--------------------- ---------------------
* GITHUB#13459: Merges all immutable attributes in FieldInfos.FieldNumbers into one Hashmap saving
memory when writing big indices. Fixes an exotic bug when calling clear where not all attributes
were cleared. (Ignacio Vera)
* LUCENE-10376: Roll up the loop in VInt/VLong in DataInput. (Guo Feng) * LUCENE-10376: Roll up the loop in VInt/VLong in DataInput. (Guo Feng)
* LUCENE-10253: The @BadApple annotation has been removed from the test * LUCENE-10253: The @BadApple annotation has been removed from the test
@ -239,7 +247,10 @@ Other
API Changes API Changes
--------------------- ---------------------
(No changes)
* GITHUB#13281: Mark COSINE VectorSimilarityFunction as deprecated. (Pulkit Gupta)
* GITHUB#13469: Expose FlatVectorsFormat as a first-class format; can be configured using a custom Codec. (Michael Sokolov)
New Features New Features
--------------------- ---------------------
@ -262,15 +273,19 @@ Optimizations
* GITHUB#13454: MultiTermQuery returns null ScoreSupplier in cases where * GITHUB#13454: MultiTermQuery returns null ScoreSupplier in cases where
no query terms are present in the index segment (Mayya Sharipova) no query terms are present in the index segment (Mayya Sharipova)
* GITHUB#13431: Replace TreeMap and use compiled Patterns in Japanese UserDictionary. (Bruno Roustant)
* GITHUB#12941: Don't preserve auxiliary buffer contents in LSBRadixSorter if it grows. (Stefan Vodita)
Bug Fixes Bug Fixes
--------------------- ---------------------
(No changes)
* GITHUB#13463: Address bug in MultiLeafKnnCollector causing #minCompetitiveSimilarity to stay artificially low in
some corner cases. (Greg Miller)
Other Other
--------------------- --------------------
* GITHUB#13459: Merges all immutable attributes in FieldInfos.FieldNumbers into one Hashmap saving (No changes)
memory when writing big indices. Fixes an exotic bug when calling clear where not all attributes
were cleared. (Ignacio Vera)
======================== Lucene 9.11.0 ======================= ======================== Lucene 9.11.0 =======================

View File

@ -23,7 +23,7 @@ dependencies {
moduleApi project(':lucene:core') moduleApi project(':lucene:core')
moduleApi project(':lucene:analysis:common') moduleApi project(':lucene:analysis:common')
moduleApi 'com.ibm.icu:icu4j' moduleApi deps.icu4j
moduleTestImplementation project(':lucene:test-framework') moduleTestImplementation project(':lucene:test-framework')
} }

View File

@ -23,8 +23,7 @@ import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.regex.Pattern;
import java.util.TreeMap;
import org.apache.lucene.analysis.morph.Dictionary; import org.apache.lucene.analysis.morph.Dictionary;
import org.apache.lucene.analysis.util.CSVUtil; import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
@ -37,6 +36,10 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
public static final String INTERNAL_SEPARATOR = "\u0000"; public static final String INTERNAL_SEPARATOR = "\u0000";
private static final Pattern LINE_COMMENT = Pattern.compile("^#.*$");
private static final Pattern WHITESPACE = Pattern.compile("\\s");
private static final Pattern SPACES = Pattern.compile(" +");
// phrase text -> phrase ID // phrase text -> phrase ID
private final TokenInfoFST fst; private final TokenInfoFST fst;
@ -51,16 +54,16 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
public static UserDictionary open(Reader reader) throws IOException { public static UserDictionary open(Reader reader) throws IOException {
BufferedReader br = new BufferedReader(reader); BufferedReader br = new BufferedReader(reader);
String line = null; String line;
List<String[]> featureEntries = new ArrayList<>(); List<String[]> featureEntries = new ArrayList<>();
// text, segmentation, readings, POS // text, segmentation, readings, POS
while ((line = br.readLine()) != null) { while ((line = br.readLine()) != null) {
// Remove comments // Remove comments
line = line.replaceAll("^#.*$", ""); line = LINE_COMMENT.matcher(line).replaceAll("");
// Skip empty lines or comment lines // Skip empty lines or comment lines
if (line.trim().length() == 0) { if (line.trim().isEmpty()) {
continue; continue;
} }
String[] values = CSVUtil.parse(line); String[] values = CSVUtil.parse(line);
@ -99,10 +102,10 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
long ord = 0; long ord = 0;
for (String[] values : featureEntries) { for (String[] values : featureEntries) {
String surface = values[0].replaceAll("\\s", ""); String surface = WHITESPACE.matcher(values[0]).replaceAll("");
String concatenatedSegment = values[1].replaceAll("\\s", ""); String concatenatedSegment = WHITESPACE.matcher(values[1]).replaceAll("");
String[] segmentation = values[1].replaceAll(" *", " ").split(" "); String[] segmentation = SPACES.split(values[1]);
String[] readings = values[2].replaceAll(" *", " ").split(" "); String[] readings = SPACES.split(values[2]);
String pos = values[3]; String pos = values[3];
if (segmentation.length != readings.length) { if (segmentation.length != readings.length) {
@ -141,7 +144,7 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
scratch.growNoCopy(token.length()); scratch.growNoCopy(token.length());
scratch.setLength(token.length()); scratch.setLength(token.length());
for (int i = 0; i < token.length(); i++) { for (int i = 0; i < token.length(); i++) {
scratch.setIntAt(i, (int) token.charAt(i)); scratch.setIntAt(i, token.charAt(i));
} }
fstCompiler.add(scratch.get(), ord); fstCompiler.add(scratch.get(), ord);
segmentations.add(wordIdAndLength); segmentations.add(wordIdAndLength);
@ -151,7 +154,7 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
new TokenInfoFST( new TokenInfoFST(
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()), false); FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()), false);
this.morphAtts = new UserMorphData(data.toArray(new String[0])); this.morphAtts = new UserMorphData(data.toArray(new String[0]));
this.segmentations = segmentations.toArray(new int[segmentations.size()][]); this.segmentations = segmentations.toArray(new int[0][]);
} }
@Override @Override
@ -168,33 +171,53 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
* @return array of {wordId, position, length} * @return array of {wordId, position, length}
*/ */
public int[][] lookup(char[] chars, int off, int len) throws IOException { public int[][] lookup(char[] chars, int off, int len) throws IOException {
// TODO: can we avoid this treemap/toIndexArray? List<Match> matches = null;
TreeMap<Integer, int[]> result = new TreeMap<>(); // index, [length, length...] int numResults = 0;
boolean found = false; // true if we found any results
final FST.BytesReader fstReader = fst.getBytesReader(); final FST.BytesReader fstReader = fst.getBytesReader();
final int end = off + len;
FST.Arc<Long> arc = new FST.Arc<>(); FST.Arc<Long> arc = new FST.Arc<>();
int end = off + len;
for (int startOffset = off; startOffset < end; startOffset++) { for (int startOffset = off; startOffset < end; startOffset++) {
int[] wordIdAndLength = null;
arc = fst.getFirstArc(arc); arc = fst.getFirstArc(arc);
int output = 0; int output = 0;
int remaining = end - startOffset; for (int i = 0, remaining = end - startOffset; i < remaining; i++) {
for (int i = 0; i < remaining; i++) {
int ch = chars[startOffset + i]; int ch = chars[startOffset + i];
if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) { if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
break; // continue to next position break; // continue to next position
} }
output += arc.output().intValue(); output += arc.output().intValue();
if (arc.isFinal()) { if (arc.isFinal()) {
final int finalOutput = output + arc.nextFinalOutput().intValue(); int finalOutput = output + arc.nextFinalOutput().intValue();
result.put(startOffset - off, segmentations[finalOutput]); wordIdAndLength = segmentations[finalOutput];
found = true;
} }
} }
if (wordIdAndLength != null) {
if (matches == null) {
matches = new ArrayList<>();
}
matches.add(new Match(startOffset - off, wordIdAndLength));
numResults += wordIdAndLength.length - 1;
}
} }
if (numResults == 0) {
return found ? toIndexArray(result) : EMPTY_RESULT; return EMPTY_RESULT;
}
int[][] result = new int[numResults][];
int index = 0;
for (int i = 0; i < matches.size(); i++) {
Match match = matches.get(i);
int[] wordIdAndLength = match.wordIdAndLength;
int wordId = wordIdAndLength[0];
// convert length to index
int position = match.position;
for (int j = 1; j < wordIdAndLength.length; j++) { // first entry is wordId offset
// add a {wordId, index, length} token to the results
int[] token = {wordId + j - 1, position, wordIdAndLength[j]};
result[index++] = token;
position += wordIdAndLength[j];
}
}
return result;
} }
public TokenInfoFST getFST() { public TokenInfoFST getFST() {
@ -203,28 +226,9 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
private static final int[][] EMPTY_RESULT = new int[0][]; private static final int[][] EMPTY_RESULT = new int[0][];
/**
* Convert Map of index and wordIdAndLength to array of {wordId, index, length}
*
* @return array of {wordId, index, length}
*/
private int[][] toIndexArray(Map<Integer, int[]> input) {
ArrayList<int[]> result = new ArrayList<>();
for (Map.Entry<Integer, int[]> entry : input.entrySet()) {
int[] wordIdAndLength = entry.getValue();
int wordId = wordIdAndLength[0];
// convert length to index
int current = entry.getKey();
for (int j = 1; j < wordIdAndLength.length; j++) { // first entry is wordId offset
int[] token = {wordId + j - 1, current, wordIdAndLength[j]};
result.add(token);
current += wordIdAndLength[j];
}
}
return result.toArray(new int[result.size()][]);
}
public int[] lookupSegmentation(int phraseID) { public int[] lookupSegmentation(int phraseID) {
return segmentations[phraseID]; return segmentations[phraseID];
} }
private record Match(int position, int[] wordIdAndLength) {}
} }

View File

@ -22,10 +22,10 @@ description = 'Analyzer for dictionary stemming, built-in Polish dictionary'
dependencies { dependencies {
moduleApi project(':lucene:core') moduleApi project(':lucene:core')
moduleApi project(':lucene:analysis:common') moduleApi project(':lucene:analysis:common')
moduleApi 'org.carrot2:morfologik-stemming' moduleApi deps.morfologik.stemming
moduleImplementation 'org.carrot2:morfologik-polish' moduleImplementation deps.morfologik.polish
moduleImplementation 'ua.net.nlp:morfologik-ukrainian-search' moduleImplementation deps.morfologik.ukrainian
moduleTestImplementation project(':lucene:test-framework') moduleTestImplementation project(':lucene:test-framework')
} }

View File

@ -22,7 +22,7 @@ description = 'OpenNLP Library Integration'
dependencies { dependencies {
moduleApi project(':lucene:core') moduleApi project(':lucene:core')
moduleApi project(':lucene:analysis:common') moduleApi project(':lucene:analysis:common')
moduleApi 'org.apache.opennlp:opennlp-tools' moduleApi deps.opennlp.tools
moduleTestImplementation project(':lucene:test-framework') moduleTestImplementation project(':lucene:test-framework')
} }

View File

@ -23,7 +23,7 @@ dependencies {
moduleApi project(':lucene:core') moduleApi project(':lucene:core')
moduleApi project(':lucene:analysis:common') moduleApi project(':lucene:analysis:common')
moduleApi 'commons-codec:commons-codec' moduleApi deps.commons.codec
moduleTestImplementation project(':lucene:test-framework') moduleTestImplementation project(':lucene:test-framework')
} }

View File

@ -209,6 +209,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
storePayloads, storePayloads,
indexOptions, indexOptions,
docValuesType, docValuesType,
false,
dvGen, dvGen,
attributes, attributes,
pointDataDimensionCount, pointDataDimensionCount,

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.ImpactsEnum;
@ -1677,6 +1678,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
} }
} }
@Override
public DocValuesSkipper getSkipper(FieldInfo field) {
return null;
}
@Override @Override
public void checkIntegrity() throws IOException { public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(data); CodecUtil.checksumEntireFile(data);

View File

@ -186,6 +186,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
storePayloads, storePayloads,
indexOptions, indexOptions,
docValuesType, docValuesType,
false,
dvGen, dvGen,
attributes, attributes,
pointDataDimensionCount, pointDataDimensionCount,

View File

@ -27,4 +27,9 @@ public class TestLucene60FieldInfosFormat extends BaseFieldInfoFormatTestCase {
protected Codec getCodec() { protected Codec getCodec() {
return new Lucene84RWCodec(); return new Lucene84RWCodec();
} }
@Override
protected boolean supportDocValuesSkipIndex() {
return false;
}
} }

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.backward_codecs.lucene80; package org.apache.lucene.backward_codecs.lucene80;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -59,18 +60,114 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.codecs.asserting.AssertingCodec; import org.apache.lucene.tests.codecs.asserting.AssertingCodec;
import org.apache.lucene.tests.index.BaseCompressingDocValuesFormatTestCase; import org.apache.lucene.tests.index.LegacyBaseDocValuesFormatTestCase;
import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.packed.PackedInts;
/** Tests Lucene80DocValuesFormat */ /** Tests Lucene80DocValuesFormat */
public abstract class BaseLucene80DocValuesFormatTestCase public abstract class BaseLucene80DocValuesFormatTestCase
extends BaseCompressingDocValuesFormatTestCase { extends LegacyBaseDocValuesFormatTestCase {
private static long dirSize(Directory d) throws IOException {
long size = 0;
for (String file : d.listAll()) {
size += d.fileLength(file);
}
return size;
}
public void testUniqueValuesCompression() throws IOException {
try (final Directory dir = new ByteBuffersDirectory()) {
final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
final IndexWriter iwriter = new IndexWriter(dir, iwc);
final int uniqueValueCount = TestUtil.nextInt(random(), 1, 256);
final List<Long> values = new ArrayList<>();
final Document doc = new Document();
final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
doc.add(dvf);
for (int i = 0; i < 300; ++i) {
final long value;
if (values.size() < uniqueValueCount) {
value = random().nextLong();
values.add(value);
} else {
value = RandomPicks.randomFrom(random(), values);
}
dvf.setLongValue(value);
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size1 = dirSize(dir);
for (int i = 0; i < 20; ++i) {
dvf.setLongValue(RandomPicks.randomFrom(random(), values));
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size2 = dirSize(dir);
// make sure the new longs did not cost 8 bytes each
assertTrue(size2 < size1 + 8 * 20);
}
}
public void testDateCompression() throws IOException {
try (final Directory dir = new ByteBuffersDirectory()) {
final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
final IndexWriter iwriter = new IndexWriter(dir, iwc);
final long base = 13; // prime
final long day = 1000L * 60 * 60 * 24;
final Document doc = new Document();
final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
doc.add(dvf);
for (int i = 0; i < 300; ++i) {
dvf.setLongValue(base + random().nextInt(1000) * day);
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size1 = dirSize(dir);
for (int i = 0; i < 50; ++i) {
dvf.setLongValue(base + random().nextInt(1000) * day);
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size2 = dirSize(dir);
// make sure the new longs costed less than if they had only been packed
assertTrue(size2 < size1 + (PackedInts.bitsRequired(day) * 50) / 8);
}
}
public void testSingleBigValueCompression() throws IOException {
try (final Directory dir = new ByteBuffersDirectory()) {
final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
final IndexWriter iwriter = new IndexWriter(dir, iwc);
final Document doc = new Document();
final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
doc.add(dvf);
for (int i = 0; i < 20000; ++i) {
dvf.setLongValue(i & 1023);
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size1 = dirSize(dir);
dvf.setLongValue(Long.MAX_VALUE);
iwriter.addDocument(doc);
iwriter.forceMerge(1);
final long size2 = dirSize(dir);
// make sure the new value did not grow the bpv for every other value
assertTrue(size2 < size1 + (20000 * (63 - 10)) / 8);
}
}
// TODO: these big methods can easily blow up some of the other ram-hungry codecs... // TODO: these big methods can easily blow up some of the other ram-hungry codecs...
// for now just keep them here, as we want to test this for this format. // for now just keep them here, as we want to test this for this format.

View File

@ -52,11 +52,6 @@ class Lucene99RWHnswScalarQuantizationVectorsFormat
null); null);
} }
@Override
public int getMaxDimensions(String fieldName) {
return 1024;
}
static class Lucene99RWScalarQuantizedFormat extends Lucene99ScalarQuantizedVectorsFormat { static class Lucene99RWScalarQuantizedFormat extends Lucene99ScalarQuantizedVectorsFormat {
private static final FlatVectorsFormat rawVectorFormat = private static final FlatVectorsFormat rawVectorFormat =
new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()); new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer());

View File

@ -25,8 +25,8 @@ dependencies {
moduleImplementation project(':lucene:core') moduleImplementation project(':lucene:core')
moduleImplementation project(':lucene:expressions') moduleImplementation project(':lucene:expressions')
moduleImplementation "org.openjdk.jmh:jmh-core:1.37" moduleImplementation deps.jmh.core
annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:1.37" annotationProcessor deps.jmh.annprocess
} }

View File

@ -31,17 +31,17 @@ dependencies {
moduleImplementation project(':lucene:spatial-extras') moduleImplementation project(':lucene:spatial-extras')
moduleImplementation project(':lucene:queryparser') moduleImplementation project(':lucene:queryparser')
moduleImplementation "org.apache.commons:commons-compress" moduleImplementation deps.commons.compress
moduleImplementation "com.ibm.icu:icu4j" moduleImplementation deps.icu4j
moduleImplementation "org.locationtech.spatial4j:spatial4j" moduleImplementation deps.spatial4j
moduleImplementation ("net.sourceforge.nekohtml:nekohtml", { moduleImplementation(deps.nekohtml, {
exclude module: "xml-apis" exclude module: "xml-apis"
// LUCENE-10337: Exclude xercesImpl from module path because it has split packages with the JDK (!) // LUCENE-10337: Exclude xercesImpl from module path because it has split packages with the JDK (!)
exclude module: "xercesImpl" exclude module: "xercesImpl"
}) })
// LUCENE-10337: Include xercesImpl on regular classpath where it won't cause conflicts. // LUCENE-10337: Include xercesImpl on regular classpath where it won't cause conflicts.
implementation ("xerces:xercesImpl", { implementation (deps.xerces, {
exclude module: "xml-apis" exclude module: "xml-apis"
}) })

View File

@ -16,13 +16,16 @@
*/ */
package org.apache.lucene.codecs.simpletext; package org.apache.lucene.codecs.simpletext;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.DOCCOUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXVALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORIGIN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE;
@ -40,6 +43,7 @@ import java.util.function.IntFunction;
import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
@ -59,12 +63,15 @@ import org.apache.lucene.util.StringHelper;
class SimpleTextDocValuesReader extends DocValuesProducer { class SimpleTextDocValuesReader extends DocValuesProducer {
static class OneField { static class OneField {
int docCount;
long dataStartFilePointer; long dataStartFilePointer;
String pattern; String pattern;
String ordPattern; String ordPattern;
int maxLength; int maxLength;
boolean fixedLength; boolean fixedLength;
long origin;
long minValue; long minValue;
long maxValue;
long numValues; long numValues;
} }
@ -99,17 +106,34 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
DocValuesType dvType = DocValuesType.valueOf(stripPrefix(TYPE)); DocValuesType dvType = DocValuesType.valueOf(stripPrefix(TYPE));
assert dvType != DocValuesType.NONE; assert dvType != DocValuesType.NONE;
if (dvType == DocValuesType.NUMERIC) {
if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED_NUMERIC) {
readLine(); readLine();
assert startsWith(MINVALUE) assert startsWith(MINVALUE)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext; : "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.minValue = Long.parseLong(stripPrefix(MINVALUE)); field.minValue = Long.parseLong(stripPrefix(MINVALUE));
readLine(); readLine();
assert startsWith(MAXVALUE)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.maxValue = Long.parseLong(stripPrefix(MAXVALUE));
}
readLine();
assert startsWith(DOCCOUNT)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.docCount = Integer.parseInt(stripPrefix(DOCCOUNT));
if (dvType == DocValuesType.NUMERIC) {
readLine();
assert startsWith(ORIGIN)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.origin = Long.parseLong(stripPrefix(ORIGIN));
readLine();
assert startsWith(PATTERN); assert startsWith(PATTERN);
field.pattern = stripPrefix(PATTERN); field.pattern = stripPrefix(PATTERN);
field.dataStartFilePointer = data.getFilePointer(); field.dataStartFilePointer = data.getFilePointer();
data.seek(data.getFilePointer() + (1 + field.pattern.length() + 2) * (long) maxDoc); data.seek(data.getFilePointer() + (1 + field.pattern.length() + 2) * (long) maxDoc);
} else if (dvType == DocValuesType.BINARY) { } else if (dvType == DocValuesType.BINARY || dvType == DocValuesType.SORTED_NUMERIC) {
readLine(); readLine();
assert startsWith(MAXLENGTH); assert startsWith(MAXLENGTH);
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH)); field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
@ -225,7 +249,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
throw new CorruptIndexException("failed to parse BigDecimal value", in, pe); throw new CorruptIndexException("failed to parse BigDecimal value", in, pe);
} }
SimpleTextUtil.readLine(in, scratch); // read the line telling us if it's real or not SimpleTextUtil.readLine(in, scratch); // read the line telling us if it's real or not
return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue(); return BigInteger.valueOf(field.origin).add(bd.toBigIntegerExact()).longValue();
} catch (IOException ioe) { } catch (IOException ioe) {
throw new RuntimeException(ioe); throw new RuntimeException(ioe);
} }
@ -824,4 +848,82 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
} }
} }
} }
@Override
public DocValuesSkipper getSkipper(FieldInfo fieldInfo) {
final boolean numeric =
fieldInfo.getDocValuesType() == DocValuesType.NUMERIC
|| fieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
final OneField field = fields.get(fieldInfo.name);
// SegmentCoreReaders already verifies this field is
// valid:
assert field != null;
return new DocValuesSkipper() {
int doc = -1;
@Override
public int numLevels() {
return 1;
}
@Override
public long minValue(int level) {
return minValue();
}
@Override
public long maxValue(int level) {
return maxValue();
}
@Override
public int docCount(int level) {
return docCount();
}
@Override
public long minValue() {
return numeric ? field.minValue : 0;
}
@Override
public long maxValue() {
return numeric ? field.maxValue : field.numValues - 1;
}
@Override
public int docCount() {
return field.docCount;
}
@Override
public int minDocID(int level) {
if (doc == -1) {
return -1;
} else if (doc >= maxDoc || field.docCount == 0) {
return DocIdSetIterator.NO_MORE_DOCS;
} else {
return 0;
}
}
@Override
public int maxDocID(int level) {
if (doc == -1) {
return -1;
} else if (doc >= maxDoc || field.docCount == 0) {
return DocIdSetIterator.NO_MORE_DOCS;
} else {
return maxDoc;
}
}
@Override
public void advance(int target) {
doc = target;
}
};
}
} }

View File

@ -46,8 +46,13 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
static final BytesRef END = new BytesRef("END"); static final BytesRef END = new BytesRef("END");
static final BytesRef FIELD = new BytesRef("field "); static final BytesRef FIELD = new BytesRef("field ");
static final BytesRef TYPE = new BytesRef(" type "); static final BytesRef TYPE = new BytesRef(" type ");
static final BytesRef DOCCOUNT = new BytesRef(" doccount ");
// used for numerics // used for numerics
static final BytesRef MINVALUE = new BytesRef(" minvalue "); static final BytesRef ORIGIN = new BytesRef(" origin "); // for deltas
static final BytesRef MINVALUE = new BytesRef(" minalue ");
static final BytesRef MAXVALUE = new BytesRef(" maxvalue ");
static final BytesRef PATTERN = new BytesRef(" pattern "); static final BytesRef PATTERN = new BytesRef(" pattern ");
// used for bytes // used for bytes
static final BytesRef LENGTH = new BytesRef("length "); static final BytesRef LENGTH = new BytesRef("length ");
@ -97,13 +102,27 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
maxValue = Math.max(maxValue, v); maxValue = Math.max(maxValue, v);
numValues++; numValues++;
} }
// write absolute min and max for skipper
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, MAXVALUE);
SimpleTextUtil.write(data, Long.toString(maxValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(numValues), scratch);
SimpleTextUtil.writeNewline(data);
if (numValues != numDocs) { if (numValues != numDocs) {
minValue = Math.min(minValue, 0); minValue = Math.min(minValue, 0);
maxValue = Math.max(maxValue, 0); maxValue = Math.max(maxValue, 0);
} }
// write our minimum value to the .dat, all entries are deltas from that // write our minimum value to the .dat, all entries are deltas from that
SimpleTextUtil.write(data, MINVALUE); SimpleTextUtil.write(data, ORIGIN);
SimpleTextUtil.write(data, Long.toString(minValue), scratch); SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data); SimpleTextUtil.writeNewline(data);
@ -161,6 +180,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
assert fieldSeen(field.name); assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.BINARY; assert field.getDocValuesType() == DocValuesType.BINARY;
writeFieldEntry(field, DocValuesType.BINARY);
doAddBinaryField(field, valuesProducer); doAddBinaryField(field, valuesProducer);
} }
@ -168,10 +188,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
throws IOException { throws IOException {
int maxLength = 0; int maxLength = 0;
BinaryDocValues values = valuesProducer.getBinary(field); BinaryDocValues values = valuesProducer.getBinary(field);
int docCount = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
maxLength = Math.max(maxLength, values.binaryValue().toString().length()); maxLength = Math.max(maxLength, values.binaryValue().toString().length());
} }
writeFieldEntry(field, DocValuesType.BINARY);
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
// write maxLength // write maxLength
SimpleTextUtil.write(data, MAXLENGTH); SimpleTextUtil.write(data, MAXLENGTH);
@ -232,6 +257,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert field.getDocValuesType() == DocValuesType.SORTED; assert field.getDocValuesType() == DocValuesType.SORTED;
writeFieldEntry(field, DocValuesType.SORTED); writeFieldEntry(field, DocValuesType.SORTED);
int docCount = 0;
SortedDocValues values = valuesProducer.getSorted(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
}
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
int valueCount = 0; int valueCount = 0;
int maxLength = -1; int maxLength = -1;
TermsEnum terms = valuesProducer.getSorted(field).termsEnum(); TermsEnum terms = valuesProducer.getSorted(field).termsEnum();
@ -301,7 +335,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert valuesSeen == valueCount; assert valuesSeen == valueCount;
SortedDocValues values = valuesProducer.getSorted(field); values = valuesProducer.getSorted(field);
for (int i = 0; i < numDocs; ++i) { for (int i = 0; i < numDocs; ++i) {
if (values.docID() < i) { if (values.docID() < i) {
values.nextDoc(); values.nextDoc();
@ -321,6 +355,28 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
throws IOException { throws IOException {
assert fieldSeen(field.name); assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.SORTED_NUMERIC; assert field.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
writeFieldEntry(field, DocValuesType.SORTED_NUMERIC);
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (int i = 0; i < values.docValueCount(); ++i) {
long v = values.nextValue();
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
}
// write absolute min and max for skipper
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, MAXVALUE);
SimpleTextUtil.write(data, Long.toString(maxValue), scratch);
SimpleTextUtil.writeNewline(data);
doAddBinaryField( doAddBinaryField(
field, field,
new EmptyDocValuesProducer() { new EmptyDocValuesProducer() {
@ -395,6 +451,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert field.getDocValuesType() == DocValuesType.SORTED_SET; assert field.getDocValuesType() == DocValuesType.SORTED_SET;
writeFieldEntry(field, DocValuesType.SORTED_SET); writeFieldEntry(field, DocValuesType.SORTED_SET);
int docCount = 0;
SortedSetDocValues values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
}
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
long valueCount = 0; long valueCount = 0;
int maxLength = 0; int maxLength = 0;
TermsEnum terms = valuesProducer.getSortedSet(field).termsEnum(); TermsEnum terms = valuesProducer.getSortedSet(field).termsEnum();
@ -430,7 +495,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
// length // length
int maxOrdListLength = 0; int maxOrdListLength = 0;
StringBuilder sb2 = new StringBuilder(); StringBuilder sb2 = new StringBuilder();
SortedSetDocValues values = valuesProducer.getSortedSet(field); values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
sb2.setLength(0); sb2.setLength(0);
for (int i = 0; i < values.docValueCount(); i++) { for (int i = 0; i < values.docValueCount(); i++) {

View File

@ -60,6 +60,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
static final BytesRef PAYLOADS = new BytesRef(" payloads "); static final BytesRef PAYLOADS = new BytesRef(" payloads ");
static final BytesRef NORMS = new BytesRef(" norms "); static final BytesRef NORMS = new BytesRef(" norms ");
static final BytesRef DOCVALUES = new BytesRef(" doc values "); static final BytesRef DOCVALUES = new BytesRef(" doc values ");
static final BytesRef DOCVALUES_SKIP_INDEX = new BytesRef(" doc values skip index");
static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen "); static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
static final BytesRef INDEXOPTIONS = new BytesRef(" index options "); static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
static final BytesRef NUM_ATTS = new BytesRef(" attributes "); static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
@ -122,6 +123,11 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
String dvType = readString(DOCVALUES.length, scratch); String dvType = readString(DOCVALUES.length, scratch);
final DocValuesType docValuesType = docValuesType(dvType); final DocValuesType docValuesType = docValuesType(dvType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
boolean docValueSkipper =
Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
SimpleTextUtil.readLine(input, scratch); SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN); assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch)); final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
@ -184,6 +190,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
storePayloads, storePayloads,
indexOptions, indexOptions,
docValuesType, docValuesType,
docValueSkipper,
dvGen, dvGen,
Collections.unmodifiableMap(atts), Collections.unmodifiableMap(atts),
dimensionalCount, dimensionalCount,
@ -276,6 +283,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch); SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out); SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_GEN); SimpleTextUtil.write(out, DOCVALUES_GEN);
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch); SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
SimpleTextUtil.writeNewline(out); SimpleTextUtil.writeNewline(out);

View File

@ -37,6 +37,12 @@ import org.apache.lucene.util.BytesRef;
public class TestSimpleTextDocValuesFormat extends BaseDocValuesFormatTestCase { public class TestSimpleTextDocValuesFormat extends BaseDocValuesFormatTestCase {
private final Codec codec = new SimpleTextCodec(); private final Codec codec = new SimpleTextCodec();
@Override
protected boolean skipperHasAccurateDocBounds() {
// This format always returns minDocID = 0 and maxDocID = maxDoc - 1
return false;
}
@Override @Override
protected Codec getCodec() { protected Codec getCodec() {
return codec; return codec;

View File

@ -111,6 +111,7 @@ public class TestBlockWriter extends LuceneTestCase {
true, true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE, DocValuesType.NONE,
false,
-1, -1,
Collections.emptyMap(), Collections.emptyMap(),
0, 0,

View File

@ -198,6 +198,7 @@ public class TestSTBlockReader extends LuceneTestCase {
true, true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE, DocValuesType.NONE,
false,
-1, -1,
Collections.emptyMap(), Collections.emptyMap(),
0, 0,

View File

@ -76,7 +76,8 @@ module org.apache.lucene.core {
org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with provides org.apache.lucene.codecs.KnnVectorsFormat with
org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat, org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat,
org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat,
org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat;
provides org.apache.lucene.codecs.PostingsFormat with provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat; org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
provides org.apache.lucene.index.SortFieldProvider with provides org.apache.lucene.index.SortFieldProvider with

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
@ -73,6 +74,13 @@ public abstract class DocValuesProducer implements Closeable {
*/ */
public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException; public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException;
/**
* Returns a {@link DocValuesSkipper} for this field. The returned instance need not be
* thread-safe: it will only be used by a single thread. The return value is undefined if {@link
* FieldInfo#hasDocValuesSkipIndex()} doesn't return {@code true}.
*/
public abstract DocValuesSkipper getSkipper(FieldInfo field) throws IOException;
/** /**
* Checks consistency of this producer * Checks consistency of this producer
* *

View File

@ -18,6 +18,7 @@
package org.apache.lucene.codecs.hnsw; package org.apache.lucene.codecs.hnsw;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
@ -27,14 +28,23 @@ import org.apache.lucene.index.SegmentWriteState;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class FlatVectorsFormat { public abstract class FlatVectorsFormat extends KnnVectorsFormat {
/** Sole constructor */ /** Sole constructor */
protected FlatVectorsFormat() {} protected FlatVectorsFormat(String name) {
super(name);
}
/** Returns a {@link FlatVectorsWriter} to write the vectors to the index. */ /** Returns a {@link FlatVectorsWriter} to write the vectors to the index. */
@Override
public abstract FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException; public abstract FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException;
/** Returns a {@link KnnVectorsReader} to read the vectors from the index. */ /** Returns a {@link KnnVectorsReader} to read the vectors from the index. */
@Override
public abstract FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException; public abstract FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException;
@Override
public int getMaxDimensions(String fieldName) {
return 1024;
}
} }

View File

@ -17,12 +17,11 @@
package org.apache.lucene.codecs.hnsw; package org.apache.lucene.codecs.hnsw;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.hnsw.RandomVectorScorer;
/** /**
@ -39,7 +38,7 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class FlatVectorsReader implements Closeable, Accountable { public abstract class FlatVectorsReader extends KnnVectorsReader implements Accountable {
/** Scorer for flat vectors */ /** Scorer for flat vectors */
protected final FlatVectorsScorer vectorScorer; protected final FlatVectorsScorer vectorScorer;
@ -56,6 +55,18 @@ public abstract class FlatVectorsReader implements Closeable, Accountable {
return vectorScorer; return vectorScorer;
} }
@Override
public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs)
throws IOException {
// don't scan stored field data. If we didn't index it, produce no search results
}
@Override
public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs)
throws IOException {
// don't scan stored field data. If we didn't index it, produce no search results
}
/** /**
* Returns a {@link RandomVectorScorer} for the given field and target vector. * Returns a {@link RandomVectorScorer} for the given field and target vector.
* *
@ -77,28 +88,4 @@ public abstract class FlatVectorsReader implements Closeable, Accountable {
*/ */
public abstract RandomVectorScorer getRandomVectorScorer(String field, byte[] target) public abstract RandomVectorScorer getRandomVectorScorer(String field, byte[] target)
throws IOException; throws IOException;
/**
* Checks consistency of this reader.
*
* <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
* against large data files.
*
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
/**
* Returns the {@link FloatVectorValues} for the given {@code field}. The behavior is undefined if
* the given field doesn't have KNN vectors enabled on its {@link FieldInfo}. The return value is
* never {@code null}.
*/
public abstract FloatVectorValues getFloatVectorValues(String field) throws IOException;
/**
* Returns the {@link ByteVectorValues} for the given {@code field}. The behavior is undefined if
* the given field doesn't have KNN vectors enabled on its {@link FieldInfo}. The return value is
* never {@code null}.
*/
public abstract ByteVectorValues getByteVectorValues(String field) throws IOException;
} }

View File

@ -17,14 +17,11 @@
package org.apache.lucene.codecs.hnsw; package org.apache.lucene.codecs.hnsw;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.codecs.KnnFieldVectorsWriter; import org.apache.lucene.codecs.KnnFieldVectorsWriter;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.Sorter;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier; import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
/** /**
@ -32,7 +29,7 @@ import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class FlatVectorsWriter implements Accountable, Closeable { public abstract class FlatVectorsWriter extends KnnVectorsWriter {
/** Scorer for flat vectors */ /** Scorer for flat vectors */
protected final FlatVectorsScorer vectorsScorer; protected final FlatVectorsScorer vectorsScorer;
@ -60,6 +57,11 @@ public abstract class FlatVectorsWriter implements Accountable, Closeable {
public abstract FlatFieldVectorsWriter<?> addField( public abstract FlatFieldVectorsWriter<?> addField(
FieldInfo fieldInfo, KnnFieldVectorsWriter<?> indexWriter) throws IOException; FieldInfo fieldInfo, KnnFieldVectorsWriter<?> indexWriter) throws IOException;
@Override
public FlatFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
return addField(fieldInfo, null);
}
/** /**
* Write the field for merging, providing a scorer over the newly merged flat vectors. This way * Write the field for merging, providing a scorer over the newly merged flat vectors. This way
* any additional merging logic can be implemented by the user of this class. * any additional merging logic can be implemented by the user of this class.
@ -72,15 +74,4 @@ public abstract class FlatVectorsWriter implements Accountable, Closeable {
*/ */
public abstract CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( public abstract CloseableRandomVectorScorerSupplier mergeOneFieldToIndex(
FieldInfo fieldInfo, MergeState mergeState) throws IOException; FieldInfo fieldInfo, MergeState mergeState) throws IOException;
/** Write field for merging */
public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
IOUtils.close(mergeOneFieldToIndex(fieldInfo, mergeState));
}
/** Called once at the end before close */
public abstract void finish() throws IOException;
/** Flush all buffered data on disk * */
public abstract void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException;
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene90;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SHIFT; import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SIZE; import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_SIZE;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
@ -43,6 +44,7 @@ import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ByteBuffersIndexOutput; import org.apache.lucene.store.ByteBuffersIndexOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -129,16 +131,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
throws IOException { throws IOException {
meta.writeInt(field.number); meta.writeInt(field.number);
meta.writeByte(Lucene90DocValuesFormat.NUMERIC); meta.writeByte(Lucene90DocValuesFormat.NUMERIC);
DocValuesProducer producer =
writeValues(
field,
new EmptyDocValuesProducer() { new EmptyDocValuesProducer() {
@Override @Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return DocValues.singleton(valuesProducer.getNumeric(field)); return DocValues.singleton(valuesProducer.getNumeric(field));
} }
}, };
false); if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, producer);
}
writeValues(field, producer, false);
} }
private static class MinMaxTracker { private static class MinMaxTracker {
@ -183,6 +186,84 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
} }
} }
private static class SkipAccumulator {
int minDocID;
int maxDocID;
int docCount;
long minValue;
long maxValue;
SkipAccumulator(int docID) {
minDocID = docID;
minValue = Long.MAX_VALUE;
maxValue = Long.MIN_VALUE;
docCount = 0;
}
void accumulate(long value) {
minValue = Math.min(minValue, value);
maxValue = Math.max(maxValue, value);
}
void nextDoc(int docID) {
maxDocID = docID;
++docCount;
}
void writeTo(DataOutput output) throws IOException {
output.writeInt(maxDocID);
output.writeInt(minDocID);
output.writeLong(maxValue);
output.writeLong(minValue);
output.writeInt(docCount);
}
}
private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
assert field.hasDocValuesSkipIndex();
// TODO: This disk compression once we introduce levels
long start = data.getFilePointer();
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
long globalMaxValue = Long.MIN_VALUE;
long globalMinValue = Long.MAX_VALUE;
int globalDocCount = 0;
int maxDocId = -1;
SkipAccumulator accumulator = null;
int counter = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
if (counter == 0) {
accumulator = new SkipAccumulator(doc);
}
accumulator.nextDoc(doc);
for (int i = 0, end = values.docValueCount(); i < end; ++i) {
accumulator.accumulate(values.nextValue());
}
if (++counter == SKIP_INDEX_INTERVAL_SIZE) {
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
globalDocCount += accumulator.docCount;
maxDocId = accumulator.maxDocID;
accumulator.writeTo(data);
counter = 0;
}
}
if (counter > 0) {
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
globalDocCount += accumulator.docCount;
maxDocId = accumulator.maxDocID;
accumulator.writeTo(data);
}
meta.writeLong(start); // record the start in meta
meta.writeLong(data.getFilePointer() - start); // record the length
meta.writeLong(globalMaxValue);
meta.writeLong(globalMinValue);
meta.writeInt(globalDocCount);
meta.writeInt(maxDocId);
}
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
throws IOException { throws IOException {
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field); SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
@ -489,13 +570,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
meta.writeInt(field.number); meta.writeInt(field.number);
meta.writeByte(Lucene90DocValuesFormat.SORTED); meta.writeByte(Lucene90DocValuesFormat.SORTED);
doAddSortedField(field, valuesProducer); doAddSortedField(field, valuesProducer, false);
} }
private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) private void doAddSortedField(
throws IOException { FieldInfo field, DocValuesProducer valuesProducer, boolean addTypeByte) throws IOException {
writeValues( DocValuesProducer producer =
field,
new EmptyDocValuesProducer() { new EmptyDocValuesProducer() {
@Override @Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
@ -534,8 +614,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
}; };
return DocValues.singleton(sortedOrds); return DocValues.singleton(sortedOrds);
} }
}, };
true); if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, producer);
}
if (addTypeByte) {
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
}
writeValues(field, producer, true);
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field))); addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
} }
@ -702,6 +788,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
private void doAddSortedNumericField( private void doAddSortedNumericField(
FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException { FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException {
if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, valuesProducer);
}
if (ords) {
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
}
long[] stats = writeValues(field, valuesProducer, ords); long[] stats = writeValues(field, valuesProducer, ords);
int numDocsWithField = Math.toIntExact(stats[0]); int numDocsWithField = Math.toIntExact(stats[0]);
long numValues = stats[1]; long numValues = stats[1];
@ -753,7 +845,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
meta.writeByte(Lucene90DocValuesFormat.SORTED_SET); meta.writeByte(Lucene90DocValuesFormat.SORTED_SET);
if (isSingleValued(valuesProducer.getSortedSet(field))) { if (isSingleValued(valuesProducer.getSortedSet(field))) {
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
doAddSortedField( doAddSortedField(
field, field,
new EmptyDocValuesProducer() { new EmptyDocValuesProducer() {
@ -762,10 +854,10 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
return SortedSetSelector.wrap( return SortedSetSelector.wrap(
valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN); valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
} }
}); },
true);
return; return;
} }
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
doAddSortedNumericField( doAddSortedNumericField(
field, field,

View File

@ -181,4 +181,7 @@ public final class Lucene90DocValuesFormat extends DocValuesFormat {
static final int TERMS_DICT_REVERSE_INDEX_SHIFT = 10; static final int TERMS_DICT_REVERSE_INDEX_SHIFT = 10;
static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT; static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT;
static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1; static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1;
static final int SKIP_INDEX_INTERVAL_SHIFT = 12;
static final int SKIP_INDEX_INTERVAL_SIZE = 1 << SKIP_INDEX_INTERVAL_SHIFT;
} }

View File

@ -27,6 +27,7 @@ import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.ImpactsEnum;
@ -39,6 +40,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
@ -59,6 +61,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
private final Map<String, SortedEntry> sorted; private final Map<String, SortedEntry> sorted;
private final Map<String, SortedSetEntry> sortedSets; private final Map<String, SortedSetEntry> sortedSets;
private final Map<String, SortedNumericEntry> sortedNumerics; private final Map<String, SortedNumericEntry> sortedNumerics;
private final Map<String, DocValuesSkipperEntry> skippers;
private final IndexInput data; private final IndexInput data;
private final int maxDoc; private final int maxDoc;
private int version = -1; private int version = -1;
@ -80,6 +83,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
sorted = new HashMap<>(); sorted = new HashMap<>();
sortedSets = new HashMap<>(); sortedSets = new HashMap<>();
sortedNumerics = new HashMap<>(); sortedNumerics = new HashMap<>();
skippers = new HashMap<>();
merging = false; merging = false;
// read in the entries from the metadata file. // read in the entries from the metadata file.
@ -147,6 +151,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
Map<String, SortedEntry> sorted, Map<String, SortedEntry> sorted,
Map<String, SortedSetEntry> sortedSets, Map<String, SortedSetEntry> sortedSets,
Map<String, SortedNumericEntry> sortedNumerics, Map<String, SortedNumericEntry> sortedNumerics,
Map<String, DocValuesSkipperEntry> skippers,
IndexInput data, IndexInput data,
int maxDoc, int maxDoc,
int version, int version,
@ -156,6 +161,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
this.sorted = sorted; this.sorted = sorted;
this.sortedSets = sortedSets; this.sortedSets = sortedSets;
this.sortedNumerics = sortedNumerics; this.sortedNumerics = sortedNumerics;
this.skippers = skippers;
this.data = data.clone(); this.data = data.clone();
this.maxDoc = maxDoc; this.maxDoc = maxDoc;
this.version = version; this.version = version;
@ -165,7 +171,16 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override @Override
public DocValuesProducer getMergeInstance() { public DocValuesProducer getMergeInstance() {
return new Lucene90DocValuesProducer( return new Lucene90DocValuesProducer(
numerics, binaries, sorted, sortedSets, sortedNumerics, data, maxDoc, version, true); numerics,
binaries,
sorted,
sortedSets,
sortedNumerics,
skippers,
data,
maxDoc,
version,
true);
} }
private void readFields(IndexInput meta, FieldInfos infos) throws IOException { private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
@ -175,6 +190,9 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta); throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
} }
byte type = meta.readByte(); byte type = meta.readByte();
if (info.hasDocValuesSkipIndex()) {
skippers.put(info.name, readDocValueSkipperMeta(meta));
}
if (type == Lucene90DocValuesFormat.NUMERIC) { if (type == Lucene90DocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta)); numerics.put(info.name, readNumeric(meta));
} else if (type == Lucene90DocValuesFormat.BINARY) { } else if (type == Lucene90DocValuesFormat.BINARY) {
@ -197,6 +215,17 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
return entry; return entry;
} }
private DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) throws IOException {
long offset = meta.readLong();
long length = meta.readLong();
long maxValue = meta.readLong();
long minValue = meta.readLong();
int docCount = meta.readInt();
int maxDocID = meta.readInt();
return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID);
}
private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException { private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException {
entry.docsWithFieldOffset = meta.readLong(); entry.docsWithFieldOffset = meta.readLong();
entry.docsWithFieldLength = meta.readLong(); entry.docsWithFieldLength = meta.readLong();
@ -326,6 +355,9 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
data.close(); data.close();
} }
private record DocValuesSkipperEntry(
long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {}
private static class NumericEntry { private static class NumericEntry {
long[] table; long[] table;
int blockShift; int blockShift;
@ -1749,4 +1781,88 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
return mul * values.get(index & mask) + delta; return mul * values.get(index & mask) + delta;
} }
} }
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
final DocValuesSkipperEntry entry = skippers.get(field.name);
final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (input.length() > 0) {
input.prefetch(0, 1);
}
return new DocValuesSkipper() {
int minDocID = -1;
int maxDocID = -1;
long minValue, maxValue;
int docCount;
@Override
public void advance(int target) throws IOException {
if (target > entry.maxDocId) {
minDocID = DocIdSetIterator.NO_MORE_DOCS;
maxDocID = DocIdSetIterator.NO_MORE_DOCS;
} else {
while (true) {
maxDocID = input.readInt();
if (maxDocID >= target) {
minDocID = input.readInt();
maxValue = input.readLong();
minValue = input.readLong();
docCount = input.readInt();
break;
} else {
input.skipBytes(24);
}
}
}
}
@Override
public int numLevels() {
return 1;
}
@Override
public int minDocID(int level) {
return minDocID;
}
@Override
public int maxDocID(int level) {
return maxDocID;
}
@Override
public long minValue(int level) {
return minValue;
}
@Override
public long maxValue(int level) {
return maxValue;
}
@Override
public int docCount(int level) {
return docCount;
}
@Override
public long minValue() {
return entry.minValue;
}
@Override
public long maxValue() {
return entry.maxValue;
}
@Override
public int docCount() {
return entry.docCount;
}
};
}
} }

View File

@ -163,8 +163,10 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0; boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
boolean isParentField = boolean isParentField =
format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false; format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
boolean hasDocValuesSkipIndex =
format >= FORMAT_DOCVALUE_SKIPPER ? (bits & DOCVALUES_SKIPPER) != 0 : false;
if ((bits & 0xE0) != 0) { if ((bits & 0xC0) != 0) {
throw new CorruptIndexException( throw new CorruptIndexException(
"unused bits are set \"" + Integer.toBinaryString(bits) + "\"", input); "unused bits are set \"" + Integer.toBinaryString(bits) + "\"", input);
} }
@ -173,6 +175,13 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
"parent field bit is set but shouldn't \"" + Integer.toBinaryString(bits) + "\"", "parent field bit is set but shouldn't \"" + Integer.toBinaryString(bits) + "\"",
input); input);
} }
if (format < FORMAT_DOCVALUE_SKIPPER && (bits & DOCVALUES_SKIPPER) != 0) {
throw new CorruptIndexException(
"doc values skipper bit is set but shouldn't \""
+ Integer.toBinaryString(bits)
+ "\"",
input);
}
final IndexOptions indexOptions = getIndexOptions(input, input.readByte()); final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
@ -208,6 +217,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
storePayloads, storePayloads,
indexOptions, indexOptions,
docValuesType, docValuesType,
hasDocValuesSkipIndex,
dvGen, dvGen,
attributes, attributes,
pointDataDimensionCount, pointDataDimensionCount,
@ -394,6 +404,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
if (fi.hasPayloads()) bits |= STORE_PAYLOADS; if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD; if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
if (fi.isParentField()) bits |= PARENT_FIELD_FIELD; if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
if (fi.hasDocValuesSkipIndex()) bits |= DOCVALUES_SKIPPER;
output.writeByte(bits); output.writeByte(bits);
output.writeByte(indexOptionsByte(fi.getIndexOptions())); output.writeByte(indexOptionsByte(fi.getIndexOptions()));
@ -423,7 +434,8 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
static final int FORMAT_START = 0; static final int FORMAT_START = 0;
// this doesn't actually change the file format but uses up one more bit an existing bit pattern // this doesn't actually change the file format but uses up one more bit an existing bit pattern
static final int FORMAT_PARENT_FIELD = 1; static final int FORMAT_PARENT_FIELD = 1;
static final int FORMAT_CURRENT = FORMAT_PARENT_FIELD; static final int FORMAT_DOCVALUE_SKIPPER = 2;
static final int FORMAT_CURRENT = FORMAT_DOCVALUE_SKIPPER;
// Field flags // Field flags
static final byte STORE_TERMVECTOR = 0x1; static final byte STORE_TERMVECTOR = 0x1;
@ -431,4 +443,5 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
static final byte STORE_PAYLOADS = 0x4; static final byte STORE_PAYLOADS = 0x4;
static final byte SOFT_DELETES_FIELD = 0x8; static final byte SOFT_DELETES_FIELD = 0x8;
static final byte PARENT_FIELD_FIELD = 0x10; static final byte PARENT_FIELD_FIELD = 0x10;
static final byte DOCVALUES_SKIPPER = 0x20;
} }

View File

@ -67,6 +67,7 @@ import org.apache.lucene.store.IndexOutput;
*/ */
public final class Lucene99FlatVectorsFormat extends FlatVectorsFormat { public final class Lucene99FlatVectorsFormat extends FlatVectorsFormat {
static final String NAME = "Lucene99FlatVectorsFormat";
static final String META_CODEC_NAME = "Lucene99FlatVectorsFormatMeta"; static final String META_CODEC_NAME = "Lucene99FlatVectorsFormatMeta";
static final String VECTOR_DATA_CODEC_NAME = "Lucene99FlatVectorsFormatData"; static final String VECTOR_DATA_CODEC_NAME = "Lucene99FlatVectorsFormatData";
static final String META_EXTENSION = "vemf"; static final String META_EXTENSION = "vemf";
@ -80,6 +81,7 @@ public final class Lucene99FlatVectorsFormat extends FlatVectorsFormat {
/** Constructs a format */ /** Constructs a format */
public Lucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer) { public Lucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer) {
super(NAME);
this.vectorsScorer = vectorsScorer; this.vectorsScorer = vectorsScorer;
} }

View File

@ -119,6 +119,11 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
return newField; return newField;
} }
@Override
public FlatFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
return addField(fieldInfo, null);
}
@Override @Override
public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
for (FieldWriter<?> field : fields) { for (FieldWriter<?> field : fields) {

View File

@ -89,6 +89,7 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
*/ */
public Lucene99ScalarQuantizedVectorsFormat( public Lucene99ScalarQuantizedVectorsFormat(
Float confidenceInterval, int bits, boolean compress) { Float confidenceInterval, int bits, boolean compress) {
super(NAME);
if (confidenceInterval != null if (confidenceInterval != null
&& confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL && confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL
&& (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL && (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL

View File

@ -28,6 +28,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
@ -346,6 +347,12 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
return producer == null ? null : producer.getSortedSet(field); return producer == null ? null : producer.getSortedSet(field);
} }
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSkipper(field);
}
@Override @Override
public void close() throws IOException { public void close() throws IOException {
IOUtils.close(formats.values()); IOUtils.close(formats.values());

View File

@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
/**
* Wrapper around a {@link TwoPhaseIterator} for a doc-values range query that speeds things up by
* taking advantage of a {@link DocValuesSkipper}.
*/
final class DocValuesRangeIterator extends TwoPhaseIterator {
enum Match {
/** None of the documents in the range match */
NO,
/** Document values need to be checked to verify matches */
MAYBE,
/** All documents in the range that have a value match */
IF_DOC_HAS_VALUE,
/** All docs in the range match */
YES;
}
private final Approximation approximation;
private final TwoPhaseIterator innerTwoPhase;
DocValuesRangeIterator(
TwoPhaseIterator twoPhase, DocValuesSkipper skipper, long lowerValue, long upperValue) {
super(new Approximation(twoPhase.approximation(), skipper, lowerValue, upperValue));
this.approximation = (Approximation) approximation();
this.innerTwoPhase = twoPhase;
}
static class Approximation extends DocIdSetIterator {
private final DocIdSetIterator innerApproximation;
private final DocValuesSkipper skipper;
private final long lowerValue;
private final long upperValue;
private int doc = -1;
// Track a decision for all doc IDs between the current doc ID and upTo inclusive.
Match match = Match.MAYBE;
int upTo = -1;
Approximation(
DocIdSetIterator innerApproximation,
DocValuesSkipper skipper,
long lowerValue,
long upperValue) {
this.innerApproximation = innerApproximation;
this.skipper = skipper;
this.lowerValue = lowerValue;
this.upperValue = upperValue;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(docID() + 1);
}
@Override
public int advance(int target) throws IOException {
while (true) {
if (target > upTo) {
skipper.advance(target);
// If target doesn't have a value and is between two blocks, it is possible that advance()
// moved to a block that doesn't contain `target`.
target = Math.max(target, skipper.minDocID(0));
if (target == NO_MORE_DOCS) {
return doc = NO_MORE_DOCS;
}
upTo = skipper.maxDocID(0);
match = match(0);
// If we have a YES or NO decision, see if we still have the same decision on a higher
// level (= on a wider range of doc IDs)
int nextLevel = 1;
while (match != Match.MAYBE
&& nextLevel < skipper.numLevels()
&& match == match(nextLevel)) {
upTo = skipper.maxDocID(nextLevel);
nextLevel++;
}
}
switch (match) {
case YES:
return doc = target;
case MAYBE:
case IF_DOC_HAS_VALUE:
if (target > innerApproximation.docID()) {
target = innerApproximation.advance(target);
}
if (target <= upTo) {
return doc = target;
}
// Otherwise we are breaking the invariant that `doc` must always be <= upTo, so let
// the loop run one more iteration to advance the skipper.
break;
case NO:
if (upTo == DocIdSetIterator.NO_MORE_DOCS) {
return doc = NO_MORE_DOCS;
}
target = upTo + 1;
break;
default:
throw new AssertionError("Unknown enum constant: " + match);
}
}
}
@Override
public long cost() {
return innerApproximation.cost();
}
private Match match(int level) {
long minValue = skipper.minValue(level);
long maxValue = skipper.maxValue(level);
if (minValue > upperValue || maxValue < lowerValue) {
return Match.NO;
} else if (minValue >= lowerValue && maxValue <= upperValue) {
if (skipper.docCount(level) == skipper.maxDocID(level) - skipper.minDocID(level) + 1) {
return Match.YES;
} else {
return Match.IF_DOC_HAS_VALUE;
}
} else {
return Match.MAYBE;
}
}
}
@Override
public final boolean matches() throws IOException {
return switch (approximation.match) {
case YES -> true;
case IF_DOC_HAS_VALUE -> true;
case MAYBE -> innerTwoPhase.matches();
case NO -> throw new IllegalStateException("Unpositioned approximation");
};
}
@Override
public float matchCost() {
return innerTwoPhase.matchCost();
}
}

View File

@ -22,6 +22,7 @@ import java.util.Objects;
import org.apache.lucene.analysis.Analyzer; // javadocs import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.PointValues; import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorEncoding;
@ -40,6 +41,7 @@ public class FieldType implements IndexableFieldType {
private IndexOptions indexOptions = IndexOptions.NONE; private IndexOptions indexOptions = IndexOptions.NONE;
private boolean frozen; private boolean frozen;
private DocValuesType docValuesType = DocValuesType.NONE; private DocValuesType docValuesType = DocValuesType.NONE;
private boolean docValuesSkipIndex;
private int dimensionCount; private int dimensionCount;
private int indexDimensionCount; private int indexDimensionCount;
private int dimensionNumBytes; private int dimensionNumBytes;
@ -59,6 +61,7 @@ public class FieldType implements IndexableFieldType {
this.omitNorms = ref.omitNorms(); this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions(); this.indexOptions = ref.indexOptions();
this.docValuesType = ref.docValuesType(); this.docValuesType = ref.docValuesType();
this.docValuesSkipIndex = ref.hasDocValuesSkipIndex();
this.dimensionCount = ref.pointDimensionCount(); this.dimensionCount = ref.pointDimensionCount();
this.indexDimensionCount = ref.pointIndexDimensionCount(); this.indexDimensionCount = ref.pointIndexDimensionCount();
this.dimensionNumBytes = ref.pointNumBytes(); this.dimensionNumBytes = ref.pointNumBytes();
@ -504,6 +507,22 @@ public class FieldType implements IndexableFieldType {
docValuesType = type; docValuesType = type;
} }
@Override
public boolean hasDocValuesSkipIndex() {
return docValuesSkipIndex;
}
/**
* Set whether to enable a skip index for doc values on this field. This is typically useful on
* fields that are part of the {@link IndexWriterConfig#setIndexSort index sort}, or that
* correlate with fields that are part of the index sort, so that values can be expected to be
* clustered in the doc ID space.
*/
public void setDocValuesSkipIndex(boolean docValuesSkipIndex) {
checkIfFrozen();
this.docValuesSkipIndex = docValuesSkipIndex;
}
@Override @Override
public int hashCode() { public int hashCode() {
final int prime = 31; final int prime = 31;
@ -512,6 +531,7 @@ public class FieldType implements IndexableFieldType {
result = prime * result + indexDimensionCount; result = prime * result + indexDimensionCount;
result = prime * result + dimensionNumBytes; result = prime * result + dimensionNumBytes;
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode()); result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
result = prime * result + Boolean.hashCode(docValuesSkipIndex);
result = prime * result + indexOptions.hashCode(); result = prime * result + indexOptions.hashCode();
result = prime * result + (omitNorms ? 1231 : 1237); result = prime * result + (omitNorms ? 1231 : 1237);
result = prime * result + (storeTermVectorOffsets ? 1231 : 1237); result = prime * result + (storeTermVectorOffsets ? 1231 : 1237);
@ -533,6 +553,7 @@ public class FieldType implements IndexableFieldType {
if (indexDimensionCount != other.indexDimensionCount) return false; if (indexDimensionCount != other.indexDimensionCount) return false;
if (dimensionNumBytes != other.dimensionNumBytes) return false; if (dimensionNumBytes != other.dimensionNumBytes) return false;
if (docValuesType != other.docValuesType) return false; if (docValuesType != other.docValuesType) return false;
if (docValuesSkipIndex != other.docValuesSkipIndex) return false;
if (indexOptions != other.indexOptions) return false; if (indexOptions != other.indexOptions) return false;
if (omitNorms != other.omitNorms) return false; if (omitNorms != other.omitNorms) return false;
if (storeTermVectorOffsets != other.storeTermVectorOffsets) return false; if (storeTermVectorOffsets != other.storeTermVectorOffsets) return false;

View File

@ -35,9 +35,27 @@ public class NumericDocValuesField extends Field {
/** Type for numeric DocValues. */ /** Type for numeric DocValues. */
public static final FieldType TYPE = new FieldType(); public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static { static {
TYPE.setDocValuesType(DocValuesType.NUMERIC); TYPE.setDocValuesType(DocValuesType.NUMERIC);
TYPE.freeze(); TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link NumericDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param value 64-bit long value
* @throws IllegalArgumentException if the field name is null
*/
public static NumericDocValuesField indexedField(String name, long value) {
return new NumericDocValuesField(name, value, INDEXED_TYPE);
} }
/** /**
@ -60,7 +78,11 @@ public class NumericDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null * @throws IllegalArgumentException if the field name is null
*/ */
public NumericDocValuesField(String name, Long value) { public NumericDocValuesField(String name, Long value) {
super(name, TYPE); this(name, value, TYPE);
}
private NumericDocValuesField(String name, Long value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value; fieldsData = value;
} }

View File

@ -41,9 +41,27 @@ public class SortedDocValuesField extends Field {
/** Type for sorted bytes DocValues */ /** Type for sorted bytes DocValues */
public static final FieldType TYPE = new FieldType(); public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static { static {
TYPE.setDocValuesType(DocValuesType.SORTED); TYPE.setDocValuesType(DocValuesType.SORTED);
TYPE.freeze(); TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param bytes binary content
* @throws IllegalArgumentException if the field name is null
*/
public static SortedDocValuesField indexedField(String name, BytesRef bytes) {
return new SortedDocValuesField(name, bytes, INDEXED_TYPE);
} }
/** /**
@ -54,7 +72,11 @@ public class SortedDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null * @throws IllegalArgumentException if the field name is null
*/ */
public SortedDocValuesField(String name, BytesRef bytes) { public SortedDocValuesField(String name, BytesRef bytes) {
super(name, TYPE); this(name, bytes, TYPE);
}
private SortedDocValuesField(String name, BytesRef bytes, FieldType fieldType) {
super(name, fieldType);
fieldsData = bytes; fieldsData = bytes;
} }

View File

@ -43,9 +43,27 @@ public class SortedNumericDocValuesField extends Field {
/** Type for sorted numeric DocValues. */ /** Type for sorted numeric DocValues. */
public static final FieldType TYPE = new FieldType(); public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static { static {
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC); TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
TYPE.freeze(); TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedNumericDocValuesField} with the specified 64-bit long value that
* also creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param value 64-bit long value
* @throws IllegalArgumentException if the field name is null
*/
public static SortedNumericDocValuesField indexedField(String name, long value) {
return new SortedNumericDocValuesField(name, value, INDEXED_TYPE);
} }
/** /**
@ -56,8 +74,12 @@ public class SortedNumericDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null * @throws IllegalArgumentException if the field name is null
*/ */
public SortedNumericDocValuesField(String name, long value) { public SortedNumericDocValuesField(String name, long value) {
super(name, TYPE); this(name, Long.valueOf(value), TYPE);
fieldsData = Long.valueOf(value); }
private SortedNumericDocValuesField(String name, Long value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
} }
/** /**

View File

@ -19,6 +19,7 @@ package org.apache.lucene.document;
import java.io.IOException; import java.io.IOException;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedNumericDocValues;
@ -109,9 +110,17 @@ final class SortedNumericDocValuesRangeQuery extends Query {
if (context.reader().getFieldInfos().fieldInfo(field) == null) { if (context.reader().getFieldInfos().fieldInfo(field) == null) {
return null; return null;
} }
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
if (skipper != null) {
if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) {
return null;
}
}
SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field); SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
final NumericDocValues singleton = DocValues.unwrapSingleton(values); final NumericDocValues singleton = DocValues.unwrapSingleton(values);
final TwoPhaseIterator iterator; TwoPhaseIterator iterator;
if (singleton != null) { if (singleton != null) {
iterator = iterator =
new TwoPhaseIterator(singleton) { new TwoPhaseIterator(singleton) {
@ -149,6 +158,9 @@ final class SortedNumericDocValuesRangeQuery extends Query {
} }
}; };
} }
if (skipper != null) {
iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue);
}
final var scorer = new ConstantScoreScorer(score(), scoreMode, iterator); final var scorer = new ConstantScoreScorer(score(), scoreMode, iterator);
return new DefaultScorerSupplier(scorer); return new DefaultScorerSupplier(scorer);
} }

Some files were not shown because too many files have changed in this diff Show More