Merge branch 'main' into check_liveDoc_one_time

This commit is contained in:
zhouhui 2024-06-20 10:03:12 +08:00
commit 036275df66
171 changed files with 8912 additions and 4743 deletions

4
.gitattributes vendored
View File

@ -1,6 +1,6 @@
# Ignore all differences in line endings for the lock file.
versions.lock text eol=lf
versions.props text eol=lf
versions.lock text eol=lf
versions.toml text eol=lf
# Gradle files are always in LF.
*.gradle text eol=lf

View File

@ -58,7 +58,7 @@ In case your contribution fixes a bug, please create a new test case that fails
### IDE support
- *IntelliJ* - IntelliJ idea can import and build gradle-based projects out of the box.
- *IntelliJ* - IntelliJ idea can import and build gradle-based projects out of the box. It will default to running tests by calling the gradle wrapper, and while this works, it is can be a bit slow. If instead you configure IntelliJ to use its own built-in test runner by (in 2024 version) navigating to settings for Build Execution & Deployment/Build Tools/Gradle (under File/Settings menu on some platforms) and selecting "Build and Run using: IntelliJ IDEA" and "Run Tests using: IntelliJ IDEA", then some tests will run faster. However some other tests will not run using this configuration.
- *Eclipse* - Basic support ([help/IDEs.txt](https://github.com/apache/lucene/blob/main/help/IDEs.txt#L7)).
- *Netbeans* - Not tested.

View File

@ -31,8 +31,8 @@ comprehensive documentation, visit:
- Latest Releases: <https://lucene.apache.org/core/documentation.html>
- Nightly: <https://ci-builds.apache.org/job/Lucene/job/Lucene-Artifacts-main/javadoc/>
- New contributors should start by reading [Contributing Guide](./CONTRIBUTING.md)
- Build System Documentation: [help/](./help/)
- Developer Documentation: [dev-docs/](./dev-docs/)
- Migration Guide: [lucene/MIGRATE.md](./lucene/MIGRATE.md)
## Building
@ -45,8 +45,6 @@ comprehensive documentation, visit:
We'll assume that you know how to get and set up the JDK - if you don't, then we suggest starting at https://jdk.java.net/ and learning more about Java, before returning to this README.
See [Contributing Guide](./CONTRIBUTING.md) for details.
## Contributing
Bug fixes, improvements and new features are always welcome!
@ -54,6 +52,8 @@ Please review the [Contributing to Lucene
Guide](./CONTRIBUTING.md) for information on
contributing.
- Additional Developer Documentation: [dev-docs/](./dev-docs/)
## Discussion and Support
- [Users Mailing List](https://lucene.apache.org/core/discussion.html#java-user-list-java-userluceneapacheorg)

View File

@ -15,30 +15,50 @@
* limitations under the License.
*/
plugins {
id "java-gradle-plugin"
alias(deps.plugins.spotless) apply false
alias(deps.plugins.forbiddenapis) apply false
}
repositories {
mavenCentral()
}
ext {
// Minimum Java version required to compile buildSrc.
minJavaVersion = JavaVersion.VERSION_21
}
group = "org.apache"
// Make sure the build environment is consistent.
apply from: file('../gradle/validation/check-environment.gradle')
apply from: file('../../gradle/conventions.gradle')
apply from: file('../../gradle/validation/check-environment.gradle')
// Load common buildSrc and script deps.
apply from: file("scriptDepVersions.gradle")
// Add spotless/ tidy.
tasks.register("checkJdkInternalsExportedToGradle") {}
apply from: file('../../gradle/validation/spotless.gradle')
apply from: file('../../gradle/validation/forbidden-apis.gradle')
java {
sourceCompatibility = JavaVersion.toVersion(deps.versions.minJava.get())
targetCompatibility = JavaVersion.toVersion(deps.versions.minJava.get())
}
gradlePlugin {
automatedPublishing = false
plugins {
buildInfra {
id = 'lucene.build-infra'
implementationClass = 'org.apache.lucene.gradle.buildinfra.BuildInfraPlugin'
}
}
}
dependencies {
implementation gradleApi()
implementation localGroovy()
implementation "commons-codec:commons-codec:${scriptDepVersions['commons-codec']}"
implementation deps.commons.codec
}
if (!rootProject.hasJavaFlightRecorder) {
if (!hasJavaFlightRecorder) {
logger.warn('Module jdk.jfr is not available; skipping compilation of Java Flight Recorder support.')
tasks.named('compileJava').configure {
exclude('**/ProfileResults.java')

View File

@ -15,18 +15,12 @@
* limitations under the License.
*/
plugins {
id 'java-library'
}
version = "1.0.0-SNAPSHOT"
group = "org.apache.lucene.tools"
description = 'Doclet-based javadoc validation'
sourceCompatibility = JavaVersion.VERSION_21
targetCompatibility = JavaVersion.VERSION_21
tasks.withType(JavaCompile) {
options.compilerArgs += ["--release", targetCompatibility.toString()]
options.encoding = "UTF-8"
rootProject.name = 'build-infra'
dependencyResolutionManagement {
versionCatalogs {
deps {
from(files('../../versions.toml'))
}
}
}

View File

@ -27,6 +27,11 @@
package org.apache.lucene.gradle;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Locale;
import org.apache.commons.codec.digest.DigestUtils;
import org.gradle.api.DefaultTask;
import org.gradle.api.GradleException;
@ -39,16 +44,10 @@ import org.gradle.api.tasks.TaskAction;
import org.gradle.work.Incremental;
import org.gradle.work.InputChanges;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Locale;
public class Checksum extends DefaultTask {
private FileCollection files;
private File outputDir;
private Algorithm algorithm;
private Algorithm algorithm = Algorithm.SHA512;
public enum Algorithm {
MD5(new DigestUtils(DigestUtils.getMd5Digest())),
@ -69,7 +68,6 @@ public class Checksum extends DefaultTask {
public Checksum() {
outputDir = new File(getProject().getBuildDir(), "checksums");
algorithm = Algorithm.SHA256;
}
@InputFiles
@ -190,6 +188,8 @@ public class Checksum extends DefaultTask {
private FileCollection filesFor(final Algorithm algo) {
return getProject()
.fileTree(getOutputDir(), files -> files.include("**/*." + algo.toString().toLowerCase(Locale.ROOT)));
.fileTree(
getOutputDir(),
files -> files.include("**/*." + algo.toString().toLowerCase(Locale.ROOT)));
}
}

View File

@ -0,0 +1,288 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import org.gradle.api.internal.tasks.testing.logging.FullExceptionFormatter;
import org.gradle.api.internal.tasks.testing.logging.TestExceptionFormatter;
import org.gradle.api.logging.Logger;
import org.gradle.api.logging.Logging;
import org.gradle.api.tasks.testing.TestDescriptor;
import org.gradle.api.tasks.testing.TestListener;
import org.gradle.api.tasks.testing.TestOutputEvent;
import org.gradle.api.tasks.testing.TestOutputListener;
import org.gradle.api.tasks.testing.TestResult;
import org.gradle.api.tasks.testing.logging.TestLogging;
/**
* An error reporting listener that queues test output streams and displays them on failure.
*
* <p>Heavily inspired by Elasticsearch's ErrorReportingTestListener (ASL 2.0 licensed).
*/
public class ErrorReportingTestListener implements TestOutputListener, TestListener {
private static final Logger LOGGER = Logging.getLogger(ErrorReportingTestListener.class);
private final TestExceptionFormatter formatter;
private final Map<TestKey, OutputHandler> outputHandlers = new ConcurrentHashMap<>();
private final Path spillDir;
private final Path outputsDir;
private final boolean verboseMode;
public ErrorReportingTestListener(
TestLogging testLogging, Path spillDir, Path outputsDir, boolean verboseMode) {
this.formatter = new FullExceptionFormatter(testLogging);
this.spillDir = spillDir;
this.outputsDir = outputsDir;
this.verboseMode = verboseMode;
}
@Override
public void onOutput(TestDescriptor testDescriptor, TestOutputEvent outputEvent) {
handlerFor(testDescriptor).write(outputEvent);
}
@Override
public void beforeSuite(TestDescriptor suite) {
// noop.
}
@Override
public void beforeTest(TestDescriptor testDescriptor) {
// Noop.
}
@Override
public void afterSuite(final TestDescriptor suite, TestResult result) {
if (suite.getParent() == null || suite.getName().startsWith("Gradle")) {
return;
}
TestKey key = TestKey.of(suite);
try {
OutputHandler outputHandler = outputHandlers.get(key);
if (outputHandler != null) {
long length = outputHandler.length();
if (length > 1024 * 1024 * 10) {
LOGGER.warn(
String.format(
Locale.ROOT,
"WARNING: Test %s wrote %,d bytes of output.",
suite.getName(),
length));
}
}
boolean echoOutput = Objects.equals(result.getResultType(), TestResult.ResultType.FAILURE);
boolean dumpOutput = echoOutput;
// If the test suite failed, report output.
if (dumpOutput || echoOutput) {
Files.createDirectories(outputsDir);
Path outputLog = outputsDir.resolve(getOutputLogName(suite));
// Save the output of a failing test to disk.
try (Writer w = Files.newBufferedWriter(outputLog, StandardCharsets.UTF_8)) {
if (outputHandler != null) {
outputHandler.copyTo(w);
}
}
if (echoOutput && !verboseMode) {
synchronized (this) {
System.out.println();
System.out.println(
suite.getClassName()
+ " > test suite's output saved to "
+ outputLog
+ ", copied below:");
try (BufferedReader reader =
Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) {
char[] buf = new char[1024];
int len;
while ((len = reader.read(buf)) >= 0) {
System.out.print(new String(buf, 0, len));
}
System.out.println();
}
}
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
OutputHandler handler = outputHandlers.remove(key);
if (handler != null) {
try {
handler.close();
} catch (IOException e) {
LOGGER.error("Failed to close output handler for: " + key, e);
}
}
}
}
private static Pattern SANITIZE = Pattern.compile("[^a-zA-Z .\\-_0-9]+");
public static String getOutputLogName(TestDescriptor suite) {
return SANITIZE.matcher("OUTPUT-" + suite.getName() + ".txt").replaceAll("_");
}
@Override
public void afterTest(TestDescriptor testDescriptor, TestResult result) {
// Include test failure exception stacktrace(s) in test output log.
if (result.getResultType() == TestResult.ResultType.FAILURE) {
if (result.getExceptions().size() > 0) {
String message = formatter.format(testDescriptor, result.getExceptions());
handlerFor(testDescriptor).write(message);
}
}
}
private OutputHandler handlerFor(TestDescriptor descriptor) {
// Attach output of leaves (individual tests) to their parent.
if (!descriptor.isComposite()) {
descriptor = descriptor.getParent();
}
return outputHandlers.computeIfAbsent(TestKey.of(descriptor), (key) -> new OutputHandler());
}
public static class TestKey {
private final String key;
private TestKey(String key) {
this.key = key;
}
public static TestKey of(TestDescriptor d) {
StringBuilder key = new StringBuilder();
key.append(d.getClassName());
key.append("::");
key.append(d.getName());
key.append("::");
key.append(d.getParent() == null ? "-" : d.getParent().toString());
return new TestKey(key.toString());
}
@Override
public boolean equals(Object o) {
return o != null && o.getClass() == this.getClass() && Objects.equals(((TestKey) o).key, key);
}
@Override
public int hashCode() {
return key.hashCode();
}
@Override
public String toString() {
return key;
}
}
private class OutputHandler implements Closeable {
// Max single-line buffer before automatic wrap occurs.
private static final int MAX_LINE_WIDTH = 1024 * 4;
private final SpillWriter buffer;
// internal stream.
private final PrefixedWriter sint;
// stdout
private final PrefixedWriter sout;
// stderr
private final PrefixedWriter serr;
// last used stream (so that we can flush it properly and prefixes are not screwed up).
private PrefixedWriter last;
public OutputHandler() {
buffer =
new SpillWriter(
() -> {
try {
return Files.createTempFile(spillDir, "spill-", ".tmp");
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
Writer sink = buffer;
if (verboseMode) {
sink = new StdOutTeeWriter(buffer);
}
sint = new PrefixedWriter(" > ", sink, MAX_LINE_WIDTH);
sout = new PrefixedWriter(" 1> ", sink, MAX_LINE_WIDTH);
serr = new PrefixedWriter(" 2> ", sink, MAX_LINE_WIDTH);
last = sint;
}
public void write(TestOutputEvent event) {
write(
(event.getDestination() == TestOutputEvent.Destination.StdOut ? sout : serr),
event.getMessage());
}
public void write(String message) {
write(sint, message);
}
public long length() throws IOException {
return buffer.length();
}
private void write(PrefixedWriter out, String message) {
try {
if (out != last) {
last.completeLine();
last = out;
}
out.write(message);
} catch (IOException e) {
throw new UncheckedIOException("Unable to write to test output.", e);
}
}
public void copyTo(Writer out) throws IOException {
flush();
buffer.copyTo(out);
}
public void flush() throws IOException {
sout.completeLine();
serr.completeLine();
buffer.flush();
}
@Override
public void close() throws IOException {
buffer.close();
}
}
}

View File

@ -67,6 +67,6 @@ public class GradlePropertiesGenerator {
fileContent = fileContent.replace(entry.getKey(), String.valueOf(entry.getValue()));
}
Files.writeString(
destination, fileContent, StandardCharsets.UTF_8, StandardOpenOption.CREATE_NEW);
destination, fileContent, StandardCharsets.UTF_8, StandardOpenOption.CREATE_NEW);
}
}

View File

@ -20,12 +20,13 @@ import java.io.IOException;
import java.io.Writer;
/**
* Prefixes every new line with a given string, synchronizing multiple streams to emit consistent lines.
* Prefixes every new line with a given string, synchronizing multiple streams to emit consistent
* lines.
*/
public class PrefixedWriter extends Writer {
Writer sink;
private final static char LF = '\n';
private static final char LF = '\n';
private final String prefix;
private final StringBuilder lineBuffer = new StringBuilder();
private final int maxLineLength;
@ -45,7 +46,7 @@ public class PrefixedWriter extends Writer {
sink.write(LF);
lineBuffer.setLength(0);
if (c != LF) {
if (c != LF) {
lineBuffer.append((char) c);
}
} else {
@ -70,9 +71,7 @@ public class PrefixedWriter extends Writer {
throw new UnsupportedOperationException();
}
/**
* Complete the current line (emit LF if not at the start of the line already).
*/
/** Complete the current line (emit LF if not at the start of the line already). */
public void completeLine() throws IOException {
if (lineBuffer.length() > 0) {
write(LF);

View File

@ -20,13 +20,12 @@ package org.apache.lucene.gradle;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.AbstractMap.SimpleEntry;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import jdk.jfr.consumer.RecordedClass;
import jdk.jfr.consumer.RecordedEvent;
import jdk.jfr.consumer.RecordedFrame;
@ -36,15 +35,12 @@ import jdk.jfr.consumer.RecordedThread;
import jdk.jfr.consumer.RecordingFile;
/**
* Processes an array of recording files (from tests), and prints a simple histogram.
* Inspired by the JFR example code.
* Whole stacks are deduplicated (with the default stacksize being 1): you can drill deeper
* by adjusting the parameters.
* Processes an array of recording files (from tests), and prints a simple histogram. Inspired by
* the JFR example code. Whole stacks are deduplicated (with the default stacksize being 1): you can
* drill deeper by adjusting the parameters.
*/
public class ProfileResults {
/**
* Formats a frame to a formatted line. This is deduplicated on!
*/
/** Formats a frame to a formatted line. This is deduplicated on! */
static String frameToString(RecordedFrame frame, boolean lineNumbers) {
StringBuilder builder = new StringBuilder();
RecordedMethod method = frame.getMethod();
@ -84,29 +80,32 @@ public class ProfileResults {
/**
* Driver method, for testing standalone.
*
* <pre>
* java -Dtests.profile.count=5 buildSrc/src/main/java/org/apache/lucene/gradle/ProfileResults.java \
* ./lucene/core/build/tmp/tests-cwd/somefile.jfr ...
* </pre>
*/
public static void main(String[] args) throws IOException {
printReport(Arrays.asList(args),
System.getProperty(MODE_KEY, MODE_DEFAULT),
Integer.parseInt(System.getProperty(STACKSIZE_KEY, STACKSIZE_DEFAULT)),
Integer.parseInt(System.getProperty(COUNT_KEY, COUNT_DEFAULT)),
Boolean.parseBoolean(System.getProperty(LINENUMBERS_KEY, LINENUMBERS_DEFAULT)));
printReport(
Arrays.asList(args),
System.getProperty(MODE_KEY, MODE_DEFAULT),
Integer.parseInt(System.getProperty(STACKSIZE_KEY, STACKSIZE_DEFAULT)),
Integer.parseInt(System.getProperty(COUNT_KEY, COUNT_DEFAULT)),
Boolean.parseBoolean(System.getProperty(LINENUMBERS_KEY, LINENUMBERS_DEFAULT)));
}
/** true if we care about this event */
static boolean isInteresting(String mode, RecordedEvent event) {
String name = event.getEventType().getName();
switch(mode) {
switch (mode) {
case "cpu":
return (name.equals("jdk.ExecutionSample") || name.equals("jdk.NativeMethodSample")) &&
!isGradlePollThread(event.getThread("sampledThread"));
return (name.equals("jdk.ExecutionSample") || name.equals("jdk.NativeMethodSample"))
&& !isGradlePollThread(event.getThread("sampledThread"));
case "heap":
return (name.equals("jdk.ObjectAllocationInNewTLAB") || name.equals("jdk.ObjectAllocationOutsideTLAB")) &&
!isGradlePollThread(event.getThread("eventThread"));
return (name.equals("jdk.ObjectAllocationInNewTLAB")
|| name.equals("jdk.ObjectAllocationOutsideTLAB"))
&& !isGradlePollThread(event.getThread("eventThread"));
default:
throw new UnsupportedOperationException(event.toString());
}
@ -119,7 +118,7 @@ public class ProfileResults {
/** value we accumulate for this event */
static long getValue(RecordedEvent event) {
switch(event.getEventType().getName()) {
switch (event.getEventType().getName()) {
case "jdk.ObjectAllocationInNewTLAB":
return event.getLong("tlabSize");
case "jdk.ObjectAllocationOutsideTLAB":
@ -133,10 +132,10 @@ public class ProfileResults {
}
}
/** format a value, if its huge, we show millions */
/** format a value, if it's huge, we show millions */
static String formatValue(long value) {
if (value > 1_000_000) {
return String.format("%dM", value / 1_000_000);
return String.format(Locale.ROOT, "%dM", value / 1_000_000);
} else {
return Long.toString(value);
}
@ -144,15 +143,17 @@ public class ProfileResults {
/** fixed width used for printing the different columns */
private static final int COLUMN_SIZE = 14;
private static final String COLUMN_PAD = "%-" + COLUMN_SIZE + "s";
private static String pad(String input) {
return String.format(Locale.ROOT, COLUMN_PAD, input);
}
/**
* Process all the JFR files passed in args and print a merged summary.
*/
public static void printReport(List<String> files, String mode, int stacksize, int count, boolean lineNumbers) throws IOException {
/** Process all the JFR files passed in args and print a merged summary. */
public static void printReport(
List<String> files, String mode, int stacksize, int count, boolean lineNumbers)
throws IOException {
if (!"cpu".equals(mode) && !"heap".equals(mode)) {
throw new IllegalArgumentException("tests.profile.mode must be one of (cpu,heap)");
}
@ -178,14 +179,13 @@ public class ProfileResults {
StringBuilder stack = new StringBuilder();
for (int i = 0; i < Math.min(stacksize, trace.getFrames().size()); i++) {
if (stack.length() > 0) {
stack.append("\n")
.append(framePadding)
.append(" at ");
stack.append("\n").append(framePadding).append(" at ");
}
stack.append(frameToString(trace.getFrames().get(i), lineNumbers));
}
String line = stack.toString();
SimpleEntry<String,Long> entry = histogram.computeIfAbsent(line, u -> new SimpleEntry<String, Long>(line, 0L));
SimpleEntry<String, Long> entry =
histogram.computeIfAbsent(line, u -> new SimpleEntry<String, Long>(line, 0L));
long value = getValue(event);
entry.setValue(entry.getValue() + value);
totalEvents++;
@ -195,12 +195,20 @@ public class ProfileResults {
}
}
// print summary from histogram
System.out.printf(Locale.ROOT, "PROFILE SUMMARY from %d events (total: %s)\n", totalEvents, formatValue(sumValues));
System.out.printf(
Locale.ROOT,
"PROFILE SUMMARY from %d events (total: %s)\n",
totalEvents,
formatValue(sumValues));
System.out.printf(Locale.ROOT, " tests.profile.mode=%s\n", mode);
System.out.printf(Locale.ROOT, " tests.profile.count=%d\n", count);
System.out.printf(Locale.ROOT, " tests.profile.stacksize=%d\n", stacksize);
System.out.printf(Locale.ROOT, " tests.profile.linenumbers=%b\n", lineNumbers);
System.out.printf(Locale.ROOT, "%s%sSTACK\n", pad("PERCENT"), pad(mode.toUpperCase(Locale.ROOT) + " SAMPLES"));
System.out.printf(
Locale.ROOT,
"%s%sSTACK\n",
pad("PERCENT"),
pad(mode.toUpperCase(Locale.ROOT) + " SAMPLES"));
List<SimpleEntry<String, Long>> entries = new ArrayList<>(histogram.values());
entries.sort((u, v) -> v.getValue().compareTo(u.getValue()));
int seen = 0;
@ -208,8 +216,10 @@ public class ProfileResults {
if (seen++ == count) {
break;
}
String percent = String.format("%2.2f%%", 100 * (c.getValue() / (float) sumValues));
System.out.printf(Locale.ROOT, "%s%s%s\n", pad(percent), pad(formatValue(c.getValue())), c.getKey());
String percent =
String.format(Locale.ROOT, "%2.2f%%", 100 * (c.getValue() / (float) sumValues));
System.out.printf(
Locale.ROOT, "%s%s%s\n", pad(percent), pad(formatValue(c.getValue())), c.getKey());
}
}
}

View File

@ -26,7 +26,7 @@ import java.nio.file.Path;
import java.util.function.Supplier;
public class SpillWriter extends Writer {
private final static int MAX_BUFFERED = 2 * 1024;
private static final int MAX_BUFFERED = 2 * 1024;
private final StringWriter buffer = new StringWriter(MAX_BUFFERED);
private final Supplier<Path> spillPathSupplier;

View File

@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Writer;
class StdOutTeeWriter extends Writer {
private final Writer delegate;
private final PrintStream out = System.out;
public StdOutTeeWriter(Writer delegate) {
this.delegate = delegate;
}
@Override
public void write(int c) throws IOException {
delegate.write(c);
out.write(c);
}
@Override
public void write(char[] cbuf) throws IOException {
delegate.write(cbuf);
out.print(cbuf);
}
@Override
public void write(String str) throws IOException {
delegate.write(str);
out.print(str);
}
@Override
public void write(String str, int off, int len) throws IOException {
delegate.write(str, off, len);
out.append(str, off, len);
}
@Override
public Writer append(CharSequence csq) throws IOException {
delegate.append(csq);
out.append(csq);
return this;
}
@Override
public Writer append(CharSequence csq, int start, int end) throws IOException {
delegate.append(csq, start, end);
out.append(csq, start, end);
return this;
}
@Override
public Writer append(char c) throws IOException {
delegate.append(c);
out.append(c);
return this;
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException {
delegate.write(cbuf, off, len);
out.print(new String(cbuf, off, len));
}
@Override
public void flush() throws IOException {
delegate.flush();
out.flush();
}
@Override
public void close() throws IOException {
delegate.close();
// Don't close the actual output.
}
}

View File

@ -16,12 +16,18 @@
*/
package org.apache.lucene.gradle;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
@ -31,12 +37,10 @@ import java.security.NoSuchAlgorithmException;
import java.util.Locale;
import java.util.concurrent.TimeUnit;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
/**
* Standalone class that can be used to download a gradle-wrapper.jar
* <p>
* Has no dependencies outside of standard java libraries
*
* <p>Has no dependencies outside of standard java libraries
*/
public class WrapperDownloader {
public static void main(String[] args) {
@ -62,13 +66,15 @@ public class WrapperDownloader {
}
public void run(Path destination) throws IOException, NoSuchAlgorithmException {
Path checksumPath = destination.resolveSibling(destination.getFileName().toString() + ".sha256");
Path checksumPath =
destination.resolveSibling(destination.getFileName().toString() + ".sha256");
if (!Files.exists(checksumPath)) {
throw new IOException("Checksum file not found: " + checksumPath);
}
String expectedChecksum = Files.readString(checksumPath, StandardCharsets.UTF_8).trim();
Path versionPath = destination.resolveSibling(destination.getFileName().toString() + ".version");
Path versionPath =
destination.resolveSibling(destination.getFileName().toString() + ".version");
if (!Files.exists(versionPath)) {
throw new IOException("Wrapper version file not found: " + versionPath);
}
@ -87,7 +93,12 @@ public class WrapperDownloader {
}
}
URL url = URI.create("https://raw.githubusercontent.com/gradle/gradle/v" + wrapperVersion + "/gradle/wrapper/gradle-wrapper.jar").toURL();
URL url =
URI.create(
"https://raw.githubusercontent.com/gradle/gradle/v"
+ wrapperVersion
+ "/gradle/wrapper/gradle-wrapper.jar")
.toURL();
System.err.println("Downloading gradle-wrapper.jar from " + url);
// Zero-copy save the jar to a temp file
@ -103,8 +114,9 @@ public class WrapperDownloader {
} catch (IOException e) {
if (retries-- > 0) {
// Retry after a short delay
System.err.println("Error connecting to server: " + e + ", will retry in " + retryDelay + " seconds.");
Thread.sleep(TimeUnit.SECONDS.toMillis(retryDelay));
System.err.println(
"Error connecting to server: " + e + ", will retry in " + retryDelay + " seconds.");
sleep(TimeUnit.SECONDS.toMillis(retryDelay));
continue;
}
}
@ -115,8 +127,13 @@ public class WrapperDownloader {
case HttpURLConnection.HTTP_BAD_GATEWAY:
if (retries-- > 0) {
// Retry after a short delay.
System.err.println("Server returned HTTP " + connection.getResponseCode() + ", will retry in " + retryDelay + " seconds.");
Thread.sleep(TimeUnit.SECONDS.toMillis(retryDelay));
System.err.println(
"Server returned HTTP "
+ connection.getResponseCode()
+ ", will retry in "
+ retryDelay
+ " seconds.");
sleep(TimeUnit.SECONDS.toMillis(retryDelay));
continue;
}
}
@ -126,13 +143,15 @@ public class WrapperDownloader {
}
try (InputStream is = connection.getInputStream();
OutputStream out = Files.newOutputStream(temp)){
OutputStream out = Files.newOutputStream(temp)) {
is.transferTo(out);
}
String checksum = checksum(digest, temp);
if (!checksum.equalsIgnoreCase(expectedChecksum)) {
throw new IOException(String.format(Locale.ROOT,
throw new IOException(
String.format(
Locale.ROOT,
"Checksum mismatch on downloaded gradle-wrapper.jar (was: %s, expected: %s).",
checksum,
expectedChecksum));
@ -141,8 +160,12 @@ public class WrapperDownloader {
Files.move(temp, destination, REPLACE_EXISTING);
temp = null;
} catch (IOException | InterruptedException e) {
throw new IOException("Could not download gradle-wrapper.jar (" +
e.getClass().getSimpleName() + ": " + e.getMessage() + ").");
throw new IOException(
"Could not download gradle-wrapper.jar ("
+ e.getClass().getSimpleName()
+ ": "
+ e.getMessage()
+ ").");
} finally {
if (temp != null) {
Files.deleteIfExists(temp);
@ -150,6 +173,11 @@ public class WrapperDownloader {
}
}
@SuppressForbidden(reason = "Correct use of thread.sleep.")
private static void sleep(long millis) throws InterruptedException {
Thread.sleep(millis);
}
private String checksum(MessageDigest messageDigest, Path path) throws IOException {
try {
char[] hex = "0123456789abcdef".toCharArray();
@ -160,7 +188,15 @@ public class WrapperDownloader {
}
return sb.toString();
} catch (IOException e) {
throw new IOException("Could not compute digest of file: " + path + " (" + e.getMessage() + ")");
throw new IOException(
"Could not compute digest of file: " + path + " (" + e.getMessage() + ")");
}
}
@Retention(RetentionPolicy.CLASS)
@Target({ElementType.CONSTRUCTOR, ElementType.FIELD, ElementType.METHOD, ElementType.TYPE})
public @interface SuppressForbidden {
/** A reason for suppressing should always be given. */
String reason();
}
}

View File

@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle.buildinfra;
import java.nio.file.Path;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.lucene.gradle.Checksum;
import org.apache.lucene.gradle.ErrorReportingTestListener;
import org.apache.lucene.gradle.datasets.ExtractReuters;
import org.gradle.api.Plugin;
import org.gradle.api.Project;
import org.gradle.api.tasks.testing.TestDescriptor;
import org.gradle.api.tasks.testing.logging.TestLogging;
public class BuildInfraPlugin implements Plugin<Project> {
@Override
public void apply(Project project) {
project.getExtensions().create(BuildInfraExtension.NAME, BuildInfraExtension.class);
}
public static class BuildInfraExtension {
public static final String NAME = "buildinfra";
public ErrorReportingTestListener newErrorReportingTestListener(
TestLogging testLogging, Path spillDir, Path outputsDir, boolean verboseMode) {
return new ErrorReportingTestListener(testLogging, spillDir, outputsDir, verboseMode);
}
public DigestUtils sha1Digest() {
return new DigestUtils(DigestUtils.getSha1Digest());
}
public void extractReuters(String reutersDir, String outputDir) throws Exception {
ExtractReuters.main(new String[] {reutersDir, outputDir});
}
public String getOutputLogName(TestDescriptor suite) {
return ErrorReportingTestListener.getOutputLogName(suite);
}
public Class<?> checksumClass() {
return Checksum.class;
}
}
}

View File

@ -30,8 +30,7 @@ import java.util.regex.Pattern;
import java.util.stream.Stream;
/**
* Split the Reuters SGML documents into Simple Text files containing:
* Title, Date, Dateline, Body
* Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
*/
public class ExtractReuters {
private final Path reutersDir;
@ -67,7 +66,9 @@ public class ExtractReuters {
private static final String[] META_CHARS = {"&", "<", ">", "\"", "'"};
private static final String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"};
private static final String[] META_CHARS_SERIALIZATIONS = {
"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"
};
/** Override if you wish to change what is extracted */
protected void extractFile(Path sgmFile) throws IOException {

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins {
id 'java-library'
alias(deps.plugins.spotless) apply false
alias(deps.plugins.forbiddenapis) apply false
}
repositories {
mavenCentral()
}
version = "1.0.0-SNAPSHOT"
group = "org.apache.lucene.tools"
description = 'Doclet-based javadoc validation'
// Make sure the build environment is consistent.
apply from: file('../../gradle/conventions.gradle')
apply from: file('../../gradle/validation/check-environment.gradle')
// Add spotless/ tidy.
tasks.register("checkJdkInternalsExportedToGradle") {}
apply from: file('../../gradle/validation/spotless.gradle')
apply from: file('../../gradle/validation/forbidden-apis.gradle')
java {
sourceCompatibility = JavaVersion.toVersion(deps.versions.minJava.get())
targetCompatibility = JavaVersion.toVersion(deps.versions.minJava.get())
}
tasks.withType(JavaCompile).configureEach {
options.compilerArgs += ["--release", java.targetCompatibility.toString()]
options.encoding = "UTF-8"
}

View File

@ -15,3 +15,10 @@
* limitations under the License.
*/
dependencyResolutionManagement {
versionCatalogs {
deps {
from(files('../../versions.toml'))
}
}
}

View File

@ -16,6 +16,9 @@
*/
package org.apache.lucene.missingdoclet;
import com.sun.source.doctree.DocCommentTree;
import com.sun.source.doctree.ParamTree;
import com.sun.source.util.DocTrees;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
@ -24,7 +27,6 @@ import java.util.Locale;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.lang.model.element.Element;
import javax.lang.model.element.ElementKind;
import javax.lang.model.element.ExecutableElement;
@ -36,24 +38,19 @@ import javax.lang.model.util.ElementFilter;
import javax.lang.model.util.Elements;
import javax.lang.model.util.Elements.Origin;
import javax.tools.Diagnostic;
import com.sun.source.doctree.DocCommentTree;
import com.sun.source.doctree.ParamTree;
import com.sun.source.util.DocTrees;
import jdk.javadoc.doclet.Doclet;
import jdk.javadoc.doclet.DocletEnvironment;
import jdk.javadoc.doclet.Reporter;
import jdk.javadoc.doclet.StandardDoclet;
/**
* Checks for missing javadocs, where missing also means "only whitespace" or "license header".
* Has option --missing-level (package, class, method, parameter) so that we can improve over time.
* Has option --missing-ignore to ignore individual elements (such as split packages).
* It isn't recursive, just ignores exactly the elements you tell it.
* This should be removed when packaging is fixed to no longer be split across JARs.
* Has option --missing-method to apply "method" level to selected packages (fix one at a time).
* Matches package names exactly: so you'll need to list subpackages separately.
* Checks for missing javadocs, where missing also means "only whitespace" or "license header". Has
* option --missing-level (package, class, method, parameter) so that we can improve over time. Has
* option --missing-ignore to ignore individual elements (such as split packages). It isn't
* recursive, just ignores exactly the elements you tell it. This should be removed when packaging
* is fixed to no longer be split across JARs. Has option --missing-method to apply "method" level
* to selected packages (fix one at a time). Matches package names exactly: so you'll need to list
* subpackages separately.
*/
public class MissingDoclet extends StandardDoclet {
// checks that modules and packages have documentation
@ -71,120 +68,123 @@ public class MissingDoclet extends StandardDoclet {
Elements elementUtils;
Set<String> ignored = Collections.emptySet();
Set<String> methodPackages = Collections.emptySet();
@Override
public Set<Doclet.Option> getSupportedOptions() {
Set<Doclet.Option> options = new HashSet<>(super.getSupportedOptions());
options.add(new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
options.add(
new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "level to enforce for missing javadocs: [package, class, method, parameter]";
}
@Override
public String getDescription() {
return "level to enforce for missing javadocs: [package, class, method, parameter]";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-level");
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-level");
}
@Override
public String getParameters() {
return "level";
}
@Override
public String getParameters() {
return "level";
}
@Override
public boolean process(String option, List<String> arguments) {
switch (arguments.get(0)) {
case "package":
level = PACKAGE;
@Override
public boolean process(String option, List<String> arguments) {
switch (arguments.get(0)) {
case "package":
level = PACKAGE;
return true;
case "class":
level = CLASS;
return true;
case "method":
level = METHOD;
return true;
case "parameter":
level = PARAMETER;
return true;
default:
return false;
}
}
});
options.add(
new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "comma separated list of element names to ignore (e.g. as a workaround for split packages)";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-ignore");
}
@Override
public String getParameters() {
return "ignoredNames";
}
@Override
public boolean process(String option, List<String> arguments) {
ignored = new HashSet<>(Arrays.asList(arguments.get(0).split(",")));
return true;
case "class":
level = CLASS;
}
});
options.add(
new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "comma separated list of packages to check at 'method' level";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-method");
}
@Override
public String getParameters() {
return "packages";
}
@Override
public boolean process(String option, List<String> arguments) {
methodPackages = new HashSet<>(Arrays.asList(arguments.get(0).split(",")));
return true;
case "method":
level = METHOD;
return true;
case "parameter":
level = PARAMETER;
return true;
default:
return false;
}
}
});
options.add(new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "comma separated list of element names to ignore (e.g. as a workaround for split packages)";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-ignore");
}
@Override
public String getParameters() {
return "ignoredNames";
}
@Override
public boolean process(String option, List<String> arguments) {
ignored = new HashSet<>(Arrays.asList(arguments.get(0).split(",")));
return true;
}
});
options.add(new Doclet.Option() {
@Override
public int getArgumentCount() {
return 1;
}
@Override
public String getDescription() {
return "comma separated list of packages to check at 'method' level";
}
@Override
public Kind getKind() {
return Option.Kind.STANDARD;
}
@Override
public List<String> getNames() {
return Collections.singletonList("--missing-method");
}
@Override
public String getParameters() {
return "packages";
}
@Override
public boolean process(String option, List<String> arguments) {
methodPackages = new HashSet<>(Arrays.asList(arguments.get(0).split(",")));
return true;
}
});
}
});
return options;
}
@ -205,10 +205,8 @@ public class MissingDoclet extends StandardDoclet {
return super.run(docEnv);
}
/**
* Returns effective check level for this element
*/
/** Returns effective check level for this element */
private int level(Element element) {
String pkg = elementUtils.getPackageOf(element).getQualifiedName().toString();
if (methodPackages.contains(pkg)) {
@ -217,24 +215,24 @@ public class MissingDoclet extends StandardDoclet {
return level;
}
}
/**
* Check an individual element.
* This checks packages and types from the doctrees.
* It will recursively check methods/fields from encountered types when the level is "method"
/**
* Check an individual element. This checks packages and types from the doctrees. It will
* recursively check methods/fields from encountered types when the level is "method"
*/
private void check(Element element) {
switch(element.getKind()) {
switch (element.getKind()) {
case MODULE:
// don't check the unnamed module, it won't have javadocs
if (!((ModuleElement)element).isUnnamed()) {
if (!((ModuleElement) element).isUnnamed()) {
checkComment(element);
}
break;
case PACKAGE:
checkComment(element);
break;
// class-like elements, check them, then recursively check their children (fields and methods)
// class-like elements, check them, then recursively check their children (fields and
// methods)
case CLASS:
case INTERFACE:
case ENUM:
@ -242,21 +240,24 @@ public class MissingDoclet extends StandardDoclet {
case ANNOTATION_TYPE:
if (level(element) >= CLASS) {
checkComment(element);
if (element instanceof TypeElement te && element.getKind() == ElementKind.RECORD && level(element) >= METHOD) {
if (element instanceof TypeElement te
&& element.getKind() == ElementKind.RECORD
&& level(element) >= METHOD) {
checkRecordParameters(te, docTrees.getDocCommentTree(element));
}
for (var subElement : element.getEnclosedElements()) {
// don't recurse into enclosed types, otherwise we'll double-check since they are already in the included docTree
if (subElement.getKind() == ElementKind.METHOD ||
subElement.getKind() == ElementKind.CONSTRUCTOR ||
subElement.getKind() == ElementKind.FIELD ||
subElement.getKind() == ElementKind.ENUM_CONSTANT) {
// don't recurse into enclosed types, otherwise we'll double-check since they are
// already in the included docTree
if (subElement.getKind() == ElementKind.METHOD
|| subElement.getKind() == ElementKind.CONSTRUCTOR
|| subElement.getKind() == ElementKind.FIELD
|| subElement.getKind() == ElementKind.ENUM_CONSTANT) {
check(subElement);
}
}
}
break;
// method-like elements, check them if we are configured to do so
// method-like elements, check them if we are configured to do so
case METHOD:
case CONSTRUCTOR:
case FIELD:
@ -272,8 +273,8 @@ public class MissingDoclet extends StandardDoclet {
/**
* Return true if the method is synthetic enum (values/valueOf) or record accessor method.
* According to the doctree documentation, the "included" set never includes synthetic/mandated elements.
* UweSays: It should not happen but it happens!
* According to the doctree documentation, the "included" set never includes synthetic/mandated
* elements. UweSays: It should not happen but it happens!
*/
private boolean isSyntheticMethod(Element element) {
// exclude all not explicitely declared methods
@ -293,20 +294,23 @@ public class MissingDoclet extends StandardDoclet {
}
return false;
}
/**
* Checks that an element doesn't have missing javadocs.
* In addition to truly "missing", check that comments aren't solely whitespace (generated by some IDEs),
* that they aren't a license header masquerading as a javadoc comment.
* Checks that an element doesn't have missing javadocs. In addition to truly "missing", check
* that comments aren't solely whitespace (generated by some IDEs), that they aren't a license
* header masquerading as a javadoc comment.
*/
private void checkComment(Element element) {
// sanity check that the element is really "included", because we do some recursion into types
if (!docEnv.isIncluded(element)) {
return;
}
// check that this element isn't on our ignore list. This is only used as a workaround for "split packages".
// ignoring a package isn't recursive (on purpose), we still check all the classes, etc. inside it.
// we just need to cope with the fact package-info.java isn't there because it is split across multiple jars.
// check that this element isn't on our ignore list. This is only used as a workaround for
// "split packages".
// ignoring a package isn't recursive (on purpose), we still check all the classes, etc. inside
// it.
// we just need to cope with the fact package-info.java isn't there because it is split across
// multiple jars.
if (ignored.contains(element.toString())) {
return;
}
@ -319,14 +323,17 @@ public class MissingDoclet extends StandardDoclet {
error(element, "javadocs are missing");
}
} else {
var normalized = tree.getFirstSentence().get(0).toString()
.replace('\u00A0', ' ')
.trim()
.toLowerCase(Locale.ROOT);
var normalized =
tree.getFirstSentence()
.get(0)
.toString()
.replace('\u00A0', ' ')
.trim()
.toLowerCase(Locale.ROOT);
if (normalized.isEmpty()) {
error(element, "blank javadoc comment");
} else if (normalized.startsWith("licensed to the apache software foundation") ||
normalized.startsWith("copyright 2004 the apache software foundation")) {
} else if (normalized.startsWith("licensed to the apache software foundation")
|| normalized.startsWith("copyright 2004 the apache software foundation")) {
error(element, "comment is really a license");
}
}
@ -336,13 +343,15 @@ public class MissingDoclet extends StandardDoclet {
}
private boolean hasInheritedJavadocs(Element element) {
boolean hasOverrides = element.getAnnotationMirrors().stream()
.anyMatch(ann -> ann.getAnnotationType().toString().equals(Override.class.getName()));
boolean hasOverrides =
element.getAnnotationMirrors().stream()
.anyMatch(ann -> ann.getAnnotationType().toString().equals(Override.class.getName()));
if (hasOverrides) {
// If an element has explicit @Overrides annotation, assume it does
// have inherited javadocs somewhere.
// reporter.print(Diagnostic.Kind.NOTE, element, "javadoc empty but @Override declared, skipping.");
// reporter.print(Diagnostic.Kind.NOTE, element, "javadoc empty but @Override declared,
// skipping.");
return true;
}
@ -359,7 +368,8 @@ public class MissingDoclet extends StandardDoclet {
// We could check supMethod for non-empty javadoc here. Don't know if this makes
// sense though as all methods will be verified in the end so it'd fail on the
// top of the hierarchy (if empty) anyway.
// reporter.print(Diagnostic.Kind.NOTE, element, "javadoc empty but method overrides another, skipping.");
// reporter.print(Diagnostic.Kind.NOTE, element, "javadoc empty but method overrides
// another, skipping.");
return true;
}
}
@ -369,15 +379,14 @@ public class MissingDoclet extends StandardDoclet {
return false;
}
/* Find types from which methods in type may inherit javadoc, in the proper order.*/
private Stream<Element> superTypeForInheritDoc(Element type) {
TypeElement clazz = (TypeElement) type;
List<Element> interfaces = clazz.getInterfaces()
.stream()
.filter(tm -> tm.getKind() == TypeKind.DECLARED)
.map(tm -> ((DeclaredType) tm).asElement())
.collect(Collectors.toList());
List<Element> interfaces =
clazz.getInterfaces().stream()
.filter(tm -> tm.getKind() == TypeKind.DECLARED)
.map(tm -> ((DeclaredType) tm).asElement())
.collect(Collectors.toList());
Stream<Element> result = interfaces.stream();
result = Stream.concat(result, interfaces.stream().flatMap(this::superTypeForInheritDoc));
@ -394,12 +403,12 @@ public class MissingDoclet extends StandardDoclet {
/** Returns all {@code @param} parameters we see in the javadocs of the element */
private Set<String> getDocParameters(DocCommentTree tree) {
return Stream.ofNullable(tree)
.flatMap(t -> t.getBlockTags().stream())
.filter(ParamTree.class::isInstance)
.map(tag -> ((ParamTree)tag).getName().getName().toString())
.collect(Collectors.toSet());
.flatMap(t -> t.getBlockTags().stream())
.filter(ParamTree.class::isInstance)
.map(tag -> ((ParamTree) tag).getName().getName().toString())
.collect(Collectors.toSet());
}
/** Checks there is a corresponding "param" tag for each method parameter */
private void checkMethodParameters(ExecutableElement element, DocCommentTree tree) {
// record each @param that we see
@ -412,7 +421,7 @@ public class MissingDoclet extends StandardDoclet {
}
}
}
/** Checks there is a corresponding "param" tag for each record component */
private void checkRecordParameters(TypeElement element, DocCommentTree tree) {
// record each @param that we see
@ -425,7 +434,7 @@ public class MissingDoclet extends StandardDoclet {
}
}
}
/** logs a new error for the particular element */
private void error(Element element, String message) {
var fullMessage = new StringBuilder();

View File

@ -20,13 +20,18 @@ import java.time.format.DateTimeFormatter
plugins {
id "base"
id "com.palantir.consistent-versions" version "2.11.0"
id "org.owasp.dependencycheck" version "7.2.0"
id 'de.thetaphi.forbiddenapis' version '3.7' apply false
id "de.undercouch.download" version "5.2.0" apply false
id "net.ltgt.errorprone" version "3.1.0" apply false
id 'com.diffplug.spotless' version "6.5.2" apply false
id 'org.barfuin.gradle.jacocolog' version "3.1.0" apply false
id "lucene.build-infra"
alias(deps.plugins.dependencychecks)
alias(deps.plugins.spotless) apply false
alias(deps.plugins.benmanes.versions)
alias(deps.plugins.forbiddenapis) apply false
alias(deps.plugins.versionCatalogUpdate) apply false
alias(deps.plugins.randomizedtesting) apply false
alias(deps.plugins.owasp.dependencycheck)
alias(deps.plugins.undercouch.download) apply false
alias(deps.plugins.errorprone) apply false
alias(deps.plugins.jacocolog) apply false
}
apply from: file('gradle/globals.gradle')
@ -73,7 +78,7 @@ ext {
}
// Minimum Java version required to compile and run Lucene.
minJavaVersion = JavaVersion.VERSION_21
minJavaVersion = JavaVersion.toVersion(deps.versions.minJava.get())
// snapshot build marker used in scripts.
snapshotBuild = version.contains("SNAPSHOT")
@ -98,17 +103,15 @@ configurations {
dependencies {
// Use a newer groovy that doesn't have illegal reflective accesses.
groovy "org.codehaus.groovy:groovy-all:3.0.21"
groovy deps.groovy
}
apply from: file('buildSrc/scriptDepVersions.gradle')
// Include smaller chunks configuring dedicated build areas.
// Some of these intersect or add additional functionality.
// The order of inclusion of these files shouldn't matter (but may
// if the build file is incorrectly written and evaluates something
// eagerly).
apply from: file('gradle/conventions.gradle')
apply from: file('gradle/generation/local-settings.gradle')
// Make sure the build environment is consistent.
@ -140,15 +143,25 @@ apply from: file('gradle/validation/precommit.gradle')
apply from: file('gradle/validation/forbidden-apis.gradle')
apply from: file('gradle/validation/jar-checks.gradle')
apply from: file('gradle/validation/git-status.gradle')
apply from: file('gradle/validation/versions-props-sorted.gradle')
apply from: file('gradle/validation/validate-source-patterns.gradle')
apply from: file('gradle/validation/rat-sources.gradle')
apply from: file('gradle/validation/owasp-dependency-check.gradle')
apply from: file('gradle/validation/ecj-lint.gradle')
apply from: file('gradle/validation/gradlew-scripts-tweaked.gradle')
apply from: file('gradle/validation/dependencies.gradle')
apply from: file('gradle/validation/spotless.gradle')
// Wire up included builds to some validation tasks.
rootProject.tasks.named("tidy").configure {
dependsOn gradle.includedBuilds*.task(":tidy")
}
rootProject.tasks.named("clean").configure {
dependsOn gradle.includedBuilds*.task(":clean")
}
rootProject.tasks.named("check").configure {
dependsOn gradle.includedBuilds*.task(":forbiddenApis")
}
// Source or data regeneration tasks
apply from: file('gradle/generation/regenerate.gradle')
apply from: file('gradle/generation/jflex.gradle')

View File

@ -1,279 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import org.gradle.api.internal.tasks.testing.logging.FullExceptionFormatter;
import org.gradle.api.internal.tasks.testing.logging.TestExceptionFormatter;
import org.gradle.api.logging.Logger;
import org.gradle.api.logging.Logging;
import org.gradle.api.tasks.testing.TestDescriptor;
import org.gradle.api.tasks.testing.TestListener;
import org.gradle.api.tasks.testing.TestOutputEvent;
import org.gradle.api.tasks.testing.TestOutputListener;
import org.gradle.api.tasks.testing.TestResult;
import org.gradle.api.tasks.testing.logging.TestLogging;
/**
* An error reporting listener that queues test output streams and displays them
* on failure.
* <p>
* Heavily inspired by Elasticsearch's ErrorReportingTestListener (ASL 2.0 licensed).
*/
public class ErrorReportingTestListener implements TestOutputListener, TestListener {
private static final Logger LOGGER = Logging.getLogger(ErrorReportingTestListener.class);
private final TestExceptionFormatter formatter;
private final Map<TestKey, OutputHandler> outputHandlers = new ConcurrentHashMap<>();
private final Path spillDir;
private final Path outputsDir;
private final boolean verboseMode;
public ErrorReportingTestListener(TestLogging testLogging, Path spillDir, Path outputsDir, boolean verboseMode) {
this.formatter = new FullExceptionFormatter(testLogging);
this.spillDir = spillDir;
this.outputsDir = outputsDir;
this.verboseMode = verboseMode;
}
@Override
public void onOutput(TestDescriptor testDescriptor, TestOutputEvent outputEvent) {
handlerFor(testDescriptor).write(outputEvent);
}
@Override
public void beforeSuite(TestDescriptor suite) {
// noop.
}
@Override
public void beforeTest(TestDescriptor testDescriptor) {
// Noop.
}
@Override
public void afterSuite(final TestDescriptor suite, TestResult result) {
if (suite.getParent() == null || suite.getName().startsWith("Gradle")) {
return;
}
TestKey key = TestKey.of(suite);
try {
OutputHandler outputHandler = outputHandlers.get(key);
if (outputHandler != null) {
long length = outputHandler.length();
if (length > 1024 * 1024 * 10) {
LOGGER.warn(String.format(Locale.ROOT, "WARNING: Test %s wrote %,d bytes of output.",
suite.getName(),
length));
}
}
boolean echoOutput = Objects.equals(result.getResultType(), TestResult.ResultType.FAILURE);
boolean dumpOutput = echoOutput;
// If the test suite failed, report output.
if (dumpOutput || echoOutput) {
Files.createDirectories(outputsDir);
Path outputLog = outputsDir.resolve(getOutputLogName(suite));
// Save the output of a failing test to disk.
try (Writer w = Files.newBufferedWriter(outputLog, StandardCharsets.UTF_8)) {
if (outputHandler != null) {
outputHandler.copyTo(w);
}
}
if (echoOutput && !verboseMode) {
synchronized (this) {
System.out.println();
System.out.println(suite.getClassName() + " > test suite's output saved to " + outputLog + ", copied below:");
try (BufferedReader reader = Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) {
char[] buf = new char[1024];
int len;
while ((len = reader.read(buf)) >= 0) {
System.out.print(new String(buf, 0, len));
}
System.out.println();
}
}
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
OutputHandler handler = outputHandlers.remove(key);
if (handler != null) {
try {
handler.close();
} catch (IOException e) {
LOGGER.error("Failed to close output handler for: " + key, e);
}
}
}
}
private static Pattern SANITIZE = Pattern.compile("[^a-zA-Z .\\-_0-9]+");
public static String getOutputLogName(TestDescriptor suite) {
return SANITIZE.matcher("OUTPUT-" + suite.getName() + ".txt").replaceAll("_");
}
@Override
public void afterTest(TestDescriptor testDescriptor, TestResult result) {
// Include test failure exception stacktrace(s) in test output log.
if (result.getResultType() == TestResult.ResultType.FAILURE) {
if (result.getExceptions().size() > 0) {
String message = formatter.format(testDescriptor, result.getExceptions());
handlerFor(testDescriptor).write(message);
}
}
}
private OutputHandler handlerFor(TestDescriptor descriptor) {
// Attach output of leaves (individual tests) to their parent.
if (!descriptor.isComposite()) {
descriptor = descriptor.getParent();
}
return outputHandlers.computeIfAbsent(TestKey.of(descriptor), (key) -> new OutputHandler());
}
public static class TestKey {
private final String key;
private TestKey(String key) {
this.key = key;
}
public static TestKey of(TestDescriptor d) {
StringBuilder key = new StringBuilder();
key.append(d.getClassName());
key.append("::");
key.append(d.getName());
key.append("::");
key.append(d.getParent() == null ? "-" : d.getParent().toString());
return new TestKey(key.toString());
}
@Override
public boolean equals(Object o) {
return o != null &&
o.getClass() == this.getClass() &&
Objects.equals(((TestKey) o).key, key);
}
@Override
public int hashCode() {
return key.hashCode();
}
@Override
public String toString() {
return key;
}
}
private class OutputHandler implements Closeable {
// Max single-line buffer before automatic wrap occurs.
private static final int MAX_LINE_WIDTH = 1024 * 4;
private final SpillWriter buffer;
// internal stream.
private final PrefixedWriter sint;
// stdout
private final PrefixedWriter sout;
// stderr
private final PrefixedWriter serr;
// last used stream (so that we can flush it properly and prefixes are not screwed up).
private PrefixedWriter last;
public OutputHandler() {
buffer = new SpillWriter(() -> {
try {
return Files.createTempFile(spillDir, "spill-", ".tmp");
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
Writer sink = buffer;
if (verboseMode) {
sink = new StdOutTeeWriter(buffer);
}
sint = new PrefixedWriter(" > ", sink, MAX_LINE_WIDTH);
sout = new PrefixedWriter(" 1> ", sink, MAX_LINE_WIDTH);
serr = new PrefixedWriter(" 2> ", sink, MAX_LINE_WIDTH);
last = sint;
}
public void write(TestOutputEvent event) {
write((event.getDestination() == TestOutputEvent.Destination.StdOut ? sout : serr), event.getMessage());
}
public void write(String message) {
write(sint, message);
}
public long length() throws IOException {
return buffer.length();
}
private void write(PrefixedWriter out, String message) {
try {
if (out != last) {
last.completeLine();
last = out;
}
out.write(message);
} catch (IOException e) {
throw new UncheckedIOException("Unable to write to test output.", e);
}
}
public void copyTo(Writer out) throws IOException {
flush();
buffer.copyTo(out);
}
public void flush() throws IOException {
sout.completeLine();
serr.completeLine();
buffer.flush();
}
@Override
public void close() throws IOException {
buffer.close();
}
}
}

View File

@ -1,93 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.gradle;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Writer;
class StdOutTeeWriter extends Writer {
private final Writer delegate;
private final PrintStream out = System.out;
public StdOutTeeWriter(Writer delegate) {
this.delegate = delegate;
}
@Override
public void write(int c) throws IOException {
delegate.write(c);
out.write(c);
}
@Override
public void write(char[] cbuf) throws IOException {
delegate.write(cbuf);
out.print(cbuf);
}
@Override
public void write(String str) throws IOException {
delegate.write(str);
out.print(str);
}
@Override
public void write(String str, int off, int len) throws IOException {
delegate.write(str, off, len);
out.append(str, off, len);
}
@Override
public Writer append(CharSequence csq) throws IOException {
delegate.append(csq);
out.append(csq);
return this;
}
@Override
public Writer append(CharSequence csq, int start, int end) throws IOException {
delegate.append(csq, start, end);
out.append(csq, start, end);
return this;
}
@Override
public Writer append(char c) throws IOException {
delegate.append(c);
out.append(c);
return this;
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException {
delegate.write(cbuf, off, len);
out.print(new String(cbuf, off, len));
}
@Override
public void flush() throws IOException {
delegate.flush();
out.flush();
}
@Override
public void close() throws IOException {
delegate.close();
// Don't close the actual output.
}
}

View File

@ -40,6 +40,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'cfs': 'index',
'nocfs': 'index',
'sorted': 'sorted',
'int8_hnsw': 'int8_hnsw',
'moreterms': 'moreterms',
'dvupdates': 'dvupdates',
'emptyIndex': 'empty'
@ -60,6 +61,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'cfs': 'testCreateCFS',
'nocfs': 'testCreateNoCFS',
'sorted': 'testCreateSortedIndex',
'int8_hnsw': 'testCreateInt8HNSWIndices',
'moreterms': 'testCreateMoreTermsIndex',
'dvupdates': 'testCreateIndexWithDocValuesUpdates',
'emptyIndex': 'testCreateEmptyIndex'
@ -204,6 +206,7 @@ def main():
current_version = scriptutil.Version.parse(scriptutil.find_current_version())
create_and_add_index(source, 'cfs', c.version, current_version, c.temp_dir)
create_and_add_index(source, 'nocfs', c.version, current_version, c.temp_dir)
create_and_add_index(source, 'int8_hnsw', c.version, current_version, c.temp_dir)
should_make_sorted = current_version.is_back_compat_with(c.version) \
and (c.version.major > 6 or (c.version.major == 6 and c.version.minor >= 2))
if should_make_sorted:

View File

@ -582,8 +582,8 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
'luke', 'memory', 'misc', 'monitor', 'queries', 'queryparser', 'replicator',
'sandbox', 'spatial-extras', 'spatial-test-fixtures', 'spatial3d', 'suggest', 'test-framework', 'licenses']
if isSrc:
expected_src_root_files = ['build.gradle', 'buildSrc', 'CONTRIBUTING.md', 'dev-docs', 'dev-tools', 'gradle', 'gradlew',
'gradlew.bat', 'help', 'lucene', 'settings.gradle', 'versions.lock', 'versions.props']
expected_src_root_files = ['build.gradle', 'build-tools', 'CONTRIBUTING.md', 'dev-docs', 'dev-tools', 'gradle', 'gradlew',
'gradlew.bat', 'help', 'lucene', 'settings.gradle', 'versions.lock', 'versions.toml']
expected_src_lucene_files = ['build.gradle', 'documentation', 'distribution', 'dev-docs']
is_in_list(in_root_folder, expected_src_root_files)
is_in_list(in_lucene_folder, expected_folders)

View File

@ -15,19 +15,19 @@
* limitations under the License.
*/
// Declare script dependency versions outside of palantir's
// version unification control. These are not our main dependencies
// but are reused in buildSrc and across applied scripts.
ext {
scriptDepVersions = [
"apache-rat": "0.14",
"asm": "9.7",
"commons-codec": "1.13",
"ecj": "3.36.0",
"flexmark": "0.61.24",
"javacc": "7.0.12",
"jflex": "1.8.2",
"jgit": "5.13.1.202206130422-r",
]
configure(allprojects) {
tasks.register("tidy").configure {
description "Applies formatters and cleanups to sources."
group "verification"
}
}
// Locate script-relative resource folder. This is context-sensitive so pass
// the right buildscript (top-level).
configure(rootProject) {
ext {
scriptResources = { buildscript ->
return file(buildscript.sourceFile.absolutePath.replaceAll('.gradle$', ""))
}
}
}

View File

@ -1,5 +1,3 @@
import org.apache.lucene.gradle.datasets.ExtractReuters
import java.nio.file.Files
/*
@ -25,7 +23,7 @@ buildscript {
}
dependencies {
classpath "com.github.luben:zstd-jni:1.5.5-11"
classpath deps.zstd
}
}
@ -40,7 +38,7 @@ def unzstd(java.nio.file.Path src, java.nio.file.Path dst) {
// TODO: not sure whether this should live in benchmarks, but for now let it be.
configure(project(":lucene:benchmark")) {
apply plugin: "java"
apply plugin: "de.undercouch.download"
apply plugin: deps.plugins.undercouch.download.get().pluginId
ext {
dataDir = file("work")
@ -164,7 +162,7 @@ configure(project(":lucene:benchmark")) {
logger.lifecycle("Extracting ${ext.name} into ${ext.dst}...")
ext.dst.deleteDir()
ExtractReuters.main(untarPath.toString(), ext.dst.toString())
buildinfra.extractReuters(untarPath.toString(), ext.dst.toString())
}
}

View File

@ -34,11 +34,11 @@ buildscript {
}
dependencies {
classpath "com.vladsch.flexmark:flexmark:${scriptDepVersions['flexmark']}"
classpath "com.vladsch.flexmark:flexmark-ext-abbreviation:${scriptDepVersions['flexmark']}"
classpath "com.vladsch.flexmark:flexmark-ext-attributes:${scriptDepVersions['flexmark']}"
classpath "com.vladsch.flexmark:flexmark-ext-autolink:${scriptDepVersions['flexmark']}"
classpath "com.vladsch.flexmark:flexmark-ext-tables:${scriptDepVersions['flexmark']}"
classpath deps.flexmark.core
classpath deps.flexmark.ext.abbreviation
classpath deps.flexmark.ext.attributes
classpath deps.flexmark.ext.autolink
classpath deps.flexmark.ext.tables
}
}

View File

@ -23,7 +23,7 @@ configure(project(":lucene:expressions")) {
}
dependencies {
antlr "org.antlr:antlr4"
antlr deps.antlr.core
}
task generateAntlrInternal() {

View File

@ -35,42 +35,44 @@ configure(project(":lucene:core")) {
}
dependencies {
apiextractor "org.ow2.asm:asm:${scriptDepVersions['asm']}"
apiextractor deps.asm.core
}
mrjarJavaVersions.each { jdkVersion ->
def task = tasks.create(name: "generateJdkApiJar${jdkVersion}", type: JavaExec) {
description "Regenerate the API-only JAR file with public Panama Foreign & Vector API from JDK ${jdkVersion}"
group "generation"
javaLauncher = javaToolchains.launcherFor {
languageVersion = JavaLanguageVersion.of(jdkVersion)
}
onlyIf {
try {
javaLauncher.get()
return true
} catch (Exception e) {
logger.warn('Launcher for Java {} is not available; skipping regeneration of Panama Foreign & Vector API JAR.', jdkVersion)
logger.warn('Error: {}', e.cause?.message)
logger.warn("Please make sure to point env 'JAVA{}_HOME' to exactly JDK version {} or enable Gradle toolchain auto-download.", jdkVersion, jdkVersion)
return false
plugins.withType(JavaPlugin) {
mrjarJavaVersions.each { jdkVersion ->
def task = tasks.create(name: "generateJdkApiJar${jdkVersion}", type: JavaExec) {
description "Regenerate the API-only JAR file with public Panama Foreign & Vector API from JDK ${jdkVersion}"
group "generation"
javaLauncher = javaToolchains.launcherFor {
languageVersion = JavaLanguageVersion.of(jdkVersion)
}
onlyIf {
try {
javaLauncher.get()
return true
} catch (Exception e) {
logger.warn('Launcher for Java {} is not available; skipping regeneration of Panama Foreign & Vector API JAR.', jdkVersion)
logger.warn('Error: {}', e.cause?.message)
logger.warn("Please make sure to point env 'JAVA{}_HOME' to exactly JDK version {} or enable Gradle toolchain auto-download.", jdkVersion, jdkVersion)
return false
}
}
classpath = configurations.apiextractor
mainClass = file("${resources}/ExtractJdkApis.java") as String
systemProperties = [
'user.timezone': 'UTC',
'file.encoding': 'UTF-8',
]
args = [
jdkVersion,
apijars.file("jdk${jdkVersion}.apijar"),
]
}
classpath = configurations.apiextractor
mainClass = file("${resources}/ExtractJdkApis.java") as String
systemProperties = [
'user.timezone': 'UTC',
'file.encoding': 'UTF-8',
]
args = [
jdkVersion,
apijars.file("jdk${jdkVersion}.apijar"),
]
regenerate.dependsOn task
}
regenerate.dependsOn task
}
}

View File

@ -33,18 +33,11 @@ def resources = scriptResources(buildscript)
// Configure different icu4j dependencies.
configure(rootProject) {
configurations {
// icu_xyz
icu_current
}
dependencies {
// icu_xyz "com.ibm.icu:icu4j:xyz"
icu_current 'com.ibm.icu:icu4j'
}
// Exclude explicit ICU configs from palantir's version unification.
versionRecommendations {
// excludeConfigurations "icu_xyz"
icu_current deps.icu4j
}
}

View File

@ -26,7 +26,7 @@ configure(rootProject) {
}
dependencies {
javacc "net.java.dev.javacc:javacc:${scriptDepVersions['javacc']}"
javacc deps.javacc
}
task javacc() {

View File

@ -25,7 +25,7 @@ configure(rootProject) {
}
dependencies {
jflex "de.jflex:jflex:${scriptDepVersions['jflex']}"
jflex deps.jflex
}
}

View File

@ -30,7 +30,7 @@ def recompileDictionary(project, dictionaryName, Closure closure) {
}
configure(project(":lucene:analysis:kuromoji")) {
apply plugin: "de.undercouch.download"
apply plugin: deps.plugins.undercouch.download.get().pluginId
plugins.withType(JavaPlugin) {
ext {

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
apply plugin: "de.undercouch.download"
apply plugin: deps.plugins.undercouch.download.get().pluginId
def resources = scriptResources(buildscript)

View File

@ -30,7 +30,7 @@ def recompileDictionary(project, dictionaryName, Closure closure) {
}
configure(project(":lucene:analysis:nori")) {
apply plugin: "de.undercouch.download"
apply plugin: deps.plugins.undercouch.download.get().pluginId
plugins.withType(JavaPlugin) {
ext {

View File

@ -1,7 +1,5 @@
import groovy.json.JsonOutput
import groovy.json.JsonSlurper
import org.apache.commons.codec.digest.DigestUtils
import java.util.function.Function
/*
@ -58,7 +56,7 @@ def computeChecksummedEntries = { Task sourceTask ->
allFiles.files.forEach { file ->
allEntries.put(
sourceTask.project.rootDir.relativePath(file),
file.exists() ? new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() : "--")
file.exists() ? buildinfra.sha1Digest().digestAsHex(file).trim() : "--")
}
return allEntries

View File

@ -19,7 +19,7 @@ import org.apache.tools.ant.taskdefs.condition.Os
def resources = scriptResources(buildscript)
apply plugin: "de.undercouch.download"
apply plugin: deps.plugins.undercouch.download.get().pluginId
configure(project(":lucene:analysis:common")) {
ext {

View File

@ -27,7 +27,7 @@ allprojects {
// Artifacts will have names after full gradle project path
// so :solr:core will have solr-core.jar, etc.
project.archivesBaseName = project.path.replaceAll("^:", "").replace(':', '-')
project.base.archivesName = project.path.replaceAll("^:", "").replace(':', '-')
ext {
// Utility method to support passing overrides via -P or -D.
@ -59,12 +59,6 @@ allprojects {
return propertyOrDefault(propName, envOrDefault(envName, defValue));
}
// Locate script-relative resource folder. This is context-sensitive so pass
// the right buildscript (top-level).
scriptResources = { buildscript ->
return file(buildscript.sourceFile.absolutePath.replaceAll('.gradle$', ""))
}
// Utility function similar to project.exec but not emitting
// any output unless an error code is returned from the executed command.
quietExec = { closure ->

View File

@ -20,7 +20,11 @@ allprojects {
tasks.withType(AbstractArchiveTask).configureEach { task ->
duplicatesStrategy = DuplicatesStrategy.FAIL
reproducibleFileOrder = true
dirMode = 0755
fileMode = 0644
dirPermissions {
it.unix(0755)
}
filePermissions {
it.unix(0644)
}
}
}

View File

@ -22,48 +22,49 @@ import org.gradle.plugins.ide.eclipse.model.ClasspathEntry
def resources = scriptResources(buildscript)
configure(rootProject) {
apply plugin: "eclipse"
plugins.withType(JavaPlugin) {
apply plugin: "eclipse"
def eclipseJavaVersion = propertyOrDefault("eclipse.javaVersion", rootProject.minJavaVersion)
def relativize = { other -> rootProject.rootDir.relativePath(other).toString() }
def eclipseJavaVersion = propertyOrDefault("eclipse.javaVersion", rootProject.minJavaVersion)
def relativize = { other -> rootProject.rootDir.relativePath(other).toString() }
eclipse {
project {
name = "Apache Lucene ${version}"
}
eclipse {
project {
name = "Apache Lucene ${version}"
}
classpath {
defaultOutputDir = file('build/eclipse')
classpath {
defaultOutputDir = file('build/eclipse')
file {
beforeMerged { classpath -> classpath.entries.removeAll { it.kind == "src" } }
file {
beforeMerged { classpath -> classpath.entries.removeAll { it.kind == "src" } }
whenMerged { classpath ->
def projects = allprojects.findAll { prj ->
return prj.plugins.hasPlugin(JavaPlugin)
}
Set<String> sourceSetNames = ['main', 'test', "main${eclipseJavaVersion}" as String, "test${eclipseJavaVersion}" as String, 'tools'] as Set
Set<String> sources = []
Set<File> jars = []
projects.each { prj ->
prj.sourceSets.each { sourceSet ->
if (sourceSetNames.contains(sourceSet.name)) {
sources += sourceSet.java.srcDirs.findAll { dir -> dir.exists() }.collect { dir -> relativize(dir) }
sources += sourceSet.resources.srcDirs.findAll { dir -> dir.exists() }.collect { dir -> relativize(dir) }
}
whenMerged { classpath ->
def projects = allprojects.findAll { prj ->
return prj.plugins.hasPlugin(JavaPlugin)
}
// This is hacky - we take the resolved compile classpath and just
// include JAR files from there. We should probably make it smarter
// by looking at real dependencies. But then: this Eclipse configuration
// doesn't really separate sources anyway so why bother.
jars += prj.configurations.compileClasspath.resolve()
jars += prj.configurations.testCompileClasspath.resolve()
}
Set<String> sourceSetNames = ['main', 'test', "main${eclipseJavaVersion}" as String, "test${eclipseJavaVersion}" as String, 'tools'] as Set
Set<String> sources = []
Set<File> jars = []
projects.each { prj ->
prj.sourceSets.each { sourceSet ->
if (sourceSetNames.contains(sourceSet.name)) {
sources += sourceSet.java.srcDirs.findAll { dir -> dir.exists() }.collect { dir -> relativize(dir) }
sources += sourceSet.resources.srcDirs.findAll { dir -> dir.exists() }.collect { dir -> relativize(dir) }
}
}
classpath.entries += sources.sort().collect { name ->
def sourceFolder = new SourceFolder(name, "build/eclipse/" + name)
// This is hacky - we take the resolved compile classpath and just
// include JAR files from there. We should probably make it smarter
// by looking at real dependencies. But then: this Eclipse configuration
// doesn't really separate sources anyway so why bother.
jars += prj.configurations.compileClasspath.resolve()
jars += prj.configurations.testCompileClasspath.resolve()
}
classpath.entries += sources.sort().collect { name ->
def sourceFolder = new SourceFolder(name, "build/eclipse/" + name)
sourceFolder.setExcludes(["module-info.java"])
return sourceFolder
}
@ -81,36 +82,38 @@ configure(rootProject) {
}
}
task luceneEclipseJdt(type: Sync) {
def errorMode = project.propertyOrDefault('eclipse.errors','warning');
def ecjLintFile = rootProject.file('gradle/validation/ecj-lint/ecj.javadocs.prefs');
description = 'Generates the Eclipse JDT settings file.'
inputs.file(ecjLintFile)
inputs.property('errorMode', errorMode)
inputs.property('eclipseJavaVersion', eclipseJavaVersion as String)
from rootProject.file("${resources}/dot.settings")
into rootProject.file(".settings")
filter(ReplaceTokens, tokens: [
'ecj-lint-config': ecjLintFile.getText('UTF-8').replaceAll(/=error\b/, '=' + errorMode)
])
filteringCharset = 'UTF-8'
doLast {
logger.lifecycle('Eclipse config for Java {} written with ECJ errors configured as {}. Change by passing -Peclipse.errors=ignore/warning/error.', eclipseJavaVersion, errorMode)
logger.lifecycle('To edit classes of MR-JARs for a specific Java version, use e.g., -Peclipse.javaVersion=19')
task luceneEclipseJdt(type: Sync) {
def errorMode = project.propertyOrDefault('eclipse.errors' ,'warning');
def ecjLintFile = rootProject.file('gradle/validation/ecj-lint/ecj.javadocs.prefs');
description = 'Generates the Eclipse JDT settings file.'
inputs.file(ecjLintFile)
inputs.property('errorMode', errorMode)
inputs.property('eclipseJavaVersion', eclipseJavaVersion as String)
from rootProject.file("${resources}/dot.settings")
into rootProject.file(".settings")
filter(ReplaceTokens, tokens: [
'ecj-lint-config': ecjLintFile.getText('UTF-8').replaceAll(/=error\b/, '=' + errorMode)
])
filteringCharset = 'UTF-8'
doLast {
logger.lifecycle('Eclipse config for Java {} written with ECJ errors configured as {}. Change by passing -Peclipse.errors=ignore/warning/error.', eclipseJavaVersion, errorMode)
logger.lifecycle('To edit classes of MR-JARs for a specific Java version, use e.g., -Peclipse.javaVersion=19')
}
}
eclipseJdt {
enabled = false
dependsOn 'luceneEclipse'
}
eclipseClasspath {
inputs.property('eclipseJavaVersion', eclipseJavaVersion as String
)
}
}
eclipseJdt {
enabled = false
dependsOn 'luceneEclipseJdt'
}
eclipseClasspath {
inputs.property('eclipseJavaVersion', eclipseJavaVersion as String)
}
}
@ -131,6 +134,6 @@ public class LibEntry implements ClasspathEntry {
node.appendNode("classpathentry", Map.of(
"kind", "lib",
"path", path
));
))
}
}

View File

@ -49,7 +49,7 @@ configure(rootProject.ext.mavenProjects) { Project project ->
// This moves pom metadata configuration after all the scripts of all projects
// have been evaluated. This is required because we set artifact groups
// and archivesBaseName in other scripts and some of the properties below don't
// and archivesName in other scripts and some of the properties below don't
// accept lazy property providers (so everything must be in its final form).
gradle.projectsEvaluated {
publishing {
@ -57,22 +57,10 @@ configure(rootProject.ext.mavenProjects) { Project project ->
configure(publication) {
from components.java
groupId = project.group
artifactId = project.archivesBaseName
artifactId = project.base.archivesName.get()
artifact sourcesJar
artifact javadocJar
// LUCENE-9561:
// Remove dependencyManagement section created by a combination of
// Palantir and the publishing plugin.
//
// https://github.com/palantir/gradle-consistent-versions/issues/550
pom({
withXml {
def dm = asNode().dependencyManagement
if (dm) dm.replaceNode {}
}
})
}
}
}

View File

@ -104,3 +104,6 @@ org.gradle.java.installations.auto-download=true
# Set these to enable automatic JVM location discovery.
org.gradle.java.installations.fromEnv=JAVA21_HOME,JAVA22_HOME,RUNTIME_JAVA_HOME
#org.gradle.java.installations.paths=(custom paths)
# Opt out of gradle enterprise build scan plugin entire.
# gradle.ge=false

View File

@ -18,7 +18,6 @@
import org.apache.tools.ant.taskdefs.condition.Os
import org.apache.tools.ant.types.Commandline
import org.gradle.api.tasks.testing.logging.*
import org.apache.lucene.gradle.ErrorReportingTestListener
def resources = scriptResources(buildscript)
def verboseModeHookInstalled = false
@ -133,7 +132,12 @@ allprojects {
jvmArgs '--add-modules', 'jdk.incubator.vector'
}
jvmArgs '--enable-native-access=' + (project.path == ':lucene:core' ? 'ALL-UNNAMED' : 'org.apache.lucene.core')
jvmArgs '--enable-native-access=' + (project.path in [
':lucene:core',
':lucene:codecs',
":lucene:distribution.tests",
":lucene:test-framework"
] ? 'ALL-UNNAMED' : 'org.apache.lucene.core')
def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties")
def tempDir = layout.projectDirectory.dir(testsTmpDir.toString())
@ -196,7 +200,7 @@ allprojects {
}
def spillDir = getTemporaryDir().toPath()
def listener = new ErrorReportingTestListener(test.testLogging, spillDir, testOutputsDir.toPath(), verboseMode)
def listener = buildinfra.newErrorReportingTestListener(test.testLogging, spillDir, testOutputsDir.toPath(), verboseMode)
addTestOutputListener(listener)
addTestListener(listener)

View File

@ -15,8 +15,6 @@
* limitations under the License.
*/
import org.apache.lucene.gradle.ErrorReportingTestListener
// Display all failed tests at the end of the build.
def failedTests = []
@ -28,7 +26,7 @@ allprojects {
failedTests << [
"name": "${desc.className}.${desc.name}",
"project": "${test.project.path}",
"output": file("${task.testOutputsDir}/${ErrorReportingTestListener.getOutputLogName(desc.parent)}"),
"output": file("${task.testOutputsDir}/${buildinfra.getOutputLogName(desc.parent)}"),
"reproduce": "gradlew ${project.path}:test --tests \"${desc.className}.${desc.name}\" ${task.project.testOptionsForReproduceLine}"
]
}
@ -39,7 +37,7 @@ allprojects {
failedTests << [
"name": "${desc.name}",
"project": "${test.project.path}",
"output": file("${task.testOutputsDir}/${ErrorReportingTestListener.getOutputLogName(desc)}"),
"output": file("${task.testOutputsDir}/${buildinfra.getOutputLogName(desc)}"),
"reproduce": "gradlew ${project.path}:test --tests \"${desc.name}\" ${task.project.testOptionsForReproduceLine}"
]
}

View File

@ -30,7 +30,7 @@ buildscript {
}
dependencies {
classpath 'com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.7.2'
classpath deps.randomizedtesting.runner
}
}
@ -126,10 +126,10 @@ allprojects {
secManagerExclusions
}
dependencies {
secManagerExclusions ( "com.carrotsearch.randomizedtesting:randomizedtesting-runner", {
secManagerExclusions ( deps.randomizedtesting.runner, {
exclude group: "junit"
})
secManagerExclusions ( "junit:junit", {
secManagerExclusions ( deps.junit, {
exclude group: "org.hamcrest"
})
}

View File

@ -22,7 +22,7 @@ import org.gradle.util.GradleVersion
configure(rootProject) {
ext {
expectedGradleVersion = '8.8'
expectedGradleVersion = deps.versions.minGradle.get()
hasJavaFlightRecorder = ModuleLayer.boot().findModule('jdk.jfr').map(this.class.module::canRead).orElse(false)
}
@ -32,6 +32,7 @@ configure(rootProject) {
}
def currentJavaVersion = JavaVersion.current()
def minJavaVersion = JavaVersion.toVersion(deps.versions.minJava.get())
if (currentJavaVersion < minJavaVersion) {
throw new GradleException("At least Java ${minJavaVersion} is required, you are running Java ${currentJavaVersion} "
+ "[${System.getProperty('java.vm.name')} ${System.getProperty('java.vm.version')}]")

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Configure sanity check for conflicting dependencies across certain configurations
allprojects {
apply plugin: deps.plugins.dependencychecks.get().pluginId
def mainConfigurations = project.configurations.matching {
it.name in [
"compileClasspath",
"runtimeClasspath"
]
}
def testConfigurations = project.configurations.matching {
it.name in [
"annotationProcessor",
"testCompileClasspath",
"testRuntimeClasspath"
]
}
dependencyVersionChecks {
lockFileComment = "An inventory of resolved dependency versions. Do not edit this file directly."
configurationGroups {
main_dependencies {
include mainConfigurations
}
test_dependencies {
include testConfigurations
}
}
}
dependencies {
constraints {
mainConfigurations.configureEach { Configuration conf ->
// no resolutions for conflicting dependencies at the moment.
}
}
}
}
// Configure version catalog cleanups plugin.
configure(rootProject) {
apply plugin: deps.plugins.versionCatalogUpdate.get().pluginId
versionCatalogUpdate {
sortByKey = true
versionCatalogs {
deps {
catalogFile = file("versions.toml")
}
}
}
tasks.matching { it.name == "tidy" }.configureEach {
it.dependsOn(":versionCatalogFormatDeps")
}
tasks.matching {
it.path in [
":versionCatalogUpdateDeps"
]
}.configureEach {
it.interactive = true
}
tasks.register("updateDeps", {
dependsOn ":versionCatalogUpdateDeps"
})
}

View File

@ -23,7 +23,7 @@ configure(rootProject) {
}
dependencies {
ecjDeps "org.eclipse.jdt:ecj:${scriptDepVersions['ecj']}"
ecjDeps deps.ecj
}
}

View File

@ -37,24 +37,25 @@ if (skipReason) {
allprojects { prj ->
plugins.withType(JavaPlugin) {
// LUCENE-9650: Errorprone on master/gradle does not work when running as plugin
// inside a forked Javac process. Javac running inside Gradle works, because we have
// additional module system opens in place.
// This is a hack to keep the dependency (so that palantir's version check doesn't complain)
// but don't include the plugin (which fails on JDK16+).
// LUCENE-9650: Errorprone does not work when running as a plugin inside a forked Javac process.
// Javac running inside Gradle works, because we have additional module system opens in place.
if (skipReason) {
tasks.withType(JavaCompile) { task -> task.dependsOn ":errorProneSkipped" }
// Error prone plugin adds error prone to test classpath. We need to add it here too (manually) so that
// versions.lock is consistent with or without error prone.
configurations {
errorprone
}
dependencies {
errorprone("com.google.errorprone:error_prone_core")
errorprone deps.errorprone
}
configurations.annotationProcessor.extendsFrom(configurations.errorprone)
} else {
prj.apply plugin: 'net.ltgt.errorprone'
prj.apply plugin: deps.plugins.errorprone.get().pluginId
dependencies {
errorprone("com.google.errorprone:error_prone_core")
errorprone deps.errorprone
}
tasks.withType(JavaCompile) { task ->

View File

@ -1,4 +1,4 @@
/*
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@ -57,7 +57,7 @@ allprojects { prj ->
}
// Configure defaults for sourceSets.main
tasks.matching { it.name ==~ /forbiddenApisMain\d*/ }.all {
tasks.matching { it.name ==~ /forbiddenApisMain\d*/ }.configureEach {
bundledSignatures += [
'jdk-unsafe',
'jdk-deprecated',
@ -76,12 +76,12 @@ allprojects { prj ->
// Configure defaults for the MR-JAR feature sourceSets by setting java version and ignore missing classes
// TODO:
// - Get hold of warning messages, see https://github.com/policeman-tools/forbidden-apis/issues/207
tasks.matching { it.name ==~ /forbiddenApisMain\d+/ }.all {
tasks.matching { it.name ==~ /forbiddenApisMain\d+/ }.configureEach {
failOnMissingClasses = false
}
// Configure defaults for sourceSets.test
tasks.matching { it.name in ["forbiddenApisTest", "forbiddenApisTestFixtures"] }.all {
tasks.matching { it.name in ["forbiddenApisTest", "forbiddenApisTestFixtures"] }.configureEach {
bundledSignatures += [
'jdk-unsafe',
'jdk-deprecated',
@ -105,7 +105,7 @@ allprojects { prj ->
}
// Configure defaults for sourceSets.tools (if present).
tasks.matching { it.name == "forbiddenApisTools" }.all {
tasks.matching { it.name == "forbiddenApisTools" }.configureEach {
bundledSignatures += [
'jdk-unsafe',
'jdk-deprecated',
@ -129,12 +129,24 @@ allprojects { prj ->
//
// This is the simplest workaround possible: just point at all the rule files and indicate
// them as inputs. This way if a rule is modified, checks will be reapplied.
configure(tasks.matching { it.name.startsWith("forbiddenApis") }) { task ->
tasks.matching { it.name.startsWith("forbiddenApis") }.configureEach { task ->
task.inputs.dir(file(resources))
}
// Disable sysout signatures for these projects.
if (prj.path in [
if (prj.name in ["missing-doclet", "build-infra"]) {
forbiddenApisMain.bundledSignatures -= [
'jdk-non-portable',
'jdk-system-out'
]
forbiddenApisMain.exclude("**/Checksum*")
forbiddenApisMain.suppressAnnotations += [
"**.*SuppressForbidden"
]
}
if (prj.name in ["missing-doclet"] || prj.path in [
":lucene:demo",
":lucene:benchmark",
":lucene:test-framework"

View File

@ -33,7 +33,7 @@ buildscript {
}
dependencies {
classpath "org.eclipse.jgit:org.eclipse.jgit:${scriptDepVersions['jgit']}"
classpath deps.jgit
}
}

View File

@ -20,8 +20,6 @@
// 2) notice file
// 3) checksum validation/ generation.
import org.apache.commons.codec.digest.DigestUtils
// This should be false only for debugging.
def failOnError = true
@ -136,7 +134,7 @@ subprojects {
jarName : file.toPath().getFileName().toString(),
path : file,
module : resolvedArtifact.moduleVersion,
checksum : provider { new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() },
checksum : provider { buildinfra.sha1Digest().digestAsHex(file).trim() },
// We keep track of the files referenced by this dependency (sha, license, notice, etc.)
// so that we can determine unused dangling files later on.
referencedFiles: []

View File

@ -23,8 +23,7 @@ configure(rootProject) {
description = "All precommit checks"
// Root-level validation tasks.
dependsOn ":verifyLocks"
dependsOn ":versionsPropsAreSorted"
dependsOn ":checkLocks"
dependsOn ":checkWorkingCopyClean"
}

View File

@ -18,22 +18,23 @@
import groovy.xml.NamespaceBuilder
// Configure rat dependencies for use in the custom task.
configure(rootProject) {
// Configure the rat validation task and all scanned directories.
allprojects {
configurations {
ratDeps
}
dependencies {
ratDeps "org.apache.rat:apache-rat:${scriptDepVersions['apache-rat']}"
ratDeps deps.rat
}
}
// Configure the rat validation task and all scanned directories.
allprojects {
task("rat", type: RatTask) {
tasks.register("rat", RatTask).configure {
group = 'Verification'
description = 'Runs Apache Rat checks.'
dependsOn configurations.ratDeps
def defaultScanFileTree = project.fileTree(projectDir, {
// Don't check under the project's build folder.
exclude project.buildDir.name
@ -78,10 +79,10 @@ allprojects {
// Exclude github stuff (templates, workflows).
exclude ".github"
// The root project also includes patterns for the boostrap (buildSrc) and composite
// The root project also includes patterns for the include composite
// projects. Include their sources in the scan.
include "buildSrc/src/**"
include "dev-tools/missing-doclet/src/**"
include "build-tools/build-infra/src/**"
include "build-tools/missing-doclet/src/**"
// do not let RAT attempt to scan a python venv, it gets lost and confused...
exclude "dev-tools/aws-jmh/build/**"
@ -142,7 +143,7 @@ class RatTask extends DefaultTask {
def generateReport(File reportFile) {
// Set up ant rat task.
def ratClasspath = project.rootProject.configurations.ratDeps.asPath
def ratClasspath = project.configurations.ratDeps.asPath
ant.setLifecycleLogLevel(AntBuilder.AntMessagePriority.ERROR)
ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', classpath: ratClasspath)

View File

@ -20,9 +20,9 @@
* spotless and Google Java Format.
*/
def resources = scriptResources(buildscript)
// def resources = scriptResources(buildscript)
configure(project(":lucene").subprojects) { prj ->
configure(allprojects) { prj ->
plugins.withType(JavaPlugin) {
prj.apply plugin: 'com.diffplug.spotless'
@ -36,7 +36,7 @@ configure(project(":lucene").subprojects) { prj ->
lineEndings 'UNIX'
endWithNewline()
googleJavaFormat('1.18.1')
googleJavaFormat(deps.versions.googleJavaFormat.get())
// Apply to all Java sources
target "src/**/*.java"
@ -100,23 +100,19 @@ configure(project(":lucene").subprojects) { prj ->
// Emit a custom message about how to fix formatting errors.
tasks.matching { task -> task.name == "spotlessJavaCheck" }.configureEach {
runToFixMessage.set("\nIMPORTANT: run the top-level './gradlew tidy' to format code automatically (see help/formatting.txt for more info).")
it.runToFixMessage.set("\nIMPORTANT: run the top-level './gradlew tidy' to format code automatically (see help/formatting.txt for more info).")
}
// Add an alias to 'spotlessApply' simply called 'tidy' and wire up
// spotlessCheck to convention's check.
task tidy() {
description "Applies formatters and cleanups to sources."
group "verification"
// Hook up spotless to tidy and check tasks.
tasks.matching { it.name == "tidy" }.configureEach { v ->
v.dependsOn tasks.matching { it.name == "spotlessApply" }
}
tasks.matching { task -> task.name == "spotlessApply" }.configureEach { v ->
tidy.dependsOn v
v.dependsOn ":checkJdkInternalsExportedToGradle"
tasks.matching { it.name == "check" }.configureEach { v ->
v.dependsOn tasks.matching { it.name == "spotlessCheck" }
}
tasks.matching { task -> task.name == "spotlessCheck" }.configureEach { v ->
check.dependsOn v
tasks.matching { task -> task.name in ["spotlessApply", "spotlessCheck"] }.configureEach { v ->
v.dependsOn ":checkJdkInternalsExportedToGradle"
}
}

View File

@ -33,7 +33,7 @@ buildscript {
}
dependencies {
classpath "org.apache.rat:apache-rat:${scriptDepVersions['apache-rat']}"
classpath deps.rat
}
}

View File

@ -1,34 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// This ensures 'versions.props' file is sorted lexicographically.
configure(rootProject) {
task versionsPropsAreSorted() {
doFirst {
def versionsProps = file('versions.props')
def lines = versionsProps.readLines("UTF-8")
def sorted = lines.toSorted()
if (!Objects.equals(lines, sorted)) {
def sortedFile = file("${buildDir}/versions.props")
sortedFile.write(sorted.join("\n"), "UTF-8")
throw new GradleException("${versionsProps} file is not sorted lexicographically. I wrote a sorted file to ${sortedFile} - please review and commit.")
}
}
}
}

4
gradlew vendored
View File

@ -158,7 +158,7 @@ fi
GRADLE_WRAPPER_JAR="$APP_HOME/gradle/wrapper/gradle-wrapper.jar"
if [ ! -e "$GRADLE_WRAPPER_JAR" ]; then
"$JAVACMD" $JAVA_OPTS "$APP_HOME/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "$GRADLE_WRAPPER_JAR"
"$JAVACMD" $JAVA_OPTS "$APP_HOME/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "$GRADLE_WRAPPER_JAR"
WRAPPER_STATUS=$?
if [ "$WRAPPER_STATUS" -eq 1 ]; then
echo "ERROR: Something went wrong. Make sure you're using Java version of exactly 21."
@ -173,7 +173,7 @@ CLASSPATH=$GRADLE_WRAPPER_JAR
# START OF LUCENE CUSTOMIZATION
# Generate gradle.properties if they don't exist
if [ ! -e "$APP_HOME/gradle.properties" ]; then
"$JAVACMD" $JAVA_OPTS "$APP_HOME/buildSrc/src/main/java/org/apache/lucene/gradle/GradlePropertiesGenerator.java" "$APP_HOME/gradle/template.gradle.properties" "$APP_HOME/gradle.properties"
"$JAVACMD" $JAVA_OPTS "$APP_HOME/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/GradlePropertiesGenerator.java" "$APP_HOME/gradle/template.gradle.properties" "$APP_HOME/gradle.properties"
GENERATOR_STATUS=$?
if [ "$GENERATOR_STATUS" -ne 0 ]; then
exit $GENERATOR_STATUS

4
gradlew.bat vendored
View File

@ -76,7 +76,7 @@ goto fail
@rem LUCENE-9266: verify and download the gradle wrapper jar if we don't have one.
set GRADLE_WRAPPER_JAR=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
IF NOT EXIST "%GRADLE_WRAPPER_JAR%" (
"%JAVA_EXE%" %JAVA_OPTS% "%APP_HOME%/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "%GRADLE_WRAPPER_JAR%"
"%JAVA_EXE%" %JAVA_OPTS% "%APP_HOME%/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "%GRADLE_WRAPPER_JAR%"
IF %ERRORLEVEL% EQU 1 goto failWithJvmMessage
IF %ERRORLEVEL% NEQ 0 goto fail
)
@ -89,7 +89,7 @@ set CLASSPATH=%GRADLE_WRAPPER_JAR%
IF NOT EXIST "%APP_HOME%\gradle.properties" (
@rem local expansion is needed to check ERRORLEVEL inside control blocks.
setlocal enableDelayedExpansion
"%JAVA_EXE%" %JAVA_OPTS% "%APP_HOME%/buildSrc/src/main/java/org/apache/lucene/gradle/GradlePropertiesGenerator.java" "%APP_HOME%\gradle\template.gradle.properties" "%APP_HOME%\gradle.properties"
"%JAVA_EXE%" %JAVA_OPTS% "%APP_HOME%/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/GradlePropertiesGenerator.java" "%APP_HOME%\gradle\template.gradle.properties" "%APP_HOME%\gradle.properties"
IF %ERRORLEVEL% NEQ 0 goto fail
endlocal
)

View File

@ -7,81 +7,79 @@ and each configuration can have dependencies attached to it.
There are some standard conventions so, for example, the Java plugin
adds standard configurations such as "api", "implementation",
"testImplementation" and others. These configurations can also inherit
from each other; more about this typic can be found here:
from each other; more about this topic can be found here:
https://docs.gradle.org/current/userguide/dependency_management_for_java_projects.html#dependency_management_for_java_projects
https://docs.gradle.org/current/userguide/java_library_plugin.html#sec:java_library_separation
https://docs.gradle.org/current/userguide/java_plugin.html#sec:java_plugin_and_dependency_management
Lucene typically uses three configurations and attach project
dependencies to them:
Lucene uses the following configurations and attach project dependencies
to them:
api - makes a dependency available for main classes, tests and any
moduleApi - makes the dependency available to main classes, tests and any
other modules importing the project (exportable dependency),
implementation - makes a dependency available for main classes, tests
but will *not* export the dependency for other modules (so their
moduleImplementation - makes the dependency available to main classes, tests
but will *not* export the dependency to other modules (so their
compilation classpath won't contain it).
testImplementation - makes a dependency only available for test classes.
moduleTestImplementation - makes the dependency available for test classes only.
The "module" prefix is used to distinguish configurations which apply
to modular builds, compared to the regular classpath-configurations defined
by gradle's java module. Some Lucene modules may define regular classpath
entries to bypass the limitations of the module system (or gradle's).
Adding a library dependency
---------------------------
Lucene dependencies and their versions are managed globally using version
catalogs (in versions.toml) [https://docs.gradle.org/current/userguide/platforms.html].
Let's say we wish to add a dependency on library "foo.bar:baz" in
version 1.2 to :lucene:core. Let's assume this library is only
used internally by the project. The :lucene:core project is configured
by lucene/core/build.gradle and we would add (or modify) the dependency
by lucene/core/build.gradle, so we add (or modify) the dependency
block as follows:
dependencies {
implementation "foo.bar:baz"
moduleImplementation deps.baz
}
The "implementation" here is a named configuration; we don't need to declare
it because it is declared for us by the java-library plugin.
The "moduleImplementation" here is a named configuration explained in the
section above. The "deps.baz" refers to the version catalog named "deps",
in which the dependency "baz" should be declared. If this is the first
reference to this library, then we have to add it to "versions.toml" catalog:
the version goes under the "versions" and module coordinates
under the "libraries" section:
In "normal" gradle the version of the dependency would be present
directly inside the declaration but we use a plugin
(palantir-consistent-versions) to manage all dependency versions
from the top-level (so that conflicts can be resolved globally).
[versions]
baz = "1.2"
...
[libraries]
baz = { module = "foo.bar:baz", version.ref = "baz" }
If this is the first time "foo.bar:baz" is added to the project, we'd have
to add its version to "versions.props" file at the top level of the
checkout:
The version defined in the "versions" section is the preferred version of the library
we wish to use. Finally, run tidy to sort all entries in versions.toml:
foo.bar:baz=1.2
gradlew tidy
and then regenerate the "versions.lock" file using the following
command:
Gradle will try to consolidate different versions across different
configurations to make sure they're compatible and may complain if it encounters
conflicting versions in the dependency tree. We want all dependencies to be consistent,
so we use an additional build plugin to ensure no accidental version changes
occur. Whenever we add or remove dependencies, we have to follow-up with lock file
regeneration:
gradlew --write-locks
gradlew writeLocks
git diff versions.*
IMPORTANT: The versions.lock file will contain the actual version
of the dependency picked based on other project dependencies and
their transitive dependencies. This selected version may be
different from what each of these actually requires (the highest
version number will be typically selected). To see which dependencies
require which version of the library use:
IMPORTANT: The versions.lock file will contain a list of actual library versions
and configurations they occurred in.
gradlew why --hash=...
where the hash code comes from versions.lock file. For example, at
the time of writing, jackson-databind has the following entry:
com.fasterxml.jackson.core:jackson-databind:2.10.0 (3 constraints: 931a7796)
and "gradlew why --hash=931a7796" prints:
com.fasterxml.jackson.core:jackson-databind:2.10.0
projects -> 2.10.0
net.thisptr:jackson-jq -> 2.7.0
org.carrot2:carrot2-mini -> 2.9.9.3
Once the dependency is added it always makes sense to see the
tree of all module dependencies and maybe exclude transitive
dependencies of foo.bar:baz that we won't need.
Once a new dependency is added it always makes sense to regenerate the lock file
and look at which dependencies have changed (and why).
Inspecting current dependencies
@ -98,12 +96,12 @@ in just the "publicly visible" and "classpath-visible" configurations.
The publicly visible project dependencies (classes shared by other
modules importing our module) can be displayed with:
gradlew -p lucene\analysis\icu dependencies --configuration api
gradlew -p lucene\analysis\icu dependencies --configuration moduleApi
And the "private" set of dependencies (real classpath) can be dumped
with:
gradlew -p lucene\analysis\icu dependencies --configuration runtimeClasspath
gradlew -p lucene\analysis\icu dependencies --configuration moduleRuntimePath
Excluding a transitive dependency
@ -115,7 +113,7 @@ crucial for the functioning of "foo.bar:baz". We can exclude it
by adding an exclusion block to the original declaration:
dependencies {
implementation("foo.bar:baz", {
implementation(deps.baz, {
exclude group: "foo.bar", module: "irrelevant"
})
}

View File

@ -2,7 +2,7 @@ Code formatting
===============
Starting with (LUCENE-9564) Java code is enforced to comply with
google-java-format conventions. In theory you shouldn't worry about
google-java-format conventions. In theory, you shouldn't worry about
what the convention actually looks like - write the code in any way
you like and then run:
@ -13,7 +13,7 @@ your code so that it complies with the convention and passes gradle
'check' task.
IMPORTANT: There is *no* way to mark sections of the code as excluded
from formatting. This is by design and cannot be altered. In vast
from formatting. This is by design and cannot be altered. In the vast
majority of cases the formatter will do a great job of cleaning up the
code. Occasionally you may want to rewrite the code (introduce a local
variable or reshape code paths) so that it's easier to read after

View File

@ -54,7 +54,7 @@ Signing can be enabled by adding the "-Psign" option, for example:
gradlew assembleRelease mavenToApacheReleases -Psign
By default gradle uses a Java-based implementation of PGP for signing, which requieres
By default, gradle uses a Java-based implementation of PGP for signing, which requires
several "signing.*" properties via either ~/.gradle/gradle.properties or command-line options:
https://docs.gradle.org/current/userguide/signing_plugin.html#sec:signatory_credentials
@ -92,9 +92,9 @@ signing.gnupg.passphrase=... # Provide your passphrase to
If in doubt, consult gradle's signing plugin documentation:
https://docs.gradle.org/current/userguide/signing_plugin.html#sec:using_gpg_agent
"signing.gnupg.passphrase" is not recomended because there is no advantage to using an external GPG process if you use it. If you
are comfortable giving gradle your passphrase, then there is no reason to use an external GPG process via '-PuseGpg'. Just use the
"signing.*" options described previuosly to let gradle deal with your key directly.
"signing.gnupg.passphrase" is not recommended because there is no advantage to using an external GPG process if you use it.
If you are comfortable giving gradle your passphrase, then there is no reason to use an external GPG process via '-PuseGpg'.
Just use the "signing.*" options described previuosly to let gradle deal with your key directly.
Because of how Gradle's signing plugin invokes GPG, using an external GPG process *only* works if your GPG configuration uses a
GPG agent (required by gpg2) and if the "pinentry" for your GPG agent does not require access to the tty to prompt you for a password.

View File

@ -129,6 +129,10 @@ New Features
* GITHUB#13233: Add RomanianNormalizationFilter (Trey Jones, Robert Muir)
* GITHUB#13449: Sparse index: optional skip list on top of doc values which is exposed via the
DocValuesSkipper abstraction. A new flag is added to FieldType.java that configures whether
to create a "skip index" for doc values. (Ignacio Vera)
Improvements
---------------------
@ -204,6 +208,10 @@ Changes in Backwards Compatibility Policy
Other
---------------------
* GITHUB#13459: Merges all immutable attributes in FieldInfos.FieldNumbers into one Hashmap saving
memory when writing big indices. Fixes an exotic bug when calling clear where not all attributes
were cleared. (Ignacio Vera)
* LUCENE-10376: Roll up the loop in VInt/VLong in DataInput. (Guo Feng)
* LUCENE-10253: The @BadApple annotation has been removed from the test
@ -239,7 +247,10 @@ Other
API Changes
---------------------
(No changes)
* GITHUB#13281: Mark COSINE VectorSimilarityFunction as deprecated. (Pulkit Gupta)
* GITHUB#13469: Expose FlatVectorsFormat as a first-class format; can be configured using a custom Codec. (Michael Sokolov)
New Features
---------------------
@ -262,15 +273,19 @@ Optimizations
* GITHUB#13454: MultiTermQuery returns null ScoreSupplier in cases where
no query terms are present in the index segment (Mayya Sharipova)
* GITHUB#13431: Replace TreeMap and use compiled Patterns in Japanese UserDictionary. (Bruno Roustant)
* GITHUB#12941: Don't preserve auxiliary buffer contents in LSBRadixSorter if it grows. (Stefan Vodita)
Bug Fixes
---------------------
(No changes)
* GITHUB#13463: Address bug in MultiLeafKnnCollector causing #minCompetitiveSimilarity to stay artificially low in
some corner cases. (Greg Miller)
Other
---------------------
* GITHUB#13459: Merges all immutable attributes in FieldInfos.FieldNumbers into one Hashmap saving
memory when writing big indices. Fixes an exotic bug when calling clear where not all attributes
were cleared. (Ignacio Vera)
--------------------
(No changes)
======================== Lucene 9.11.0 =======================

View File

@ -23,7 +23,7 @@ dependencies {
moduleApi project(':lucene:core')
moduleApi project(':lucene:analysis:common')
moduleApi 'com.ibm.icu:icu4j'
moduleApi deps.icu4j
moduleTestImplementation project(':lucene:test-framework')
}

View File

@ -23,8 +23,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.morph.Dictionary;
import org.apache.lucene.analysis.util.CSVUtil;
import org.apache.lucene.util.IntsRefBuilder;
@ -37,6 +36,10 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
public static final String INTERNAL_SEPARATOR = "\u0000";
private static final Pattern LINE_COMMENT = Pattern.compile("^#.*$");
private static final Pattern WHITESPACE = Pattern.compile("\\s");
private static final Pattern SPACES = Pattern.compile(" +");
// phrase text -> phrase ID
private final TokenInfoFST fst;
@ -51,16 +54,16 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
public static UserDictionary open(Reader reader) throws IOException {
BufferedReader br = new BufferedReader(reader);
String line = null;
String line;
List<String[]> featureEntries = new ArrayList<>();
// text, segmentation, readings, POS
while ((line = br.readLine()) != null) {
// Remove comments
line = line.replaceAll("^#.*$", "");
line = LINE_COMMENT.matcher(line).replaceAll("");
// Skip empty lines or comment lines
if (line.trim().length() == 0) {
if (line.trim().isEmpty()) {
continue;
}
String[] values = CSVUtil.parse(line);
@ -99,10 +102,10 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
long ord = 0;
for (String[] values : featureEntries) {
String surface = values[0].replaceAll("\\s", "");
String concatenatedSegment = values[1].replaceAll("\\s", "");
String[] segmentation = values[1].replaceAll(" *", " ").split(" ");
String[] readings = values[2].replaceAll(" *", " ").split(" ");
String surface = WHITESPACE.matcher(values[0]).replaceAll("");
String concatenatedSegment = WHITESPACE.matcher(values[1]).replaceAll("");
String[] segmentation = SPACES.split(values[1]);
String[] readings = SPACES.split(values[2]);
String pos = values[3];
if (segmentation.length != readings.length) {
@ -141,7 +144,7 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
scratch.growNoCopy(token.length());
scratch.setLength(token.length());
for (int i = 0; i < token.length(); i++) {
scratch.setIntAt(i, (int) token.charAt(i));
scratch.setIntAt(i, token.charAt(i));
}
fstCompiler.add(scratch.get(), ord);
segmentations.add(wordIdAndLength);
@ -151,7 +154,7 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
new TokenInfoFST(
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()), false);
this.morphAtts = new UserMorphData(data.toArray(new String[0]));
this.segmentations = segmentations.toArray(new int[segmentations.size()][]);
this.segmentations = segmentations.toArray(new int[0][]);
}
@Override
@ -168,33 +171,53 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
* @return array of {wordId, position, length}
*/
public int[][] lookup(char[] chars, int off, int len) throws IOException {
// TODO: can we avoid this treemap/toIndexArray?
TreeMap<Integer, int[]> result = new TreeMap<>(); // index, [length, length...]
boolean found = false; // true if we found any results
List<Match> matches = null;
int numResults = 0;
final FST.BytesReader fstReader = fst.getBytesReader();
final int end = off + len;
FST.Arc<Long> arc = new FST.Arc<>();
int end = off + len;
for (int startOffset = off; startOffset < end; startOffset++) {
int[] wordIdAndLength = null;
arc = fst.getFirstArc(arc);
int output = 0;
int remaining = end - startOffset;
for (int i = 0; i < remaining; i++) {
for (int i = 0, remaining = end - startOffset; i < remaining; i++) {
int ch = chars[startOffset + i];
if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
break; // continue to next position
}
output += arc.output().intValue();
if (arc.isFinal()) {
final int finalOutput = output + arc.nextFinalOutput().intValue();
result.put(startOffset - off, segmentations[finalOutput]);
found = true;
int finalOutput = output + arc.nextFinalOutput().intValue();
wordIdAndLength = segmentations[finalOutput];
}
}
if (wordIdAndLength != null) {
if (matches == null) {
matches = new ArrayList<>();
}
matches.add(new Match(startOffset - off, wordIdAndLength));
numResults += wordIdAndLength.length - 1;
}
}
return found ? toIndexArray(result) : EMPTY_RESULT;
if (numResults == 0) {
return EMPTY_RESULT;
}
int[][] result = new int[numResults][];
int index = 0;
for (int i = 0; i < matches.size(); i++) {
Match match = matches.get(i);
int[] wordIdAndLength = match.wordIdAndLength;
int wordId = wordIdAndLength[0];
// convert length to index
int position = match.position;
for (int j = 1; j < wordIdAndLength.length; j++) { // first entry is wordId offset
// add a {wordId, index, length} token to the results
int[] token = {wordId + j - 1, position, wordIdAndLength[j]};
result[index++] = token;
position += wordIdAndLength[j];
}
}
return result;
}
public TokenInfoFST getFST() {
@ -203,28 +226,9 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
private static final int[][] EMPTY_RESULT = new int[0][];
/**
* Convert Map of index and wordIdAndLength to array of {wordId, index, length}
*
* @return array of {wordId, index, length}
*/
private int[][] toIndexArray(Map<Integer, int[]> input) {
ArrayList<int[]> result = new ArrayList<>();
for (Map.Entry<Integer, int[]> entry : input.entrySet()) {
int[] wordIdAndLength = entry.getValue();
int wordId = wordIdAndLength[0];
// convert length to index
int current = entry.getKey();
for (int j = 1; j < wordIdAndLength.length; j++) { // first entry is wordId offset
int[] token = {wordId + j - 1, current, wordIdAndLength[j]};
result.add(token);
current += wordIdAndLength[j];
}
}
return result.toArray(new int[result.size()][]);
}
public int[] lookupSegmentation(int phraseID) {
return segmentations[phraseID];
}
private record Match(int position, int[] wordIdAndLength) {}
}

View File

@ -22,10 +22,10 @@ description = 'Analyzer for dictionary stemming, built-in Polish dictionary'
dependencies {
moduleApi project(':lucene:core')
moduleApi project(':lucene:analysis:common')
moduleApi 'org.carrot2:morfologik-stemming'
moduleApi deps.morfologik.stemming
moduleImplementation 'org.carrot2:morfologik-polish'
moduleImplementation 'ua.net.nlp:morfologik-ukrainian-search'
moduleImplementation deps.morfologik.polish
moduleImplementation deps.morfologik.ukrainian
moduleTestImplementation project(':lucene:test-framework')
}

View File

@ -22,7 +22,7 @@ description = 'OpenNLP Library Integration'
dependencies {
moduleApi project(':lucene:core')
moduleApi project(':lucene:analysis:common')
moduleApi 'org.apache.opennlp:opennlp-tools'
moduleApi deps.opennlp.tools
moduleTestImplementation project(':lucene:test-framework')
}

View File

@ -23,7 +23,7 @@ dependencies {
moduleApi project(':lucene:core')
moduleApi project(':lucene:analysis:common')
moduleApi 'commons-codec:commons-codec'
moduleApi deps.commons.codec
moduleTestImplementation project(':lucene:test-framework')
}

View File

@ -209,6 +209,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
false,
dvGen,
attributes,
pointDataDimensionCount,

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
@ -1677,6 +1678,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
}
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) {
return null;
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(data);

View File

@ -186,6 +186,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
false,
dvGen,
attributes,
pointDataDimensionCount,

View File

@ -27,4 +27,9 @@ public class TestLucene60FieldInfosFormat extends BaseFieldInfoFormatTestCase {
protected Codec getCodec() {
return new Lucene84RWCodec();
}
@Override
protected boolean supportDocValuesSkipIndex() {
return false;
}
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene80;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -59,18 +60,114 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.codecs.asserting.AssertingCodec;
import org.apache.lucene.tests.index.BaseCompressingDocValuesFormatTestCase;
import org.apache.lucene.tests.index.LegacyBaseDocValuesFormatTestCase;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.packed.PackedInts;
/** Tests Lucene80DocValuesFormat */
public abstract class BaseLucene80DocValuesFormatTestCase
extends BaseCompressingDocValuesFormatTestCase {
extends LegacyBaseDocValuesFormatTestCase {
private static long dirSize(Directory d) throws IOException {
long size = 0;
for (String file : d.listAll()) {
size += d.fileLength(file);
}
return size;
}
public void testUniqueValuesCompression() throws IOException {
try (final Directory dir = new ByteBuffersDirectory()) {
final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
final IndexWriter iwriter = new IndexWriter(dir, iwc);
final int uniqueValueCount = TestUtil.nextInt(random(), 1, 256);
final List<Long> values = new ArrayList<>();
final Document doc = new Document();
final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
doc.add(dvf);
for (int i = 0; i < 300; ++i) {
final long value;
if (values.size() < uniqueValueCount) {
value = random().nextLong();
values.add(value);
} else {
value = RandomPicks.randomFrom(random(), values);
}
dvf.setLongValue(value);
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size1 = dirSize(dir);
for (int i = 0; i < 20; ++i) {
dvf.setLongValue(RandomPicks.randomFrom(random(), values));
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size2 = dirSize(dir);
// make sure the new longs did not cost 8 bytes each
assertTrue(size2 < size1 + 8 * 20);
}
}
public void testDateCompression() throws IOException {
try (final Directory dir = new ByteBuffersDirectory()) {
final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
final IndexWriter iwriter = new IndexWriter(dir, iwc);
final long base = 13; // prime
final long day = 1000L * 60 * 60 * 24;
final Document doc = new Document();
final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
doc.add(dvf);
for (int i = 0; i < 300; ++i) {
dvf.setLongValue(base + random().nextInt(1000) * day);
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size1 = dirSize(dir);
for (int i = 0; i < 50; ++i) {
dvf.setLongValue(base + random().nextInt(1000) * day);
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size2 = dirSize(dir);
// make sure the new longs costed less than if they had only been packed
assertTrue(size2 < size1 + (PackedInts.bitsRequired(day) * 50) / 8);
}
}
public void testSingleBigValueCompression() throws IOException {
try (final Directory dir = new ByteBuffersDirectory()) {
final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
final IndexWriter iwriter = new IndexWriter(dir, iwc);
final Document doc = new Document();
final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
doc.add(dvf);
for (int i = 0; i < 20000; ++i) {
dvf.setLongValue(i & 1023);
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
final long size1 = dirSize(dir);
dvf.setLongValue(Long.MAX_VALUE);
iwriter.addDocument(doc);
iwriter.forceMerge(1);
final long size2 = dirSize(dir);
// make sure the new value did not grow the bpv for every other value
assertTrue(size2 < size1 + (20000 * (63 - 10)) / 8);
}
}
// TODO: these big methods can easily blow up some of the other ram-hungry codecs...
// for now just keep them here, as we want to test this for this format.

View File

@ -52,11 +52,6 @@ class Lucene99RWHnswScalarQuantizationVectorsFormat
null);
}
@Override
public int getMaxDimensions(String fieldName) {
return 1024;
}
static class Lucene99RWScalarQuantizedFormat extends Lucene99ScalarQuantizedVectorsFormat {
private static final FlatVectorsFormat rawVectorFormat =
new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer());

View File

@ -25,8 +25,8 @@ dependencies {
moduleImplementation project(':lucene:core')
moduleImplementation project(':lucene:expressions')
moduleImplementation "org.openjdk.jmh:jmh-core:1.37"
annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:1.37"
moduleImplementation deps.jmh.core
annotationProcessor deps.jmh.annprocess
}

View File

@ -31,17 +31,17 @@ dependencies {
moduleImplementation project(':lucene:spatial-extras')
moduleImplementation project(':lucene:queryparser')
moduleImplementation "org.apache.commons:commons-compress"
moduleImplementation "com.ibm.icu:icu4j"
moduleImplementation "org.locationtech.spatial4j:spatial4j"
moduleImplementation ("net.sourceforge.nekohtml:nekohtml", {
moduleImplementation deps.commons.compress
moduleImplementation deps.icu4j
moduleImplementation deps.spatial4j
moduleImplementation(deps.nekohtml, {
exclude module: "xml-apis"
// LUCENE-10337: Exclude xercesImpl from module path because it has split packages with the JDK (!)
exclude module: "xercesImpl"
})
// LUCENE-10337: Include xercesImpl on regular classpath where it won't cause conflicts.
implementation ("xerces:xercesImpl", {
implementation (deps.xerces, {
exclude module: "xml-apis"
})

View File

@ -16,13 +16,16 @@
*/
package org.apache.lucene.codecs.simpletext;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.DOCCOUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXVALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORIGIN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE;
@ -40,6 +43,7 @@ import java.util.function.IntFunction;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
@ -59,12 +63,15 @@ import org.apache.lucene.util.StringHelper;
class SimpleTextDocValuesReader extends DocValuesProducer {
static class OneField {
int docCount;
long dataStartFilePointer;
String pattern;
String ordPattern;
int maxLength;
boolean fixedLength;
long origin;
long minValue;
long maxValue;
long numValues;
}
@ -99,17 +106,34 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
DocValuesType dvType = DocValuesType.valueOf(stripPrefix(TYPE));
assert dvType != DocValuesType.NONE;
if (dvType == DocValuesType.NUMERIC) {
if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED_NUMERIC) {
readLine();
assert startsWith(MINVALUE)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.minValue = Long.parseLong(stripPrefix(MINVALUE));
readLine();
assert startsWith(MAXVALUE)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.maxValue = Long.parseLong(stripPrefix(MAXVALUE));
}
readLine();
assert startsWith(DOCCOUNT)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.docCount = Integer.parseInt(stripPrefix(DOCCOUNT));
if (dvType == DocValuesType.NUMERIC) {
readLine();
assert startsWith(ORIGIN)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.origin = Long.parseLong(stripPrefix(ORIGIN));
readLine();
assert startsWith(PATTERN);
field.pattern = stripPrefix(PATTERN);
field.dataStartFilePointer = data.getFilePointer();
data.seek(data.getFilePointer() + (1 + field.pattern.length() + 2) * (long) maxDoc);
} else if (dvType == DocValuesType.BINARY) {
} else if (dvType == DocValuesType.BINARY || dvType == DocValuesType.SORTED_NUMERIC) {
readLine();
assert startsWith(MAXLENGTH);
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
@ -225,7 +249,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
throw new CorruptIndexException("failed to parse BigDecimal value", in, pe);
}
SimpleTextUtil.readLine(in, scratch); // read the line telling us if it's real or not
return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
return BigInteger.valueOf(field.origin).add(bd.toBigIntegerExact()).longValue();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
@ -824,4 +848,82 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
}
}
}
@Override
public DocValuesSkipper getSkipper(FieldInfo fieldInfo) {
final boolean numeric =
fieldInfo.getDocValuesType() == DocValuesType.NUMERIC
|| fieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
final OneField field = fields.get(fieldInfo.name);
// SegmentCoreReaders already verifies this field is
// valid:
assert field != null;
return new DocValuesSkipper() {
int doc = -1;
@Override
public int numLevels() {
return 1;
}
@Override
public long minValue(int level) {
return minValue();
}
@Override
public long maxValue(int level) {
return maxValue();
}
@Override
public int docCount(int level) {
return docCount();
}
@Override
public long minValue() {
return numeric ? field.minValue : 0;
}
@Override
public long maxValue() {
return numeric ? field.maxValue : field.numValues - 1;
}
@Override
public int docCount() {
return field.docCount;
}
@Override
public int minDocID(int level) {
if (doc == -1) {
return -1;
} else if (doc >= maxDoc || field.docCount == 0) {
return DocIdSetIterator.NO_MORE_DOCS;
} else {
return 0;
}
}
@Override
public int maxDocID(int level) {
if (doc == -1) {
return -1;
} else if (doc >= maxDoc || field.docCount == 0) {
return DocIdSetIterator.NO_MORE_DOCS;
} else {
return maxDoc;
}
}
@Override
public void advance(int target) {
doc = target;
}
};
}
}

View File

@ -46,8 +46,13 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
static final BytesRef END = new BytesRef("END");
static final BytesRef FIELD = new BytesRef("field ");
static final BytesRef TYPE = new BytesRef(" type ");
static final BytesRef DOCCOUNT = new BytesRef(" doccount ");
// used for numerics
static final BytesRef MINVALUE = new BytesRef(" minvalue ");
static final BytesRef ORIGIN = new BytesRef(" origin "); // for deltas
static final BytesRef MINVALUE = new BytesRef(" minalue ");
static final BytesRef MAXVALUE = new BytesRef(" maxvalue ");
static final BytesRef PATTERN = new BytesRef(" pattern ");
// used for bytes
static final BytesRef LENGTH = new BytesRef("length ");
@ -97,13 +102,27 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
maxValue = Math.max(maxValue, v);
numValues++;
}
// write absolute min and max for skipper
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, MAXVALUE);
SimpleTextUtil.write(data, Long.toString(maxValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(numValues), scratch);
SimpleTextUtil.writeNewline(data);
if (numValues != numDocs) {
minValue = Math.min(minValue, 0);
maxValue = Math.max(maxValue, 0);
}
// write our minimum value to the .dat, all entries are deltas from that
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, ORIGIN);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
@ -161,6 +180,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.BINARY;
writeFieldEntry(field, DocValuesType.BINARY);
doAddBinaryField(field, valuesProducer);
}
@ -168,10 +188,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
throws IOException {
int maxLength = 0;
BinaryDocValues values = valuesProducer.getBinary(field);
int docCount = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
maxLength = Math.max(maxLength, values.binaryValue().toString().length());
}
writeFieldEntry(field, DocValuesType.BINARY);
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
// write maxLength
SimpleTextUtil.write(data, MAXLENGTH);
@ -232,6 +257,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert field.getDocValuesType() == DocValuesType.SORTED;
writeFieldEntry(field, DocValuesType.SORTED);
int docCount = 0;
SortedDocValues values = valuesProducer.getSorted(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
}
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
int valueCount = 0;
int maxLength = -1;
TermsEnum terms = valuesProducer.getSorted(field).termsEnum();
@ -301,7 +335,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert valuesSeen == valueCount;
SortedDocValues values = valuesProducer.getSorted(field);
values = valuesProducer.getSorted(field);
for (int i = 0; i < numDocs; ++i) {
if (values.docID() < i) {
values.nextDoc();
@ -321,6 +355,28 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
throws IOException {
assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
writeFieldEntry(field, DocValuesType.SORTED_NUMERIC);
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (int i = 0; i < values.docValueCount(); ++i) {
long v = values.nextValue();
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
}
// write absolute min and max for skipper
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, MAXVALUE);
SimpleTextUtil.write(data, Long.toString(maxValue), scratch);
SimpleTextUtil.writeNewline(data);
doAddBinaryField(
field,
new EmptyDocValuesProducer() {
@ -395,6 +451,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert field.getDocValuesType() == DocValuesType.SORTED_SET;
writeFieldEntry(field, DocValuesType.SORTED_SET);
int docCount = 0;
SortedSetDocValues values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
}
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
long valueCount = 0;
int maxLength = 0;
TermsEnum terms = valuesProducer.getSortedSet(field).termsEnum();
@ -430,7 +495,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
// length
int maxOrdListLength = 0;
StringBuilder sb2 = new StringBuilder();
SortedSetDocValues values = valuesProducer.getSortedSet(field);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
sb2.setLength(0);
for (int i = 0; i < values.docValueCount(); i++) {

View File

@ -60,6 +60,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
static final BytesRef NORMS = new BytesRef(" norms ");
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
static final BytesRef DOCVALUES_SKIP_INDEX = new BytesRef(" doc values skip index");
static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
@ -122,6 +123,11 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
String dvType = readString(DOCVALUES.length, scratch);
final DocValuesType docValuesType = docValuesType(dvType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
boolean docValueSkipper =
Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
@ -184,6 +190,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
docValueSkipper,
dvGen,
Collections.unmodifiableMap(atts),
dimensionalCount,
@ -276,6 +283,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_GEN);
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
SimpleTextUtil.writeNewline(out);

View File

@ -37,6 +37,12 @@ import org.apache.lucene.util.BytesRef;
public class TestSimpleTextDocValuesFormat extends BaseDocValuesFormatTestCase {
private final Codec codec = new SimpleTextCodec();
@Override
protected boolean skipperHasAccurateDocBounds() {
// This format always returns minDocID = 0 and maxDocID = maxDoc - 1
return false;
}
@Override
protected Codec getCodec() {
return codec;

View File

@ -111,6 +111,7 @@ public class TestBlockWriter extends LuceneTestCase {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
-1,
Collections.emptyMap(),
0,

View File

@ -198,6 +198,7 @@ public class TestSTBlockReader extends LuceneTestCase {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
-1,
Collections.emptyMap(),
0,

View File

@ -76,7 +76,8 @@ module org.apache.lucene.core {
org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat,
org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat;
org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat,
org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat;
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
provides org.apache.lucene.index.SortFieldProvider with

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
@ -73,6 +74,13 @@ public abstract class DocValuesProducer implements Closeable {
*/
public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException;
/**
* Returns a {@link DocValuesSkipper} for this field. The returned instance need not be
* thread-safe: it will only be used by a single thread. The return value is undefined if {@link
* FieldInfo#hasDocValuesSkipIndex()} doesn't return {@code true}.
*/
public abstract DocValuesSkipper getSkipper(FieldInfo field) throws IOException;
/**
* Checks consistency of this producer
*

View File

@ -18,6 +18,7 @@
package org.apache.lucene.codecs.hnsw;
import java.io.IOException;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@ -27,14 +28,23 @@ import org.apache.lucene.index.SegmentWriteState;
*
* @lucene.experimental
*/
public abstract class FlatVectorsFormat {
public abstract class FlatVectorsFormat extends KnnVectorsFormat {
/** Sole constructor */
protected FlatVectorsFormat() {}
protected FlatVectorsFormat(String name) {
super(name);
}
/** Returns a {@link FlatVectorsWriter} to write the vectors to the index. */
@Override
public abstract FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException;
/** Returns a {@link KnnVectorsReader} to read the vectors from the index. */
@Override
public abstract FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException;
@Override
public int getMaxDimensions(String fieldName) {
return 1024;
}
}

View File

@ -17,12 +17,11 @@
package org.apache.lucene.codecs.hnsw;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
/**
@ -39,7 +38,7 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer;
*
* @lucene.experimental
*/
public abstract class FlatVectorsReader implements Closeable, Accountable {
public abstract class FlatVectorsReader extends KnnVectorsReader implements Accountable {
/** Scorer for flat vectors */
protected final FlatVectorsScorer vectorScorer;
@ -56,6 +55,18 @@ public abstract class FlatVectorsReader implements Closeable, Accountable {
return vectorScorer;
}
@Override
public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs)
throws IOException {
// don't scan stored field data. If we didn't index it, produce no search results
}
@Override
public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs)
throws IOException {
// don't scan stored field data. If we didn't index it, produce no search results
}
/**
* Returns a {@link RandomVectorScorer} for the given field and target vector.
*
@ -77,28 +88,4 @@ public abstract class FlatVectorsReader implements Closeable, Accountable {
*/
public abstract RandomVectorScorer getRandomVectorScorer(String field, byte[] target)
throws IOException;
/**
* Checks consistency of this reader.
*
* <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value
* against large data files.
*
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
/**
* Returns the {@link FloatVectorValues} for the given {@code field}. The behavior is undefined if
* the given field doesn't have KNN vectors enabled on its {@link FieldInfo}. The return value is
* never {@code null}.
*/
public abstract FloatVectorValues getFloatVectorValues(String field) throws IOException;
/**
* Returns the {@link ByteVectorValues} for the given {@code field}. The behavior is undefined if
* the given field doesn't have KNN vectors enabled on its {@link FieldInfo}. The return value is
* never {@code null}.
*/
public abstract ByteVectorValues getByteVectorValues(String field) throws IOException;
}

View File

@ -17,14 +17,11 @@
package org.apache.lucene.codecs.hnsw;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.Sorter;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
/**
@ -32,7 +29,7 @@ import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
*
* @lucene.experimental
*/
public abstract class FlatVectorsWriter implements Accountable, Closeable {
public abstract class FlatVectorsWriter extends KnnVectorsWriter {
/** Scorer for flat vectors */
protected final FlatVectorsScorer vectorsScorer;
@ -60,6 +57,11 @@ public abstract class FlatVectorsWriter implements Accountable, Closeable {
public abstract FlatFieldVectorsWriter<?> addField(
FieldInfo fieldInfo, KnnFieldVectorsWriter<?> indexWriter) throws IOException;
@Override
public FlatFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
return addField(fieldInfo, null);
}
/**
* Write the field for merging, providing a scorer over the newly merged flat vectors. This way
* any additional merging logic can be implemented by the user of this class.
@ -72,15 +74,4 @@ public abstract class FlatVectorsWriter implements Accountable, Closeable {
*/
public abstract CloseableRandomVectorScorerSupplier mergeOneFieldToIndex(
FieldInfo fieldInfo, MergeState mergeState) throws IOException;
/** Write field for merging */
public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
IOUtils.close(mergeOneFieldToIndex(fieldInfo, mergeState));
}
/** Called once at the end before close */
public abstract void finish() throws IOException;
/** Flush all buffered data on disk * */
public abstract void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException;
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene90;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_SIZE;
import java.io.IOException;
import java.util.Arrays;
@ -43,6 +44,7 @@ import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ByteBuffersIndexOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -129,16 +131,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene90DocValuesFormat.NUMERIC);
writeValues(
field,
DocValuesProducer producer =
new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return DocValues.singleton(valuesProducer.getNumeric(field));
}
},
false);
};
if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, producer);
}
writeValues(field, producer, false);
}
private static class MinMaxTracker {
@ -183,6 +186,84 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
}
}
private static class SkipAccumulator {
int minDocID;
int maxDocID;
int docCount;
long minValue;
long maxValue;
SkipAccumulator(int docID) {
minDocID = docID;
minValue = Long.MAX_VALUE;
maxValue = Long.MIN_VALUE;
docCount = 0;
}
void accumulate(long value) {
minValue = Math.min(minValue, value);
maxValue = Math.max(maxValue, value);
}
void nextDoc(int docID) {
maxDocID = docID;
++docCount;
}
void writeTo(DataOutput output) throws IOException {
output.writeInt(maxDocID);
output.writeInt(minDocID);
output.writeLong(maxValue);
output.writeLong(minValue);
output.writeInt(docCount);
}
}
private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
assert field.hasDocValuesSkipIndex();
// TODO: This disk compression once we introduce levels
long start = data.getFilePointer();
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
long globalMaxValue = Long.MIN_VALUE;
long globalMinValue = Long.MAX_VALUE;
int globalDocCount = 0;
int maxDocId = -1;
SkipAccumulator accumulator = null;
int counter = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
if (counter == 0) {
accumulator = new SkipAccumulator(doc);
}
accumulator.nextDoc(doc);
for (int i = 0, end = values.docValueCount(); i < end; ++i) {
accumulator.accumulate(values.nextValue());
}
if (++counter == SKIP_INDEX_INTERVAL_SIZE) {
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
globalDocCount += accumulator.docCount;
maxDocId = accumulator.maxDocID;
accumulator.writeTo(data);
counter = 0;
}
}
if (counter > 0) {
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
globalDocCount += accumulator.docCount;
maxDocId = accumulator.maxDocID;
accumulator.writeTo(data);
}
meta.writeLong(start); // record the start in meta
meta.writeLong(data.getFilePointer() - start); // record the length
meta.writeLong(globalMaxValue);
meta.writeLong(globalMinValue);
meta.writeInt(globalDocCount);
meta.writeInt(maxDocId);
}
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
throws IOException {
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
@ -489,13 +570,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene90DocValuesFormat.SORTED);
doAddSortedField(field, valuesProducer);
doAddSortedField(field, valuesProducer, false);
}
private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
writeValues(
field,
private void doAddSortedField(
FieldInfo field, DocValuesProducer valuesProducer, boolean addTypeByte) throws IOException {
DocValuesProducer producer =
new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
@ -534,8 +614,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
};
return DocValues.singleton(sortedOrds);
}
},
true);
};
if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, producer);
}
if (addTypeByte) {
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
}
writeValues(field, producer, true);
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
}
@ -702,6 +788,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
private void doAddSortedNumericField(
FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException {
if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, valuesProducer);
}
if (ords) {
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
}
long[] stats = writeValues(field, valuesProducer, ords);
int numDocsWithField = Math.toIntExact(stats[0]);
long numValues = stats[1];
@ -753,7 +845,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
meta.writeByte(Lucene90DocValuesFormat.SORTED_SET);
if (isSingleValued(valuesProducer.getSortedSet(field))) {
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
doAddSortedField(
field,
new EmptyDocValuesProducer() {
@ -762,10 +854,10 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
return SortedSetSelector.wrap(
valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
}
});
},
true);
return;
}
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
doAddSortedNumericField(
field,

View File

@ -181,4 +181,7 @@ public final class Lucene90DocValuesFormat extends DocValuesFormat {
static final int TERMS_DICT_REVERSE_INDEX_SHIFT = 10;
static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT;
static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1;
static final int SKIP_INDEX_INTERVAL_SHIFT = 12;
static final int SKIP_INDEX_INTERVAL_SIZE = 1 << SKIP_INDEX_INTERVAL_SHIFT;
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
@ -39,6 +40,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
@ -59,6 +61,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
private final Map<String, SortedEntry> sorted;
private final Map<String, SortedSetEntry> sortedSets;
private final Map<String, SortedNumericEntry> sortedNumerics;
private final Map<String, DocValuesSkipperEntry> skippers;
private final IndexInput data;
private final int maxDoc;
private int version = -1;
@ -80,6 +83,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
sorted = new HashMap<>();
sortedSets = new HashMap<>();
sortedNumerics = new HashMap<>();
skippers = new HashMap<>();
merging = false;
// read in the entries from the metadata file.
@ -147,6 +151,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
Map<String, SortedEntry> sorted,
Map<String, SortedSetEntry> sortedSets,
Map<String, SortedNumericEntry> sortedNumerics,
Map<String, DocValuesSkipperEntry> skippers,
IndexInput data,
int maxDoc,
int version,
@ -156,6 +161,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
this.sorted = sorted;
this.sortedSets = sortedSets;
this.sortedNumerics = sortedNumerics;
this.skippers = skippers;
this.data = data.clone();
this.maxDoc = maxDoc;
this.version = version;
@ -165,7 +171,16 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public DocValuesProducer getMergeInstance() {
return new Lucene90DocValuesProducer(
numerics, binaries, sorted, sortedSets, sortedNumerics, data, maxDoc, version, true);
numerics,
binaries,
sorted,
sortedSets,
sortedNumerics,
skippers,
data,
maxDoc,
version,
true);
}
private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
@ -175,6 +190,9 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
}
byte type = meta.readByte();
if (info.hasDocValuesSkipIndex()) {
skippers.put(info.name, readDocValueSkipperMeta(meta));
}
if (type == Lucene90DocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta));
} else if (type == Lucene90DocValuesFormat.BINARY) {
@ -197,6 +215,17 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
return entry;
}
private DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) throws IOException {
long offset = meta.readLong();
long length = meta.readLong();
long maxValue = meta.readLong();
long minValue = meta.readLong();
int docCount = meta.readInt();
int maxDocID = meta.readInt();
return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID);
}
private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException {
entry.docsWithFieldOffset = meta.readLong();
entry.docsWithFieldLength = meta.readLong();
@ -326,6 +355,9 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
data.close();
}
private record DocValuesSkipperEntry(
long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {}
private static class NumericEntry {
long[] table;
int blockShift;
@ -1749,4 +1781,88 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
return mul * values.get(index & mask) + delta;
}
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
final DocValuesSkipperEntry entry = skippers.get(field.name);
final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (input.length() > 0) {
input.prefetch(0, 1);
}
return new DocValuesSkipper() {
int minDocID = -1;
int maxDocID = -1;
long minValue, maxValue;
int docCount;
@Override
public void advance(int target) throws IOException {
if (target > entry.maxDocId) {
minDocID = DocIdSetIterator.NO_MORE_DOCS;
maxDocID = DocIdSetIterator.NO_MORE_DOCS;
} else {
while (true) {
maxDocID = input.readInt();
if (maxDocID >= target) {
minDocID = input.readInt();
maxValue = input.readLong();
minValue = input.readLong();
docCount = input.readInt();
break;
} else {
input.skipBytes(24);
}
}
}
}
@Override
public int numLevels() {
return 1;
}
@Override
public int minDocID(int level) {
return minDocID;
}
@Override
public int maxDocID(int level) {
return maxDocID;
}
@Override
public long minValue(int level) {
return minValue;
}
@Override
public long maxValue(int level) {
return maxValue;
}
@Override
public int docCount(int level) {
return docCount;
}
@Override
public long minValue() {
return entry.minValue;
}
@Override
public long maxValue() {
return entry.maxValue;
}
@Override
public int docCount() {
return entry.docCount;
}
};
}
}

View File

@ -163,8 +163,10 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
boolean isParentField =
format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
boolean hasDocValuesSkipIndex =
format >= FORMAT_DOCVALUE_SKIPPER ? (bits & DOCVALUES_SKIPPER) != 0 : false;
if ((bits & 0xE0) != 0) {
if ((bits & 0xC0) != 0) {
throw new CorruptIndexException(
"unused bits are set \"" + Integer.toBinaryString(bits) + "\"", input);
}
@ -173,6 +175,13 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
"parent field bit is set but shouldn't \"" + Integer.toBinaryString(bits) + "\"",
input);
}
if (format < FORMAT_DOCVALUE_SKIPPER && (bits & DOCVALUES_SKIPPER) != 0) {
throw new CorruptIndexException(
"doc values skipper bit is set but shouldn't \""
+ Integer.toBinaryString(bits)
+ "\"",
input);
}
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
@ -208,6 +217,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
hasDocValuesSkipIndex,
dvGen,
attributes,
pointDataDimensionCount,
@ -394,6 +404,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
if (fi.hasDocValuesSkipIndex()) bits |= DOCVALUES_SKIPPER;
output.writeByte(bits);
output.writeByte(indexOptionsByte(fi.getIndexOptions()));
@ -423,7 +434,8 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
static final int FORMAT_START = 0;
// this doesn't actually change the file format but uses up one more bit an existing bit pattern
static final int FORMAT_PARENT_FIELD = 1;
static final int FORMAT_CURRENT = FORMAT_PARENT_FIELD;
static final int FORMAT_DOCVALUE_SKIPPER = 2;
static final int FORMAT_CURRENT = FORMAT_DOCVALUE_SKIPPER;
// Field flags
static final byte STORE_TERMVECTOR = 0x1;
@ -431,4 +443,5 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
static final byte STORE_PAYLOADS = 0x4;
static final byte SOFT_DELETES_FIELD = 0x8;
static final byte PARENT_FIELD_FIELD = 0x10;
static final byte DOCVALUES_SKIPPER = 0x20;
}

View File

@ -67,6 +67,7 @@ import org.apache.lucene.store.IndexOutput;
*/
public final class Lucene99FlatVectorsFormat extends FlatVectorsFormat {
static final String NAME = "Lucene99FlatVectorsFormat";
static final String META_CODEC_NAME = "Lucene99FlatVectorsFormatMeta";
static final String VECTOR_DATA_CODEC_NAME = "Lucene99FlatVectorsFormatData";
static final String META_EXTENSION = "vemf";
@ -80,6 +81,7 @@ public final class Lucene99FlatVectorsFormat extends FlatVectorsFormat {
/** Constructs a format */
public Lucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer) {
super(NAME);
this.vectorsScorer = vectorsScorer;
}

View File

@ -119,6 +119,11 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
return newField;
}
@Override
public FlatFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
return addField(fieldInfo, null);
}
@Override
public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
for (FieldWriter<?> field : fields) {

View File

@ -89,6 +89,7 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
*/
public Lucene99ScalarQuantizedVectorsFormat(
Float confidenceInterval, int bits, boolean compress) {
super(NAME);
if (confidenceInterval != null
&& confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL
&& (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL

View File

@ -28,6 +28,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
@ -346,6 +347,12 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
return producer == null ? null : producer.getSortedSet(field);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSkipper(field);
}
@Override
public void close() throws IOException {
IOUtils.close(formats.values());

View File

@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
/**
* Wrapper around a {@link TwoPhaseIterator} for a doc-values range query that speeds things up by
* taking advantage of a {@link DocValuesSkipper}.
*/
final class DocValuesRangeIterator extends TwoPhaseIterator {
enum Match {
/** None of the documents in the range match */
NO,
/** Document values need to be checked to verify matches */
MAYBE,
/** All documents in the range that have a value match */
IF_DOC_HAS_VALUE,
/** All docs in the range match */
YES;
}
private final Approximation approximation;
private final TwoPhaseIterator innerTwoPhase;
DocValuesRangeIterator(
TwoPhaseIterator twoPhase, DocValuesSkipper skipper, long lowerValue, long upperValue) {
super(new Approximation(twoPhase.approximation(), skipper, lowerValue, upperValue));
this.approximation = (Approximation) approximation();
this.innerTwoPhase = twoPhase;
}
static class Approximation extends DocIdSetIterator {
private final DocIdSetIterator innerApproximation;
private final DocValuesSkipper skipper;
private final long lowerValue;
private final long upperValue;
private int doc = -1;
// Track a decision for all doc IDs between the current doc ID and upTo inclusive.
Match match = Match.MAYBE;
int upTo = -1;
Approximation(
DocIdSetIterator innerApproximation,
DocValuesSkipper skipper,
long lowerValue,
long upperValue) {
this.innerApproximation = innerApproximation;
this.skipper = skipper;
this.lowerValue = lowerValue;
this.upperValue = upperValue;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(docID() + 1);
}
@Override
public int advance(int target) throws IOException {
while (true) {
if (target > upTo) {
skipper.advance(target);
// If target doesn't have a value and is between two blocks, it is possible that advance()
// moved to a block that doesn't contain `target`.
target = Math.max(target, skipper.minDocID(0));
if (target == NO_MORE_DOCS) {
return doc = NO_MORE_DOCS;
}
upTo = skipper.maxDocID(0);
match = match(0);
// If we have a YES or NO decision, see if we still have the same decision on a higher
// level (= on a wider range of doc IDs)
int nextLevel = 1;
while (match != Match.MAYBE
&& nextLevel < skipper.numLevels()
&& match == match(nextLevel)) {
upTo = skipper.maxDocID(nextLevel);
nextLevel++;
}
}
switch (match) {
case YES:
return doc = target;
case MAYBE:
case IF_DOC_HAS_VALUE:
if (target > innerApproximation.docID()) {
target = innerApproximation.advance(target);
}
if (target <= upTo) {
return doc = target;
}
// Otherwise we are breaking the invariant that `doc` must always be <= upTo, so let
// the loop run one more iteration to advance the skipper.
break;
case NO:
if (upTo == DocIdSetIterator.NO_MORE_DOCS) {
return doc = NO_MORE_DOCS;
}
target = upTo + 1;
break;
default:
throw new AssertionError("Unknown enum constant: " + match);
}
}
}
@Override
public long cost() {
return innerApproximation.cost();
}
private Match match(int level) {
long minValue = skipper.minValue(level);
long maxValue = skipper.maxValue(level);
if (minValue > upperValue || maxValue < lowerValue) {
return Match.NO;
} else if (minValue >= lowerValue && maxValue <= upperValue) {
if (skipper.docCount(level) == skipper.maxDocID(level) - skipper.minDocID(level) + 1) {
return Match.YES;
} else {
return Match.IF_DOC_HAS_VALUE;
}
} else {
return Match.MAYBE;
}
}
}
@Override
public final boolean matches() throws IOException {
return switch (approximation.match) {
case YES -> true;
case IF_DOC_HAS_VALUE -> true;
case MAYBE -> innerTwoPhase.matches();
case NO -> throw new IllegalStateException("Unpositioned approximation");
};
}
@Override
public float matchCost() {
return innerTwoPhase.matchCost();
}
}

View File

@ -22,6 +22,7 @@ import java.util.Objects;
import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.VectorEncoding;
@ -40,6 +41,7 @@ public class FieldType implements IndexableFieldType {
private IndexOptions indexOptions = IndexOptions.NONE;
private boolean frozen;
private DocValuesType docValuesType = DocValuesType.NONE;
private boolean docValuesSkipIndex;
private int dimensionCount;
private int indexDimensionCount;
private int dimensionNumBytes;
@ -59,6 +61,7 @@ public class FieldType implements IndexableFieldType {
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.docValuesType = ref.docValuesType();
this.docValuesSkipIndex = ref.hasDocValuesSkipIndex();
this.dimensionCount = ref.pointDimensionCount();
this.indexDimensionCount = ref.pointIndexDimensionCount();
this.dimensionNumBytes = ref.pointNumBytes();
@ -504,6 +507,22 @@ public class FieldType implements IndexableFieldType {
docValuesType = type;
}
@Override
public boolean hasDocValuesSkipIndex() {
return docValuesSkipIndex;
}
/**
* Set whether to enable a skip index for doc values on this field. This is typically useful on
* fields that are part of the {@link IndexWriterConfig#setIndexSort index sort}, or that
* correlate with fields that are part of the index sort, so that values can be expected to be
* clustered in the doc ID space.
*/
public void setDocValuesSkipIndex(boolean docValuesSkipIndex) {
checkIfFrozen();
this.docValuesSkipIndex = docValuesSkipIndex;
}
@Override
public int hashCode() {
final int prime = 31;
@ -512,6 +531,7 @@ public class FieldType implements IndexableFieldType {
result = prime * result + indexDimensionCount;
result = prime * result + dimensionNumBytes;
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
result = prime * result + Boolean.hashCode(docValuesSkipIndex);
result = prime * result + indexOptions.hashCode();
result = prime * result + (omitNorms ? 1231 : 1237);
result = prime * result + (storeTermVectorOffsets ? 1231 : 1237);
@ -533,6 +553,7 @@ public class FieldType implements IndexableFieldType {
if (indexDimensionCount != other.indexDimensionCount) return false;
if (dimensionNumBytes != other.dimensionNumBytes) return false;
if (docValuesType != other.docValuesType) return false;
if (docValuesSkipIndex != other.docValuesSkipIndex) return false;
if (indexOptions != other.indexOptions) return false;
if (omitNorms != other.omitNorms) return false;
if (storeTermVectorOffsets != other.storeTermVectorOffsets) return false;

View File

@ -35,9 +35,27 @@ public class NumericDocValuesField extends Field {
/** Type for numeric DocValues. */
public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static {
TYPE.setDocValuesType(DocValuesType.NUMERIC);
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link NumericDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param value 64-bit long value
* @throws IllegalArgumentException if the field name is null
*/
public static NumericDocValuesField indexedField(String name, long value) {
return new NumericDocValuesField(name, value, INDEXED_TYPE);
}
/**
@ -60,7 +78,11 @@ public class NumericDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null
*/
public NumericDocValuesField(String name, Long value) {
super(name, TYPE);
this(name, value, TYPE);
}
private NumericDocValuesField(String name, Long value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}

View File

@ -41,9 +41,27 @@ public class SortedDocValuesField extends Field {
/** Type for sorted bytes DocValues */
public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static {
TYPE.setDocValuesType(DocValuesType.SORTED);
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param bytes binary content
* @throws IllegalArgumentException if the field name is null
*/
public static SortedDocValuesField indexedField(String name, BytesRef bytes) {
return new SortedDocValuesField(name, bytes, INDEXED_TYPE);
}
/**
@ -54,7 +72,11 @@ public class SortedDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null
*/
public SortedDocValuesField(String name, BytesRef bytes) {
super(name, TYPE);
this(name, bytes, TYPE);
}
private SortedDocValuesField(String name, BytesRef bytes, FieldType fieldType) {
super(name, fieldType);
fieldsData = bytes;
}

View File

@ -43,9 +43,27 @@ public class SortedNumericDocValuesField extends Field {
/** Type for sorted numeric DocValues. */
public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static {
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedNumericDocValuesField} with the specified 64-bit long value that
* also creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param value 64-bit long value
* @throws IllegalArgumentException if the field name is null
*/
public static SortedNumericDocValuesField indexedField(String name, long value) {
return new SortedNumericDocValuesField(name, value, INDEXED_TYPE);
}
/**
@ -56,8 +74,12 @@ public class SortedNumericDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null
*/
public SortedNumericDocValuesField(String name, long value) {
super(name, TYPE);
fieldsData = Long.valueOf(value);
this(name, Long.valueOf(value), TYPE);
}
private SortedNumericDocValuesField(String name, Long value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}
/**

View File

@ -19,6 +19,7 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
@ -109,9 +110,17 @@ final class SortedNumericDocValuesRangeQuery extends Query {
if (context.reader().getFieldInfos().fieldInfo(field) == null) {
return null;
}
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
if (skipper != null) {
if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) {
return null;
}
}
SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
final NumericDocValues singleton = DocValues.unwrapSingleton(values);
final TwoPhaseIterator iterator;
TwoPhaseIterator iterator;
if (singleton != null) {
iterator =
new TwoPhaseIterator(singleton) {
@ -149,6 +158,9 @@ final class SortedNumericDocValuesRangeQuery extends Query {
}
};
}
if (skipper != null) {
iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue);
}
final var scorer = new ConstantScoreScorer(score(), scoreMode, iterator);
return new DefaultScorerSupplier(scorer);
}

Some files were not shown because too many files have changed in this diff Show More