Merge branch 'main' into java_21

This commit is contained in:
Uwe Schindler 2024-02-27 13:42:36 +01:00 committed by GitHub
commit bfa64b0725
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 165 additions and 50 deletions

View File

@ -39,6 +39,8 @@ import zipfile
from collections import namedtuple
import scriptutil
BASE_JAVA_VERSION = "21"
# This tool expects to find /lucene off the base URL. You
# must have a working gpg, tar, unzip in your path. This has been
# tested on Linux and on Cygwin under Windows 7.
@ -144,10 +146,10 @@ def checkJARMetaData(desc, jarFile, gitRevision, version):
'Implementation-Vendor: The Apache Software Foundation',
'Specification-Title: Lucene Search Engine:',
'Implementation-Title: org.apache.lucene',
'X-Compile-Source-JDK: 21',
'X-Compile-Target-JDK: 21',
'X-Compile-Source-JDK: %s' % BASE_JAVA_VERSION,
'X-Compile-Target-JDK: %s' % BASE_JAVA_VERSION,
'Specification-Version: %s' % version,
'X-Build-JDK: 21.',
'X-Build-JDK: %s.' % BASE_JAVA_VERSION,
'Extension-Name: org.apache.lucene'):
if type(verify) is not tuple:
verify = (verify,)
@ -611,20 +613,21 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
validateCmd = './gradlew --no-daemon check -p lucene/documentation'
print(' run "%s"' % validateCmd)
java.run_java21(validateCmd, '%s/validate.log' % unpackPath)
java.run_java(validateCmd, '%s/validate.log' % unpackPath)
print(" run tests w/ Java 21 and testArgs='%s'..." % testArgs)
java.run_java21('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
print(" compile jars w/ Java 21")
java.run_java21('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
testDemo(java.run_java21, isSrc, version, '21')
print(" run tests w/ Java %s and testArgs='%s'..." % (BASE_JAVA_VERSION, testArgs))
java.run_java('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
print(" compile jars w/ Java %s" % BASE_JAVA_VERSION)
java.run_java('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
testDemo(java.run_java, isSrc, version, BASE_JAVA_VERSION)
if java.run_java19:
print(" run tests w/ Java 19 and testArgs='%s'..." % testArgs)
java.run_java19('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
print(" compile jars w/ Java 19")
java.run_java19('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
testDemo(java.run_java19, isSrc, version, '19')
if java.run_alt_javas:
for run_alt_java, alt_java_version in zip(java.run_alt_javas, java.alt_java_versions):
print(" run tests w/ Java %s and testArgs='%s'..." % (alt_java_version, testArgs))
run_alt_java('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
print(" compile jars w/ Java %s" % alt_java_version)
run_alt_java('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
testDemo(run_alt_java, isSrc, version, alt_java_version)
print(' confirm all releases have coverage in TestBackwardsCompatibility')
confirmAllReleasesAreTestedForBackCompat(version, unpackPath)
@ -633,9 +636,10 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
checkAllJARs(os.getcwd(), gitRevision, version)
testDemo(java.run_java21, isSrc, version, '21')
if java.run_java19:
testDemo(java.run_java19, isSrc, version, '19')
testDemo(java.run_java, isSrc, version, BASE_JAVA_VERSION)
if java.run_alt_javas:
for run_alt_java, alt_java_version in zip(java.run_alt_javas, java.alt_java_versions):
testDemo(run_alt_java, isSrc, version, alt_java_version)
testChangesText('.', version)
@ -911,33 +915,49 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
sys.stdout.write('.')
def make_java_config(parser, java19_home):
def _make_runner(java_home, version):
print('Java %s JAVA_HOME=%s' % (version, java_home))
def make_java_config(parser, alt_java_homes):
def _make_runner(java_home, is_base_version=False):
if cygwin:
java_home = subprocess.check_output('cygpath -u "%s"' % java_home, shell=True).decode('utf-8').strip()
cmd_prefix = 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % \
(java_home, java_home, java_home)
s = subprocess.check_output('%s; java -version' % cmd_prefix,
shell=True, stderr=subprocess.STDOUT).decode('utf-8')
if s.find(' version "%s' % version) == -1:
parser.error('got wrong version for java %s:\n%s' % (version, s))
actual_version = re.search(r'version "([1-9][0-9]*)', s).group(1)
print('Java %s JAVA_HOME=%s' % (actual_version, java_home))
# validate Java version
if is_base_version:
if BASE_JAVA_VERSION != actual_version:
parser.error('got wrong base version for java %s:\n%s' % (BASE_JAVA_VERSION, s))
else:
if int(actual_version) < int(BASE_JAVA_VERSION):
parser.error('got wrong version for java %s, less than base version %s:\n%s' % (actual_version, BASE_JAVA_VERSION, s))
def run_java(cmd, logfile):
run('%s; %s' % (cmd_prefix, cmd), logfile)
return run_java
java21_home = os.environ.get('JAVA_HOME')
if java21_home is None:
parser.error('JAVA_HOME must be set')
run_java21 = _make_runner(java21_home, '21')
run_java19 = None
if java19_home is not None:
run_java19 = _make_runner(java19_home, '19')
jc = namedtuple('JavaConfig', 'run_java21 java21_home run_java19 java19_home')
return jc(run_java21, java21_home, run_java19, java19_home)
return run_java, actual_version
java_home = os.environ.get('JAVA_HOME')
if java_home is None:
parser.error('JAVA_HOME must be set')
run_java, _ = _make_runner(java_home, True)
run_alt_javas = []
alt_java_versions = []
if alt_java_homes:
for alt_java_home in alt_java_homes:
run_alt_java, version = _make_runner(alt_java_home)
run_alt_javas.append(run_alt_java)
alt_java_versions.append(version)
jc = namedtuple('JavaConfig', 'run_java java_home run_alt_javas alt_java_homes alt_java_versions')
return jc(run_java, java_home, run_alt_javas, alt_java_homes, alt_java_versions)
version_re = re.compile(r'(\d+\.\d+\.\d+(-ALPHA|-BETA)?)')
revision_re = re.compile(r'rev-([a-f\d]+)')
def parse_config():
epilogue = textwrap.dedent('''
Example usage:
@ -956,8 +976,8 @@ def parse_config():
help='GIT revision number that release was built with, defaults to that in URL')
parser.add_argument('--version', metavar='X.Y.Z(-ALPHA|-BETA)?',
help='Version of the release, defaults to that in URL')
parser.add_argument('--test-java19', metavar='java19_home',
help='Path to Java home directory, to run tests with if specified')
parser.add_argument('--test-alternative-java', action='append',
help='Path to alternative Java home directory, to run tests with if specified')
parser.add_argument('--download-only', action='store_true', default=False,
help='Only perform download and sha hash check steps')
parser.add_argument('url', help='Url pointing to release to test')
@ -984,7 +1004,7 @@ def parse_config():
if c.local_keys is not None and not os.path.exists(c.local_keys):
parser.error('Local KEYS file "%s" not found' % c.local_keys)
c.java = make_java_config(parser, c.test_java19)
c.java = make_java_config(parser, c.test_alternative_java)
if c.tmp_dir:
c.tmp_dir = os.path.abspath(c.tmp_dir)

View File

@ -198,19 +198,25 @@ Improvements
Optimizations
---------------------
* GITHUB#12996: Reduce ArrayUtil#grow in decompress. (Zhang Chao)
* GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)
* GITHUB#13085: Remove unnecessary toString() / substring() calls to save some String allocations (Dmitry Cherniachenko)
Bug Fixes
---------------------
(No changes)
* GITHUB#13105: Fix ByteKnnVectorFieldSource & FloatKnnVectorFieldSource to work correctly when a segment does not contain
any docs with vectors (hossman)
Other
---------------------
* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
* GITHUB#13077: Add public getter for SynonymQuery#field (Andrey Bozhko)
======================== Lucene 9.10.0 =======================
API Changes

View File

@ -128,10 +128,12 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
}
// Read blocks that intersect with the interval we need
if (offsetInBlock < offset + length) {
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + offset + length - offsetInBlock);
}
while (offsetInBlock < offset + length) {
final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
LZ4.decompress(in, bytesToDecompress, buffer, dictLength);
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
bytes.length += bytesToDecompress;
offsetInBlock += blockLength;

View File

@ -442,7 +442,7 @@ abstract class AbstractKnnVectorQuery extends Query {
@Override
public String toString(String field) {
return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]";
return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
}
@Override

View File

@ -115,10 +115,16 @@ public final class SynonymQuery extends Query {
this.field = Objects.requireNonNull(field);
}
/** Returns the terms of this {@link SynonymQuery} */
public List<Term> getTerms() {
return Arrays.stream(terms).map(t -> new Term(field, t.term)).toList();
}
/** Returns the field name of this {@link SynonymQuery} */
public String getField() {
return field;
}
@Override
public String toString(String field) {
StringBuilder builder = new StringBuilder("Synonym(");

View File

@ -842,8 +842,8 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
// The string should contain matching docIds and their score.
// Since a forceMerge could occur in this test, we must not assert that a specific doc_id is
// matched
// But that instead the string format is expected and that the score is 1.0
assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[1.0,...]"));
// But that instead the string format is expected and that the max score is 1.0
assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[\\d+.\\d+,...],1.0"));
}
/**

View File

@ -87,6 +87,12 @@ public class TestSynonymQuery extends LuceneTestCase {
new SynonymQuery.Builder("field2").addTerm(new Term("field2", "b"), 0.4f).build());
}
public void testGetField() {
SynonymQuery query =
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a")).build();
assertEquals("field1", query.getField());
}
public void testBogusParams() {
expectThrows(
IllegalArgumentException.class,

View File

@ -20,7 +20,9 @@ import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
@ -39,11 +41,25 @@ public class ByteKnnVectorFieldSource extends ValueSource {
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
throws IOException {
final ByteVectorValues vectorValues = readerContext.reader().getByteVectorValues(fieldName);
final LeafReader reader = readerContext.reader();
final ByteVectorValues vectorValues = reader.getByteVectorValues(fieldName);
if (vectorValues == null) {
throw new IllegalArgumentException(
"no byte vector value is indexed for field '" + fieldName + "'");
VectorFieldFunction.checkField(reader, fieldName, VectorEncoding.BYTE);
return new VectorFieldFunction(this) {
private final DocIdSetIterator empty = DocIdSetIterator.empty();
@Override
public byte[] byteVectorVal(int doc) throws IOException {
return null;
}
@Override
protected DocIdSetIterator getVectorIterator() {
return empty;
}
};
}
return new VectorFieldFunction(this) {

View File

@ -20,7 +20,9 @@ import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
@ -39,12 +41,26 @@ public class FloatKnnVectorFieldSource extends ValueSource {
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
throws IOException {
final FloatVectorValues vectorValues = readerContext.reader().getFloatVectorValues(fieldName);
final LeafReader reader = readerContext.reader();
final FloatVectorValues vectorValues = reader.getFloatVectorValues(fieldName);
if (vectorValues == null) {
throw new IllegalArgumentException(
"no float vector value is indexed for field '" + fieldName + "'");
VectorFieldFunction.checkField(reader, fieldName, VectorEncoding.FLOAT32);
return new VectorFieldFunction(this) {
private final DocIdSetIterator empty = DocIdSetIterator.empty();
@Override
public float[] floatVectorVal(int doc) throws IOException {
return null;
}
@Override
protected DocIdSetIterator getVectorIterator() {
return empty;
}
};
}
return new VectorFieldFunction(this) {
@Override

View File

@ -17,6 +17,9 @@
package org.apache.lucene.queries.function.valuesource;
import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
@ -53,4 +56,29 @@ public abstract class VectorFieldFunction extends FunctionValues {
}
return doc == curDocID;
}
/**
* Checks the Vector Encoding of a field
*
* @throws IllegalStateException if {@code field} exists, but was not indexed with vectors.
* @throws IllegalStateException if {@code field} has vectors, but using a different encoding
* @lucene.internal
* @lucene.experimental
*/
static void checkField(LeafReader in, String field, VectorEncoding expectedEncoding) {
FieldInfo fi = in.getFieldInfos().fieldInfo(field);
if (fi != null) {
final VectorEncoding actual = fi.hasVectorValues() ? fi.getVectorEncoding() : null;
if (expectedEncoding != actual) {
throw new IllegalStateException(
"Unexpected vector encoding ("
+ actual
+ ") for field "
+ field
+ "(expected="
+ expectedEncoding
+ ")");
}
}
}
}

View File

@ -78,6 +78,10 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
document.add(new KnnByteVectorField("knnByteField2", new byte[] {4, 2, 3}));
iw.addDocument(document);
if (usually(random())) {
iw.commit();
}
Document document2 = new Document();
document2.add(new StringField("id", "2", Field.Store.NO));
document2.add(new SortedDocValuesField("id", new BytesRef("2")));
@ -232,7 +236,7 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
new ByteVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
assertThrows(
IllegalArgumentException.class,
IllegalStateException.class,
() -> searcher.search(new FunctionQuery(byteDenseVectorSimilarityFunction), 10));
v1 = new FloatKnnVectorFieldSource("knnByteField1");
@ -241,8 +245,16 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
new FloatVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
assertThrows(
IllegalArgumentException.class,
IllegalStateException.class,
() -> searcher.search(new FunctionQuery(floatVectorSimilarityFunction), 10));
v1 = new FloatKnnVectorFieldSource("id");
FloatVectorSimilarityFunction idVectorSimilarityFunction =
new FloatVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
assertThrows(
IllegalStateException.class,
() -> searcher.search(new FunctionQuery(idVectorSimilarityFunction), 10));
}
private static void assertHits(Query q, float[] scores) throws Exception {

View File

@ -241,6 +241,9 @@ public class MockRandomMergePolicy extends MergePolicy {
@Override
public Sorter.DocMap reorder(CodecReader reader, Directory dir) throws IOException {
if (r.nextBoolean()) {
if (LuceneTestCase.VERBOSE) {
System.out.println("NOTE: MockRandomMergePolicy now reverses reader=" + reader);
}
// Reverse the doc ID order
final int maxDoc = reader.maxDoc();
return new Sorter.DocMap() {