Merge branch 'main' into java_21

2024-02-27 13:42:36 +01:00 · 2024-02-27 13:42:36 +01:00 · bfa64b0725
parent 0ccb119495 42269203cc
commit bfa64b0725
12 changed files with 165 additions and 50 deletions
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@ -39,6 +39,8 @@ import zipfile
 from collections import namedtuple
 import scriptutil
 BASE_JAVA_VERSION = "21"
 # This tool expects to find /lucene off the base URL.  You
 # must have a working gpg, tar, unzip in your path.  This has been
 # tested on Linux and on Cygwin under Windows 7.
@ -144,10 +146,10 @@ def checkJARMetaData(desc, jarFile, gitRevision, version):
      'Implementation-Vendor: The Apache Software Foundation',
      'Specification-Title: Lucene Search Engine:',
      'Implementation-Title: org.apache.lucene',
-      'X-Compile-Source-JDK: 21',
+      'X-Compile-Source-JDK: %s' % BASE_JAVA_VERSION,
-      'X-Compile-Target-JDK: 21',
+      'X-Compile-Target-JDK: %s' % BASE_JAVA_VERSION,
      'Specification-Version: %s' % version,
-      'X-Build-JDK: 21.',
+      'X-Build-JDK: %s.' % BASE_JAVA_VERSION,
      'Extension-Name: org.apache.lucene'):
      if type(verify) is not tuple:
        verify = (verify,)
@ -611,20 +613,21 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
    validateCmd = './gradlew --no-daemon check -p lucene/documentation'
    print('    run "%s"' % validateCmd)
-    java.run_java21(validateCmd, '%s/validate.log' % unpackPath)
+    java.run_java(validateCmd, '%s/validate.log' % unpackPath)
-    print("    run tests w/ Java 21 and testArgs='%s'..." % testArgs)
+    print("    run tests w/ Java %s and testArgs='%s'..." % (BASE_JAVA_VERSION, testArgs))
-    java.run_java21('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
+    java.run_java('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
-    print("    compile jars w/ Java 21")
+    print("    compile jars w/ Java %s" % BASE_JAVA_VERSION)
-    java.run_java21('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
+    java.run_java('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
-    testDemo(java.run_java21, isSrc, version, '21')
+    testDemo(java.run_java, isSrc, version, BASE_JAVA_VERSION)
-    if java.run_java19:
+    if java.run_alt_javas:
-      print("    run tests w/ Java 19 and testArgs='%s'..." % testArgs)
+      for run_alt_java, alt_java_version in zip(java.run_alt_javas, java.alt_java_versions):
-      java.run_java19('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
+        print("    run tests w/ Java %s and testArgs='%s'..." % (alt_java_version, testArgs))
-      print("    compile jars w/ Java 19")
+        run_alt_java('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
-      java.run_java19('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
+        print("    compile jars w/ Java %s" % alt_java_version)
-      testDemo(java.run_java19, isSrc, version, '19')
+        run_alt_java('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
        testDemo(run_alt_java, isSrc, version, alt_java_version)
    print('  confirm all releases have coverage in TestBackwardsCompatibility')
    confirmAllReleasesAreTestedForBackCompat(version, unpackPath)
@ -633,9 +636,10 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
    checkAllJARs(os.getcwd(), gitRevision, version)
-    testDemo(java.run_java21, isSrc, version, '21')
+    testDemo(java.run_java, isSrc, version, BASE_JAVA_VERSION)
-    if java.run_java19:
+    if java.run_alt_javas:
-      testDemo(java.run_java19, isSrc, version, '19')
+      for run_alt_java, alt_java_version in zip(java.run_alt_javas, java.alt_java_versions):
        testDemo(run_alt_java, isSrc, version, alt_java_version)
  testChangesText('.', version)
@ -911,33 +915,49 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
        sys.stdout.write('.')
-def make_java_config(parser, java19_home):
+def make_java_config(parser, alt_java_homes):
-  def _make_runner(java_home, version):
+  def _make_runner(java_home, is_base_version=False):
    print('Java %s JAVA_HOME=%s' % (version, java_home))
    if cygwin:
      java_home = subprocess.check_output('cygpath -u "%s"' % java_home, shell=True).decode('utf-8').strip()
    cmd_prefix = 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % \
                 (java_home, java_home, java_home)
    s = subprocess.check_output('%s; java -version' % cmd_prefix,
                                shell=True, stderr=subprocess.STDOUT).decode('utf-8')
-    if s.find(' version "%s' % version) == -1:
+
-      parser.error('got wrong version for java %s:\n%s' % (version, s))
+    actual_version = re.search(r'version "([1-9][0-9]*)', s).group(1)
    print('Java %s JAVA_HOME=%s' % (actual_version, java_home))
    # validate Java version
    if is_base_version:
      if BASE_JAVA_VERSION != actual_version:
        parser.error('got wrong base version for java %s:\n%s' % (BASE_JAVA_VERSION, s))
    else:
      if int(actual_version) < int(BASE_JAVA_VERSION):
        parser.error('got wrong version for java %s, less than base version %s:\n%s' % (actual_version, BASE_JAVA_VERSION, s))
    def run_java(cmd, logfile):
      run('%s; %s' % (cmd_prefix, cmd), logfile)
    return run_java
  java21_home =  os.environ.get('JAVA_HOME')
  if java21_home is None:
    parser.error('JAVA_HOME must be set')
  run_java21 = _make_runner(java21_home, '21')
  run_java19 = None
  if java19_home is not None:
    run_java19 = _make_runner(java19_home, '19')
-  jc = namedtuple('JavaConfig', 'run_java21 java21_home run_java19 java19_home')
+    return run_java, actual_version
-  return jc(run_java21, java21_home, run_java19, java19_home)
+
  java_home =  os.environ.get('JAVA_HOME')
  if java_home is None:
    parser.error('JAVA_HOME must be set')
  run_java, _ = _make_runner(java_home, True)
  run_alt_javas = []
  alt_java_versions = []
  if alt_java_homes:
    for alt_java_home in alt_java_homes:
      run_alt_java, version = _make_runner(alt_java_home)
      run_alt_javas.append(run_alt_java)
      alt_java_versions.append(version)
  jc = namedtuple('JavaConfig', 'run_java java_home run_alt_javas alt_java_homes alt_java_versions')
  return jc(run_java, java_home, run_alt_javas, alt_java_homes, alt_java_versions)
 version_re = re.compile(r'(\d+\.\d+\.\d+(-ALPHA|-BETA)?)')
 revision_re = re.compile(r'rev-([a-f\d]+)')
 def parse_config():
  epilogue = textwrap.dedent('''
    Example usage:
@ -956,8 +976,8 @@ def parse_config():
                      help='GIT revision number that release was built with, defaults to that in URL')
  parser.add_argument('--version', metavar='X.Y.Z(-ALPHA|-BETA)?',
                      help='Version of the release, defaults to that in URL')
-  parser.add_argument('--test-java19', metavar='java19_home',
+  parser.add_argument('--test-alternative-java', action='append',
-                      help='Path to Java home directory, to run tests with if specified')
+                      help='Path to alternative Java home directory, to run tests with if specified')
  parser.add_argument('--download-only', action='store_true', default=False,
                      help='Only perform download and sha hash check steps')
  parser.add_argument('url', help='Url pointing to release to test')
@ -984,7 +1004,7 @@ def parse_config():
  if c.local_keys is not None and not os.path.exists(c.local_keys):
    parser.error('Local KEYS file "%s" not found' % c.local_keys)
-  c.java = make_java_config(parser, c.test_java19)
+  c.java = make_java_config(parser, c.test_alternative_java)
  if c.tmp_dir:
    c.tmp_dir = os.path.abspath(c.tmp_dir)
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -198,19 +198,25 @@ Improvements
 Optimizations
 ---------------------
 * GITHUB#12996: Reduce ArrayUtil#grow in decompress. (Zhang Chao)
 * GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)
 * GITHUB#13085: Remove unnecessary toString() / substring() calls to save some String allocations (Dmitry Cherniachenko)
 Bug Fixes
 ---------------------
-(No changes)
+
 * GITHUB#13105: Fix ByteKnnVectorFieldSource & FloatKnnVectorFieldSource to work correctly when a segment does not contain
  any docs with vectors (hossman)
 Other
 ---------------------
 * GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
 * GITHUB#13077: Add public getter for SynonymQuery#field (Andrey Bozhko)
 ======================== Lucene 9.10.0 =======================
 API Changes
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java
@ -128,10 +128,12 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
      }
      // Read blocks that intersect with the interval we need
      if (offsetInBlock < offset + length) {
        bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + offset + length - offsetInBlock);
      }
      while (offsetInBlock < offset + length) {
        final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
        LZ4.decompress(in, bytesToDecompress, buffer, dictLength);
        bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
        System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
        bytes.length += bytesToDecompress;
        offsetInBlock += blockLength;
--- a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java
@ -442,7 +442,7 @@ abstract class AbstractKnnVectorQuery extends Query {
    @Override
    public String toString(String field) {
-      return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]";
+      return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
    }
    @Override
--- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
@ -115,10 +115,16 @@ public final class SynonymQuery extends Query {
    this.field = Objects.requireNonNull(field);
  }
  /** Returns the terms of this {@link SynonymQuery} */
  public List<Term> getTerms() {
    return Arrays.stream(terms).map(t -> new Term(field, t.term)).toList();
  }
  /** Returns the field name of this {@link SynonymQuery} */
  public String getField() {
    return field;
  }
  @Override
  public String toString(String field) {
    StringBuilder builder = new StringBuilder("Synonym(");
--- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
@ -842,8 +842,8 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
    // The string should contain matching docIds and their score.
    // Since a forceMerge could occur in this test, we must not assert that a specific doc_id is
    // matched
-    // But that instead the string format is expected and that the score is 1.0
+    // But that instead the string format is expected and that the max score is 1.0
-    assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[1.0,...]"));
+    assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[\\d+.\\d+,...],1.0"));
  }
  /**
--- a/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java
@ -87,6 +87,12 @@ public class TestSynonymQuery extends LuceneTestCase {
        new SynonymQuery.Builder("field2").addTerm(new Term("field2", "b"), 0.4f).build());
  }
  public void testGetField() {
    SynonymQuery query =
        new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a")).build();
    assertEquals("field1", query.getField());
  }
  public void testBogusParams() {
    expectThrows(
        IllegalArgumentException.class,
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ByteKnnVectorFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ByteKnnVectorFieldSource.java
@ -20,7 +20,9 @@ import java.io.IOException;
 import java.util.Map;
 import java.util.Objects;
 import org.apache.lucene.index.ByteVectorValues;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.VectorEncoding;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.DocIdSetIterator;
@ -39,11 +41,25 @@ public class ByteKnnVectorFieldSource extends ValueSource {
  public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
      throws IOException {
-    final ByteVectorValues vectorValues = readerContext.reader().getByteVectorValues(fieldName);
+    final LeafReader reader = readerContext.reader();
    final ByteVectorValues vectorValues = reader.getByteVectorValues(fieldName);
    if (vectorValues == null) {
-      throw new IllegalArgumentException(
+      VectorFieldFunction.checkField(reader, fieldName, VectorEncoding.BYTE);
-          "no byte vector value is indexed for field '" + fieldName + "'");
+
      return new VectorFieldFunction(this) {
        private final DocIdSetIterator empty = DocIdSetIterator.empty();
        @Override
        public byte[] byteVectorVal(int doc) throws IOException {
          return null;
        }
        @Override
        protected DocIdSetIterator getVectorIterator() {
          return empty;
        }
      };
    }
    return new VectorFieldFunction(this) {
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatKnnVectorFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatKnnVectorFieldSource.java
@ -20,7 +20,9 @@ import java.io.IOException;
 import java.util.Map;
 import java.util.Objects;
 import org.apache.lucene.index.FloatVectorValues;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.VectorEncoding;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.DocIdSetIterator;
@ -39,12 +41,26 @@ public class FloatKnnVectorFieldSource extends ValueSource {
  public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
      throws IOException {
-    final FloatVectorValues vectorValues = readerContext.reader().getFloatVectorValues(fieldName);
+    final LeafReader reader = readerContext.reader();
    final FloatVectorValues vectorValues = reader.getFloatVectorValues(fieldName);
    if (vectorValues == null) {
-      throw new IllegalArgumentException(
+      VectorFieldFunction.checkField(reader, fieldName, VectorEncoding.FLOAT32);
-          "no float vector value is indexed for field '" + fieldName + "'");
+      return new VectorFieldFunction(this) {
        private final DocIdSetIterator empty = DocIdSetIterator.empty();
        @Override
        public float[] floatVectorVal(int doc) throws IOException {
          return null;
        }
        @Override
        protected DocIdSetIterator getVectorIterator() {
          return empty;
        }
      };
    }
    return new VectorFieldFunction(this) {
      @Override
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/VectorFieldFunction.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/VectorFieldFunction.java
@ -17,6 +17,9 @@
 package org.apache.lucene.queries.function.valuesource;
 import java.io.IOException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.VectorEncoding;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.DocIdSetIterator;
@ -53,4 +56,29 @@ public abstract class VectorFieldFunction extends FunctionValues {
    }
    return doc == curDocID;
  }
  /**
   * Checks the Vector Encoding of a field
   *
   * @throws IllegalStateException if {@code field} exists, but was not indexed with vectors.
   * @throws IllegalStateException if {@code field} has vectors, but using a different encoding
   * @lucene.internal
   * @lucene.experimental
   */
  static void checkField(LeafReader in, String field, VectorEncoding expectedEncoding) {
    FieldInfo fi = in.getFieldInfos().fieldInfo(field);
    if (fi != null) {
      final VectorEncoding actual = fi.hasVectorValues() ? fi.getVectorEncoding() : null;
      if (expectedEncoding != actual) {
        throw new IllegalStateException(
            "Unexpected vector encoding ("
                + actual
                + ") for field "
                + field
                + "(expected="
                + expectedEncoding
                + ")");
      }
    }
  }
 }
--- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestKnnVectorSimilarityFunctions.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestKnnVectorSimilarityFunctions.java
@ -78,6 +78,10 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
    document.add(new KnnByteVectorField("knnByteField2", new byte[] {4, 2, 3}));
    iw.addDocument(document);
    if (usually(random())) {
      iw.commit();
    }
    Document document2 = new Document();
    document2.add(new StringField("id", "2", Field.Store.NO));
    document2.add(new SortedDocValuesField("id", new BytesRef("2")));
@ -232,7 +236,7 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
        new ByteVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
    assertThrows(
-        IllegalArgumentException.class,
+        IllegalStateException.class,
        () -> searcher.search(new FunctionQuery(byteDenseVectorSimilarityFunction), 10));
    v1 = new FloatKnnVectorFieldSource("knnByteField1");
@ -241,8 +245,16 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
        new FloatVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
    assertThrows(
-        IllegalArgumentException.class,
+        IllegalStateException.class,
        () -> searcher.search(new FunctionQuery(floatVectorSimilarityFunction), 10));
    v1 = new FloatKnnVectorFieldSource("id");
    FloatVectorSimilarityFunction idVectorSimilarityFunction =
        new FloatVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
    assertThrows(
        IllegalStateException.class,
        () -> searcher.search(new FunctionQuery(idVectorSimilarityFunction), 10));
  }
  private static void assertHits(Query q, float[] scores) throws Exception {
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MockRandomMergePolicy.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MockRandomMergePolicy.java
@ -241,6 +241,9 @@ public class MockRandomMergePolicy extends MergePolicy {
    @Override
    public Sorter.DocMap reorder(CodecReader reader, Directory dir) throws IOException {
      if (r.nextBoolean()) {
        if (LuceneTestCase.VERBOSE) {
          System.out.println("NOTE: MockRandomMergePolicy now reverses reader=" + reader);
        }
        // Reverse the doc ID order
        final int maxDoc = reader.maxDoc();
        return new Sorter.DocMap() {