Merge branch 'main' into java_21

2024-02-27 13:42:36 +01:00 · 2024-02-27 13:42:36 +01:00 · bfa64b0725
parent 0ccb119495 42269203cc
commit bfa64b0725
12 changed files with 165 additions and 50 deletions
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@ -39,6 +39,8 @@ import zipfile
 from collections import namedtuple
 import scriptutil

+BASE_JAVA_VERSION = "21"
+
 # This tool expects to find /lucene off the base URL.  You
 # must have a working gpg, tar, unzip in your path.  This has been
 # tested on Linux and on Cygwin under Windows 7.
@ -144,10 +146,10 @@ def checkJARMetaData(desc, jarFile, gitRevision, version):
      'Implementation-Vendor: The Apache Software Foundation',
      'Specification-Title: Lucene Search Engine:',
      'Implementation-Title: org.apache.lucene',
-      'X-Compile-Source-JDK: 21',
-      'X-Compile-Target-JDK: 21',
+      'X-Compile-Source-JDK: %s' % BASE_JAVA_VERSION,
+      'X-Compile-Target-JDK: %s' % BASE_JAVA_VERSION,
      'Specification-Version: %s' % version,
-      'X-Build-JDK: 21.',
+      'X-Build-JDK: %s.' % BASE_JAVA_VERSION,
      'Extension-Name: org.apache.lucene'):
      if type(verify) is not tuple:
        verify = (verify,)
@ -611,20 +613,21 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):

    validateCmd = './gradlew --no-daemon check -p lucene/documentation'
    print('    run "%s"' % validateCmd)
-    java.run_java21(validateCmd, '%s/validate.log' % unpackPath)
+    java.run_java(validateCmd, '%s/validate.log' % unpackPath)

-    print("    run tests w/ Java 21 and testArgs='%s'..." % testArgs)
-    java.run_java21('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
-    print("    compile jars w/ Java 21")
-    java.run_java21('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
-    testDemo(java.run_java21, isSrc, version, '21')
+    print("    run tests w/ Java %s and testArgs='%s'..." % (BASE_JAVA_VERSION, testArgs))
+    java.run_java('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
+    print("    compile jars w/ Java %s" % BASE_JAVA_VERSION)
+    java.run_java('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
+    testDemo(java.run_java, isSrc, version, BASE_JAVA_VERSION)

-    if java.run_java19:
-      print("    run tests w/ Java 19 and testArgs='%s'..." % testArgs)
-      java.run_java19('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
-      print("    compile jars w/ Java 19")
-      java.run_java19('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
-      testDemo(java.run_java19, isSrc, version, '19')
+    if java.run_alt_javas:
+      for run_alt_java, alt_java_version in zip(java.run_alt_javas, java.alt_java_versions):
+        print("    run tests w/ Java %s and testArgs='%s'..." % (alt_java_version, testArgs))
+        run_alt_java('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
+        print("    compile jars w/ Java %s" % alt_java_version)
+        run_alt_java('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
+        testDemo(run_alt_java, isSrc, version, alt_java_version)

    print('  confirm all releases have coverage in TestBackwardsCompatibility')
    confirmAllReleasesAreTestedForBackCompat(version, unpackPath)
@ -633,9 +636,10 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):

    checkAllJARs(os.getcwd(), gitRevision, version)

-    testDemo(java.run_java21, isSrc, version, '21')
-    if java.run_java19:
-      testDemo(java.run_java19, isSrc, version, '19')
+    testDemo(java.run_java, isSrc, version, BASE_JAVA_VERSION)
+    if java.run_alt_javas:
+      for run_alt_java, alt_java_version in zip(java.run_alt_javas, java.alt_java_versions):
+        testDemo(run_alt_java, isSrc, version, alt_java_version)

  testChangesText('.', version)

@ -664,7 +668,7 @@ def testDemo(run_java, isSrc, version, jdk):
    checkIndexCmd = 'java -ea %s --module org.apache.lucene.core/org.apache.lucene.index.CheckIndex index' % cp
    indexFilesCmd = 'java -Dsmoketester=true %s --module org.apache.lucene.demo/org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir)
    searchFilesCmd = 'java %s --module org.apache.lucene.demo/org.apache.lucene.demo.SearchFiles -index index -query lucene' % cp
-      
+
  run_java(indexFilesCmd, 'index.log')
  run_java(searchFilesCmd, 'search.log')
  reMatchingDocs = re.compile('(\d+) total matching documents')
@ -911,33 +915,49 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
        sys.stdout.write('.')


-def make_java_config(parser, java19_home):
-  def _make_runner(java_home, version):
-    print('Java %s JAVA_HOME=%s' % (version, java_home))
+def make_java_config(parser, alt_java_homes):
+  def _make_runner(java_home, is_base_version=False):
    if cygwin:
      java_home = subprocess.check_output('cygpath -u "%s"' % java_home, shell=True).decode('utf-8').strip()
    cmd_prefix = 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % \
                 (java_home, java_home, java_home)
    s = subprocess.check_output('%s; java -version' % cmd_prefix,
                                shell=True, stderr=subprocess.STDOUT).decode('utf-8')
-    if s.find(' version "%s' % version) == -1:
-      parser.error('got wrong version for java %s:\n%s' % (version, s))
+
+    actual_version = re.search(r'version "([1-9][0-9]*)', s).group(1)
+    print('Java %s JAVA_HOME=%s' % (actual_version, java_home))
+
+    # validate Java version
+    if is_base_version:
+      if BASE_JAVA_VERSION != actual_version:
+        parser.error('got wrong base version for java %s:\n%s' % (BASE_JAVA_VERSION, s))
+    else:
+      if int(actual_version) < int(BASE_JAVA_VERSION):
+        parser.error('got wrong version for java %s, less than base version %s:\n%s' % (actual_version, BASE_JAVA_VERSION, s))
+
    def run_java(cmd, logfile):
      run('%s; %s' % (cmd_prefix, cmd), logfile)
-    return run_java
-  java21_home =  os.environ.get('JAVA_HOME')
-  if java21_home is None:
-    parser.error('JAVA_HOME must be set')
-  run_java21 = _make_runner(java21_home, '21')
-  run_java19 = None
-  if java19_home is not None:
-    run_java19 = _make_runner(java19_home, '19')

-  jc = namedtuple('JavaConfig', 'run_java21 java21_home run_java19 java19_home')
-  return jc(run_java21, java21_home, run_java19, java19_home)
+    return run_java, actual_version
+
+  java_home =  os.environ.get('JAVA_HOME')
+  if java_home is None:
+    parser.error('JAVA_HOME must be set')
+  run_java, _ = _make_runner(java_home, True)
+  run_alt_javas = []
+  alt_java_versions = []
+  if alt_java_homes:
+    for alt_java_home in alt_java_homes:
+      run_alt_java, version = _make_runner(alt_java_home)
+      run_alt_javas.append(run_alt_java)
+      alt_java_versions.append(version)
+
+  jc = namedtuple('JavaConfig', 'run_java java_home run_alt_javas alt_java_homes alt_java_versions')
+  return jc(run_java, java_home, run_alt_javas, alt_java_homes, alt_java_versions)

 version_re = re.compile(r'(\d+\.\d+\.\d+(-ALPHA|-BETA)?)')
 revision_re = re.compile(r'rev-([a-f\d]+)')
+
 def parse_config():
  epilogue = textwrap.dedent('''
    Example usage:
@ -956,8 +976,8 @@ def parse_config():
                      help='GIT revision number that release was built with, defaults to that in URL')
  parser.add_argument('--version', metavar='X.Y.Z(-ALPHA|-BETA)?',
                      help='Version of the release, defaults to that in URL')
-  parser.add_argument('--test-java19', metavar='java19_home',
-                      help='Path to Java home directory, to run tests with if specified')
+  parser.add_argument('--test-alternative-java', action='append',
+                      help='Path to alternative Java home directory, to run tests with if specified')
  parser.add_argument('--download-only', action='store_true', default=False,
                      help='Only perform download and sha hash check steps')
  parser.add_argument('url', help='Url pointing to release to test')
@ -984,7 +1004,7 @@ def parse_config():
  if c.local_keys is not None and not os.path.exists(c.local_keys):
    parser.error('Local KEYS file "%s" not found' % c.local_keys)

-  c.java = make_java_config(parser, c.test_java19)
+  c.java = make_java_config(parser, c.test_alternative_java)

  if c.tmp_dir:
    c.tmp_dir = os.path.abspath(c.tmp_dir)
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -198,19 +198,25 @@ Improvements
 Optimizations
 ---------------------

+* GITHUB#12996: Reduce ArrayUtil#grow in decompress. (Zhang Chao)
+
 * GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)

 * GITHUB#13085: Remove unnecessary toString() / substring() calls to save some String allocations (Dmitry Cherniachenko)

 Bug Fixes
 ---------------------
-(No changes)
+
+* GITHUB#13105: Fix ByteKnnVectorFieldSource & FloatKnnVectorFieldSource to work correctly when a segment does not contain
+  any docs with vectors (hossman)

 Other
 ---------------------

 * GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)

+* GITHUB#13077: Add public getter for SynonymQuery#field (Andrey Bozhko)
+
 ======================== Lucene 9.10.0 =======================

 API Changes
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java
@ -128,10 +128,12 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
      }

      // Read blocks that intersect with the interval we need
+      if (offsetInBlock < offset + length) {
+        bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + offset + length - offsetInBlock);
+      }
      while (offsetInBlock < offset + length) {
        final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
        LZ4.decompress(in, bytesToDecompress, buffer, dictLength);
-        bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
        System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
        bytes.length += bytesToDecompress;
        offsetInBlock += blockLength;
--- a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java
@ -442,7 +442,7 @@ abstract class AbstractKnnVectorQuery extends Query {

    @Override
    public String toString(String field) {
-      return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]";
+      return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
@ -115,10 +115,16 @@ public final class SynonymQuery extends Query {
    this.field = Objects.requireNonNull(field);
  }

+  /** Returns the terms of this {@link SynonymQuery} */
  public List<Term> getTerms() {
    return Arrays.stream(terms).map(t -> new Term(field, t.term)).toList();
  }

+  /** Returns the field name of this {@link SynonymQuery} */
+  public String getField() {
+    return field;
+  }
+
  @Override
  public String toString(String field) {
    StringBuilder builder = new StringBuilder("Synonym(");
--- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
@ -842,8 +842,8 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
    // The string should contain matching docIds and their score.
    // Since a forceMerge could occur in this test, we must not assert that a specific doc_id is
    // matched
-    // But that instead the string format is expected and that the score is 1.0
-    assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[1.0,...]"));
+    // But that instead the string format is expected and that the max score is 1.0
+    assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[\\d+.\\d+,...],1.0"));
  }

  /**
--- a/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java
@ -87,6 +87,12 @@ public class TestSynonymQuery extends LuceneTestCase {
        new SynonymQuery.Builder("field2").addTerm(new Term("field2", "b"), 0.4f).build());
  }

+  public void testGetField() {
+    SynonymQuery query =
+        new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a")).build();
+    assertEquals("field1", query.getField());
+  }
+
  public void testBogusParams() {
    expectThrows(
        IllegalArgumentException.class,
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ByteKnnVectorFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ByteKnnVectorFieldSource.java
@ -20,7 +20,9 @@ import java.io.IOException;
 import java.util.Map;
 import java.util.Objects;
 import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.VectorEncoding;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.DocIdSetIterator;
@ -39,11 +41,25 @@ public class ByteKnnVectorFieldSource extends ValueSource {
  public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
      throws IOException {

-    final ByteVectorValues vectorValues = readerContext.reader().getByteVectorValues(fieldName);
+    final LeafReader reader = readerContext.reader();
+    final ByteVectorValues vectorValues = reader.getByteVectorValues(fieldName);

    if (vectorValues == null) {
-      throw new IllegalArgumentException(
-          "no byte vector value is indexed for field '" + fieldName + "'");
+      VectorFieldFunction.checkField(reader, fieldName, VectorEncoding.BYTE);
+
+      return new VectorFieldFunction(this) {
+        private final DocIdSetIterator empty = DocIdSetIterator.empty();
+
+        @Override
+        public byte[] byteVectorVal(int doc) throws IOException {
+          return null;
+        }
+
+        @Override
+        protected DocIdSetIterator getVectorIterator() {
+          return empty;
+        }
+      };
    }

    return new VectorFieldFunction(this) {
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatKnnVectorFieldSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatKnnVectorFieldSource.java
@ -20,7 +20,9 @@ import java.io.IOException;
 import java.util.Map;
 import java.util.Objects;
 import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.VectorEncoding;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.DocIdSetIterator;
@ -39,12 +41,26 @@ public class FloatKnnVectorFieldSource extends ValueSource {
  public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
      throws IOException {

-    final FloatVectorValues vectorValues = readerContext.reader().getFloatVectorValues(fieldName);
+    final LeafReader reader = readerContext.reader();
+    final FloatVectorValues vectorValues = reader.getFloatVectorValues(fieldName);

    if (vectorValues == null) {
-      throw new IllegalArgumentException(
-          "no float vector value is indexed for field '" + fieldName + "'");
+      VectorFieldFunction.checkField(reader, fieldName, VectorEncoding.FLOAT32);
+      return new VectorFieldFunction(this) {
+        private final DocIdSetIterator empty = DocIdSetIterator.empty();
+
+        @Override
+        public float[] floatVectorVal(int doc) throws IOException {
+          return null;
+        }
+
+        @Override
+        protected DocIdSetIterator getVectorIterator() {
+          return empty;
+        }
+      };
    }
+
    return new VectorFieldFunction(this) {

      @Override
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/VectorFieldFunction.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/VectorFieldFunction.java
@ -17,6 +17,9 @@
 package org.apache.lucene.queries.function.valuesource;

 import java.io.IOException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.VectorEncoding;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.DocIdSetIterator;
@ -53,4 +56,29 @@ public abstract class VectorFieldFunction extends FunctionValues {
    }
    return doc == curDocID;
  }
+
+  /**
+   * Checks the Vector Encoding of a field
+   *
+   * @throws IllegalStateException if {@code field} exists, but was not indexed with vectors.
+   * @throws IllegalStateException if {@code field} has vectors, but using a different encoding
+   * @lucene.internal
+   * @lucene.experimental
+   */
+  static void checkField(LeafReader in, String field, VectorEncoding expectedEncoding) {
+    FieldInfo fi = in.getFieldInfos().fieldInfo(field);
+    if (fi != null) {
+      final VectorEncoding actual = fi.hasVectorValues() ? fi.getVectorEncoding() : null;
+      if (expectedEncoding != actual) {
+        throw new IllegalStateException(
+            "Unexpected vector encoding ("
+                + actual
+                + ") for field "
+                + field
+                + "(expected="
+                + expectedEncoding
+                + ")");
+      }
+    }
+  }
 }
--- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestKnnVectorSimilarityFunctions.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestKnnVectorSimilarityFunctions.java
@ -78,6 +78,10 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
    document.add(new KnnByteVectorField("knnByteField2", new byte[] {4, 2, 3}));
    iw.addDocument(document);

+    if (usually(random())) {
+      iw.commit();
+    }
+
    Document document2 = new Document();
    document2.add(new StringField("id", "2", Field.Store.NO));
    document2.add(new SortedDocValuesField("id", new BytesRef("2")));
@ -232,7 +236,7 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
        new ByteVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);

    assertThrows(
-        IllegalArgumentException.class,
+        IllegalStateException.class,
        () -> searcher.search(new FunctionQuery(byteDenseVectorSimilarityFunction), 10));

    v1 = new FloatKnnVectorFieldSource("knnByteField1");
@ -241,8 +245,16 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
        new FloatVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);

    assertThrows(
-        IllegalArgumentException.class,
+        IllegalStateException.class,
        () -> searcher.search(new FunctionQuery(floatVectorSimilarityFunction), 10));
+
+    v1 = new FloatKnnVectorFieldSource("id");
+    FloatVectorSimilarityFunction idVectorSimilarityFunction =
+        new FloatVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
+
+    assertThrows(
+        IllegalStateException.class,
+        () -> searcher.search(new FunctionQuery(idVectorSimilarityFunction), 10));
  }

  private static void assertHits(Query q, float[] scores) throws Exception {
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MockRandomMergePolicy.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MockRandomMergePolicy.java
@ -241,6 +241,9 @@ public class MockRandomMergePolicy extends MergePolicy {
    @Override
    public Sorter.DocMap reorder(CodecReader reader, Directory dir) throws IOException {
      if (r.nextBoolean()) {
+        if (LuceneTestCase.VERBOSE) {
+          System.out.println("NOTE: MockRandomMergePolicy now reverses reader=" + reader);
+        }
        // Reverse the doc ID order
        final int maxDoc = reader.maxDoc();
        return new Sorter.DocMap() {