mirror of https://github.com/apache/lucene.git
Merge branch 'main' into java_21
This commit is contained in:
commit
bfa64b0725
|
@ -39,6 +39,8 @@ import zipfile
|
|||
from collections import namedtuple
|
||||
import scriptutil
|
||||
|
||||
BASE_JAVA_VERSION = "21"
|
||||
|
||||
# This tool expects to find /lucene off the base URL. You
|
||||
# must have a working gpg, tar, unzip in your path. This has been
|
||||
# tested on Linux and on Cygwin under Windows 7.
|
||||
|
@ -144,10 +146,10 @@ def checkJARMetaData(desc, jarFile, gitRevision, version):
|
|||
'Implementation-Vendor: The Apache Software Foundation',
|
||||
'Specification-Title: Lucene Search Engine:',
|
||||
'Implementation-Title: org.apache.lucene',
|
||||
'X-Compile-Source-JDK: 21',
|
||||
'X-Compile-Target-JDK: 21',
|
||||
'X-Compile-Source-JDK: %s' % BASE_JAVA_VERSION,
|
||||
'X-Compile-Target-JDK: %s' % BASE_JAVA_VERSION,
|
||||
'Specification-Version: %s' % version,
|
||||
'X-Build-JDK: 21.',
|
||||
'X-Build-JDK: %s.' % BASE_JAVA_VERSION,
|
||||
'Extension-Name: org.apache.lucene'):
|
||||
if type(verify) is not tuple:
|
||||
verify = (verify,)
|
||||
|
@ -611,20 +613,21 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
|
|||
|
||||
validateCmd = './gradlew --no-daemon check -p lucene/documentation'
|
||||
print(' run "%s"' % validateCmd)
|
||||
java.run_java21(validateCmd, '%s/validate.log' % unpackPath)
|
||||
java.run_java(validateCmd, '%s/validate.log' % unpackPath)
|
||||
|
||||
print(" run tests w/ Java 21 and testArgs='%s'..." % testArgs)
|
||||
java.run_java21('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
|
||||
print(" compile jars w/ Java 21")
|
||||
java.run_java21('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
|
||||
testDemo(java.run_java21, isSrc, version, '21')
|
||||
print(" run tests w/ Java %s and testArgs='%s'..." % (BASE_JAVA_VERSION, testArgs))
|
||||
java.run_java('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
|
||||
print(" compile jars w/ Java %s" % BASE_JAVA_VERSION)
|
||||
java.run_java('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
|
||||
testDemo(java.run_java, isSrc, version, BASE_JAVA_VERSION)
|
||||
|
||||
if java.run_java19:
|
||||
print(" run tests w/ Java 19 and testArgs='%s'..." % testArgs)
|
||||
java.run_java19('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
|
||||
print(" compile jars w/ Java 19")
|
||||
java.run_java19('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
|
||||
testDemo(java.run_java19, isSrc, version, '19')
|
||||
if java.run_alt_javas:
|
||||
for run_alt_java, alt_java_version in zip(java.run_alt_javas, java.alt_java_versions):
|
||||
print(" run tests w/ Java %s and testArgs='%s'..." % (alt_java_version, testArgs))
|
||||
run_alt_java('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
|
||||
print(" compile jars w/ Java %s" % alt_java_version)
|
||||
run_alt_java('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
|
||||
testDemo(run_alt_java, isSrc, version, alt_java_version)
|
||||
|
||||
print(' confirm all releases have coverage in TestBackwardsCompatibility')
|
||||
confirmAllReleasesAreTestedForBackCompat(version, unpackPath)
|
||||
|
@ -633,9 +636,10 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
|
|||
|
||||
checkAllJARs(os.getcwd(), gitRevision, version)
|
||||
|
||||
testDemo(java.run_java21, isSrc, version, '21')
|
||||
if java.run_java19:
|
||||
testDemo(java.run_java19, isSrc, version, '19')
|
||||
testDemo(java.run_java, isSrc, version, BASE_JAVA_VERSION)
|
||||
if java.run_alt_javas:
|
||||
for run_alt_java, alt_java_version in zip(java.run_alt_javas, java.alt_java_versions):
|
||||
testDemo(run_alt_java, isSrc, version, alt_java_version)
|
||||
|
||||
testChangesText('.', version)
|
||||
|
||||
|
@ -664,7 +668,7 @@ def testDemo(run_java, isSrc, version, jdk):
|
|||
checkIndexCmd = 'java -ea %s --module org.apache.lucene.core/org.apache.lucene.index.CheckIndex index' % cp
|
||||
indexFilesCmd = 'java -Dsmoketester=true %s --module org.apache.lucene.demo/org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir)
|
||||
searchFilesCmd = 'java %s --module org.apache.lucene.demo/org.apache.lucene.demo.SearchFiles -index index -query lucene' % cp
|
||||
|
||||
|
||||
run_java(indexFilesCmd, 'index.log')
|
||||
run_java(searchFilesCmd, 'search.log')
|
||||
reMatchingDocs = re.compile('(\d+) total matching documents')
|
||||
|
@ -911,33 +915,49 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
|
|||
sys.stdout.write('.')
|
||||
|
||||
|
||||
def make_java_config(parser, java19_home):
|
||||
def _make_runner(java_home, version):
|
||||
print('Java %s JAVA_HOME=%s' % (version, java_home))
|
||||
def make_java_config(parser, alt_java_homes):
|
||||
def _make_runner(java_home, is_base_version=False):
|
||||
if cygwin:
|
||||
java_home = subprocess.check_output('cygpath -u "%s"' % java_home, shell=True).decode('utf-8').strip()
|
||||
cmd_prefix = 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % \
|
||||
(java_home, java_home, java_home)
|
||||
s = subprocess.check_output('%s; java -version' % cmd_prefix,
|
||||
shell=True, stderr=subprocess.STDOUT).decode('utf-8')
|
||||
if s.find(' version "%s' % version) == -1:
|
||||
parser.error('got wrong version for java %s:\n%s' % (version, s))
|
||||
|
||||
actual_version = re.search(r'version "([1-9][0-9]*)', s).group(1)
|
||||
print('Java %s JAVA_HOME=%s' % (actual_version, java_home))
|
||||
|
||||
# validate Java version
|
||||
if is_base_version:
|
||||
if BASE_JAVA_VERSION != actual_version:
|
||||
parser.error('got wrong base version for java %s:\n%s' % (BASE_JAVA_VERSION, s))
|
||||
else:
|
||||
if int(actual_version) < int(BASE_JAVA_VERSION):
|
||||
parser.error('got wrong version for java %s, less than base version %s:\n%s' % (actual_version, BASE_JAVA_VERSION, s))
|
||||
|
||||
def run_java(cmd, logfile):
|
||||
run('%s; %s' % (cmd_prefix, cmd), logfile)
|
||||
return run_java
|
||||
java21_home = os.environ.get('JAVA_HOME')
|
||||
if java21_home is None:
|
||||
parser.error('JAVA_HOME must be set')
|
||||
run_java21 = _make_runner(java21_home, '21')
|
||||
run_java19 = None
|
||||
if java19_home is not None:
|
||||
run_java19 = _make_runner(java19_home, '19')
|
||||
|
||||
jc = namedtuple('JavaConfig', 'run_java21 java21_home run_java19 java19_home')
|
||||
return jc(run_java21, java21_home, run_java19, java19_home)
|
||||
return run_java, actual_version
|
||||
|
||||
java_home = os.environ.get('JAVA_HOME')
|
||||
if java_home is None:
|
||||
parser.error('JAVA_HOME must be set')
|
||||
run_java, _ = _make_runner(java_home, True)
|
||||
run_alt_javas = []
|
||||
alt_java_versions = []
|
||||
if alt_java_homes:
|
||||
for alt_java_home in alt_java_homes:
|
||||
run_alt_java, version = _make_runner(alt_java_home)
|
||||
run_alt_javas.append(run_alt_java)
|
||||
alt_java_versions.append(version)
|
||||
|
||||
jc = namedtuple('JavaConfig', 'run_java java_home run_alt_javas alt_java_homes alt_java_versions')
|
||||
return jc(run_java, java_home, run_alt_javas, alt_java_homes, alt_java_versions)
|
||||
|
||||
version_re = re.compile(r'(\d+\.\d+\.\d+(-ALPHA|-BETA)?)')
|
||||
revision_re = re.compile(r'rev-([a-f\d]+)')
|
||||
|
||||
def parse_config():
|
||||
epilogue = textwrap.dedent('''
|
||||
Example usage:
|
||||
|
@ -956,8 +976,8 @@ def parse_config():
|
|||
help='GIT revision number that release was built with, defaults to that in URL')
|
||||
parser.add_argument('--version', metavar='X.Y.Z(-ALPHA|-BETA)?',
|
||||
help='Version of the release, defaults to that in URL')
|
||||
parser.add_argument('--test-java19', metavar='java19_home',
|
||||
help='Path to Java home directory, to run tests with if specified')
|
||||
parser.add_argument('--test-alternative-java', action='append',
|
||||
help='Path to alternative Java home directory, to run tests with if specified')
|
||||
parser.add_argument('--download-only', action='store_true', default=False,
|
||||
help='Only perform download and sha hash check steps')
|
||||
parser.add_argument('url', help='Url pointing to release to test')
|
||||
|
@ -984,7 +1004,7 @@ def parse_config():
|
|||
if c.local_keys is not None and not os.path.exists(c.local_keys):
|
||||
parser.error('Local KEYS file "%s" not found' % c.local_keys)
|
||||
|
||||
c.java = make_java_config(parser, c.test_java19)
|
||||
c.java = make_java_config(parser, c.test_alternative_java)
|
||||
|
||||
if c.tmp_dir:
|
||||
c.tmp_dir = os.path.abspath(c.tmp_dir)
|
||||
|
|
|
@ -198,19 +198,25 @@ Improvements
|
|||
Optimizations
|
||||
---------------------
|
||||
|
||||
* GITHUB#12996: Reduce ArrayUtil#grow in decompress. (Zhang Chao)
|
||||
|
||||
* GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)
|
||||
|
||||
* GITHUB#13085: Remove unnecessary toString() / substring() calls to save some String allocations (Dmitry Cherniachenko)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* GITHUB#13105: Fix ByteKnnVectorFieldSource & FloatKnnVectorFieldSource to work correctly when a segment does not contain
|
||||
any docs with vectors (hossman)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
|
||||
|
||||
* GITHUB#13077: Add public getter for SynonymQuery#field (Andrey Bozhko)
|
||||
|
||||
======================== Lucene 9.10.0 =======================
|
||||
|
||||
API Changes
|
||||
|
|
|
@ -128,10 +128,12 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
|
|||
}
|
||||
|
||||
// Read blocks that intersect with the interval we need
|
||||
if (offsetInBlock < offset + length) {
|
||||
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + offset + length - offsetInBlock);
|
||||
}
|
||||
while (offsetInBlock < offset + length) {
|
||||
final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
|
||||
LZ4.decompress(in, bytesToDecompress, buffer, dictLength);
|
||||
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
|
||||
System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
|
||||
bytes.length += bytesToDecompress;
|
||||
offsetInBlock += blockLength;
|
||||
|
|
|
@ -442,7 +442,7 @@ abstract class AbstractKnnVectorQuery extends Query {
|
|||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]";
|
||||
return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -115,10 +115,16 @@ public final class SynonymQuery extends Query {
|
|||
this.field = Objects.requireNonNull(field);
|
||||
}
|
||||
|
||||
/** Returns the terms of this {@link SynonymQuery} */
|
||||
public List<Term> getTerms() {
|
||||
return Arrays.stream(terms).map(t -> new Term(field, t.term)).toList();
|
||||
}
|
||||
|
||||
/** Returns the field name of this {@link SynonymQuery} */
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder builder = new StringBuilder("Synonym(");
|
||||
|
|
|
@ -842,8 +842,8 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
|
|||
// The string should contain matching docIds and their score.
|
||||
// Since a forceMerge could occur in this test, we must not assert that a specific doc_id is
|
||||
// matched
|
||||
// But that instead the string format is expected and that the score is 1.0
|
||||
assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[1.0,...]"));
|
||||
// But that instead the string format is expected and that the max score is 1.0
|
||||
assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[\\d+.\\d+,...],1.0"));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -87,6 +87,12 @@ public class TestSynonymQuery extends LuceneTestCase {
|
|||
new SynonymQuery.Builder("field2").addTerm(new Term("field2", "b"), 0.4f).build());
|
||||
}
|
||||
|
||||
public void testGetField() {
|
||||
SynonymQuery query =
|
||||
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a")).build();
|
||||
assertEquals("field1", query.getField());
|
||||
}
|
||||
|
||||
public void testBogusParams() {
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
|
|
|
@ -20,7 +20,9 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.ByteVectorValues;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -39,11 +41,25 @@ public class ByteKnnVectorFieldSource extends ValueSource {
|
|||
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
|
||||
throws IOException {
|
||||
|
||||
final ByteVectorValues vectorValues = readerContext.reader().getByteVectorValues(fieldName);
|
||||
final LeafReader reader = readerContext.reader();
|
||||
final ByteVectorValues vectorValues = reader.getByteVectorValues(fieldName);
|
||||
|
||||
if (vectorValues == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"no byte vector value is indexed for field '" + fieldName + "'");
|
||||
VectorFieldFunction.checkField(reader, fieldName, VectorEncoding.BYTE);
|
||||
|
||||
return new VectorFieldFunction(this) {
|
||||
private final DocIdSetIterator empty = DocIdSetIterator.empty();
|
||||
|
||||
@Override
|
||||
public byte[] byteVectorVal(int doc) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator getVectorIterator() {
|
||||
return empty;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return new VectorFieldFunction(this) {
|
||||
|
|
|
@ -20,7 +20,9 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -39,12 +41,26 @@ public class FloatKnnVectorFieldSource extends ValueSource {
|
|||
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
|
||||
throws IOException {
|
||||
|
||||
final FloatVectorValues vectorValues = readerContext.reader().getFloatVectorValues(fieldName);
|
||||
final LeafReader reader = readerContext.reader();
|
||||
final FloatVectorValues vectorValues = reader.getFloatVectorValues(fieldName);
|
||||
|
||||
if (vectorValues == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"no float vector value is indexed for field '" + fieldName + "'");
|
||||
VectorFieldFunction.checkField(reader, fieldName, VectorEncoding.FLOAT32);
|
||||
return new VectorFieldFunction(this) {
|
||||
private final DocIdSetIterator empty = DocIdSetIterator.empty();
|
||||
|
||||
@Override
|
||||
public float[] floatVectorVal(int doc) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator getVectorIterator() {
|
||||
return empty;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return new VectorFieldFunction(this) {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
package org.apache.lucene.queries.function.valuesource;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -53,4 +56,29 @@ public abstract class VectorFieldFunction extends FunctionValues {
|
|||
}
|
||||
return doc == curDocID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks the Vector Encoding of a field
|
||||
*
|
||||
* @throws IllegalStateException if {@code field} exists, but was not indexed with vectors.
|
||||
* @throws IllegalStateException if {@code field} has vectors, but using a different encoding
|
||||
* @lucene.internal
|
||||
* @lucene.experimental
|
||||
*/
|
||||
static void checkField(LeafReader in, String field, VectorEncoding expectedEncoding) {
|
||||
FieldInfo fi = in.getFieldInfos().fieldInfo(field);
|
||||
if (fi != null) {
|
||||
final VectorEncoding actual = fi.hasVectorValues() ? fi.getVectorEncoding() : null;
|
||||
if (expectedEncoding != actual) {
|
||||
throw new IllegalStateException(
|
||||
"Unexpected vector encoding ("
|
||||
+ actual
|
||||
+ ") for field "
|
||||
+ field
|
||||
+ "(expected="
|
||||
+ expectedEncoding
|
||||
+ ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -78,6 +78,10 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
|
|||
document.add(new KnnByteVectorField("knnByteField2", new byte[] {4, 2, 3}));
|
||||
iw.addDocument(document);
|
||||
|
||||
if (usually(random())) {
|
||||
iw.commit();
|
||||
}
|
||||
|
||||
Document document2 = new Document();
|
||||
document2.add(new StringField("id", "2", Field.Store.NO));
|
||||
document2.add(new SortedDocValuesField("id", new BytesRef("2")));
|
||||
|
@ -232,7 +236,7 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
|
|||
new ByteVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
IllegalStateException.class,
|
||||
() -> searcher.search(new FunctionQuery(byteDenseVectorSimilarityFunction), 10));
|
||||
|
||||
v1 = new FloatKnnVectorFieldSource("knnByteField1");
|
||||
|
@ -241,8 +245,16 @@ public class TestKnnVectorSimilarityFunctions extends LuceneTestCase {
|
|||
new FloatVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
IllegalStateException.class,
|
||||
() -> searcher.search(new FunctionQuery(floatVectorSimilarityFunction), 10));
|
||||
|
||||
v1 = new FloatKnnVectorFieldSource("id");
|
||||
FloatVectorSimilarityFunction idVectorSimilarityFunction =
|
||||
new FloatVectorSimilarityFunction(VectorSimilarityFunction.EUCLIDEAN, v1, v2);
|
||||
|
||||
assertThrows(
|
||||
IllegalStateException.class,
|
||||
() -> searcher.search(new FunctionQuery(idVectorSimilarityFunction), 10));
|
||||
}
|
||||
|
||||
private static void assertHits(Query q, float[] scores) throws Exception {
|
||||
|
|
|
@ -241,6 +241,9 @@ public class MockRandomMergePolicy extends MergePolicy {
|
|||
@Override
|
||||
public Sorter.DocMap reorder(CodecReader reader, Directory dir) throws IOException {
|
||||
if (r.nextBoolean()) {
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("NOTE: MockRandomMergePolicy now reverses reader=" + reader);
|
||||
}
|
||||
// Reverse the doc ID order
|
||||
final int maxDoc = reader.maxDoc();
|
||||
return new Sorter.DocMap() {
|
||||
|
|
Loading…
Reference in New Issue