diff --git a/buildSrc/src/main/java/org/apache/lucene/gradle/ErrorReportingTestListener.java b/buildSrc/src/main/java/org/apache/lucene/gradle/ErrorReportingTestListener.java
index 9ba150909d8..64abfa5aec8 100644
--- a/buildSrc/src/main/java/org/apache/lucene/gradle/ErrorReportingTestListener.java
+++ b/buildSrc/src/main/java/org/apache/lucene/gradle/ErrorReportingTestListener.java
@@ -113,7 +113,7 @@ public class ErrorReportingTestListener implements TestOutputListener, TestListe
if (echoOutput && !verboseMode) {
synchronized (this) {
- System.out.println("");
+ System.out.println();
System.out.println(suite.getClassName() + " > test suite's output saved to " + outputLog + ", copied below:");
try (BufferedReader reader = Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) {
char[] buf = new char[1024];
diff --git a/dev-tools/doap/lucene.rdf b/dev-tools/doap/lucene.rdf
index 72a976a94fa..06a400ef883 100644
--- a/dev-tools/doap/lucene.rdf
+++ b/dev-tools/doap/lucene.rdf
@@ -67,6 +67,13 @@
+
+
+ lucene-9.10.0
+ 2024-02-20
+ 9.10.0
+
+
lucene-9.9.2
diff --git a/dev-tools/scripts/addBackcompatIndexes.py b/dev-tools/scripts/addBackcompatIndexes.py
index bbaf0b40630..7faacb8b8e3 100755
--- a/dev-tools/scripts/addBackcompatIndexes.py
+++ b/dev-tools/scripts/addBackcompatIndexes.py
@@ -45,16 +45,13 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'emptyIndex': 'empty'
}[indextype]
if indextype in ('cfs', 'nocfs'):
- dirname = 'index.%s' % indextype
filename = '%s.%s-%s.zip' % (prefix, index_version, indextype)
else:
- dirname = indextype
filename = '%s.%s.zip' % (prefix, index_version)
print(' creating %s...' % filename, end='', flush=True)
module = 'backward-codecs'
index_dir = os.path.join('lucene', module, 'src/test/org/apache/lucene/backward_index')
- test_file = os.path.join(index_dir, filename)
if os.path.exists(os.path.join(index_dir, filename)):
print('uptodate')
return
@@ -76,24 +73,20 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'-Dtests.codec=default'
])
base_dir = os.getcwd()
- bc_index_dir = os.path.join(temp_dir, dirname)
- bc_index_file = os.path.join(bc_index_dir, filename)
+ bc_index_file = os.path.join(temp_dir, filename)
if os.path.exists(bc_index_file):
print('alreadyexists')
else:
- if os.path.exists(bc_index_dir):
- shutil.rmtree(bc_index_dir)
os.chdir(source)
scriptutil.run('./gradlew %s' % gradle_args)
- os.chdir(bc_index_dir)
- scriptutil.run('zip %s *' % filename)
+ if not os.path.exists(bc_index_file):
+ raise Exception("Expected file can't be found: %s" %bc_index_file)
print('done')
print(' adding %s...' % filename, end='', flush=True)
scriptutil.run('cp %s %s' % (bc_index_file, os.path.join(base_dir, index_dir)))
os.chdir(base_dir)
- scriptutil.run('rm -rf %s' % bc_index_dir)
print('done')
def update_backcompat_tests(index_version, current_version):
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 8eff18dfaf8..860961c11b8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -197,7 +197,10 @@ Improvements
Optimizations
---------------------
-(No changes)
+
+* GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)
+
+* GITHUB#13085: Remove unnecessary toString() / substring() calls to save some String allocations (Dmitry Cherniachenko)
Bug Fixes
---------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
index f999c25133f..94b684cd3d5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
@@ -278,7 +278,7 @@ class BrazilianStemmer {
return false;
}
- return value.substring(value.length() - suffix.length()).equals(suffix);
+ return value.endsWith(suffix);
}
/**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/PatternParser.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/PatternParser.java
index 3f746491da5..7f176cccb30 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/PatternParser.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/PatternParser.java
@@ -142,7 +142,7 @@ public class PatternParser extends DefaultHandler {
break;
}
}
- token.append(chars.toString().substring(0, i));
+ token.append(chars, 0, i);
// chars.delete(0,i);
for (int countr = i; countr < chars.length(); countr++) {
chars.setCharAt(countr - i, chars.charAt(countr));
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilter.java
index 8e786aa59cc..d5122406261 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilter.java
@@ -669,7 +669,7 @@ public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
builder.append((char) ch);
}
} catch (Exception e) {
- if (gold.equals(builder.toString())) {
+ if (gold.contentEquals(builder)) {
throw e;
}
throw new Exception(
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
index 5b315cf3ce9..33348e26d7a 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
@@ -30,19 +30,13 @@ import org.apache.lucene.tests.analysis.Token;
public class TestTrimFilter extends BaseTokenStreamTestCase {
public void testTrim() throws Exception {
- char[] a = " a ".toCharArray();
- char[] b = "b ".toCharArray();
- char[] ccc = "cCc".toCharArray();
- char[] whitespace = " ".toCharArray();
- char[] empty = "".toCharArray();
-
TokenStream ts =
new CannedTokenStream(
- new Token(new String(a, 0, a.length), 1, 5),
- new Token(new String(b, 0, b.length), 6, 10),
- new Token(new String(ccc, 0, ccc.length), 11, 15),
- new Token(new String(whitespace, 0, whitespace.length), 16, 20),
- new Token(new String(empty, 0, empty.length), 21, 21));
+ new Token(" a ", 1, 5),
+ new Token("b ", 6, 10),
+ new Token("cCc", 11, 15),
+ new Token(" ", 16, 20),
+ new Token("", 21, 21));
ts = new TrimFilter(ts);
assertTokenStreamContents(ts, new String[] {"a", "b", "cCc", "", ""});
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
index e39bffb9c64..06553575e5f 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
@@ -82,8 +82,8 @@ public class TestPatternReplaceCharFilter extends BaseTokenStreamTestCase {
indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i))));
}
- boolean outputGood = expectedOutput.equals(output.toString());
- boolean indexMatchedGood = expectedIndexMatchedOutput.equals(indexMatched.toString());
+ boolean outputGood = expectedOutput.contentEquals(output);
+ boolean indexMatchedGood = expectedIndexMatchedOutput.contentEquals(indexMatched);
if (!outputGood || !indexMatchedGood || false) {
System.out.println("Pattern : " + pattern);
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java
index 8df28d40dbc..04baa47425e 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java
@@ -26,6 +26,7 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashSet;
@@ -38,11 +39,17 @@ import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
@@ -253,10 +260,23 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
protected abstract void createIndex(Directory directory) throws IOException;
public final void createBWCIndex() throws IOException {
- Path indexDir = getIndexDir().resolve(indexName(Version.LATEST));
- Files.deleteIfExists(indexDir);
- try (Directory dir = newFSDirectory(indexDir)) {
+ Path zipFile = getIndexDir().resolve(indexName(Version.LATEST));
+ Files.deleteIfExists(zipFile);
+ Path tmpDir = createTempDir();
+
+ try (Directory dir = FSDirectory.open(tmpDir);
+ ZipOutputStream zipOut =
+ new ZipOutputStream(
+ Files.newOutputStream(
+ zipFile, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW))) {
createIndex(dir);
+ for (String file : dir.listAll()) {
+ try (IndexInput in = dir.openInput(file, IOContext.READONCE)) {
+ zipOut.putNextEntry(new ZipEntry(file));
+ new OutputStreamDataOutput(zipOut).copyBytes(in, in.length());
+ zipOut.closeEntry();
+ }
+ }
}
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java
index 0cd9f37d5c3..c7b1ea3fb4a 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java
@@ -20,8 +20,10 @@ import static org.apache.lucene.backward_index.BackwardsCompatibilityTestBase.cr
import java.io.IOException;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
import org.apache.lucene.util.Version;
+@SuppressFileSystems("ExtrasFS")
public class TestGenerateBwcIndices extends LuceneTestCase {
// Backcompat index generation, described below, is mostly automated in:
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java
index c57f319213f..82de070189c 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java
@@ -55,7 +55,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
static final String INDEX_NAME = "sorted";
static final String SUFFIX = "";
- private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_10_0;
+ private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_11_0;
private static final String PARENT_FIELD_NAME = "___parent";
public TestIndexSortBackwardsCompatibility(Version version, String pattern) {
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-cfs.zip
new file mode 100644
index 00000000000..59d38f72b4f
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-cfs.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-nocfs.zip
new file mode 100644
index 00000000000..6da00722761
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-nocfs.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.10.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.10.0.zip
new file mode 100644
index 00000000000..131d82c5acf
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.10.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt
index 8d3c001554d..8aa27313b67 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt
@@ -37,4 +37,5 @@
9.8.0
9.9.0
9.9.1
-9.9.2
\ No newline at end of file
+9.9.2
+9.10.0
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
index 8caa25bd8d6..cd0daa42d4b 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
@@ -112,13 +112,13 @@ public class EnwikiContentSource extends ContentSource {
String time(String original) {
StringBuilder buffer = new StringBuilder();
- buffer.append(original.substring(8, 10));
+ buffer.append(original, 8, 10);
buffer.append('-');
buffer.append(months[Integer.parseInt(original.substring(5, 7)) - 1]);
buffer.append('-');
- buffer.append(original.substring(0, 4));
+ buffer.append(original, 0, 4);
buffer.append(' ');
- buffer.append(original.substring(11, 19));
+ buffer.append(original, 11, 19);
buffer.append(".000");
return buffer.toString();
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java
index cdce0b479fb..f6ebae4158f 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java
@@ -60,7 +60,7 @@ public class TrecFBISParser extends TrecDocParser {
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
- docData.setBody(stripTags(docBuf, mark).toString());
+ docData.setBody(stripTags(docBuf, mark));
return docData;
}
}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java
index e2dae3ab775..06a4f0fbdd1 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java
@@ -53,14 +53,14 @@ public class TrecFR94Parser extends TrecDocParser {
// date...
String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
if (dateStr != null) {
- dateStr = stripTags(dateStr, 0).toString();
+ dateStr = stripTags(dateStr, 0);
date = trecSrc.parseDate(dateStr.trim());
}
}
docData.clear();
docData.setName(name);
docData.setDate(date);
- docData.setBody(stripTags(docBuf, mark).toString());
+ docData.setBody(stripTags(docBuf, mark));
return docData;
}
}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java
index 57762e884f0..a0d8e570cfa 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java
@@ -52,7 +52,7 @@ public class TrecFTParser extends TrecDocParser {
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
- docData.setBody(stripTags(docBuf, mark).toString());
+ docData.setBody(stripTags(docBuf, mark));
return docData;
}
}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java
index 933859e0ddb..186465a6a0d 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java
@@ -49,7 +49,7 @@ public class TrecLATimesParser extends TrecDocParser {
if (d2a > 0) {
dateStr = dateStr.substring(0, d2a + 3); // we need the "day" part
}
- dateStr = stripTags(dateStr, 0).toString();
+ dateStr = stripTags(dateStr, 0);
date = trecSrc.parseDate(dateStr.trim());
}
@@ -59,14 +59,14 @@ public class TrecLATimesParser extends TrecDocParser {
title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
}
if (title != null) {
- title = stripTags(title, 0).toString().trim();
+ title = stripTags(title, 0).trim();
}
docData.clear();
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
- docData.setBody(stripTags(docBuf, mark).toString());
+ docData.setBody(stripTags(docBuf, mark));
return docData;
}
}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
index e8eb8f27042..8dc1ace3177 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
@@ -59,7 +59,7 @@ public class SearchWithSortTask extends ReadTask {
String typeString;
if (index != -1) {
fieldName = field.substring(0, index);
- typeString = field.substring(1 + index, field.length());
+ typeString = field.substring(1 + index);
} else {
throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
index b0c19e8526e..d95e82f62ec 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
@@ -169,7 +169,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
if (type == TYPE_STRING) {
byte[] bytes = new byte[scratch.length() - VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
- visitor.stringField(fieldInfo, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8));
+ visitor.stringField(fieldInfo, new String(bytes, StandardCharsets.UTF_8));
} else if (type == TYPE_BINARY) {
byte[] copy = new byte[scratch.length() - VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
index 525381d60c9..bdfa78af87f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@@ -380,7 +380,7 @@ public final class CodecUtil {
int suffixLength = in.readByte() & 0xFF;
byte[] suffixBytes = new byte[suffixLength];
in.readBytes(suffixBytes, 0, suffixBytes.length);
- String suffix = new String(suffixBytes, 0, suffixBytes.length, StandardCharsets.UTF_8);
+ String suffix = new String(suffixBytes, StandardCharsets.UTF_8);
if (!suffix.equals(expectedSuffix)) {
throw new CorruptIndexException(
"file mismatch, expected suffix=" + expectedSuffix + ", got=" + suffix, in);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java
index 97c05435b96..341e28c36f5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene94;
import java.io.IOException;
import java.util.Collections;
+import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
@@ -111,6 +112,8 @@ import org.apache.lucene.store.IndexOutput;
* 0: EUCLIDEAN distance. ({@link VectorSimilarityFunction#EUCLIDEAN})
* 1: DOT_PRODUCT similarity. ({@link VectorSimilarityFunction#DOT_PRODUCT})
* 2: COSINE similarity. ({@link VectorSimilarityFunction#COSINE})
+ * 3: MAXIMUM_INNER_PRODUCT similarity. ({@link
+ * VectorSimilarityFunction#MAXIMUM_INNER_PRODUCT})
*
*
*
@@ -284,10 +287,38 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
}
private static VectorSimilarityFunction getDistFunc(IndexInput input, byte b) throws IOException {
- if (b < 0 || b >= VectorSimilarityFunction.values().length) {
- throw new CorruptIndexException("invalid distance function: " + b, input);
+ try {
+ return distOrdToFunc(b);
+ } catch (IllegalArgumentException e) {
+ throw new CorruptIndexException("invalid distance function: " + b, input, e);
}
- return VectorSimilarityFunction.values()[b];
+ }
+
+ // List of vector similarity functions. This list is defined here, in order
+ // to avoid an undesirable dependency on the declaration and order of values
+ // in VectorSimilarityFunction. The list values and order have been chosen to
+ // match that of VectorSimilarityFunction in, at least, Lucene 9.10. Values
+ static final List SIMILARITY_FUNCTIONS =
+ List.of(
+ VectorSimilarityFunction.EUCLIDEAN,
+ VectorSimilarityFunction.DOT_PRODUCT,
+ VectorSimilarityFunction.COSINE,
+ VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
+
+ static VectorSimilarityFunction distOrdToFunc(byte i) {
+ if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) {
+ throw new IllegalArgumentException("invalid distance function: " + i);
+ }
+ return SIMILARITY_FUNCTIONS.get(i);
+ }
+
+ static byte distFuncToOrd(VectorSimilarityFunction func) {
+ for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
+ if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
+ return (byte) i;
+ }
+ }
+ throw new IllegalArgumentException("invalid distance function: " + func);
}
static {
@@ -378,7 +409,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
}
output.writeVInt(fi.getVectorDimension());
output.writeByte((byte) fi.getVectorEncoding().ordinal());
- output.writeByte((byte) fi.getVectorSimilarityFunction().ordinal());
+ output.writeByte(distFuncToOrd(fi.getVectorSimilarityFunction()));
}
CodecUtil.writeFooter(output);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java
index 9ebac62ce9b..efb51c963e0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java
@@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FlatVectorsReader;
@@ -171,15 +172,24 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
}
}
+ // List of vector similarity functions. This list is defined here, in order
+ // to avoid an undesirable dependency on the declaration and order of values
+ // in VectorSimilarityFunction. The list values and order must be identical
+ // to that of {@link o.a.l.c.l.Lucene94FieldInfosFormat#SIMILARITY_FUNCTIONS}.
+ public static final List SIMILARITY_FUNCTIONS =
+ List.of(
+ VectorSimilarityFunction.EUCLIDEAN,
+ VectorSimilarityFunction.DOT_PRODUCT,
+ VectorSimilarityFunction.COSINE,
+ VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
+
public static VectorSimilarityFunction readSimilarityFunction(DataInput input)
throws IOException {
- int similarityFunctionId = input.readInt();
- if (similarityFunctionId < 0
- || similarityFunctionId >= VectorSimilarityFunction.values().length) {
- throw new CorruptIndexException(
- "Invalid similarity function id: " + similarityFunctionId, input);
+ int i = input.readInt();
+ if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) {
+ throw new IllegalArgumentException("invalid distance function: " + i);
}
- return VectorSimilarityFunction.values()[similarityFunctionId];
+ return SIMILARITY_FUNCTIONS.get(i);
}
public static VectorEncoding readVectorEncoding(DataInput input) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
index 174c65db9ac..a236dd7c65b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
@@ -18,6 +18,7 @@
package org.apache.lucene.codecs.lucene99;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
+import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
import java.io.IOException;
import java.util.ArrayList;
@@ -33,6 +34,7 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Sorter;
+import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.store.IndexOutput;
@@ -436,7 +438,7 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
throws IOException {
meta.writeInt(field.number);
meta.writeInt(field.getVectorEncoding().ordinal());
- meta.writeInt(field.getVectorSimilarityFunction().ordinal());
+ meta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction()));
meta.writeVLong(vectorIndexOffset);
meta.writeVLong(vectorIndexLength);
meta.writeVInt(field.getVectorDimension());
@@ -500,6 +502,15 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
IOUtils.close(meta, vectorIndex, flatVectorWriter);
}
+ static int distFuncToOrd(VectorSimilarityFunction func) {
+ for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
+ if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
+ return (byte) i;
+ }
+ }
+ throw new IllegalArgumentException("invalid distance function: " + func);
+ }
+
private static class FieldWriter extends KnnFieldVectorsWriter {
private static final long SHALLOW_SIZE =
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
index ec6ced68002..d85df1cf8cd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
@@ -384,7 +384,7 @@ final class DocumentsWriter implements Closeable, Accountable {
ensureOpen();
boolean hasEvents = false;
while (flushControl.anyStalledThreads()
- || (flushControl.numQueuedFlushes() > 0 && config.checkPendingFlushOnUpdate)) {
+ || (config.checkPendingFlushOnUpdate && flushControl.numQueuedFlushes() > 0)) {
// Help out flushing any queued DWPTs so we can un-stall:
// Try pickup pending threads here if possible
// no need to loop over the next pending flushes... doFlush will take care of this
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java
index 968900c7f0c..b8459e42afa 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java
@@ -191,7 +191,7 @@ public final class IndexFileNames {
if (idx == -1) {
return null;
} else {
- return filename.substring(idx + 1, filename.length());
+ return filename.substring(idx + 1);
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
index 8fbe0359698..b51cdf86306 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
@@ -677,16 +677,11 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
public void testMaxTokenLengthDefault() throws Exception {
StandardAnalyzer a = new StandardAnalyzer();
- StringBuilder bToken = new StringBuilder();
// exact max length:
- for (int i = 0; i < StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; i++) {
- bToken.append('b');
- }
-
- String bString = bToken.toString();
+ String bString = "b".repeat(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
// first bString is exact max default length; next one is 1 too long
String input = "x " + bString + " " + bString + "b";
- assertAnalyzesTo(a, input.toString(), new String[] {"x", bString, bString, "b"});
+ assertAnalyzesTo(a, input, new String[] {"x", bString, bString, "b"});
a.close();
}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene94/TestLucene94FieldInfosFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene94/TestLucene94FieldInfosFormat.java
new file mode 100644
index 00000000000..c69eeadf5e6
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene94/TestLucene94FieldInfosFormat.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene94;
+
+import java.util.Arrays;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.tests.index.BaseFieldInfoFormatTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+
+public class TestLucene94FieldInfosFormat extends BaseFieldInfoFormatTestCase {
+ @Override
+ protected Codec getCodec() {
+ return TestUtil.getDefaultCodec();
+ }
+
+ // Ensures that all expected vector similarity functions are translatable
+ // in the format.
+ public void testVectorSimilarityFuncs() {
+ // This does not necessarily have to be all similarity functions, but
+ // differences should be considered carefully.
+ var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
+
+ assertEquals(Lucene94FieldInfosFormat.SIMILARITY_FUNCTIONS, expectedValues);
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
index be0b01f3e0b..382389bc8f3 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene99;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
@@ -186,4 +187,13 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
new Lucene99HnswScalarQuantizedVectorsFormat(
20, 100, 1, null, new SameThreadExecutorService()));
}
+
+ // Ensures that all expected vector similarity functions are translatable
+ // in the format.
+ public void testVectorSimilarityFuncs() {
+ // This does not necessarily have to be all similarity functions, but
+ // differences should be considered carefully.
+ var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
+ assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues);
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java
index e05f3ae6633..4695c5b1f5e 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.io.StringReader;
-import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
@@ -289,12 +288,10 @@ public class TestPayloads extends LuceneTestCase {
reader.close();
}
- static final Charset utf8 = StandardCharsets.UTF_8;
-
private void generateRandomData(byte[] data) {
// this test needs the random data to be valid unicode
String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length);
- byte[] b = s.getBytes(utf8);
+ byte[] b = s.getBytes(StandardCharsets.UTF_8);
assert b.length == data.length;
System.arraycopy(b, 0, data, 0, b.length);
}
@@ -493,7 +490,7 @@ public class TestPayloads extends LuceneTestCase {
this.pool = pool;
payload = pool.get();
generateRandomData(payload);
- term = new String(payload, 0, payload.length, utf8);
+ term = new String(payload, StandardCharsets.UTF_8);
first = true;
payloadAtt = addAttribute(PayloadAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java b/lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java
index 80f61e6d171..0cb8b083057 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java
@@ -107,7 +107,7 @@ public class TestPrefixRandom extends LuceneTestCase {
@Override
public String toString(String field) {
- return field.toString() + ":" + prefix.toString();
+ return field + ":" + prefix;
}
@Override
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java b/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
index 4b5f0eb5c08..5f48b0861bb 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
@@ -143,7 +143,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
@Override
public String toString(String field) {
- return field.toString() + automaton.toString();
+ return field + automaton;
}
@Override
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
index 32fc2005cdd..c8adb8751b9 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
@@ -213,10 +213,10 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
List list = new ArrayList<>();
for (int i = 0; i < s.length() - 1; i++) {
StringBuilder sb = new StringBuilder();
- sb.append(s.substring(0, i));
+ sb.append(s, 0, i);
sb.append(s.charAt(i + 1));
sb.append(s.charAt(i));
- sb.append(s.substring(i + 2, s.length()));
+ sb.append(s, i + 2, s.length());
String st = sb.toString();
if (!st.equals(s)) {
list.add(Automata.makeString(st));
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
index c83f2631845..c934108115d 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
@@ -119,7 +119,7 @@ public class TestRegExp extends LuceneTestCase {
// Add any head to the result, unchanged
if (substitutionPoint > 0) {
- result.append(docValue.substring(0, substitutionPoint));
+ result.append(docValue, 0, substitutionPoint);
}
// Modify the middle...
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index 4f2d4af6da0..dfedf7974fd 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -1398,7 +1398,7 @@ public class UnifiedHighlighter {
curValueBuilder.append(curValue);
}
curValueBuilder.append(valueSeparator);
- curValueBuilder.append(value.substring(0, Math.min(lengthBudget - 1, value.length())));
+ curValueBuilder.append(value, 0, Math.min(lengthBudget - 1, value.length()));
values[currentField] = curValueBuilder;
}
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java b/lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java
index 457d72bc4a3..d9c8b29caef 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java
@@ -49,7 +49,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.IntroSorter;
+import org.apache.lucene.util.IntroSelector;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.packed.PackedInts;
@@ -251,17 +251,17 @@ public final class BPIndexReorderer {
private class IndexReorderingTask extends BaseRecursiveAction {
private final IntsRef docIDs;
- private final float[] gains;
+ private final float[] biases;
private final CloseableThreadLocal threadLocal;
IndexReorderingTask(
IntsRef docIDs,
- float[] gains,
+ float[] biases,
CloseableThreadLocal threadLocal,
int depth) {
super(depth);
this.docIDs = docIDs;
- this.gains = gains;
+ this.biases = biases;
this.threadLocal = threadLocal;
}
@@ -293,14 +293,14 @@ public final class BPIndexReorderer {
assert sorted(docIDs);
}
- int leftSize = docIDs.length / 2;
- if (leftSize < minPartitionSize) {
+ int halfLength = docIDs.length / 2;
+ if (halfLength < minPartitionSize) {
return;
}
- int rightSize = docIDs.length - leftSize;
- IntsRef left = new IntsRef(docIDs.ints, docIDs.offset, leftSize);
- IntsRef right = new IntsRef(docIDs.ints, docIDs.offset + leftSize, rightSize);
+ IntsRef left = new IntsRef(docIDs.ints, docIDs.offset, halfLength);
+ IntsRef right =
+ new IntsRef(docIDs.ints, docIDs.offset + halfLength, docIDs.length - halfLength);
PerThreadState state = threadLocal.get();
ForwardIndex forwardIndex = state.forwardIndex;
@@ -313,7 +313,9 @@ public final class BPIndexReorderer {
for (int iter = 0; iter < maxIters; ++iter) {
boolean moved;
try {
- moved = shuffle(forwardIndex, left, right, leftDocFreqs, rightDocFreqs, gains, iter);
+ moved =
+ shuffle(
+ forwardIndex, docIDs, right.offset, leftDocFreqs, rightDocFreqs, biases, iter);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
@@ -322,10 +324,11 @@ public final class BPIndexReorderer {
}
}
- // It is fine for all tasks to share the same docs / gains array since they all work on
+ // It is fine for all tasks to share the same docs / biases array since they all work on
// different slices of the array at a given point in time.
- IndexReorderingTask leftTask = new IndexReorderingTask(left, gains, threadLocal, depth + 1);
- IndexReorderingTask rightTask = new IndexReorderingTask(right, gains, threadLocal, depth + 1);
+ IndexReorderingTask leftTask = new IndexReorderingTask(left, biases, threadLocal, depth + 1);
+ IndexReorderingTask rightTask =
+ new IndexReorderingTask(right, biases, threadLocal, depth + 1);
if (shouldFork(docIDs.length, docIDs.ints.length)) {
invokeAll(leftTask, rightTask);
@@ -341,116 +344,94 @@ public final class BPIndexReorderer {
*/
private boolean shuffle(
ForwardIndex forwardIndex,
- IntsRef left,
- IntsRef right,
+ IntsRef docIDs,
+ int midPoint,
int[] leftDocFreqs,
int[] rightDocFreqs,
- float[] gains,
+ float[] biases,
int iter)
throws IOException {
- assert left.ints == right.ints;
- assert left.offset + left.length == right.offset;
- // Computing gains is typically a bottleneck, because each iteration needs to iterate over all
- // postings to recompute gains, and the total number of postings is usually one order of
+ // Computing biases is typically a bottleneck, because each iteration needs to iterate over
+ // all postings to recompute biases, and the total number of postings is usually one order of
// magnitude or more larger than the number of docs. So we try to parallelize it.
- ComputeGainsTask leftGainsTask =
- new ComputeGainsTask(
- left.ints,
- gains,
- left.offset,
- left.offset + left.length,
+ new ComputeBiasTask(
+ docIDs.ints,
+ biases,
+ docIDs.offset,
+ docIDs.offset + docIDs.length,
leftDocFreqs,
rightDocFreqs,
threadLocal,
- depth);
- ComputeGainsTask rightGainsTask =
- new ComputeGainsTask(
- right.ints,
- gains,
- right.offset,
- right.offset + right.length,
- rightDocFreqs,
- leftDocFreqs,
- threadLocal,
- depth);
- if (shouldFork(docIDs.length, docIDs.ints.length)) {
- invokeAll(leftGainsTask, rightGainsTask);
- } else {
- leftGainsTask.compute();
- rightGainsTask.compute();
+ depth)
+ .compute();
+
+ float maxLeftBias = Float.NEGATIVE_INFINITY;
+ for (int i = docIDs.offset; i < midPoint; ++i) {
+ maxLeftBias = Math.max(maxLeftBias, biases[i]);
+ }
+ float minRightBias = Float.POSITIVE_INFINITY;
+ for (int i = midPoint, end = docIDs.offset + docIDs.length; i < end; ++i) {
+ minRightBias = Math.min(minRightBias, biases[i]);
+ }
+ float gain = maxLeftBias - minRightBias;
+ // This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for
+ // Bipartite Graph Partitioning" by comparing the gain of swapping the doc from the left side
+ // that is most attracted to the right and the doc from the right side that is most attracted
+ // to the left against `iter` rather than zero.
+ if (gain <= iter) {
+ return false;
}
- class ByDescendingGainSorter extends IntroSorter {
+ new IntroSelector() {
int pivotDoc;
- float pivotGain;
+ float pivotBias;
@Override
protected void setPivot(int i) {
- pivotDoc = left.ints[i];
- pivotGain = gains[i];
+ pivotDoc = docIDs.ints[i];
+ pivotBias = biases[i];
}
@Override
protected int comparePivot(int j) {
- // Compare in reverse order to get a descending sort
- int cmp = Float.compare(gains[j], pivotGain);
+ int cmp = Float.compare(pivotBias, biases[j]);
if (cmp == 0) {
// Tie break on the doc ID to preserve doc ID ordering as much as possible
- cmp = pivotDoc - left.ints[j];
+ cmp = pivotDoc - docIDs.ints[j];
}
return cmp;
}
@Override
protected void swap(int i, int j) {
- int tmpDoc = left.ints[i];
- left.ints[i] = left.ints[j];
- left.ints[j] = tmpDoc;
+ float tmpBias = biases[i];
+ biases[i] = biases[j];
+ biases[j] = tmpBias;
- float tmpGain = gains[i];
- gains[i] = gains[j];
- gains[j] = tmpGain;
- }
- }
-
- Runnable leftSorter =
- () -> new ByDescendingGainSorter().sort(left.offset, left.offset + left.length);
- Runnable rightSorter =
- () -> new ByDescendingGainSorter().sort(right.offset, right.offset + right.length);
-
- if (shouldFork(docIDs.length, docIDs.ints.length)) {
- // TODO: run it on more than 2 threads at most
- invokeAll(adapt(leftSorter), adapt(rightSorter));
- } else {
- leftSorter.run();
- rightSorter.run();
- }
-
- for (int i = 0; i < left.length; ++i) {
- // This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for
- // Bipartite Graph Partitioning" by comparing the gain against `iter` rather than zero.
- if (gains[left.offset + i] + gains[right.offset + i] <= iter) {
- if (i == 0) {
- return false;
+ if (i < midPoint == j < midPoint) {
+ int tmpDoc = docIDs.ints[i];
+ docIDs.ints[i] = docIDs.ints[j];
+ docIDs.ints[j] = tmpDoc;
+ } else {
+ // If we're swapping docs across the left and right sides, we need to keep doc freqs
+ // up-to-date.
+ int left = Math.min(i, j);
+ int right = Math.max(i, j);
+ try {
+ swapDocsAndFreqs(docIDs.ints, left, right, forwardIndex, leftDocFreqs, rightDocFreqs);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
}
- break;
}
-
- swap(
- left.ints,
- left.offset + i,
- right.offset + i,
- forwardIndex,
- leftDocFreqs,
- rightDocFreqs);
- }
+ }.select(docIDs.offset, docIDs.offset + docIDs.length, midPoint);
return true;
}
- private static void swap(
+ private static void swapDocsAndFreqs(
int[] docs,
int left,
int right,
@@ -492,19 +473,19 @@ public final class BPIndexReorderer {
}
}
- private class ComputeGainsTask extends BaseRecursiveAction {
+ private class ComputeBiasTask extends BaseRecursiveAction {
private final int[] docs;
- private final float[] gains;
+ private final float[] biases;
private final int from;
private final int to;
private final int[] fromDocFreqs;
private final int[] toDocFreqs;
private final CloseableThreadLocal threadLocal;
- ComputeGainsTask(
+ ComputeBiasTask(
int[] docs,
- float[] gains,
+ float[] biases,
int from,
int to,
int[] fromDocFreqs,
@@ -513,7 +494,7 @@ public final class BPIndexReorderer {
int depth) {
super(depth);
this.docs = docs;
- this.gains = gains;
+ this.biases = biases;
this.from = from;
this.to = to;
this.fromDocFreqs = fromDocFreqs;
@@ -527,15 +508,15 @@ public final class BPIndexReorderer {
if (problemSize > 1 && shouldFork(problemSize, docs.length)) {
final int mid = (from + to) >>> 1;
invokeAll(
- new ComputeGainsTask(
- docs, gains, from, mid, fromDocFreqs, toDocFreqs, threadLocal, depth),
- new ComputeGainsTask(
- docs, gains, mid, to, fromDocFreqs, toDocFreqs, threadLocal, depth));
+ new ComputeBiasTask(
+ docs, biases, from, mid, fromDocFreqs, toDocFreqs, threadLocal, depth),
+ new ComputeBiasTask(
+ docs, biases, mid, to, fromDocFreqs, toDocFreqs, threadLocal, depth));
} else {
ForwardIndex forwardIndex = threadLocal.get().forwardIndex;
try {
for (int i = from; i < to; ++i) {
- gains[i] = computeGain(docs[i], forwardIndex, fromDocFreqs, toDocFreqs);
+ biases[i] = computeBias(docs[i], forwardIndex, fromDocFreqs, toDocFreqs);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
@@ -547,11 +528,11 @@ public final class BPIndexReorderer {
* Compute a float that is negative when a document is attracted to the left and positive
* otherwise.
*/
- private static float computeGain(
+ private static float computeBias(
int docID, ForwardIndex forwardIndex, int[] fromDocFreqs, int[] toDocFreqs)
throws IOException {
forwardIndex.seek(docID);
- double gain = 0;
+ double bias = 0;
for (IntsRef terms = forwardIndex.nextTerms();
terms.length != 0;
terms = forwardIndex.nextTerms()) {
@@ -561,12 +542,12 @@ public final class BPIndexReorderer {
final int toDocFreq = toDocFreqs[termID];
assert fromDocFreq >= 0;
assert toDocFreq >= 0;
- gain +=
+ bias +=
(toDocFreq == 0 ? 0 : fastLog2(toDocFreq))
- (fromDocFreq == 0 ? 0 : fastLog2(fromDocFreq));
}
}
- return (float) gain;
+ return (float) bias;
}
}
@@ -869,7 +850,7 @@ public final class BPIndexReorderer {
}
private static long docRAMRequirements(int maxDoc) {
- // We need one int per doc for the doc map, plus one float to store the gain associated with
+ // We need one int per doc for the doc map, plus one float to store the bias associated with
// this doc.
return 2L * Integer.BYTES * maxDoc;
}
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/surround/query/BooleanQueryTestFacade.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/surround/query/BooleanQueryTestFacade.java
index 83b1d78d685..d5da92c5300 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/surround/query/BooleanQueryTestFacade.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/surround/query/BooleanQueryTestFacade.java
@@ -114,7 +114,7 @@ public class BooleanQueryTestFacade {
public void doTest() throws Exception {
if (verbose) {
- System.out.println("");
+ System.out.println();
System.out.println("Query: " + queryText);
}
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
index 8fe372feec3..642f466bc03 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
@@ -113,7 +113,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
if (pointsOnly) str.append(",pointsOnly");
if (pruneLeafyBranches) str.append(",pruneLeafyBranches");
if (prefixGridScanLevel != grid.getMaxLevels() - 4)
- str.append(",prefixGridScanLevel:").append("").append(prefixGridScanLevel);
+ str.append(",prefixGridScanLevel:").append(prefixGridScanLevel);
if (!multiOverlappingIndexedShapes) str.append(",!multiOverlappingIndexedShapes");
return str.append(')').toString();
}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index e1772102098..afaf304a4ad 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -927,7 +927,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
return;
}
sb.append("");
- sb.append(surface.substring(0, prefixToken.length()));
+ sb.append(surface, 0, prefixToken.length());
sb.append("");
sb.append(surface.substring(prefixToken.length()));
}
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestAnalyzingInfixSuggester.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestAnalyzingInfixSuggester.java
index 2361f6b0c31..6983c7eba59 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestAnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestAnalyzingInfixSuggester.java
@@ -892,7 +892,7 @@ public class TestAnalyzingInfixSuggester extends LuceneTestCase {
b.append("");
b.append(queryTerm);
b.append("");
- b.append(inputTerm.substring(queryTerm.length(), inputTerm.length()));
+ b.append(inputTerm.substring(queryTerm.length()));
matched = true;
break;
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/search/CheckHits.java b/lucene/test-framework/src/java/org/apache/lucene/tests/search/CheckHits.java
index e01fc3877a8..3c6bb891c92 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/search/CheckHits.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/search/CheckHits.java
@@ -793,6 +793,10 @@ public class CheckHits {
assertTrue(s2 == null || s2.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
continue;
}
+ if (s2 == null) {
+ assertTrue(s1.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
+ continue;
+ }
TwoPhaseIterator twoPhase1 = s1.twoPhaseIterator();
TwoPhaseIterator twoPhase2 = s2.twoPhaseIterator();
DocIdSetIterator approx1 = twoPhase1 == null ? s1.iterator() : twoPhase1.approximation();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/English.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/English.java
index c78fb7ac362..5b48b617e72 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/English.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/English.java
@@ -166,7 +166,6 @@ public final class English {
result.append("one ");
break;
case 0:
- result.append("");
break;
}
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LineFileDocs.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LineFileDocs.java
index e0158c4c542..91b897ffb70 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LineFileDocs.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LineFileDocs.java
@@ -307,7 +307,7 @@ public class LineFileDocs implements Closeable {
throw new RuntimeException("line: [" + line + "] is in an invalid format !");
}
- docState.body.setStringValue(line.substring(1 + spot2, line.length()));
+ docState.body.setStringValue(line.substring(1 + spot2));
final String title = line.substring(0, spot);
docState.title.setStringValue(title);
docState.titleTokenized.setStringValue(title);