Merge branch 'main' into java_21

This commit is contained in:
ChrisHegarty 2024-02-23 17:07:54 +00:00
commit 54b6248a8d
46 changed files with 284 additions and 186 deletions

View File

@ -113,7 +113,7 @@ public class ErrorReportingTestListener implements TestOutputListener, TestListe
if (echoOutput && !verboseMode) {
synchronized (this) {
System.out.println("");
System.out.println();
System.out.println(suite.getClassName() + " > test suite's output saved to " + outputLog + ", copied below:");
try (BufferedReader reader = Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) {
char[] buf = new char[1024];

View File

@ -67,6 +67,13 @@
</maintainer>
<!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
<release>
<Version>
<name>lucene-9.10.0</name>
<created>2024-02-20</created>
<revision>9.10.0</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-9.9.2</name>

View File

@ -45,16 +45,13 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'emptyIndex': 'empty'
}[indextype]
if indextype in ('cfs', 'nocfs'):
dirname = 'index.%s' % indextype
filename = '%s.%s-%s.zip' % (prefix, index_version, indextype)
else:
dirname = indextype
filename = '%s.%s.zip' % (prefix, index_version)
print(' creating %s...' % filename, end='', flush=True)
module = 'backward-codecs'
index_dir = os.path.join('lucene', module, 'src/test/org/apache/lucene/backward_index')
test_file = os.path.join(index_dir, filename)
if os.path.exists(os.path.join(index_dir, filename)):
print('uptodate')
return
@ -76,24 +73,20 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'-Dtests.codec=default'
])
base_dir = os.getcwd()
bc_index_dir = os.path.join(temp_dir, dirname)
bc_index_file = os.path.join(bc_index_dir, filename)
bc_index_file = os.path.join(temp_dir, filename)
if os.path.exists(bc_index_file):
print('alreadyexists')
else:
if os.path.exists(bc_index_dir):
shutil.rmtree(bc_index_dir)
os.chdir(source)
scriptutil.run('./gradlew %s' % gradle_args)
os.chdir(bc_index_dir)
scriptutil.run('zip %s *' % filename)
if not os.path.exists(bc_index_file):
raise Exception("Expected file can't be found: %s" %bc_index_file)
print('done')
print(' adding %s...' % filename, end='', flush=True)
scriptutil.run('cp %s %s' % (bc_index_file, os.path.join(base_dir, index_dir)))
os.chdir(base_dir)
scriptutil.run('rm -rf %s' % bc_index_dir)
print('done')
def update_backcompat_tests(index_version, current_version):

View File

@ -197,7 +197,10 @@ Improvements
Optimizations
---------------------
(No changes)
* GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)
* GITHUB#13085: Remove unnecessary toString() / substring() calls to save some String allocations (Dmitry Cherniachenko)
Bug Fixes
---------------------

View File

@ -278,7 +278,7 @@ class BrazilianStemmer {
return false;
}
return value.substring(value.length() - suffix.length()).equals(suffix);
return value.endsWith(suffix);
}
/**

View File

@ -142,7 +142,7 @@ public class PatternParser extends DefaultHandler {
break;
}
}
token.append(chars.toString().substring(0, i));
token.append(chars, 0, i);
// chars.delete(0,i);
for (int countr = i; countr < chars.length(); countr++) {
chars.setCharAt(countr - i, chars.charAt(countr));

View File

@ -669,7 +669,7 @@ public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
builder.append((char) ch);
}
} catch (Exception e) {
if (gold.equals(builder.toString())) {
if (gold.contentEquals(builder)) {
throw e;
}
throw new Exception(

View File

@ -30,19 +30,13 @@ import org.apache.lucene.tests.analysis.Token;
public class TestTrimFilter extends BaseTokenStreamTestCase {
public void testTrim() throws Exception {
char[] a = " a ".toCharArray();
char[] b = "b ".toCharArray();
char[] ccc = "cCc".toCharArray();
char[] whitespace = " ".toCharArray();
char[] empty = "".toCharArray();
TokenStream ts =
new CannedTokenStream(
new Token(new String(a, 0, a.length), 1, 5),
new Token(new String(b, 0, b.length), 6, 10),
new Token(new String(ccc, 0, ccc.length), 11, 15),
new Token(new String(whitespace, 0, whitespace.length), 16, 20),
new Token(new String(empty, 0, empty.length), 21, 21));
new Token(" a ", 1, 5),
new Token("b ", 6, 10),
new Token("cCc", 11, 15),
new Token(" ", 16, 20),
new Token("", 21, 21));
ts = new TrimFilter(ts);
assertTokenStreamContents(ts, new String[] {"a", "b", "cCc", "", ""});

View File

@ -82,8 +82,8 @@ public class TestPatternReplaceCharFilter extends BaseTokenStreamTestCase {
indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i))));
}
boolean outputGood = expectedOutput.equals(output.toString());
boolean indexMatchedGood = expectedIndexMatchedOutput.equals(indexMatched.toString());
boolean outputGood = expectedOutput.contentEquals(output);
boolean indexMatchedGood = expectedIndexMatchedOutput.contentEquals(indexMatched);
if (!outputGood || !indexMatchedGood || false) {
System.out.println("Pattern : " + pattern);

View File

@ -26,6 +26,7 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashSet;
@ -38,11 +39,17 @@ import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
@ -253,10 +260,23 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
protected abstract void createIndex(Directory directory) throws IOException;
public final void createBWCIndex() throws IOException {
Path indexDir = getIndexDir().resolve(indexName(Version.LATEST));
Files.deleteIfExists(indexDir);
try (Directory dir = newFSDirectory(indexDir)) {
Path zipFile = getIndexDir().resolve(indexName(Version.LATEST));
Files.deleteIfExists(zipFile);
Path tmpDir = createTempDir();
try (Directory dir = FSDirectory.open(tmpDir);
ZipOutputStream zipOut =
new ZipOutputStream(
Files.newOutputStream(
zipFile, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW))) {
createIndex(dir);
for (String file : dir.listAll()) {
try (IndexInput in = dir.openInput(file, IOContext.READONCE)) {
zipOut.putNextEntry(new ZipEntry(file));
new OutputStreamDataOutput(zipOut).copyBytes(in, in.length());
zipOut.closeEntry();
}
}
}
}

View File

@ -20,8 +20,10 @@ import static org.apache.lucene.backward_index.BackwardsCompatibilityTestBase.cr
import java.io.IOException;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
import org.apache.lucene.util.Version;
@SuppressFileSystems("ExtrasFS")
public class TestGenerateBwcIndices extends LuceneTestCase {
// Backcompat index generation, described below, is mostly automated in:

View File

@ -55,7 +55,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
static final String INDEX_NAME = "sorted";
static final String SUFFIX = "";
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_10_0;
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_11_0;
private static final String PARENT_FIELD_NAME = "___parent";
public TestIndexSortBackwardsCompatibility(Version version, String pattern) {

View File

@ -37,4 +37,5 @@
9.8.0
9.9.0
9.9.1
9.9.2
9.9.2
9.10.0

View File

@ -112,13 +112,13 @@ public class EnwikiContentSource extends ContentSource {
String time(String original) {
StringBuilder buffer = new StringBuilder();
buffer.append(original.substring(8, 10));
buffer.append(original, 8, 10);
buffer.append('-');
buffer.append(months[Integer.parseInt(original.substring(5, 7)) - 1]);
buffer.append('-');
buffer.append(original.substring(0, 4));
buffer.append(original, 0, 4);
buffer.append(' ');
buffer.append(original.substring(11, 19));
buffer.append(original, 11, 19);
buffer.append(".000");
return buffer.toString();

View File

@ -60,7 +60,7 @@ public class TrecFBISParser extends TrecDocParser {
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString());
docData.setBody(stripTags(docBuf, mark));
return docData;
}
}

View File

@ -53,14 +53,14 @@ public class TrecFR94Parser extends TrecDocParser {
// date...
String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
if (dateStr != null) {
dateStr = stripTags(dateStr, 0).toString();
dateStr = stripTags(dateStr, 0);
date = trecSrc.parseDate(dateStr.trim());
}
}
docData.clear();
docData.setName(name);
docData.setDate(date);
docData.setBody(stripTags(docBuf, mark).toString());
docData.setBody(stripTags(docBuf, mark));
return docData;
}
}

View File

@ -52,7 +52,7 @@ public class TrecFTParser extends TrecDocParser {
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString());
docData.setBody(stripTags(docBuf, mark));
return docData;
}
}

View File

@ -49,7 +49,7 @@ public class TrecLATimesParser extends TrecDocParser {
if (d2a > 0) {
dateStr = dateStr.substring(0, d2a + 3); // we need the "day" part
}
dateStr = stripTags(dateStr, 0).toString();
dateStr = stripTags(dateStr, 0);
date = trecSrc.parseDate(dateStr.trim());
}
@ -59,14 +59,14 @@ public class TrecLATimesParser extends TrecDocParser {
title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
}
if (title != null) {
title = stripTags(title, 0).toString().trim();
title = stripTags(title, 0).trim();
}
docData.clear();
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString());
docData.setBody(stripTags(docBuf, mark));
return docData;
}
}

View File

@ -59,7 +59,7 @@ public class SearchWithSortTask extends ReadTask {
String typeString;
if (index != -1) {
fieldName = field.substring(0, index);
typeString = field.substring(1 + index, field.length());
typeString = field.substring(1 + index);
} else {
throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
}

View File

@ -169,7 +169,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
if (type == TYPE_STRING) {
byte[] bytes = new byte[scratch.length() - VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
visitor.stringField(fieldInfo, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8));
visitor.stringField(fieldInfo, new String(bytes, StandardCharsets.UTF_8));
} else if (type == TYPE_BINARY) {
byte[] copy = new byte[scratch.length() - VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);

View File

@ -380,7 +380,7 @@ public final class CodecUtil {
int suffixLength = in.readByte() & 0xFF;
byte[] suffixBytes = new byte[suffixLength];
in.readBytes(suffixBytes, 0, suffixBytes.length);
String suffix = new String(suffixBytes, 0, suffixBytes.length, StandardCharsets.UTF_8);
String suffix = new String(suffixBytes, StandardCharsets.UTF_8);
if (!suffix.equals(expectedSuffix)) {
throw new CorruptIndexException(
"file mismatch, expected suffix=" + expectedSuffix + ", got=" + suffix, in);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene94;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
@ -111,6 +112,8 @@ import org.apache.lucene.store.IndexOutput;
* <li>0: EUCLIDEAN distance. ({@link VectorSimilarityFunction#EUCLIDEAN})
* <li>1: DOT_PRODUCT similarity. ({@link VectorSimilarityFunction#DOT_PRODUCT})
* <li>2: COSINE similarity. ({@link VectorSimilarityFunction#COSINE})
* <li>3: MAXIMUM_INNER_PRODUCT similarity. ({@link
* VectorSimilarityFunction#MAXIMUM_INNER_PRODUCT})
* </ul>
* </ul>
*
@ -284,10 +287,38 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
}
private static VectorSimilarityFunction getDistFunc(IndexInput input, byte b) throws IOException {
if (b < 0 || b >= VectorSimilarityFunction.values().length) {
throw new CorruptIndexException("invalid distance function: " + b, input);
try {
return distOrdToFunc(b);
} catch (IllegalArgumentException e) {
throw new CorruptIndexException("invalid distance function: " + b, input, e);
}
return VectorSimilarityFunction.values()[b];
}
// List of vector similarity functions. This list is defined here, in order
// to avoid an undesirable dependency on the declaration and order of values
// in VectorSimilarityFunction. The list values and order have been chosen to
// match that of VectorSimilarityFunction in, at least, Lucene 9.10. Values
static final List<VectorSimilarityFunction> SIMILARITY_FUNCTIONS =
List.of(
VectorSimilarityFunction.EUCLIDEAN,
VectorSimilarityFunction.DOT_PRODUCT,
VectorSimilarityFunction.COSINE,
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
static VectorSimilarityFunction distOrdToFunc(byte i) {
if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) {
throw new IllegalArgumentException("invalid distance function: " + i);
}
return SIMILARITY_FUNCTIONS.get(i);
}
static byte distFuncToOrd(VectorSimilarityFunction func) {
for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
return (byte) i;
}
}
throw new IllegalArgumentException("invalid distance function: " + func);
}
static {
@ -378,7 +409,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
}
output.writeVInt(fi.getVectorDimension());
output.writeByte((byte) fi.getVectorEncoding().ordinal());
output.writeByte((byte) fi.getVectorSimilarityFunction().ordinal());
output.writeByte(distFuncToOrd(fi.getVectorSimilarityFunction()));
}
CodecUtil.writeFooter(output);
}

View File

@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FlatVectorsReader;
@ -171,15 +172,24 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
}
}
// List of vector similarity functions. This list is defined here, in order
// to avoid an undesirable dependency on the declaration and order of values
// in VectorSimilarityFunction. The list values and order must be identical
// to that of {@link o.a.l.c.l.Lucene94FieldInfosFormat#SIMILARITY_FUNCTIONS}.
public static final List<VectorSimilarityFunction> SIMILARITY_FUNCTIONS =
List.of(
VectorSimilarityFunction.EUCLIDEAN,
VectorSimilarityFunction.DOT_PRODUCT,
VectorSimilarityFunction.COSINE,
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
public static VectorSimilarityFunction readSimilarityFunction(DataInput input)
throws IOException {
int similarityFunctionId = input.readInt();
if (similarityFunctionId < 0
|| similarityFunctionId >= VectorSimilarityFunction.values().length) {
throw new CorruptIndexException(
"Invalid similarity function id: " + similarityFunctionId, input);
int i = input.readInt();
if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) {
throw new IllegalArgumentException("invalid distance function: " + i);
}
return VectorSimilarityFunction.values()[similarityFunctionId];
return SIMILARITY_FUNCTIONS.get(i);
}
public static VectorEncoding readVectorEncoding(DataInput input) throws IOException {

View File

@ -18,6 +18,7 @@
package org.apache.lucene.codecs.lucene99;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
import java.io.IOException;
import java.util.ArrayList;
@ -33,6 +34,7 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Sorter;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.store.IndexOutput;
@ -436,7 +438,7 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
throws IOException {
meta.writeInt(field.number);
meta.writeInt(field.getVectorEncoding().ordinal());
meta.writeInt(field.getVectorSimilarityFunction().ordinal());
meta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction()));
meta.writeVLong(vectorIndexOffset);
meta.writeVLong(vectorIndexLength);
meta.writeVInt(field.getVectorDimension());
@ -500,6 +502,15 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
IOUtils.close(meta, vectorIndex, flatVectorWriter);
}
static int distFuncToOrd(VectorSimilarityFunction func) {
for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
return (byte) i;
}
}
throw new IllegalArgumentException("invalid distance function: " + func);
}
private static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
private static final long SHALLOW_SIZE =

View File

@ -384,7 +384,7 @@ final class DocumentsWriter implements Closeable, Accountable {
ensureOpen();
boolean hasEvents = false;
while (flushControl.anyStalledThreads()
|| (flushControl.numQueuedFlushes() > 0 && config.checkPendingFlushOnUpdate)) {
|| (config.checkPendingFlushOnUpdate && flushControl.numQueuedFlushes() > 0)) {
// Help out flushing any queued DWPTs so we can un-stall:
// Try pickup pending threads here if possible
// no need to loop over the next pending flushes... doFlush will take care of this

View File

@ -191,7 +191,7 @@ public final class IndexFileNames {
if (idx == -1) {
return null;
} else {
return filename.substring(idx + 1, filename.length());
return filename.substring(idx + 1);
}
}

View File

@ -677,16 +677,11 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
public void testMaxTokenLengthDefault() throws Exception {
StandardAnalyzer a = new StandardAnalyzer();
StringBuilder bToken = new StringBuilder();
// exact max length:
for (int i = 0; i < StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; i++) {
bToken.append('b');
}
String bString = bToken.toString();
String bString = "b".repeat(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
// first bString is exact max default length; next one is 1 too long
String input = "x " + bString + " " + bString + "b";
assertAnalyzesTo(a, input.toString(), new String[] {"x", bString, bString, "b"});
assertAnalyzesTo(a, input, new String[] {"x", bString, bString, "b"});
a.close();
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene94;
import java.util.Arrays;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.tests.index.BaseFieldInfoFormatTestCase;
import org.apache.lucene.tests.util.TestUtil;
public class TestLucene94FieldInfosFormat extends BaseFieldInfoFormatTestCase {
@Override
protected Codec getCodec() {
return TestUtil.getDefaultCodec();
}
// Ensures that all expected vector similarity functions are translatable
// in the format.
public void testVectorSimilarityFuncs() {
// This does not necessarily have to be all similarity functions, but
// differences should be considered carefully.
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
assertEquals(Lucene94FieldInfosFormat.SIMILARITY_FUNCTIONS, expectedValues);
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene99;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
@ -186,4 +187,13 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
new Lucene99HnswScalarQuantizedVectorsFormat(
20, 100, 1, null, new SameThreadExecutorService()));
}
// Ensures that all expected vector similarity functions are translatable
// in the format.
public void testVectorSimilarityFuncs() {
// This does not necessarily have to be all similarity functions, but
// differences should be considered carefully.
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues);
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
@ -289,12 +288,10 @@ public class TestPayloads extends LuceneTestCase {
reader.close();
}
static final Charset utf8 = StandardCharsets.UTF_8;
private void generateRandomData(byte[] data) {
// this test needs the random data to be valid unicode
String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length);
byte[] b = s.getBytes(utf8);
byte[] b = s.getBytes(StandardCharsets.UTF_8);
assert b.length == data.length;
System.arraycopy(b, 0, data, 0, b.length);
}
@ -493,7 +490,7 @@ public class TestPayloads extends LuceneTestCase {
this.pool = pool;
payload = pool.get();
generateRandomData(payload);
term = new String(payload, 0, payload.length, utf8);
term = new String(payload, StandardCharsets.UTF_8);
first = true;
payloadAtt = addAttribute(PayloadAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);

View File

@ -107,7 +107,7 @@ public class TestPrefixRandom extends LuceneTestCase {
@Override
public String toString(String field) {
return field.toString() + ":" + prefix.toString();
return field + ":" + prefix;
}
@Override

View File

@ -143,7 +143,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
@Override
public String toString(String field) {
return field.toString() + automaton.toString();
return field + automaton;
}
@Override

View File

@ -213,10 +213,10 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
List<Automaton> list = new ArrayList<>();
for (int i = 0; i < s.length() - 1; i++) {
StringBuilder sb = new StringBuilder();
sb.append(s.substring(0, i));
sb.append(s, 0, i);
sb.append(s.charAt(i + 1));
sb.append(s.charAt(i));
sb.append(s.substring(i + 2, s.length()));
sb.append(s, i + 2, s.length());
String st = sb.toString();
if (!st.equals(s)) {
list.add(Automata.makeString(st));

View File

@ -119,7 +119,7 @@ public class TestRegExp extends LuceneTestCase {
// Add any head to the result, unchanged
if (substitutionPoint > 0) {
result.append(docValue.substring(0, substitutionPoint));
result.append(docValue, 0, substitutionPoint);
}
// Modify the middle...

View File

@ -1398,7 +1398,7 @@ public class UnifiedHighlighter {
curValueBuilder.append(curValue);
}
curValueBuilder.append(valueSeparator);
curValueBuilder.append(value.substring(0, Math.min(lengthBudget - 1, value.length())));
curValueBuilder.append(value, 0, Math.min(lengthBudget - 1, value.length()));
values[currentField] = curValueBuilder;
}

View File

@ -49,7 +49,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.IntroSelector;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.packed.PackedInts;
@ -251,17 +251,17 @@ public final class BPIndexReorderer {
private class IndexReorderingTask extends BaseRecursiveAction {
private final IntsRef docIDs;
private final float[] gains;
private final float[] biases;
private final CloseableThreadLocal<PerThreadState> threadLocal;
IndexReorderingTask(
IntsRef docIDs,
float[] gains,
float[] biases,
CloseableThreadLocal<PerThreadState> threadLocal,
int depth) {
super(depth);
this.docIDs = docIDs;
this.gains = gains;
this.biases = biases;
this.threadLocal = threadLocal;
}
@ -293,14 +293,14 @@ public final class BPIndexReorderer {
assert sorted(docIDs);
}
int leftSize = docIDs.length / 2;
if (leftSize < minPartitionSize) {
int halfLength = docIDs.length / 2;
if (halfLength < minPartitionSize) {
return;
}
int rightSize = docIDs.length - leftSize;
IntsRef left = new IntsRef(docIDs.ints, docIDs.offset, leftSize);
IntsRef right = new IntsRef(docIDs.ints, docIDs.offset + leftSize, rightSize);
IntsRef left = new IntsRef(docIDs.ints, docIDs.offset, halfLength);
IntsRef right =
new IntsRef(docIDs.ints, docIDs.offset + halfLength, docIDs.length - halfLength);
PerThreadState state = threadLocal.get();
ForwardIndex forwardIndex = state.forwardIndex;
@ -313,7 +313,9 @@ public final class BPIndexReorderer {
for (int iter = 0; iter < maxIters; ++iter) {
boolean moved;
try {
moved = shuffle(forwardIndex, left, right, leftDocFreqs, rightDocFreqs, gains, iter);
moved =
shuffle(
forwardIndex, docIDs, right.offset, leftDocFreqs, rightDocFreqs, biases, iter);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
@ -322,10 +324,11 @@ public final class BPIndexReorderer {
}
}
// It is fine for all tasks to share the same docs / gains array since they all work on
// It is fine for all tasks to share the same docs / biases array since they all work on
// different slices of the array at a given point in time.
IndexReorderingTask leftTask = new IndexReorderingTask(left, gains, threadLocal, depth + 1);
IndexReorderingTask rightTask = new IndexReorderingTask(right, gains, threadLocal, depth + 1);
IndexReorderingTask leftTask = new IndexReorderingTask(left, biases, threadLocal, depth + 1);
IndexReorderingTask rightTask =
new IndexReorderingTask(right, biases, threadLocal, depth + 1);
if (shouldFork(docIDs.length, docIDs.ints.length)) {
invokeAll(leftTask, rightTask);
@ -341,116 +344,94 @@ public final class BPIndexReorderer {
*/
private boolean shuffle(
ForwardIndex forwardIndex,
IntsRef left,
IntsRef right,
IntsRef docIDs,
int midPoint,
int[] leftDocFreqs,
int[] rightDocFreqs,
float[] gains,
float[] biases,
int iter)
throws IOException {
assert left.ints == right.ints;
assert left.offset + left.length == right.offset;
// Computing gains is typically a bottleneck, because each iteration needs to iterate over all
// postings to recompute gains, and the total number of postings is usually one order of
// Computing biases is typically a bottleneck, because each iteration needs to iterate over
// all postings to recompute biases, and the total number of postings is usually one order of
// magnitude or more larger than the number of docs. So we try to parallelize it.
ComputeGainsTask leftGainsTask =
new ComputeGainsTask(
left.ints,
gains,
left.offset,
left.offset + left.length,
new ComputeBiasTask(
docIDs.ints,
biases,
docIDs.offset,
docIDs.offset + docIDs.length,
leftDocFreqs,
rightDocFreqs,
threadLocal,
depth);
ComputeGainsTask rightGainsTask =
new ComputeGainsTask(
right.ints,
gains,
right.offset,
right.offset + right.length,
rightDocFreqs,
leftDocFreqs,
threadLocal,
depth);
if (shouldFork(docIDs.length, docIDs.ints.length)) {
invokeAll(leftGainsTask, rightGainsTask);
} else {
leftGainsTask.compute();
rightGainsTask.compute();
depth)
.compute();
float maxLeftBias = Float.NEGATIVE_INFINITY;
for (int i = docIDs.offset; i < midPoint; ++i) {
maxLeftBias = Math.max(maxLeftBias, biases[i]);
}
float minRightBias = Float.POSITIVE_INFINITY;
for (int i = midPoint, end = docIDs.offset + docIDs.length; i < end; ++i) {
minRightBias = Math.min(minRightBias, biases[i]);
}
float gain = maxLeftBias - minRightBias;
// This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for
// Bipartite Graph Partitioning" by comparing the gain of swapping the doc from the left side
// that is most attracted to the right and the doc from the right side that is most attracted
// to the left against `iter` rather than zero.
if (gain <= iter) {
return false;
}
class ByDescendingGainSorter extends IntroSorter {
new IntroSelector() {
int pivotDoc;
float pivotGain;
float pivotBias;
@Override
protected void setPivot(int i) {
pivotDoc = left.ints[i];
pivotGain = gains[i];
pivotDoc = docIDs.ints[i];
pivotBias = biases[i];
}
@Override
protected int comparePivot(int j) {
// Compare in reverse order to get a descending sort
int cmp = Float.compare(gains[j], pivotGain);
int cmp = Float.compare(pivotBias, biases[j]);
if (cmp == 0) {
// Tie break on the doc ID to preserve doc ID ordering as much as possible
cmp = pivotDoc - left.ints[j];
cmp = pivotDoc - docIDs.ints[j];
}
return cmp;
}
@Override
protected void swap(int i, int j) {
int tmpDoc = left.ints[i];
left.ints[i] = left.ints[j];
left.ints[j] = tmpDoc;
float tmpBias = biases[i];
biases[i] = biases[j];
biases[j] = tmpBias;
float tmpGain = gains[i];
gains[i] = gains[j];
gains[j] = tmpGain;
}
}
Runnable leftSorter =
() -> new ByDescendingGainSorter().sort(left.offset, left.offset + left.length);
Runnable rightSorter =
() -> new ByDescendingGainSorter().sort(right.offset, right.offset + right.length);
if (shouldFork(docIDs.length, docIDs.ints.length)) {
// TODO: run it on more than 2 threads at most
invokeAll(adapt(leftSorter), adapt(rightSorter));
} else {
leftSorter.run();
rightSorter.run();
}
for (int i = 0; i < left.length; ++i) {
// This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for
// Bipartite Graph Partitioning" by comparing the gain against `iter` rather than zero.
if (gains[left.offset + i] + gains[right.offset + i] <= iter) {
if (i == 0) {
return false;
if (i < midPoint == j < midPoint) {
int tmpDoc = docIDs.ints[i];
docIDs.ints[i] = docIDs.ints[j];
docIDs.ints[j] = tmpDoc;
} else {
// If we're swapping docs across the left and right sides, we need to keep doc freqs
// up-to-date.
int left = Math.min(i, j);
int right = Math.max(i, j);
try {
swapDocsAndFreqs(docIDs.ints, left, right, forwardIndex, leftDocFreqs, rightDocFreqs);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
break;
}
swap(
left.ints,
left.offset + i,
right.offset + i,
forwardIndex,
leftDocFreqs,
rightDocFreqs);
}
}.select(docIDs.offset, docIDs.offset + docIDs.length, midPoint);
return true;
}
private static void swap(
private static void swapDocsAndFreqs(
int[] docs,
int left,
int right,
@ -492,19 +473,19 @@ public final class BPIndexReorderer {
}
}
private class ComputeGainsTask extends BaseRecursiveAction {
private class ComputeBiasTask extends BaseRecursiveAction {
private final int[] docs;
private final float[] gains;
private final float[] biases;
private final int from;
private final int to;
private final int[] fromDocFreqs;
private final int[] toDocFreqs;
private final CloseableThreadLocal<PerThreadState> threadLocal;
ComputeGainsTask(
ComputeBiasTask(
int[] docs,
float[] gains,
float[] biases,
int from,
int to,
int[] fromDocFreqs,
@ -513,7 +494,7 @@ public final class BPIndexReorderer {
int depth) {
super(depth);
this.docs = docs;
this.gains = gains;
this.biases = biases;
this.from = from;
this.to = to;
this.fromDocFreqs = fromDocFreqs;
@ -527,15 +508,15 @@ public final class BPIndexReorderer {
if (problemSize > 1 && shouldFork(problemSize, docs.length)) {
final int mid = (from + to) >>> 1;
invokeAll(
new ComputeGainsTask(
docs, gains, from, mid, fromDocFreqs, toDocFreqs, threadLocal, depth),
new ComputeGainsTask(
docs, gains, mid, to, fromDocFreqs, toDocFreqs, threadLocal, depth));
new ComputeBiasTask(
docs, biases, from, mid, fromDocFreqs, toDocFreqs, threadLocal, depth),
new ComputeBiasTask(
docs, biases, mid, to, fromDocFreqs, toDocFreqs, threadLocal, depth));
} else {
ForwardIndex forwardIndex = threadLocal.get().forwardIndex;
try {
for (int i = from; i < to; ++i) {
gains[i] = computeGain(docs[i], forwardIndex, fromDocFreqs, toDocFreqs);
biases[i] = computeBias(docs[i], forwardIndex, fromDocFreqs, toDocFreqs);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
@ -547,11 +528,11 @@ public final class BPIndexReorderer {
* Compute a float that is negative when a document is attracted to the left and positive
* otherwise.
*/
private static float computeGain(
private static float computeBias(
int docID, ForwardIndex forwardIndex, int[] fromDocFreqs, int[] toDocFreqs)
throws IOException {
forwardIndex.seek(docID);
double gain = 0;
double bias = 0;
for (IntsRef terms = forwardIndex.nextTerms();
terms.length != 0;
terms = forwardIndex.nextTerms()) {
@ -561,12 +542,12 @@ public final class BPIndexReorderer {
final int toDocFreq = toDocFreqs[termID];
assert fromDocFreq >= 0;
assert toDocFreq >= 0;
gain +=
bias +=
(toDocFreq == 0 ? 0 : fastLog2(toDocFreq))
- (fromDocFreq == 0 ? 0 : fastLog2(fromDocFreq));
}
}
return (float) gain;
return (float) bias;
}
}
@ -869,7 +850,7 @@ public final class BPIndexReorderer {
}
private static long docRAMRequirements(int maxDoc) {
// We need one int per doc for the doc map, plus one float to store the gain associated with
// We need one int per doc for the doc map, plus one float to store the bias associated with
// this doc.
return 2L * Integer.BYTES * maxDoc;
}

View File

@ -114,7 +114,7 @@ public class BooleanQueryTestFacade {
public void doTest() throws Exception {
if (verbose) {
System.out.println("");
System.out.println();
System.out.println("Query: " + queryText);
}

View File

@ -113,7 +113,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
if (pointsOnly) str.append(",pointsOnly");
if (pruneLeafyBranches) str.append(",pruneLeafyBranches");
if (prefixGridScanLevel != grid.getMaxLevels() - 4)
str.append(",prefixGridScanLevel:").append("").append(prefixGridScanLevel);
str.append(",prefixGridScanLevel:").append(prefixGridScanLevel);
if (!multiOverlappingIndexedShapes) str.append(",!multiOverlappingIndexedShapes");
return str.append(')').toString();
}

View File

@ -927,7 +927,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
return;
}
sb.append("<b>");
sb.append(surface.substring(0, prefixToken.length()));
sb.append(surface, 0, prefixToken.length());
sb.append("</b>");
sb.append(surface.substring(prefixToken.length()));
}

View File

@ -892,7 +892,7 @@ public class TestAnalyzingInfixSuggester extends LuceneTestCase {
b.append("<b>");
b.append(queryTerm);
b.append("</b>");
b.append(inputTerm.substring(queryTerm.length(), inputTerm.length()));
b.append(inputTerm.substring(queryTerm.length()));
matched = true;
break;
}

View File

@ -793,6 +793,10 @@ public class CheckHits {
assertTrue(s2 == null || s2.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
continue;
}
if (s2 == null) {
assertTrue(s1.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
continue;
}
TwoPhaseIterator twoPhase1 = s1.twoPhaseIterator();
TwoPhaseIterator twoPhase2 = s2.twoPhaseIterator();
DocIdSetIterator approx1 = twoPhase1 == null ? s1.iterator() : twoPhase1.approximation();

View File

@ -166,7 +166,6 @@ public final class English {
result.append("one ");
break;
case 0:
result.append("");
break;
}
}

View File

@ -307,7 +307,7 @@ public class LineFileDocs implements Closeable {
throw new RuntimeException("line: [" + line + "] is in an invalid format !");
}
docState.body.setStringValue(line.substring(1 + spot2, line.length()));
docState.body.setStringValue(line.substring(1 + spot2));
final String title = line.substring(0, spot);
docState.title.setStringValue(title);
docState.titleTokenized.setStringValue(title);