Merge branch 'main' into java_21

This commit is contained in:
ChrisHegarty 2024-02-23 17:07:54 +00:00
commit 54b6248a8d
46 changed files with 284 additions and 186 deletions

View File

@ -113,7 +113,7 @@ public class ErrorReportingTestListener implements TestOutputListener, TestListe
if (echoOutput && !verboseMode) { if (echoOutput && !verboseMode) {
synchronized (this) { synchronized (this) {
System.out.println(""); System.out.println();
System.out.println(suite.getClassName() + " > test suite's output saved to " + outputLog + ", copied below:"); System.out.println(suite.getClassName() + " > test suite's output saved to " + outputLog + ", copied below:");
try (BufferedReader reader = Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) { try (BufferedReader reader = Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) {
char[] buf = new char[1024]; char[] buf = new char[1024];

View File

@ -67,6 +67,13 @@
</maintainer> </maintainer>
<!-- NOTE: please insert releases in numeric order, NOT chronologically. --> <!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
<release>
<Version>
<name>lucene-9.10.0</name>
<created>2024-02-20</created>
<revision>9.10.0</revision>
</Version>
</release>
<release> <release>
<Version> <Version>
<name>lucene-9.9.2</name> <name>lucene-9.9.2</name>

View File

@ -45,16 +45,13 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'emptyIndex': 'empty' 'emptyIndex': 'empty'
}[indextype] }[indextype]
if indextype in ('cfs', 'nocfs'): if indextype in ('cfs', 'nocfs'):
dirname = 'index.%s' % indextype
filename = '%s.%s-%s.zip' % (prefix, index_version, indextype) filename = '%s.%s-%s.zip' % (prefix, index_version, indextype)
else: else:
dirname = indextype
filename = '%s.%s.zip' % (prefix, index_version) filename = '%s.%s.zip' % (prefix, index_version)
print(' creating %s...' % filename, end='', flush=True) print(' creating %s...' % filename, end='', flush=True)
module = 'backward-codecs' module = 'backward-codecs'
index_dir = os.path.join('lucene', module, 'src/test/org/apache/lucene/backward_index') index_dir = os.path.join('lucene', module, 'src/test/org/apache/lucene/backward_index')
test_file = os.path.join(index_dir, filename)
if os.path.exists(os.path.join(index_dir, filename)): if os.path.exists(os.path.join(index_dir, filename)):
print('uptodate') print('uptodate')
return return
@ -76,24 +73,20 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
'-Dtests.codec=default' '-Dtests.codec=default'
]) ])
base_dir = os.getcwd() base_dir = os.getcwd()
bc_index_dir = os.path.join(temp_dir, dirname) bc_index_file = os.path.join(temp_dir, filename)
bc_index_file = os.path.join(bc_index_dir, filename)
if os.path.exists(bc_index_file): if os.path.exists(bc_index_file):
print('alreadyexists') print('alreadyexists')
else: else:
if os.path.exists(bc_index_dir):
shutil.rmtree(bc_index_dir)
os.chdir(source) os.chdir(source)
scriptutil.run('./gradlew %s' % gradle_args) scriptutil.run('./gradlew %s' % gradle_args)
os.chdir(bc_index_dir) if not os.path.exists(bc_index_file):
scriptutil.run('zip %s *' % filename) raise Exception("Expected file can't be found: %s" %bc_index_file)
print('done') print('done')
print(' adding %s...' % filename, end='', flush=True) print(' adding %s...' % filename, end='', flush=True)
scriptutil.run('cp %s %s' % (bc_index_file, os.path.join(base_dir, index_dir))) scriptutil.run('cp %s %s' % (bc_index_file, os.path.join(base_dir, index_dir)))
os.chdir(base_dir) os.chdir(base_dir)
scriptutil.run('rm -rf %s' % bc_index_dir)
print('done') print('done')
def update_backcompat_tests(index_version, current_version): def update_backcompat_tests(index_version, current_version):

View File

@ -197,7 +197,10 @@ Improvements
Optimizations Optimizations
--------------------- ---------------------
(No changes)
* GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)
* GITHUB#13085: Remove unnecessary toString() / substring() calls to save some String allocations (Dmitry Cherniachenko)
Bug Fixes Bug Fixes
--------------------- ---------------------

View File

@ -278,7 +278,7 @@ class BrazilianStemmer {
return false; return false;
} }
return value.substring(value.length() - suffix.length()).equals(suffix); return value.endsWith(suffix);
} }
/** /**

View File

@ -142,7 +142,7 @@ public class PatternParser extends DefaultHandler {
break; break;
} }
} }
token.append(chars.toString().substring(0, i)); token.append(chars, 0, i);
// chars.delete(0,i); // chars.delete(0,i);
for (int countr = i; countr < chars.length(); countr++) { for (int countr = i; countr < chars.length(); countr++) {
chars.setCharAt(countr - i, chars.charAt(countr)); chars.setCharAt(countr - i, chars.charAt(countr));

View File

@ -669,7 +669,7 @@ public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
builder.append((char) ch); builder.append((char) ch);
} }
} catch (Exception e) { } catch (Exception e) {
if (gold.equals(builder.toString())) { if (gold.contentEquals(builder)) {
throw e; throw e;
} }
throw new Exception( throw new Exception(

View File

@ -30,19 +30,13 @@ import org.apache.lucene.tests.analysis.Token;
public class TestTrimFilter extends BaseTokenStreamTestCase { public class TestTrimFilter extends BaseTokenStreamTestCase {
public void testTrim() throws Exception { public void testTrim() throws Exception {
char[] a = " a ".toCharArray();
char[] b = "b ".toCharArray();
char[] ccc = "cCc".toCharArray();
char[] whitespace = " ".toCharArray();
char[] empty = "".toCharArray();
TokenStream ts = TokenStream ts =
new CannedTokenStream( new CannedTokenStream(
new Token(new String(a, 0, a.length), 1, 5), new Token(" a ", 1, 5),
new Token(new String(b, 0, b.length), 6, 10), new Token("b ", 6, 10),
new Token(new String(ccc, 0, ccc.length), 11, 15), new Token("cCc", 11, 15),
new Token(new String(whitespace, 0, whitespace.length), 16, 20), new Token(" ", 16, 20),
new Token(new String(empty, 0, empty.length), 21, 21)); new Token("", 21, 21));
ts = new TrimFilter(ts); ts = new TrimFilter(ts);
assertTokenStreamContents(ts, new String[] {"a", "b", "cCc", "", ""}); assertTokenStreamContents(ts, new String[] {"a", "b", "cCc", "", ""});

View File

@ -82,8 +82,8 @@ public class TestPatternReplaceCharFilter extends BaseTokenStreamTestCase {
indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i)))); indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i))));
} }
boolean outputGood = expectedOutput.equals(output.toString()); boolean outputGood = expectedOutput.contentEquals(output);
boolean indexMatchedGood = expectedIndexMatchedOutput.equals(indexMatched.toString()); boolean indexMatchedGood = expectedIndexMatchedOutput.contentEquals(indexMatched);
if (!outputGood || !indexMatchedGood || false) { if (!outputGood || !indexMatchedGood || false) {
System.out.println("Pattern : " + pattern); System.out.println("Pattern : " + pattern);

View File

@ -26,6 +26,7 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
@ -38,11 +39,17 @@ import java.util.function.Predicate;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -253,10 +260,23 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
protected abstract void createIndex(Directory directory) throws IOException; protected abstract void createIndex(Directory directory) throws IOException;
public final void createBWCIndex() throws IOException { public final void createBWCIndex() throws IOException {
Path indexDir = getIndexDir().resolve(indexName(Version.LATEST)); Path zipFile = getIndexDir().resolve(indexName(Version.LATEST));
Files.deleteIfExists(indexDir); Files.deleteIfExists(zipFile);
try (Directory dir = newFSDirectory(indexDir)) { Path tmpDir = createTempDir();
try (Directory dir = FSDirectory.open(tmpDir);
ZipOutputStream zipOut =
new ZipOutputStream(
Files.newOutputStream(
zipFile, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW))) {
createIndex(dir); createIndex(dir);
for (String file : dir.listAll()) {
try (IndexInput in = dir.openInput(file, IOContext.READONCE)) {
zipOut.putNextEntry(new ZipEntry(file));
new OutputStreamDataOutput(zipOut).copyBytes(in, in.length());
zipOut.closeEntry();
}
}
} }
} }

View File

@ -20,8 +20,10 @@ import static org.apache.lucene.backward_index.BackwardsCompatibilityTestBase.cr
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@SuppressFileSystems("ExtrasFS")
public class TestGenerateBwcIndices extends LuceneTestCase { public class TestGenerateBwcIndices extends LuceneTestCase {
// Backcompat index generation, described below, is mostly automated in: // Backcompat index generation, described below, is mostly automated in:

View File

@ -55,7 +55,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
static final String INDEX_NAME = "sorted"; static final String INDEX_NAME = "sorted";
static final String SUFFIX = ""; static final String SUFFIX = "";
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_10_0; private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_11_0;
private static final String PARENT_FIELD_NAME = "___parent"; private static final String PARENT_FIELD_NAME = "___parent";
public TestIndexSortBackwardsCompatibility(Version version, String pattern) { public TestIndexSortBackwardsCompatibility(Version version, String pattern) {

View File

@ -38,3 +38,4 @@
9.9.0 9.9.0
9.9.1 9.9.1
9.9.2 9.9.2
9.10.0

View File

@ -112,13 +112,13 @@ public class EnwikiContentSource extends ContentSource {
String time(String original) { String time(String original) {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();
buffer.append(original.substring(8, 10)); buffer.append(original, 8, 10);
buffer.append('-'); buffer.append('-');
buffer.append(months[Integer.parseInt(original.substring(5, 7)) - 1]); buffer.append(months[Integer.parseInt(original.substring(5, 7)) - 1]);
buffer.append('-'); buffer.append('-');
buffer.append(original.substring(0, 4)); buffer.append(original, 0, 4);
buffer.append(' '); buffer.append(' ');
buffer.append(original.substring(11, 19)); buffer.append(original, 11, 19);
buffer.append(".000"); buffer.append(".000");
return buffer.toString(); return buffer.toString();

View File

@ -60,7 +60,7 @@ public class TrecFBISParser extends TrecDocParser {
docData.setName(name); docData.setName(name);
docData.setDate(date); docData.setDate(date);
docData.setTitle(title); docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString()); docData.setBody(stripTags(docBuf, mark));
return docData; return docData;
} }
} }

View File

@ -53,14 +53,14 @@ public class TrecFR94Parser extends TrecDocParser {
// date... // date...
String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES); String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
if (dateStr != null) { if (dateStr != null) {
dateStr = stripTags(dateStr, 0).toString(); dateStr = stripTags(dateStr, 0);
date = trecSrc.parseDate(dateStr.trim()); date = trecSrc.parseDate(dateStr.trim());
} }
} }
docData.clear(); docData.clear();
docData.setName(name); docData.setName(name);
docData.setDate(date); docData.setDate(date);
docData.setBody(stripTags(docBuf, mark).toString()); docData.setBody(stripTags(docBuf, mark));
return docData; return docData;
} }
} }

View File

@ -52,7 +52,7 @@ public class TrecFTParser extends TrecDocParser {
docData.setName(name); docData.setName(name);
docData.setDate(date); docData.setDate(date);
docData.setTitle(title); docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString()); docData.setBody(stripTags(docBuf, mark));
return docData; return docData;
} }
} }

View File

@ -49,7 +49,7 @@ public class TrecLATimesParser extends TrecDocParser {
if (d2a > 0) { if (d2a > 0) {
dateStr = dateStr.substring(0, d2a + 3); // we need the "day" part dateStr = dateStr.substring(0, d2a + 3); // we need the "day" part
} }
dateStr = stripTags(dateStr, 0).toString(); dateStr = stripTags(dateStr, 0);
date = trecSrc.parseDate(dateStr.trim()); date = trecSrc.parseDate(dateStr.trim());
} }
@ -59,14 +59,14 @@ public class TrecLATimesParser extends TrecDocParser {
title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null); title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
} }
if (title != null) { if (title != null) {
title = stripTags(title, 0).toString().trim(); title = stripTags(title, 0).trim();
} }
docData.clear(); docData.clear();
docData.setName(name); docData.setName(name);
docData.setDate(date); docData.setDate(date);
docData.setTitle(title); docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString()); docData.setBody(stripTags(docBuf, mark));
return docData; return docData;
} }
} }

View File

@ -59,7 +59,7 @@ public class SearchWithSortTask extends ReadTask {
String typeString; String typeString;
if (index != -1) { if (index != -1) {
fieldName = field.substring(0, index); fieldName = field.substring(0, index);
typeString = field.substring(1 + index, field.length()); typeString = field.substring(1 + index);
} else { } else {
throw new RuntimeException("You must specify the sort type ie page:int,subject:string"); throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
} }

View File

@ -169,7 +169,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
if (type == TYPE_STRING) { if (type == TYPE_STRING) {
byte[] bytes = new byte[scratch.length() - VALUE.length]; byte[] bytes = new byte[scratch.length() - VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length); System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
visitor.stringField(fieldInfo, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8)); visitor.stringField(fieldInfo, new String(bytes, StandardCharsets.UTF_8));
} else if (type == TYPE_BINARY) { } else if (type == TYPE_BINARY) {
byte[] copy = new byte[scratch.length() - VALUE.length]; byte[] copy = new byte[scratch.length() - VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length); System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);

View File

@ -380,7 +380,7 @@ public final class CodecUtil {
int suffixLength = in.readByte() & 0xFF; int suffixLength = in.readByte() & 0xFF;
byte[] suffixBytes = new byte[suffixLength]; byte[] suffixBytes = new byte[suffixLength];
in.readBytes(suffixBytes, 0, suffixBytes.length); in.readBytes(suffixBytes, 0, suffixBytes.length);
String suffix = new String(suffixBytes, 0, suffixBytes.length, StandardCharsets.UTF_8); String suffix = new String(suffixBytes, StandardCharsets.UTF_8);
if (!suffix.equals(expectedSuffix)) { if (!suffix.equals(expectedSuffix)) {
throw new CorruptIndexException( throw new CorruptIndexException(
"file mismatch, expected suffix=" + expectedSuffix + ", got=" + suffix, in); "file mismatch, expected suffix=" + expectedSuffix + ", got=" + suffix, in);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene94;
import java.io.IOException; import java.io.IOException;
import java.util.Collections; import java.util.Collections;
import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
@ -111,6 +112,8 @@ import org.apache.lucene.store.IndexOutput;
* <li>0: EUCLIDEAN distance. ({@link VectorSimilarityFunction#EUCLIDEAN}) * <li>0: EUCLIDEAN distance. ({@link VectorSimilarityFunction#EUCLIDEAN})
* <li>1: DOT_PRODUCT similarity. ({@link VectorSimilarityFunction#DOT_PRODUCT}) * <li>1: DOT_PRODUCT similarity. ({@link VectorSimilarityFunction#DOT_PRODUCT})
* <li>2: COSINE similarity. ({@link VectorSimilarityFunction#COSINE}) * <li>2: COSINE similarity. ({@link VectorSimilarityFunction#COSINE})
* <li>3: MAXIMUM_INNER_PRODUCT similarity. ({@link
* VectorSimilarityFunction#MAXIMUM_INNER_PRODUCT})
* </ul> * </ul>
* </ul> * </ul>
* *
@ -284,10 +287,38 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
} }
private static VectorSimilarityFunction getDistFunc(IndexInput input, byte b) throws IOException { private static VectorSimilarityFunction getDistFunc(IndexInput input, byte b) throws IOException {
if (b < 0 || b >= VectorSimilarityFunction.values().length) { try {
throw new CorruptIndexException("invalid distance function: " + b, input); return distOrdToFunc(b);
} catch (IllegalArgumentException e) {
throw new CorruptIndexException("invalid distance function: " + b, input, e);
} }
return VectorSimilarityFunction.values()[b]; }
// List of vector similarity functions. This list is defined here, in order
// to avoid an undesirable dependency on the declaration and order of values
// in VectorSimilarityFunction. The list values and order have been chosen to
// match that of VectorSimilarityFunction in, at least, Lucene 9.10. Values
static final List<VectorSimilarityFunction> SIMILARITY_FUNCTIONS =
List.of(
VectorSimilarityFunction.EUCLIDEAN,
VectorSimilarityFunction.DOT_PRODUCT,
VectorSimilarityFunction.COSINE,
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
static VectorSimilarityFunction distOrdToFunc(byte i) {
if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) {
throw new IllegalArgumentException("invalid distance function: " + i);
}
return SIMILARITY_FUNCTIONS.get(i);
}
static byte distFuncToOrd(VectorSimilarityFunction func) {
for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
return (byte) i;
}
}
throw new IllegalArgumentException("invalid distance function: " + func);
} }
static { static {
@ -378,7 +409,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
} }
output.writeVInt(fi.getVectorDimension()); output.writeVInt(fi.getVectorDimension());
output.writeByte((byte) fi.getVectorEncoding().ordinal()); output.writeByte((byte) fi.getVectorEncoding().ordinal());
output.writeByte((byte) fi.getVectorSimilarityFunction().ordinal()); output.writeByte(distFuncToOrd(fi.getVectorSimilarityFunction()));
} }
CodecUtil.writeFooter(output); CodecUtil.writeFooter(output);
} }

View File

@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FlatVectorsReader; import org.apache.lucene.codecs.FlatVectorsReader;
@ -171,15 +172,24 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
} }
} }
// List of vector similarity functions. This list is defined here, in order
// to avoid an undesirable dependency on the declaration and order of values
// in VectorSimilarityFunction. The list values and order must be identical
// to that of {@link o.a.l.c.l.Lucene94FieldInfosFormat#SIMILARITY_FUNCTIONS}.
public static final List<VectorSimilarityFunction> SIMILARITY_FUNCTIONS =
List.of(
VectorSimilarityFunction.EUCLIDEAN,
VectorSimilarityFunction.DOT_PRODUCT,
VectorSimilarityFunction.COSINE,
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
public static VectorSimilarityFunction readSimilarityFunction(DataInput input) public static VectorSimilarityFunction readSimilarityFunction(DataInput input)
throws IOException { throws IOException {
int similarityFunctionId = input.readInt(); int i = input.readInt();
if (similarityFunctionId < 0 if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) {
|| similarityFunctionId >= VectorSimilarityFunction.values().length) { throw new IllegalArgumentException("invalid distance function: " + i);
throw new CorruptIndexException(
"Invalid similarity function id: " + similarityFunctionId, input);
} }
return VectorSimilarityFunction.values()[similarityFunctionId]; return SIMILARITY_FUNCTIONS.get(i);
} }
public static VectorEncoding readVectorEncoding(DataInput input) throws IOException { public static VectorEncoding readVectorEncoding(DataInput input) throws IOException {

View File

@ -18,6 +18,7 @@
package org.apache.lucene.codecs.lucene99; package org.apache.lucene.codecs.lucene99;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@ -33,6 +34,7 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Sorter; import org.apache.lucene.index.Sorter;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TaskExecutor; import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
@ -436,7 +438,7 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
throws IOException { throws IOException {
meta.writeInt(field.number); meta.writeInt(field.number);
meta.writeInt(field.getVectorEncoding().ordinal()); meta.writeInt(field.getVectorEncoding().ordinal());
meta.writeInt(field.getVectorSimilarityFunction().ordinal()); meta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction()));
meta.writeVLong(vectorIndexOffset); meta.writeVLong(vectorIndexOffset);
meta.writeVLong(vectorIndexLength); meta.writeVLong(vectorIndexLength);
meta.writeVInt(field.getVectorDimension()); meta.writeVInt(field.getVectorDimension());
@ -500,6 +502,15 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
IOUtils.close(meta, vectorIndex, flatVectorWriter); IOUtils.close(meta, vectorIndex, flatVectorWriter);
} }
static int distFuncToOrd(VectorSimilarityFunction func) {
for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
return (byte) i;
}
}
throw new IllegalArgumentException("invalid distance function: " + func);
}
private static class FieldWriter<T> extends KnnFieldVectorsWriter<T> { private static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
private static final long SHALLOW_SIZE = private static final long SHALLOW_SIZE =

View File

@ -384,7 +384,7 @@ final class DocumentsWriter implements Closeable, Accountable {
ensureOpen(); ensureOpen();
boolean hasEvents = false; boolean hasEvents = false;
while (flushControl.anyStalledThreads() while (flushControl.anyStalledThreads()
|| (flushControl.numQueuedFlushes() > 0 && config.checkPendingFlushOnUpdate)) { || (config.checkPendingFlushOnUpdate && flushControl.numQueuedFlushes() > 0)) {
// Help out flushing any queued DWPTs so we can un-stall: // Help out flushing any queued DWPTs so we can un-stall:
// Try pickup pending threads here if possible // Try pickup pending threads here if possible
// no need to loop over the next pending flushes... doFlush will take care of this // no need to loop over the next pending flushes... doFlush will take care of this

View File

@ -191,7 +191,7 @@ public final class IndexFileNames {
if (idx == -1) { if (idx == -1) {
return null; return null;
} else { } else {
return filename.substring(idx + 1, filename.length()); return filename.substring(idx + 1);
} }
} }

View File

@ -677,16 +677,11 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
public void testMaxTokenLengthDefault() throws Exception { public void testMaxTokenLengthDefault() throws Exception {
StandardAnalyzer a = new StandardAnalyzer(); StandardAnalyzer a = new StandardAnalyzer();
StringBuilder bToken = new StringBuilder();
// exact max length: // exact max length:
for (int i = 0; i < StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; i++) { String bString = "b".repeat(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
bToken.append('b');
}
String bString = bToken.toString();
// first bString is exact max default length; next one is 1 too long // first bString is exact max default length; next one is 1 too long
String input = "x " + bString + " " + bString + "b"; String input = "x " + bString + " " + bString + "b";
assertAnalyzesTo(a, input.toString(), new String[] {"x", bString, bString, "b"}); assertAnalyzesTo(a, input, new String[] {"x", bString, bString, "b"});
a.close(); a.close();
} }

View File

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene94;
import java.util.Arrays;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.tests.index.BaseFieldInfoFormatTestCase;
import org.apache.lucene.tests.util.TestUtil;
public class TestLucene94FieldInfosFormat extends BaseFieldInfoFormatTestCase {
@Override
protected Codec getCodec() {
return TestUtil.getDefaultCodec();
}
// Ensures that all expected vector similarity functions are translatable
// in the format.
public void testVectorSimilarityFuncs() {
// This does not necessarily have to be all similarity functions, but
// differences should be considered carefully.
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
assertEquals(Lucene94FieldInfosFormat.SIMILARITY_FUNCTIONS, expectedValues);
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene99;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.FilterCodec;
@ -186,4 +187,13 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
new Lucene99HnswScalarQuantizedVectorsFormat( new Lucene99HnswScalarQuantizedVectorsFormat(
20, 100, 1, null, new SameThreadExecutorService())); 20, 100, 1, null, new SameThreadExecutorService()));
} }
// Ensures that all expected vector similarity functions are translatable
// in the format.
public void testVectorSimilarityFuncs() {
// This does not necessarily have to be all similarity functions, but
// differences should be considered carefully.
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues);
}
} }

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -289,12 +288,10 @@ public class TestPayloads extends LuceneTestCase {
reader.close(); reader.close();
} }
static final Charset utf8 = StandardCharsets.UTF_8;
private void generateRandomData(byte[] data) { private void generateRandomData(byte[] data) {
// this test needs the random data to be valid unicode // this test needs the random data to be valid unicode
String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length); String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length);
byte[] b = s.getBytes(utf8); byte[] b = s.getBytes(StandardCharsets.UTF_8);
assert b.length == data.length; assert b.length == data.length;
System.arraycopy(b, 0, data, 0, b.length); System.arraycopy(b, 0, data, 0, b.length);
} }
@ -493,7 +490,7 @@ public class TestPayloads extends LuceneTestCase {
this.pool = pool; this.pool = pool;
payload = pool.get(); payload = pool.get();
generateRandomData(payload); generateRandomData(payload);
term = new String(payload, 0, payload.length, utf8); term = new String(payload, StandardCharsets.UTF_8);
first = true; first = true;
payloadAtt = addAttribute(PayloadAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class);
termAtt = addAttribute(CharTermAttribute.class); termAtt = addAttribute(CharTermAttribute.class);

View File

@ -107,7 +107,7 @@ public class TestPrefixRandom extends LuceneTestCase {
@Override @Override
public String toString(String field) { public String toString(String field) {
return field.toString() + ":" + prefix.toString(); return field + ":" + prefix;
} }
@Override @Override

View File

@ -143,7 +143,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
@Override @Override
public String toString(String field) { public String toString(String field) {
return field.toString() + automaton.toString(); return field + automaton;
} }
@Override @Override

View File

@ -213,10 +213,10 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
List<Automaton> list = new ArrayList<>(); List<Automaton> list = new ArrayList<>();
for (int i = 0; i < s.length() - 1; i++) { for (int i = 0; i < s.length() - 1; i++) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append(s.substring(0, i)); sb.append(s, 0, i);
sb.append(s.charAt(i + 1)); sb.append(s.charAt(i + 1));
sb.append(s.charAt(i)); sb.append(s.charAt(i));
sb.append(s.substring(i + 2, s.length())); sb.append(s, i + 2, s.length());
String st = sb.toString(); String st = sb.toString();
if (!st.equals(s)) { if (!st.equals(s)) {
list.add(Automata.makeString(st)); list.add(Automata.makeString(st));

View File

@ -119,7 +119,7 @@ public class TestRegExp extends LuceneTestCase {
// Add any head to the result, unchanged // Add any head to the result, unchanged
if (substitutionPoint > 0) { if (substitutionPoint > 0) {
result.append(docValue.substring(0, substitutionPoint)); result.append(docValue, 0, substitutionPoint);
} }
// Modify the middle... // Modify the middle...

View File

@ -1398,7 +1398,7 @@ public class UnifiedHighlighter {
curValueBuilder.append(curValue); curValueBuilder.append(curValue);
} }
curValueBuilder.append(valueSeparator); curValueBuilder.append(valueSeparator);
curValueBuilder.append(value.substring(0, Math.min(lengthBudget - 1, value.length()))); curValueBuilder.append(value, 0, Math.min(lengthBudget - 1, value.length()));
values[currentField] = curValueBuilder; values[currentField] = curValueBuilder;
} }

View File

@ -49,7 +49,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntroSorter; import org.apache.lucene.util.IntroSelector;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
@ -251,17 +251,17 @@ public final class BPIndexReorderer {
private class IndexReorderingTask extends BaseRecursiveAction { private class IndexReorderingTask extends BaseRecursiveAction {
private final IntsRef docIDs; private final IntsRef docIDs;
private final float[] gains; private final float[] biases;
private final CloseableThreadLocal<PerThreadState> threadLocal; private final CloseableThreadLocal<PerThreadState> threadLocal;
IndexReorderingTask( IndexReorderingTask(
IntsRef docIDs, IntsRef docIDs,
float[] gains, float[] biases,
CloseableThreadLocal<PerThreadState> threadLocal, CloseableThreadLocal<PerThreadState> threadLocal,
int depth) { int depth) {
super(depth); super(depth);
this.docIDs = docIDs; this.docIDs = docIDs;
this.gains = gains; this.biases = biases;
this.threadLocal = threadLocal; this.threadLocal = threadLocal;
} }
@ -293,14 +293,14 @@ public final class BPIndexReorderer {
assert sorted(docIDs); assert sorted(docIDs);
} }
int leftSize = docIDs.length / 2; int halfLength = docIDs.length / 2;
if (leftSize < minPartitionSize) { if (halfLength < minPartitionSize) {
return; return;
} }
int rightSize = docIDs.length - leftSize; IntsRef left = new IntsRef(docIDs.ints, docIDs.offset, halfLength);
IntsRef left = new IntsRef(docIDs.ints, docIDs.offset, leftSize); IntsRef right =
IntsRef right = new IntsRef(docIDs.ints, docIDs.offset + leftSize, rightSize); new IntsRef(docIDs.ints, docIDs.offset + halfLength, docIDs.length - halfLength);
PerThreadState state = threadLocal.get(); PerThreadState state = threadLocal.get();
ForwardIndex forwardIndex = state.forwardIndex; ForwardIndex forwardIndex = state.forwardIndex;
@ -313,7 +313,9 @@ public final class BPIndexReorderer {
for (int iter = 0; iter < maxIters; ++iter) { for (int iter = 0; iter < maxIters; ++iter) {
boolean moved; boolean moved;
try { try {
moved = shuffle(forwardIndex, left, right, leftDocFreqs, rightDocFreqs, gains, iter); moved =
shuffle(
forwardIndex, docIDs, right.offset, leftDocFreqs, rightDocFreqs, biases, iter);
} catch (IOException e) { } catch (IOException e) {
throw new UncheckedIOException(e); throw new UncheckedIOException(e);
} }
@ -322,10 +324,11 @@ public final class BPIndexReorderer {
} }
} }
// It is fine for all tasks to share the same docs / gains array since they all work on // It is fine for all tasks to share the same docs / biases array since they all work on
// different slices of the array at a given point in time. // different slices of the array at a given point in time.
IndexReorderingTask leftTask = new IndexReorderingTask(left, gains, threadLocal, depth + 1); IndexReorderingTask leftTask = new IndexReorderingTask(left, biases, threadLocal, depth + 1);
IndexReorderingTask rightTask = new IndexReorderingTask(right, gains, threadLocal, depth + 1); IndexReorderingTask rightTask =
new IndexReorderingTask(right, biases, threadLocal, depth + 1);
if (shouldFork(docIDs.length, docIDs.ints.length)) { if (shouldFork(docIDs.length, docIDs.ints.length)) {
invokeAll(leftTask, rightTask); invokeAll(leftTask, rightTask);
@ -341,116 +344,94 @@ public final class BPIndexReorderer {
*/ */
private boolean shuffle( private boolean shuffle(
ForwardIndex forwardIndex, ForwardIndex forwardIndex,
IntsRef left, IntsRef docIDs,
IntsRef right, int midPoint,
int[] leftDocFreqs, int[] leftDocFreqs,
int[] rightDocFreqs, int[] rightDocFreqs,
float[] gains, float[] biases,
int iter) int iter)
throws IOException { throws IOException {
assert left.ints == right.ints;
assert left.offset + left.length == right.offset;
// Computing gains is typically a bottleneck, because each iteration needs to iterate over all // Computing biases is typically a bottleneck, because each iteration needs to iterate over
// postings to recompute gains, and the total number of postings is usually one order of // all postings to recompute biases, and the total number of postings is usually one order of
// magnitude or more larger than the number of docs. So we try to parallelize it. // magnitude or more larger than the number of docs. So we try to parallelize it.
ComputeGainsTask leftGainsTask = new ComputeBiasTask(
new ComputeGainsTask( docIDs.ints,
left.ints, biases,
gains, docIDs.offset,
left.offset, docIDs.offset + docIDs.length,
left.offset + left.length,
leftDocFreqs, leftDocFreqs,
rightDocFreqs, rightDocFreqs,
threadLocal, threadLocal,
depth); depth)
ComputeGainsTask rightGainsTask = .compute();
new ComputeGainsTask(
right.ints, float maxLeftBias = Float.NEGATIVE_INFINITY;
gains, for (int i = docIDs.offset; i < midPoint; ++i) {
right.offset, maxLeftBias = Math.max(maxLeftBias, biases[i]);
right.offset + right.length, }
rightDocFreqs, float minRightBias = Float.POSITIVE_INFINITY;
leftDocFreqs, for (int i = midPoint, end = docIDs.offset + docIDs.length; i < end; ++i) {
threadLocal, minRightBias = Math.min(minRightBias, biases[i]);
depth); }
if (shouldFork(docIDs.length, docIDs.ints.length)) { float gain = maxLeftBias - minRightBias;
invokeAll(leftGainsTask, rightGainsTask); // This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for
} else { // Bipartite Graph Partitioning" by comparing the gain of swapping the doc from the left side
leftGainsTask.compute(); // that is most attracted to the right and the doc from the right side that is most attracted
rightGainsTask.compute(); // to the left against `iter` rather than zero.
if (gain <= iter) {
return false;
} }
class ByDescendingGainSorter extends IntroSorter { new IntroSelector() {
int pivotDoc; int pivotDoc;
float pivotGain; float pivotBias;
@Override @Override
protected void setPivot(int i) { protected void setPivot(int i) {
pivotDoc = left.ints[i]; pivotDoc = docIDs.ints[i];
pivotGain = gains[i]; pivotBias = biases[i];
} }
@Override @Override
protected int comparePivot(int j) { protected int comparePivot(int j) {
// Compare in reverse order to get a descending sort int cmp = Float.compare(pivotBias, biases[j]);
int cmp = Float.compare(gains[j], pivotGain);
if (cmp == 0) { if (cmp == 0) {
// Tie break on the doc ID to preserve doc ID ordering as much as possible // Tie break on the doc ID to preserve doc ID ordering as much as possible
cmp = pivotDoc - left.ints[j]; cmp = pivotDoc - docIDs.ints[j];
} }
return cmp; return cmp;
} }
@Override @Override
protected void swap(int i, int j) { protected void swap(int i, int j) {
int tmpDoc = left.ints[i]; float tmpBias = biases[i];
left.ints[i] = left.ints[j]; biases[i] = biases[j];
left.ints[j] = tmpDoc; biases[j] = tmpBias;
float tmpGain = gains[i]; if (i < midPoint == j < midPoint) {
gains[i] = gains[j]; int tmpDoc = docIDs.ints[i];
gains[j] = tmpGain; docIDs.ints[i] = docIDs.ints[j];
} docIDs.ints[j] = tmpDoc;
}
Runnable leftSorter =
() -> new ByDescendingGainSorter().sort(left.offset, left.offset + left.length);
Runnable rightSorter =
() -> new ByDescendingGainSorter().sort(right.offset, right.offset + right.length);
if (shouldFork(docIDs.length, docIDs.ints.length)) {
// TODO: run it on more than 2 threads at most
invokeAll(adapt(leftSorter), adapt(rightSorter));
} else { } else {
leftSorter.run(); // If we're swapping docs across the left and right sides, we need to keep doc freqs
rightSorter.run(); // up-to-date.
int left = Math.min(i, j);
int right = Math.max(i, j);
try {
swapDocsAndFreqs(docIDs.ints, left, right, forwardIndex, leftDocFreqs, rightDocFreqs);
} catch (IOException e) {
throw new UncheckedIOException(e);
} }
for (int i = 0; i < left.length; ++i) {
// This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for
// Bipartite Graph Partitioning" by comparing the gain against `iter` rather than zero.
if (gains[left.offset + i] + gains[right.offset + i] <= iter) {
if (i == 0) {
return false;
} }
break;
}
swap(
left.ints,
left.offset + i,
right.offset + i,
forwardIndex,
leftDocFreqs,
rightDocFreqs);
} }
}.select(docIDs.offset, docIDs.offset + docIDs.length, midPoint);
return true; return true;
} }
private static void swap( private static void swapDocsAndFreqs(
int[] docs, int[] docs,
int left, int left,
int right, int right,
@ -492,19 +473,19 @@ public final class BPIndexReorderer {
} }
} }
private class ComputeGainsTask extends BaseRecursiveAction { private class ComputeBiasTask extends BaseRecursiveAction {
private final int[] docs; private final int[] docs;
private final float[] gains; private final float[] biases;
private final int from; private final int from;
private final int to; private final int to;
private final int[] fromDocFreqs; private final int[] fromDocFreqs;
private final int[] toDocFreqs; private final int[] toDocFreqs;
private final CloseableThreadLocal<PerThreadState> threadLocal; private final CloseableThreadLocal<PerThreadState> threadLocal;
ComputeGainsTask( ComputeBiasTask(
int[] docs, int[] docs,
float[] gains, float[] biases,
int from, int from,
int to, int to,
int[] fromDocFreqs, int[] fromDocFreqs,
@ -513,7 +494,7 @@ public final class BPIndexReorderer {
int depth) { int depth) {
super(depth); super(depth);
this.docs = docs; this.docs = docs;
this.gains = gains; this.biases = biases;
this.from = from; this.from = from;
this.to = to; this.to = to;
this.fromDocFreqs = fromDocFreqs; this.fromDocFreqs = fromDocFreqs;
@ -527,15 +508,15 @@ public final class BPIndexReorderer {
if (problemSize > 1 && shouldFork(problemSize, docs.length)) { if (problemSize > 1 && shouldFork(problemSize, docs.length)) {
final int mid = (from + to) >>> 1; final int mid = (from + to) >>> 1;
invokeAll( invokeAll(
new ComputeGainsTask( new ComputeBiasTask(
docs, gains, from, mid, fromDocFreqs, toDocFreqs, threadLocal, depth), docs, biases, from, mid, fromDocFreqs, toDocFreqs, threadLocal, depth),
new ComputeGainsTask( new ComputeBiasTask(
docs, gains, mid, to, fromDocFreqs, toDocFreqs, threadLocal, depth)); docs, biases, mid, to, fromDocFreqs, toDocFreqs, threadLocal, depth));
} else { } else {
ForwardIndex forwardIndex = threadLocal.get().forwardIndex; ForwardIndex forwardIndex = threadLocal.get().forwardIndex;
try { try {
for (int i = from; i < to; ++i) { for (int i = from; i < to; ++i) {
gains[i] = computeGain(docs[i], forwardIndex, fromDocFreqs, toDocFreqs); biases[i] = computeBias(docs[i], forwardIndex, fromDocFreqs, toDocFreqs);
} }
} catch (IOException e) { } catch (IOException e) {
throw new UncheckedIOException(e); throw new UncheckedIOException(e);
@ -547,11 +528,11 @@ public final class BPIndexReorderer {
* Compute a float that is negative when a document is attracted to the left and positive * Compute a float that is negative when a document is attracted to the left and positive
* otherwise. * otherwise.
*/ */
private static float computeGain( private static float computeBias(
int docID, ForwardIndex forwardIndex, int[] fromDocFreqs, int[] toDocFreqs) int docID, ForwardIndex forwardIndex, int[] fromDocFreqs, int[] toDocFreqs)
throws IOException { throws IOException {
forwardIndex.seek(docID); forwardIndex.seek(docID);
double gain = 0; double bias = 0;
for (IntsRef terms = forwardIndex.nextTerms(); for (IntsRef terms = forwardIndex.nextTerms();
terms.length != 0; terms.length != 0;
terms = forwardIndex.nextTerms()) { terms = forwardIndex.nextTerms()) {
@ -561,12 +542,12 @@ public final class BPIndexReorderer {
final int toDocFreq = toDocFreqs[termID]; final int toDocFreq = toDocFreqs[termID];
assert fromDocFreq >= 0; assert fromDocFreq >= 0;
assert toDocFreq >= 0; assert toDocFreq >= 0;
gain += bias +=
(toDocFreq == 0 ? 0 : fastLog2(toDocFreq)) (toDocFreq == 0 ? 0 : fastLog2(toDocFreq))
- (fromDocFreq == 0 ? 0 : fastLog2(fromDocFreq)); - (fromDocFreq == 0 ? 0 : fastLog2(fromDocFreq));
} }
} }
return (float) gain; return (float) bias;
} }
} }
@ -869,7 +850,7 @@ public final class BPIndexReorderer {
} }
private static long docRAMRequirements(int maxDoc) { private static long docRAMRequirements(int maxDoc) {
// We need one int per doc for the doc map, plus one float to store the gain associated with // We need one int per doc for the doc map, plus one float to store the bias associated with
// this doc. // this doc.
return 2L * Integer.BYTES * maxDoc; return 2L * Integer.BYTES * maxDoc;
} }

View File

@ -114,7 +114,7 @@ public class BooleanQueryTestFacade {
public void doTest() throws Exception { public void doTest() throws Exception {
if (verbose) { if (verbose) {
System.out.println(""); System.out.println();
System.out.println("Query: " + queryText); System.out.println("Query: " + queryText);
} }

View File

@ -113,7 +113,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
if (pointsOnly) str.append(",pointsOnly"); if (pointsOnly) str.append(",pointsOnly");
if (pruneLeafyBranches) str.append(",pruneLeafyBranches"); if (pruneLeafyBranches) str.append(",pruneLeafyBranches");
if (prefixGridScanLevel != grid.getMaxLevels() - 4) if (prefixGridScanLevel != grid.getMaxLevels() - 4)
str.append(",prefixGridScanLevel:").append("").append(prefixGridScanLevel); str.append(",prefixGridScanLevel:").append(prefixGridScanLevel);
if (!multiOverlappingIndexedShapes) str.append(",!multiOverlappingIndexedShapes"); if (!multiOverlappingIndexedShapes) str.append(",!multiOverlappingIndexedShapes");
return str.append(')').toString(); return str.append(')').toString();
} }

View File

@ -927,7 +927,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
return; return;
} }
sb.append("<b>"); sb.append("<b>");
sb.append(surface.substring(0, prefixToken.length())); sb.append(surface, 0, prefixToken.length());
sb.append("</b>"); sb.append("</b>");
sb.append(surface.substring(prefixToken.length())); sb.append(surface.substring(prefixToken.length()));
} }

View File

@ -892,7 +892,7 @@ public class TestAnalyzingInfixSuggester extends LuceneTestCase {
b.append("<b>"); b.append("<b>");
b.append(queryTerm); b.append(queryTerm);
b.append("</b>"); b.append("</b>");
b.append(inputTerm.substring(queryTerm.length(), inputTerm.length())); b.append(inputTerm.substring(queryTerm.length()));
matched = true; matched = true;
break; break;
} }

View File

@ -793,6 +793,10 @@ public class CheckHits {
assertTrue(s2 == null || s2.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS); assertTrue(s2 == null || s2.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
continue; continue;
} }
if (s2 == null) {
assertTrue(s1.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
continue;
}
TwoPhaseIterator twoPhase1 = s1.twoPhaseIterator(); TwoPhaseIterator twoPhase1 = s1.twoPhaseIterator();
TwoPhaseIterator twoPhase2 = s2.twoPhaseIterator(); TwoPhaseIterator twoPhase2 = s2.twoPhaseIterator();
DocIdSetIterator approx1 = twoPhase1 == null ? s1.iterator() : twoPhase1.approximation(); DocIdSetIterator approx1 = twoPhase1 == null ? s1.iterator() : twoPhase1.approximation();

View File

@ -166,7 +166,6 @@ public final class English {
result.append("one "); result.append("one ");
break; break;
case 0: case 0:
result.append("");
break; break;
} }
} }

View File

@ -307,7 +307,7 @@ public class LineFileDocs implements Closeable {
throw new RuntimeException("line: [" + line + "] is in an invalid format !"); throw new RuntimeException("line: [" + line + "] is in an invalid format !");
} }
docState.body.setStringValue(line.substring(1 + spot2, line.length())); docState.body.setStringValue(line.substring(1 + spot2));
final String title = line.substring(0, spot); final String title = line.substring(0, spot);
docState.title.setStringValue(title); docState.title.setStringValue(title);
docState.titleTokenized.setStringValue(title); docState.titleTokenized.setStringValue(title);