mirror of https://github.com/apache/lucene.git
Merge branch 'main' into java_21
This commit is contained in:
commit
54b6248a8d
|
@ -113,7 +113,7 @@ public class ErrorReportingTestListener implements TestOutputListener, TestListe
|
|||
|
||||
if (echoOutput && !verboseMode) {
|
||||
synchronized (this) {
|
||||
System.out.println("");
|
||||
System.out.println();
|
||||
System.out.println(suite.getClassName() + " > test suite's output saved to " + outputLog + ", copied below:");
|
||||
try (BufferedReader reader = Files.newBufferedReader(outputLog, StandardCharsets.UTF_8)) {
|
||||
char[] buf = new char[1024];
|
||||
|
|
|
@ -67,6 +67,13 @@
|
|||
</maintainer>
|
||||
|
||||
<!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-9.10.0</name>
|
||||
<created>2024-02-20</created>
|
||||
<revision>9.10.0</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-9.9.2</name>
|
||||
|
|
|
@ -45,16 +45,13 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
|
|||
'emptyIndex': 'empty'
|
||||
}[indextype]
|
||||
if indextype in ('cfs', 'nocfs'):
|
||||
dirname = 'index.%s' % indextype
|
||||
filename = '%s.%s-%s.zip' % (prefix, index_version, indextype)
|
||||
else:
|
||||
dirname = indextype
|
||||
filename = '%s.%s.zip' % (prefix, index_version)
|
||||
|
||||
print(' creating %s...' % filename, end='', flush=True)
|
||||
module = 'backward-codecs'
|
||||
index_dir = os.path.join('lucene', module, 'src/test/org/apache/lucene/backward_index')
|
||||
test_file = os.path.join(index_dir, filename)
|
||||
if os.path.exists(os.path.join(index_dir, filename)):
|
||||
print('uptodate')
|
||||
return
|
||||
|
@ -76,24 +73,20 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
|
|||
'-Dtests.codec=default'
|
||||
])
|
||||
base_dir = os.getcwd()
|
||||
bc_index_dir = os.path.join(temp_dir, dirname)
|
||||
bc_index_file = os.path.join(bc_index_dir, filename)
|
||||
bc_index_file = os.path.join(temp_dir, filename)
|
||||
|
||||
if os.path.exists(bc_index_file):
|
||||
print('alreadyexists')
|
||||
else:
|
||||
if os.path.exists(bc_index_dir):
|
||||
shutil.rmtree(bc_index_dir)
|
||||
os.chdir(source)
|
||||
scriptutil.run('./gradlew %s' % gradle_args)
|
||||
os.chdir(bc_index_dir)
|
||||
scriptutil.run('zip %s *' % filename)
|
||||
if not os.path.exists(bc_index_file):
|
||||
raise Exception("Expected file can't be found: %s" %bc_index_file)
|
||||
print('done')
|
||||
|
||||
print(' adding %s...' % filename, end='', flush=True)
|
||||
scriptutil.run('cp %s %s' % (bc_index_file, os.path.join(base_dir, index_dir)))
|
||||
os.chdir(base_dir)
|
||||
scriptutil.run('rm -rf %s' % bc_index_dir)
|
||||
print('done')
|
||||
|
||||
def update_backcompat_tests(index_version, current_version):
|
||||
|
|
|
@ -197,7 +197,10 @@ Improvements
|
|||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)
|
||||
|
||||
* GITHUB#13085: Remove unnecessary toString() / substring() calls to save some String allocations (Dmitry Cherniachenko)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
|
|
@ -278,7 +278,7 @@ class BrazilianStemmer {
|
|||
return false;
|
||||
}
|
||||
|
||||
return value.substring(value.length() - suffix.length()).equals(suffix);
|
||||
return value.endsWith(suffix);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -142,7 +142,7 @@ public class PatternParser extends DefaultHandler {
|
|||
break;
|
||||
}
|
||||
}
|
||||
token.append(chars.toString().substring(0, i));
|
||||
token.append(chars, 0, i);
|
||||
// chars.delete(0,i);
|
||||
for (int countr = i; countr < chars.length(); countr++) {
|
||||
chars.setCharAt(countr - i, chars.charAt(countr));
|
||||
|
|
|
@ -669,7 +669,7 @@ public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
|
|||
builder.append((char) ch);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (gold.equals(builder.toString())) {
|
||||
if (gold.contentEquals(builder)) {
|
||||
throw e;
|
||||
}
|
||||
throw new Exception(
|
||||
|
|
|
@ -30,19 +30,13 @@ import org.apache.lucene.tests.analysis.Token;
|
|||
public class TestTrimFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testTrim() throws Exception {
|
||||
char[] a = " a ".toCharArray();
|
||||
char[] b = "b ".toCharArray();
|
||||
char[] ccc = "cCc".toCharArray();
|
||||
char[] whitespace = " ".toCharArray();
|
||||
char[] empty = "".toCharArray();
|
||||
|
||||
TokenStream ts =
|
||||
new CannedTokenStream(
|
||||
new Token(new String(a, 0, a.length), 1, 5),
|
||||
new Token(new String(b, 0, b.length), 6, 10),
|
||||
new Token(new String(ccc, 0, ccc.length), 11, 15),
|
||||
new Token(new String(whitespace, 0, whitespace.length), 16, 20),
|
||||
new Token(new String(empty, 0, empty.length), 21, 21));
|
||||
new Token(" a ", 1, 5),
|
||||
new Token("b ", 6, 10),
|
||||
new Token("cCc", 11, 15),
|
||||
new Token(" ", 16, 20),
|
||||
new Token("", 21, 21));
|
||||
ts = new TrimFilter(ts);
|
||||
|
||||
assertTokenStreamContents(ts, new String[] {"a", "b", "cCc", "", ""});
|
||||
|
|
|
@ -82,8 +82,8 @@ public class TestPatternReplaceCharFilter extends BaseTokenStreamTestCase {
|
|||
indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i))));
|
||||
}
|
||||
|
||||
boolean outputGood = expectedOutput.equals(output.toString());
|
||||
boolean indexMatchedGood = expectedIndexMatchedOutput.equals(indexMatched.toString());
|
||||
boolean outputGood = expectedOutput.contentEquals(output);
|
||||
boolean indexMatchedGood = expectedIndexMatchedOutput.contentEquals(indexMatched);
|
||||
|
||||
if (!outputGood || !indexMatchedGood || false) {
|
||||
System.out.println("Pattern : " + pattern);
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
|
@ -38,11 +39,17 @@ import java.util.function.Predicate;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -253,10 +260,23 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
|||
protected abstract void createIndex(Directory directory) throws IOException;
|
||||
|
||||
public final void createBWCIndex() throws IOException {
|
||||
Path indexDir = getIndexDir().resolve(indexName(Version.LATEST));
|
||||
Files.deleteIfExists(indexDir);
|
||||
try (Directory dir = newFSDirectory(indexDir)) {
|
||||
Path zipFile = getIndexDir().resolve(indexName(Version.LATEST));
|
||||
Files.deleteIfExists(zipFile);
|
||||
Path tmpDir = createTempDir();
|
||||
|
||||
try (Directory dir = FSDirectory.open(tmpDir);
|
||||
ZipOutputStream zipOut =
|
||||
new ZipOutputStream(
|
||||
Files.newOutputStream(
|
||||
zipFile, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW))) {
|
||||
createIndex(dir);
|
||||
for (String file : dir.listAll()) {
|
||||
try (IndexInput in = dir.openInput(file, IOContext.READONCE)) {
|
||||
zipOut.putNextEntry(new ZipEntry(file));
|
||||
new OutputStreamDataOutput(zipOut).copyBytes(in, in.length());
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,10 @@ import static org.apache.lucene.backward_index.BackwardsCompatibilityTestBase.cr
|
|||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
@SuppressFileSystems("ExtrasFS")
|
||||
public class TestGenerateBwcIndices extends LuceneTestCase {
|
||||
|
||||
// Backcompat index generation, described below, is mostly automated in:
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
|
||||
static final String INDEX_NAME = "sorted";
|
||||
static final String SUFFIX = "";
|
||||
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_10_0;
|
||||
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_11_0;
|
||||
private static final String PARENT_FIELD_NAME = "___parent";
|
||||
|
||||
public TestIndexSortBackwardsCompatibility(Version version, String pattern) {
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -37,4 +37,5 @@
|
|||
9.8.0
|
||||
9.9.0
|
||||
9.9.1
|
||||
9.9.2
|
||||
9.9.2
|
||||
9.10.0
|
||||
|
|
|
@ -112,13 +112,13 @@ public class EnwikiContentSource extends ContentSource {
|
|||
String time(String original) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
buffer.append(original.substring(8, 10));
|
||||
buffer.append(original, 8, 10);
|
||||
buffer.append('-');
|
||||
buffer.append(months[Integer.parseInt(original.substring(5, 7)) - 1]);
|
||||
buffer.append('-');
|
||||
buffer.append(original.substring(0, 4));
|
||||
buffer.append(original, 0, 4);
|
||||
buffer.append(' ');
|
||||
buffer.append(original.substring(11, 19));
|
||||
buffer.append(original, 11, 19);
|
||||
buffer.append(".000");
|
||||
|
||||
return buffer.toString();
|
||||
|
|
|
@ -60,7 +60,7 @@ public class TrecFBISParser extends TrecDocParser {
|
|||
docData.setName(name);
|
||||
docData.setDate(date);
|
||||
docData.setTitle(title);
|
||||
docData.setBody(stripTags(docBuf, mark).toString());
|
||||
docData.setBody(stripTags(docBuf, mark));
|
||||
return docData;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,14 +53,14 @@ public class TrecFR94Parser extends TrecDocParser {
|
|||
// date...
|
||||
String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
|
||||
if (dateStr != null) {
|
||||
dateStr = stripTags(dateStr, 0).toString();
|
||||
dateStr = stripTags(dateStr, 0);
|
||||
date = trecSrc.parseDate(dateStr.trim());
|
||||
}
|
||||
}
|
||||
docData.clear();
|
||||
docData.setName(name);
|
||||
docData.setDate(date);
|
||||
docData.setBody(stripTags(docBuf, mark).toString());
|
||||
docData.setBody(stripTags(docBuf, mark));
|
||||
return docData;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@ public class TrecFTParser extends TrecDocParser {
|
|||
docData.setName(name);
|
||||
docData.setDate(date);
|
||||
docData.setTitle(title);
|
||||
docData.setBody(stripTags(docBuf, mark).toString());
|
||||
docData.setBody(stripTags(docBuf, mark));
|
||||
return docData;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ public class TrecLATimesParser extends TrecDocParser {
|
|||
if (d2a > 0) {
|
||||
dateStr = dateStr.substring(0, d2a + 3); // we need the "day" part
|
||||
}
|
||||
dateStr = stripTags(dateStr, 0).toString();
|
||||
dateStr = stripTags(dateStr, 0);
|
||||
date = trecSrc.parseDate(dateStr.trim());
|
||||
}
|
||||
|
||||
|
@ -59,14 +59,14 @@ public class TrecLATimesParser extends TrecDocParser {
|
|||
title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
|
||||
}
|
||||
if (title != null) {
|
||||
title = stripTags(title, 0).toString().trim();
|
||||
title = stripTags(title, 0).trim();
|
||||
}
|
||||
|
||||
docData.clear();
|
||||
docData.setName(name);
|
||||
docData.setDate(date);
|
||||
docData.setTitle(title);
|
||||
docData.setBody(stripTags(docBuf, mark).toString());
|
||||
docData.setBody(stripTags(docBuf, mark));
|
||||
return docData;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ public class SearchWithSortTask extends ReadTask {
|
|||
String typeString;
|
||||
if (index != -1) {
|
||||
fieldName = field.substring(0, index);
|
||||
typeString = field.substring(1 + index, field.length());
|
||||
typeString = field.substring(1 + index);
|
||||
} else {
|
||||
throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
|
||||
}
|
||||
|
|
|
@ -169,7 +169,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
if (type == TYPE_STRING) {
|
||||
byte[] bytes = new byte[scratch.length() - VALUE.length];
|
||||
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
|
||||
visitor.stringField(fieldInfo, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8));
|
||||
visitor.stringField(fieldInfo, new String(bytes, StandardCharsets.UTF_8));
|
||||
} else if (type == TYPE_BINARY) {
|
||||
byte[] copy = new byte[scratch.length() - VALUE.length];
|
||||
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);
|
||||
|
|
|
@ -380,7 +380,7 @@ public final class CodecUtil {
|
|||
int suffixLength = in.readByte() & 0xFF;
|
||||
byte[] suffixBytes = new byte[suffixLength];
|
||||
in.readBytes(suffixBytes, 0, suffixBytes.length);
|
||||
String suffix = new String(suffixBytes, 0, suffixBytes.length, StandardCharsets.UTF_8);
|
||||
String suffix = new String(suffixBytes, StandardCharsets.UTF_8);
|
||||
if (!suffix.equals(expectedSuffix)) {
|
||||
throw new CorruptIndexException(
|
||||
"file mismatch, expected suffix=" + expectedSuffix + ", got=" + suffix, in);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene94;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
|
@ -111,6 +112,8 @@ import org.apache.lucene.store.IndexOutput;
|
|||
* <li>0: EUCLIDEAN distance. ({@link VectorSimilarityFunction#EUCLIDEAN})
|
||||
* <li>1: DOT_PRODUCT similarity. ({@link VectorSimilarityFunction#DOT_PRODUCT})
|
||||
* <li>2: COSINE similarity. ({@link VectorSimilarityFunction#COSINE})
|
||||
* <li>3: MAXIMUM_INNER_PRODUCT similarity. ({@link
|
||||
* VectorSimilarityFunction#MAXIMUM_INNER_PRODUCT})
|
||||
* </ul>
|
||||
* </ul>
|
||||
*
|
||||
|
@ -284,10 +287,38 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
|||
}
|
||||
|
||||
private static VectorSimilarityFunction getDistFunc(IndexInput input, byte b) throws IOException {
|
||||
if (b < 0 || b >= VectorSimilarityFunction.values().length) {
|
||||
throw new CorruptIndexException("invalid distance function: " + b, input);
|
||||
try {
|
||||
return distOrdToFunc(b);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new CorruptIndexException("invalid distance function: " + b, input, e);
|
||||
}
|
||||
return VectorSimilarityFunction.values()[b];
|
||||
}
|
||||
|
||||
// List of vector similarity functions. This list is defined here, in order
|
||||
// to avoid an undesirable dependency on the declaration and order of values
|
||||
// in VectorSimilarityFunction. The list values and order have been chosen to
|
||||
// match that of VectorSimilarityFunction in, at least, Lucene 9.10. Values
|
||||
static final List<VectorSimilarityFunction> SIMILARITY_FUNCTIONS =
|
||||
List.of(
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
VectorSimilarityFunction.DOT_PRODUCT,
|
||||
VectorSimilarityFunction.COSINE,
|
||||
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
|
||||
|
||||
static VectorSimilarityFunction distOrdToFunc(byte i) {
|
||||
if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) {
|
||||
throw new IllegalArgumentException("invalid distance function: " + i);
|
||||
}
|
||||
return SIMILARITY_FUNCTIONS.get(i);
|
||||
}
|
||||
|
||||
static byte distFuncToOrd(VectorSimilarityFunction func) {
|
||||
for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
|
||||
if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
|
||||
return (byte) i;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("invalid distance function: " + func);
|
||||
}
|
||||
|
||||
static {
|
||||
|
@ -378,7 +409,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
|||
}
|
||||
output.writeVInt(fi.getVectorDimension());
|
||||
output.writeByte((byte) fi.getVectorEncoding().ordinal());
|
||||
output.writeByte((byte) fi.getVectorSimilarityFunction().ordinal());
|
||||
output.writeByte(distFuncToOrd(fi.getVectorSimilarityFunction()));
|
||||
}
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FlatVectorsReader;
|
||||
|
@ -171,15 +172,24 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
|
|||
}
|
||||
}
|
||||
|
||||
// List of vector similarity functions. This list is defined here, in order
|
||||
// to avoid an undesirable dependency on the declaration and order of values
|
||||
// in VectorSimilarityFunction. The list values and order must be identical
|
||||
// to that of {@link o.a.l.c.l.Lucene94FieldInfosFormat#SIMILARITY_FUNCTIONS}.
|
||||
public static final List<VectorSimilarityFunction> SIMILARITY_FUNCTIONS =
|
||||
List.of(
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
VectorSimilarityFunction.DOT_PRODUCT,
|
||||
VectorSimilarityFunction.COSINE,
|
||||
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
|
||||
|
||||
public static VectorSimilarityFunction readSimilarityFunction(DataInput input)
|
||||
throws IOException {
|
||||
int similarityFunctionId = input.readInt();
|
||||
if (similarityFunctionId < 0
|
||||
|| similarityFunctionId >= VectorSimilarityFunction.values().length) {
|
||||
throw new CorruptIndexException(
|
||||
"Invalid similarity function id: " + similarityFunctionId, input);
|
||||
int i = input.readInt();
|
||||
if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) {
|
||||
throw new IllegalArgumentException("invalid distance function: " + i);
|
||||
}
|
||||
return VectorSimilarityFunction.values()[similarityFunctionId];
|
||||
return SIMILARITY_FUNCTIONS.get(i);
|
||||
}
|
||||
|
||||
public static VectorEncoding readVectorEncoding(DataInput input) throws IOException {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.lucene.codecs.lucene99;
|
||||
|
||||
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -33,6 +34,7 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Sorter;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TaskExecutor;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -436,7 +438,7 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
|
|||
throws IOException {
|
||||
meta.writeInt(field.number);
|
||||
meta.writeInt(field.getVectorEncoding().ordinal());
|
||||
meta.writeInt(field.getVectorSimilarityFunction().ordinal());
|
||||
meta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction()));
|
||||
meta.writeVLong(vectorIndexOffset);
|
||||
meta.writeVLong(vectorIndexLength);
|
||||
meta.writeVInt(field.getVectorDimension());
|
||||
|
@ -500,6 +502,15 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
|
|||
IOUtils.close(meta, vectorIndex, flatVectorWriter);
|
||||
}
|
||||
|
||||
static int distFuncToOrd(VectorSimilarityFunction func) {
|
||||
for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
|
||||
if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
|
||||
return (byte) i;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("invalid distance function: " + func);
|
||||
}
|
||||
|
||||
private static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
|
||||
|
||||
private static final long SHALLOW_SIZE =
|
||||
|
|
|
@ -384,7 +384,7 @@ final class DocumentsWriter implements Closeable, Accountable {
|
|||
ensureOpen();
|
||||
boolean hasEvents = false;
|
||||
while (flushControl.anyStalledThreads()
|
||||
|| (flushControl.numQueuedFlushes() > 0 && config.checkPendingFlushOnUpdate)) {
|
||||
|| (config.checkPendingFlushOnUpdate && flushControl.numQueuedFlushes() > 0)) {
|
||||
// Help out flushing any queued DWPTs so we can un-stall:
|
||||
// Try pickup pending threads here if possible
|
||||
// no need to loop over the next pending flushes... doFlush will take care of this
|
||||
|
|
|
@ -191,7 +191,7 @@ public final class IndexFileNames {
|
|||
if (idx == -1) {
|
||||
return null;
|
||||
} else {
|
||||
return filename.substring(idx + 1, filename.length());
|
||||
return filename.substring(idx + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -677,16 +677,11 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testMaxTokenLengthDefault() throws Exception {
|
||||
StandardAnalyzer a = new StandardAnalyzer();
|
||||
|
||||
StringBuilder bToken = new StringBuilder();
|
||||
// exact max length:
|
||||
for (int i = 0; i < StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; i++) {
|
||||
bToken.append('b');
|
||||
}
|
||||
|
||||
String bString = bToken.toString();
|
||||
String bString = "b".repeat(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
// first bString is exact max default length; next one is 1 too long
|
||||
String input = "x " + bString + " " + bString + "b";
|
||||
assertAnalyzesTo(a, input.toString(), new String[] {"x", bString, bString, "b"});
|
||||
assertAnalyzesTo(a, input, new String[] {"x", bString, bString, "b"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene94;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.tests.index.BaseFieldInfoFormatTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
|
||||
public class TestLucene94FieldInfosFormat extends BaseFieldInfoFormatTestCase {
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return TestUtil.getDefaultCodec();
|
||||
}
|
||||
|
||||
// Ensures that all expected vector similarity functions are translatable
|
||||
// in the format.
|
||||
public void testVectorSimilarityFuncs() {
|
||||
// This does not necessarily have to be all similarity functions, but
|
||||
// differences should be considered carefully.
|
||||
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
|
||||
|
||||
assertEquals(Lucene94FieldInfosFormat.SIMILARITY_FUNCTIONS, expectedValues);
|
||||
}
|
||||
}
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene99;
|
|||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
|
@ -186,4 +187,13 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
|
|||
new Lucene99HnswScalarQuantizedVectorsFormat(
|
||||
20, 100, 1, null, new SameThreadExecutorService()));
|
||||
}
|
||||
|
||||
// Ensures that all expected vector similarity functions are translatable
|
||||
// in the format.
|
||||
public void testVectorSimilarityFuncs() {
|
||||
// This does not necessarily have to be all similarity functions, but
|
||||
// differences should be considered carefully.
|
||||
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
|
||||
assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
@ -289,12 +288,10 @@ public class TestPayloads extends LuceneTestCase {
|
|||
reader.close();
|
||||
}
|
||||
|
||||
static final Charset utf8 = StandardCharsets.UTF_8;
|
||||
|
||||
private void generateRandomData(byte[] data) {
|
||||
// this test needs the random data to be valid unicode
|
||||
String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length);
|
||||
byte[] b = s.getBytes(utf8);
|
||||
byte[] b = s.getBytes(StandardCharsets.UTF_8);
|
||||
assert b.length == data.length;
|
||||
System.arraycopy(b, 0, data, 0, b.length);
|
||||
}
|
||||
|
@ -493,7 +490,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||
this.pool = pool;
|
||||
payload = pool.get();
|
||||
generateRandomData(payload);
|
||||
term = new String(payload, 0, payload.length, utf8);
|
||||
term = new String(payload, StandardCharsets.UTF_8);
|
||||
first = true;
|
||||
payloadAtt = addAttribute(PayloadAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -107,7 +107,7 @@ public class TestPrefixRandom extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return field.toString() + ":" + prefix.toString();
|
||||
return field + ":" + prefix;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -143,7 +143,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return field.toString() + automaton.toString();
|
||||
return field + automaton;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -213,10 +213,10 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
List<Automaton> list = new ArrayList<>();
|
||||
for (int i = 0; i < s.length() - 1; i++) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(s.substring(0, i));
|
||||
sb.append(s, 0, i);
|
||||
sb.append(s.charAt(i + 1));
|
||||
sb.append(s.charAt(i));
|
||||
sb.append(s.substring(i + 2, s.length()));
|
||||
sb.append(s, i + 2, s.length());
|
||||
String st = sb.toString();
|
||||
if (!st.equals(s)) {
|
||||
list.add(Automata.makeString(st));
|
||||
|
|
|
@ -119,7 +119,7 @@ public class TestRegExp extends LuceneTestCase {
|
|||
|
||||
// Add any head to the result, unchanged
|
||||
if (substitutionPoint > 0) {
|
||||
result.append(docValue.substring(0, substitutionPoint));
|
||||
result.append(docValue, 0, substitutionPoint);
|
||||
}
|
||||
|
||||
// Modify the middle...
|
||||
|
|
|
@ -1398,7 +1398,7 @@ public class UnifiedHighlighter {
|
|||
curValueBuilder.append(curValue);
|
||||
}
|
||||
curValueBuilder.append(valueSeparator);
|
||||
curValueBuilder.append(value.substring(0, Math.min(lengthBudget - 1, value.length())));
|
||||
curValueBuilder.append(value, 0, Math.min(lengthBudget - 1, value.length()));
|
||||
values[currentField] = curValueBuilder;
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
import org.apache.lucene.util.IntroSelector;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
|
@ -251,17 +251,17 @@ public final class BPIndexReorderer {
|
|||
private class IndexReorderingTask extends BaseRecursiveAction {
|
||||
|
||||
private final IntsRef docIDs;
|
||||
private final float[] gains;
|
||||
private final float[] biases;
|
||||
private final CloseableThreadLocal<PerThreadState> threadLocal;
|
||||
|
||||
IndexReorderingTask(
|
||||
IntsRef docIDs,
|
||||
float[] gains,
|
||||
float[] biases,
|
||||
CloseableThreadLocal<PerThreadState> threadLocal,
|
||||
int depth) {
|
||||
super(depth);
|
||||
this.docIDs = docIDs;
|
||||
this.gains = gains;
|
||||
this.biases = biases;
|
||||
this.threadLocal = threadLocal;
|
||||
}
|
||||
|
||||
|
@ -293,14 +293,14 @@ public final class BPIndexReorderer {
|
|||
assert sorted(docIDs);
|
||||
}
|
||||
|
||||
int leftSize = docIDs.length / 2;
|
||||
if (leftSize < minPartitionSize) {
|
||||
int halfLength = docIDs.length / 2;
|
||||
if (halfLength < minPartitionSize) {
|
||||
return;
|
||||
}
|
||||
|
||||
int rightSize = docIDs.length - leftSize;
|
||||
IntsRef left = new IntsRef(docIDs.ints, docIDs.offset, leftSize);
|
||||
IntsRef right = new IntsRef(docIDs.ints, docIDs.offset + leftSize, rightSize);
|
||||
IntsRef left = new IntsRef(docIDs.ints, docIDs.offset, halfLength);
|
||||
IntsRef right =
|
||||
new IntsRef(docIDs.ints, docIDs.offset + halfLength, docIDs.length - halfLength);
|
||||
|
||||
PerThreadState state = threadLocal.get();
|
||||
ForwardIndex forwardIndex = state.forwardIndex;
|
||||
|
@ -313,7 +313,9 @@ public final class BPIndexReorderer {
|
|||
for (int iter = 0; iter < maxIters; ++iter) {
|
||||
boolean moved;
|
||||
try {
|
||||
moved = shuffle(forwardIndex, left, right, leftDocFreqs, rightDocFreqs, gains, iter);
|
||||
moved =
|
||||
shuffle(
|
||||
forwardIndex, docIDs, right.offset, leftDocFreqs, rightDocFreqs, biases, iter);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
|
@ -322,10 +324,11 @@ public final class BPIndexReorderer {
|
|||
}
|
||||
}
|
||||
|
||||
// It is fine for all tasks to share the same docs / gains array since they all work on
|
||||
// It is fine for all tasks to share the same docs / biases array since they all work on
|
||||
// different slices of the array at a given point in time.
|
||||
IndexReorderingTask leftTask = new IndexReorderingTask(left, gains, threadLocal, depth + 1);
|
||||
IndexReorderingTask rightTask = new IndexReorderingTask(right, gains, threadLocal, depth + 1);
|
||||
IndexReorderingTask leftTask = new IndexReorderingTask(left, biases, threadLocal, depth + 1);
|
||||
IndexReorderingTask rightTask =
|
||||
new IndexReorderingTask(right, biases, threadLocal, depth + 1);
|
||||
|
||||
if (shouldFork(docIDs.length, docIDs.ints.length)) {
|
||||
invokeAll(leftTask, rightTask);
|
||||
|
@ -341,116 +344,94 @@ public final class BPIndexReorderer {
|
|||
*/
|
||||
private boolean shuffle(
|
||||
ForwardIndex forwardIndex,
|
||||
IntsRef left,
|
||||
IntsRef right,
|
||||
IntsRef docIDs,
|
||||
int midPoint,
|
||||
int[] leftDocFreqs,
|
||||
int[] rightDocFreqs,
|
||||
float[] gains,
|
||||
float[] biases,
|
||||
int iter)
|
||||
throws IOException {
|
||||
assert left.ints == right.ints;
|
||||
assert left.offset + left.length == right.offset;
|
||||
|
||||
// Computing gains is typically a bottleneck, because each iteration needs to iterate over all
|
||||
// postings to recompute gains, and the total number of postings is usually one order of
|
||||
// Computing biases is typically a bottleneck, because each iteration needs to iterate over
|
||||
// all postings to recompute biases, and the total number of postings is usually one order of
|
||||
// magnitude or more larger than the number of docs. So we try to parallelize it.
|
||||
ComputeGainsTask leftGainsTask =
|
||||
new ComputeGainsTask(
|
||||
left.ints,
|
||||
gains,
|
||||
left.offset,
|
||||
left.offset + left.length,
|
||||
new ComputeBiasTask(
|
||||
docIDs.ints,
|
||||
biases,
|
||||
docIDs.offset,
|
||||
docIDs.offset + docIDs.length,
|
||||
leftDocFreqs,
|
||||
rightDocFreqs,
|
||||
threadLocal,
|
||||
depth);
|
||||
ComputeGainsTask rightGainsTask =
|
||||
new ComputeGainsTask(
|
||||
right.ints,
|
||||
gains,
|
||||
right.offset,
|
||||
right.offset + right.length,
|
||||
rightDocFreqs,
|
||||
leftDocFreqs,
|
||||
threadLocal,
|
||||
depth);
|
||||
if (shouldFork(docIDs.length, docIDs.ints.length)) {
|
||||
invokeAll(leftGainsTask, rightGainsTask);
|
||||
} else {
|
||||
leftGainsTask.compute();
|
||||
rightGainsTask.compute();
|
||||
depth)
|
||||
.compute();
|
||||
|
||||
float maxLeftBias = Float.NEGATIVE_INFINITY;
|
||||
for (int i = docIDs.offset; i < midPoint; ++i) {
|
||||
maxLeftBias = Math.max(maxLeftBias, biases[i]);
|
||||
}
|
||||
float minRightBias = Float.POSITIVE_INFINITY;
|
||||
for (int i = midPoint, end = docIDs.offset + docIDs.length; i < end; ++i) {
|
||||
minRightBias = Math.min(minRightBias, biases[i]);
|
||||
}
|
||||
float gain = maxLeftBias - minRightBias;
|
||||
// This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for
|
||||
// Bipartite Graph Partitioning" by comparing the gain of swapping the doc from the left side
|
||||
// that is most attracted to the right and the doc from the right side that is most attracted
|
||||
// to the left against `iter` rather than zero.
|
||||
if (gain <= iter) {
|
||||
return false;
|
||||
}
|
||||
|
||||
class ByDescendingGainSorter extends IntroSorter {
|
||||
new IntroSelector() {
|
||||
|
||||
int pivotDoc;
|
||||
float pivotGain;
|
||||
float pivotBias;
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivotDoc = left.ints[i];
|
||||
pivotGain = gains[i];
|
||||
pivotDoc = docIDs.ints[i];
|
||||
pivotBias = biases[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
// Compare in reverse order to get a descending sort
|
||||
int cmp = Float.compare(gains[j], pivotGain);
|
||||
int cmp = Float.compare(pivotBias, biases[j]);
|
||||
if (cmp == 0) {
|
||||
// Tie break on the doc ID to preserve doc ID ordering as much as possible
|
||||
cmp = pivotDoc - left.ints[j];
|
||||
cmp = pivotDoc - docIDs.ints[j];
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int tmpDoc = left.ints[i];
|
||||
left.ints[i] = left.ints[j];
|
||||
left.ints[j] = tmpDoc;
|
||||
float tmpBias = biases[i];
|
||||
biases[i] = biases[j];
|
||||
biases[j] = tmpBias;
|
||||
|
||||
float tmpGain = gains[i];
|
||||
gains[i] = gains[j];
|
||||
gains[j] = tmpGain;
|
||||
}
|
||||
}
|
||||
|
||||
Runnable leftSorter =
|
||||
() -> new ByDescendingGainSorter().sort(left.offset, left.offset + left.length);
|
||||
Runnable rightSorter =
|
||||
() -> new ByDescendingGainSorter().sort(right.offset, right.offset + right.length);
|
||||
|
||||
if (shouldFork(docIDs.length, docIDs.ints.length)) {
|
||||
// TODO: run it on more than 2 threads at most
|
||||
invokeAll(adapt(leftSorter), adapt(rightSorter));
|
||||
} else {
|
||||
leftSorter.run();
|
||||
rightSorter.run();
|
||||
}
|
||||
|
||||
for (int i = 0; i < left.length; ++i) {
|
||||
// This uses the simulated annealing proposed by Mackenzie et al in "Tradeoff Options for
|
||||
// Bipartite Graph Partitioning" by comparing the gain against `iter` rather than zero.
|
||||
if (gains[left.offset + i] + gains[right.offset + i] <= iter) {
|
||||
if (i == 0) {
|
||||
return false;
|
||||
if (i < midPoint == j < midPoint) {
|
||||
int tmpDoc = docIDs.ints[i];
|
||||
docIDs.ints[i] = docIDs.ints[j];
|
||||
docIDs.ints[j] = tmpDoc;
|
||||
} else {
|
||||
// If we're swapping docs across the left and right sides, we need to keep doc freqs
|
||||
// up-to-date.
|
||||
int left = Math.min(i, j);
|
||||
int right = Math.max(i, j);
|
||||
try {
|
||||
swapDocsAndFreqs(docIDs.ints, left, right, forwardIndex, leftDocFreqs, rightDocFreqs);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
swap(
|
||||
left.ints,
|
||||
left.offset + i,
|
||||
right.offset + i,
|
||||
forwardIndex,
|
||||
leftDocFreqs,
|
||||
rightDocFreqs);
|
||||
}
|
||||
}.select(docIDs.offset, docIDs.offset + docIDs.length, midPoint);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void swap(
|
||||
private static void swapDocsAndFreqs(
|
||||
int[] docs,
|
||||
int left,
|
||||
int right,
|
||||
|
@ -492,19 +473,19 @@ public final class BPIndexReorderer {
|
|||
}
|
||||
}
|
||||
|
||||
private class ComputeGainsTask extends BaseRecursiveAction {
|
||||
private class ComputeBiasTask extends BaseRecursiveAction {
|
||||
|
||||
private final int[] docs;
|
||||
private final float[] gains;
|
||||
private final float[] biases;
|
||||
private final int from;
|
||||
private final int to;
|
||||
private final int[] fromDocFreqs;
|
||||
private final int[] toDocFreqs;
|
||||
private final CloseableThreadLocal<PerThreadState> threadLocal;
|
||||
|
||||
ComputeGainsTask(
|
||||
ComputeBiasTask(
|
||||
int[] docs,
|
||||
float[] gains,
|
||||
float[] biases,
|
||||
int from,
|
||||
int to,
|
||||
int[] fromDocFreqs,
|
||||
|
@ -513,7 +494,7 @@ public final class BPIndexReorderer {
|
|||
int depth) {
|
||||
super(depth);
|
||||
this.docs = docs;
|
||||
this.gains = gains;
|
||||
this.biases = biases;
|
||||
this.from = from;
|
||||
this.to = to;
|
||||
this.fromDocFreqs = fromDocFreqs;
|
||||
|
@ -527,15 +508,15 @@ public final class BPIndexReorderer {
|
|||
if (problemSize > 1 && shouldFork(problemSize, docs.length)) {
|
||||
final int mid = (from + to) >>> 1;
|
||||
invokeAll(
|
||||
new ComputeGainsTask(
|
||||
docs, gains, from, mid, fromDocFreqs, toDocFreqs, threadLocal, depth),
|
||||
new ComputeGainsTask(
|
||||
docs, gains, mid, to, fromDocFreqs, toDocFreqs, threadLocal, depth));
|
||||
new ComputeBiasTask(
|
||||
docs, biases, from, mid, fromDocFreqs, toDocFreqs, threadLocal, depth),
|
||||
new ComputeBiasTask(
|
||||
docs, biases, mid, to, fromDocFreqs, toDocFreqs, threadLocal, depth));
|
||||
} else {
|
||||
ForwardIndex forwardIndex = threadLocal.get().forwardIndex;
|
||||
try {
|
||||
for (int i = from; i < to; ++i) {
|
||||
gains[i] = computeGain(docs[i], forwardIndex, fromDocFreqs, toDocFreqs);
|
||||
biases[i] = computeBias(docs[i], forwardIndex, fromDocFreqs, toDocFreqs);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
|
@ -547,11 +528,11 @@ public final class BPIndexReorderer {
|
|||
* Compute a float that is negative when a document is attracted to the left and positive
|
||||
* otherwise.
|
||||
*/
|
||||
private static float computeGain(
|
||||
private static float computeBias(
|
||||
int docID, ForwardIndex forwardIndex, int[] fromDocFreqs, int[] toDocFreqs)
|
||||
throws IOException {
|
||||
forwardIndex.seek(docID);
|
||||
double gain = 0;
|
||||
double bias = 0;
|
||||
for (IntsRef terms = forwardIndex.nextTerms();
|
||||
terms.length != 0;
|
||||
terms = forwardIndex.nextTerms()) {
|
||||
|
@ -561,12 +542,12 @@ public final class BPIndexReorderer {
|
|||
final int toDocFreq = toDocFreqs[termID];
|
||||
assert fromDocFreq >= 0;
|
||||
assert toDocFreq >= 0;
|
||||
gain +=
|
||||
bias +=
|
||||
(toDocFreq == 0 ? 0 : fastLog2(toDocFreq))
|
||||
- (fromDocFreq == 0 ? 0 : fastLog2(fromDocFreq));
|
||||
}
|
||||
}
|
||||
return (float) gain;
|
||||
return (float) bias;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -869,7 +850,7 @@ public final class BPIndexReorderer {
|
|||
}
|
||||
|
||||
private static long docRAMRequirements(int maxDoc) {
|
||||
// We need one int per doc for the doc map, plus one float to store the gain associated with
|
||||
// We need one int per doc for the doc map, plus one float to store the bias associated with
|
||||
// this doc.
|
||||
return 2L * Integer.BYTES * maxDoc;
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ public class BooleanQueryTestFacade {
|
|||
public void doTest() throws Exception {
|
||||
|
||||
if (verbose) {
|
||||
System.out.println("");
|
||||
System.out.println();
|
||||
System.out.println("Query: " + queryText);
|
||||
}
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
|
|||
if (pointsOnly) str.append(",pointsOnly");
|
||||
if (pruneLeafyBranches) str.append(",pruneLeafyBranches");
|
||||
if (prefixGridScanLevel != grid.getMaxLevels() - 4)
|
||||
str.append(",prefixGridScanLevel:").append("").append(prefixGridScanLevel);
|
||||
str.append(",prefixGridScanLevel:").append(prefixGridScanLevel);
|
||||
if (!multiOverlappingIndexedShapes) str.append(",!multiOverlappingIndexedShapes");
|
||||
return str.append(')').toString();
|
||||
}
|
||||
|
|
|
@ -927,7 +927,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
return;
|
||||
}
|
||||
sb.append("<b>");
|
||||
sb.append(surface.substring(0, prefixToken.length()));
|
||||
sb.append(surface, 0, prefixToken.length());
|
||||
sb.append("</b>");
|
||||
sb.append(surface.substring(prefixToken.length()));
|
||||
}
|
||||
|
|
|
@ -892,7 +892,7 @@ public class TestAnalyzingInfixSuggester extends LuceneTestCase {
|
|||
b.append("<b>");
|
||||
b.append(queryTerm);
|
||||
b.append("</b>");
|
||||
b.append(inputTerm.substring(queryTerm.length(), inputTerm.length()));
|
||||
b.append(inputTerm.substring(queryTerm.length()));
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -793,6 +793,10 @@ public class CheckHits {
|
|||
assertTrue(s2 == null || s2.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
continue;
|
||||
}
|
||||
if (s2 == null) {
|
||||
assertTrue(s1.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
continue;
|
||||
}
|
||||
TwoPhaseIterator twoPhase1 = s1.twoPhaseIterator();
|
||||
TwoPhaseIterator twoPhase2 = s2.twoPhaseIterator();
|
||||
DocIdSetIterator approx1 = twoPhase1 == null ? s1.iterator() : twoPhase1.approximation();
|
||||
|
|
|
@ -166,7 +166,6 @@ public final class English {
|
|||
result.append("one ");
|
||||
break;
|
||||
case 0:
|
||||
result.append("");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -307,7 +307,7 @@ public class LineFileDocs implements Closeable {
|
|||
throw new RuntimeException("line: [" + line + "] is in an invalid format !");
|
||||
}
|
||||
|
||||
docState.body.setStringValue(line.substring(1 + spot2, line.length()));
|
||||
docState.body.setStringValue(line.substring(1 + spot2));
|
||||
final String title = line.substring(0, spot);
|
||||
docState.title.setStringValue(title);
|
||||
docState.titleTokenized.setStringValue(title);
|
||||
|
|
Loading…
Reference in New Issue