Revert "LUCENE-9856: fail precommit on unused local variables (#34)"

This reverts commit 20dba278bb.
This commit is contained in:
Robert Muir 2021-03-23 12:46:36 -04:00
parent 20dba278bb
commit e6c4956cf6
No known key found for this signature in database
GPG Key ID: 817AE1DD322D7ECA
219 changed files with 1180 additions and 329 deletions

View File

@ -86,7 +86,7 @@ ext {
scriptDepVersions = [
"apache-rat": "0.11",
"commons-codec": "1.13",
"ecj": "3.25.0",
"ecj": "3.19.0",
"javacc": "7.0.4",
"jflex": "1.7.0",
"jgit": "5.9.0.202009080501-r",

View File

@ -95,12 +95,6 @@ def commonCleanups = { FileTree generatedFiles ->
text = text.replace(
"public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }",
"// (setDebugStream omitted).")
text = text.replace(
"public class QueryParserTokenManager ",
'@SuppressWarnings("unused") public class QueryParserTokenManager ')
text = text.replace(
"public class StandardSyntaxParserTokenManager ",
'@SuppressWarnings("unused") public class StandardSyntaxParserTokenManager ')
return text
})
}
@ -129,9 +123,6 @@ configure(project(":lucene:queryparser")) {
text = text.replace(
"final private LookaheadSuccess jj_ls =",
"static final private LookaheadSuccess jj_ls =")
text = text.replace(
"public class QueryParser ",
'@SuppressWarnings("unused") public class QueryParser ')
return text
})
}
@ -154,9 +145,6 @@ configure(project(":lucene:queryparser")) {
text = text.replace(
"new java.util.ArrayList<int[]>",
"new java.util.ArrayList<>")
text = text.replace(
"public class QueryParser ",
'@SuppressWarnings("unused") public class QueryParser ')
return text
})
}
@ -233,9 +221,6 @@ configure(project(":lucene:queryparser")) {
text = text.replace(
"Collections.<QueryNode> singletonList",
"Collections.singletonList")
text = text.replace(
"public class StandardSyntaxParser ",
'@SuppressWarnings("unused") public class StandardSyntaxParser ')
return text
})
}

View File

@ -3,7 +3,6 @@ eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=11
org.eclipse.jdt.core.compiler.compliance=11
org.eclipse.jdt.core.compiler.doc.comment.support=enabled
org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=enabled
org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=error
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.comparingIdentical=error
@ -33,7 +32,6 @@ org.eclipse.jdt.core.compiler.problem.noEffectAssignment=error
org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=error
org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=error
org.eclipse.jdt.core.compiler.problem.unusedImport=error
org.eclipse.jdt.core.compiler.problem.unusedLocal=error
org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=error
org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
org.eclipse.jdt.core.compiler.source=11

View File

@ -1,24 +1,13 @@
#Sun Sep 23 20:55:03 EDT 2012
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.annotation.inheritNullAnnotations=disabled
org.eclipse.jdt.core.compiler.annotation.missingNonNullByDefaultAnnotation=ignore
org.eclipse.jdt.core.compiler.annotation.nonnull=org.eclipse.jdt.annotation.NonNull
org.eclipse.jdt.core.compiler.annotation.nonnull.secondary=
org.eclipse.jdt.core.compiler.annotation.nonnullbydefault=org.eclipse.jdt.annotation.NonNullByDefault
org.eclipse.jdt.core.compiler.annotation.nonnullbydefault.secondary=
org.eclipse.jdt.core.compiler.annotation.nonnullisdefault=disabled
org.eclipse.jdt.core.compiler.annotation.nullable=org.eclipse.jdt.annotation.Nullable
org.eclipse.jdt.core.compiler.annotation.nullable.secondary=
org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
org.eclipse.jdt.core.compiler.codegen.targetPlatform=11
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=11
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.doc.comment.support=enabled
org.eclipse.jdt.core.compiler.problem.APILeak=error
org.eclipse.jdt.core.compiler.problem.annotatedTypeArgumentToUnannotated=error
org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=error
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
@ -29,9 +18,7 @@ org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
org.eclipse.jdt.core.compiler.problem.discouragedReference=error
org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.explicitlyClosedAutoCloseable=ignore
org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
@ -50,10 +37,8 @@ org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=disabled
org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=private
org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=error
org.eclipse.jdt.core.compiler.problem.missingDefaultCase=ignore
org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=error
org.eclipse.jdt.core.compiler.problem.missingEnumCaseDespiteDefault=disabled
org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=error
org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore
org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled
org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public
@ -69,63 +54,43 @@ org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignor
org.eclipse.jdt.core.compiler.problem.noEffectAssignment=error
org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=error
org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
org.eclipse.jdt.core.compiler.problem.nonnullParameterAnnotationDropped=error
org.eclipse.jdt.core.compiler.problem.nonnullTypeVariableFromLegacyInvocation=error
org.eclipse.jdt.core.compiler.problem.nullAnnotationInferenceConflict=error
org.eclipse.jdt.core.compiler.problem.nullReference=ignore
org.eclipse.jdt.core.compiler.problem.nullSpecViolation=error
org.eclipse.jdt.core.compiler.problem.nullUncheckedConversion=error
org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=error
org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
org.eclipse.jdt.core.compiler.problem.pessimisticNullAnalysisForFreeTypeVariables=error
org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
org.eclipse.jdt.core.compiler.problem.potentiallyUnclosedCloseable=ignore
org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore
org.eclipse.jdt.core.compiler.problem.redundantNullAnnotation=error
org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
org.eclipse.jdt.core.compiler.problem.redundantSpecificationOfTypeArguments=ignore
org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=error
org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
org.eclipse.jdt.core.compiler.problem.reportMethodCanBePotentiallyStatic=ignore
org.eclipse.jdt.core.compiler.problem.reportMethodCanBeStatic=ignore
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=error
org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=ignore
org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=enabled
org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
org.eclipse.jdt.core.compiler.problem.suppressWarningsNotFullyAnalysed=error
org.eclipse.jdt.core.compiler.problem.syntacticNullAnalysisForFields=disabled
org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
org.eclipse.jdt.core.compiler.problem.terminalDeprecation=ignore
org.eclipse.jdt.core.compiler.problem.typeParameterHiding=ignore
org.eclipse.jdt.core.compiler.problem.unavoidableGenericTypeProblems=enabled
org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore
org.eclipse.jdt.core.compiler.problem.unclosedCloseable=ignore
org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=ignore
org.eclipse.jdt.core.compiler.problem.unlikelyCollectionMethodArgumentType=error
org.eclipse.jdt.core.compiler.problem.unlikelyCollectionMethodArgumentTypeStrict=disabled
org.eclipse.jdt.core.compiler.problem.unlikelyEqualsArgumentType=error
org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
org.eclipse.jdt.core.compiler.problem.unstableAutoModuleName=ignore
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
org.eclipse.jdt.core.compiler.problem.unusedExceptionParameter=ignore
org.eclipse.jdt.core.compiler.problem.unusedImport=error
org.eclipse.jdt.core.compiler.problem.unusedLabel=error
org.eclipse.jdt.core.compiler.problem.unusedLocal=error
org.eclipse.jdt.core.compiler.problem.unusedLabel=ignore
org.eclipse.jdt.core.compiler.problem.unusedLocal=ignore
org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=error
org.eclipse.jdt.core.compiler.problem.unusedTypeParameter=ignore
org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=ignore
org.eclipse.jdt.core.compiler.problem.unusedWarningToken=ignore
org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=error
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=11

View File

@ -32,7 +32,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
/**
* A CharFilter that wraps another Reader and attempts to strip out HTML constructs.
*/
@SuppressWarnings({"unused","fallthrough"})
@SuppressWarnings("fallthrough")
public final class HTMLStripCharFilter extends BaseCharFilter {

View File

@ -30,7 +30,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
/**
* A CharFilter that wraps another Reader and attempts to strip out HTML constructs.
*/
@SuppressWarnings({"unused","fallthrough"})
@SuppressWarnings("fallthrough")
%%
%unicode 9.0

View File

@ -22,7 +22,7 @@ package org.apache.lucene.analysis.classic;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** This class implements the classic lucene StandardTokenizer up until 3.0 */
@SuppressWarnings({"unused", "fallthrough"})
@SuppressWarnings("fallthrough")
class ClassicTokenizerImpl {
/** This character denotes the end of file */

View File

@ -22,7 +22,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class implements the classic lucene StandardTokenizer up until 3.0
*/
@SuppressWarnings({"unused","fallthrough"})
@SuppressWarnings("fallthrough")
%%
%class ClassicTokenizerImpl

View File

@ -22,6 +22,7 @@ import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
@ -340,4 +341,24 @@ public class PatternParser extends DefaultHandler {
word = readToken(chars);
}
}
/** Returns a string of the location. */
private String getLocationString(SAXParseException ex) {
StringBuilder str = new StringBuilder();
String systemId = ex.getSystemId();
if (systemId != null) {
int index = systemId.lastIndexOf('/');
if (index != -1) {
systemId = systemId.substring(index + 1);
}
str.append(systemId);
}
str.append(':');
str.append(ex.getLineNumber());
str.append(':');
str.append(ex.getColumnNumber());
return str.toString();
} // getLocationString(SAXParseException):String
}

View File

@ -42,7 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
* </ul>
*/
@SuppressWarnings({"unused","fallthrough"})
@SuppressWarnings("fallthrough")
public final class UAX29URLEmailTokenizerImpl {

View File

@ -40,7 +40,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
* </ul>
*/
@SuppressWarnings({"unused","fallthrough"})
@SuppressWarnings("fallthrough")
%%
%unicode 9.0

View File

@ -619,6 +619,10 @@ public class KStemmer {
* CharArrayMap<String>(maxCacheSize,false); }
***/
private char finalChar() {
return word.charAt(k);
}
private char penultChar() {
return word.charAt(k - 1);
}

View File

@ -59,6 +59,8 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
private static final String PARAM_DICTIONARY = "dictionary";
private static final String PARAM_AFFIX = "affix";
// NOTE: this one is currently unused?:
private static final String PARAM_RECURSION_CAP = "recursionCap";
private static final String PARAM_IGNORE_CASE = "ignoreCase";
private static final String PARAM_LONGEST_ONLY = "longestOnly";

View File

@ -348,7 +348,7 @@ public final class ConcatenateGraphFilter extends TokenStream {
* @lucene.internal
*/
public static final class BytesRefBuilderTermAttributeImpl extends AttributeImpl
implements BytesRefBuilderTermAttribute {
implements BytesRefBuilderTermAttribute, TermToBytesRefAttribute {
private final BytesRefBuilder bytes = new BytesRefBuilder();
private transient CharsRefBuilder charsRef;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
public final class FixBrokenOffsetsFilter extends TokenFilter {
private int lastStartOffset;
private int lastEndOffset;
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@ -57,6 +58,7 @@ public final class FixBrokenOffsetsFilter extends TokenFilter {
public void reset() throws IOException {
super.reset();
lastStartOffset = 0;
lastEndOffset = 0;
}
private void fixOffsets() {
@ -70,5 +72,6 @@ public final class FixBrokenOffsetsFilter extends TokenFilter {
}
offsetAtt.setOffset(startOffset, endOffset);
lastStartOffset = startOffset;
lastEndOffset = endOffset;
}
}

View File

@ -31,6 +31,7 @@ import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilterFactory;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.ResourceLoader;
import org.apache.lucene.util.ResourceLoaderAware;
/**
* Factory for a {@link ProtectedTermFilter}
@ -81,7 +82,8 @@ import org.apache.lucene.util.ResourceLoader;
* @since 7.4.0
* @lucene.spi {@value #NAME}
*/
public class ProtectedTermFilterFactory extends ConditionalTokenFilterFactory {
public class ProtectedTermFilterFactory extends ConditionalTokenFilterFactory
implements ResourceLoaderAware {
public static final String NAME = "protectedTerm";

View File

@ -23,7 +23,7 @@ import org.apache.lucene.util.BytesRef;
*
* @see org.apache.lucene.analysis.payloads.PayloadHelper#encodeFloat(float, byte[], int)
*/
public class FloatEncoder extends AbstractEncoder {
public class FloatEncoder extends AbstractEncoder implements PayloadEncoder {
@Override
public BytesRef encode(char[] buffer, int offset, int length) {

View File

@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets;
import org.apache.lucene.util.BytesRef;
/** Does nothing other than convert the char array to a byte array using the specified encoding. */
public class IdentityEncoder extends AbstractEncoder {
public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder {
protected Charset charset = StandardCharsets.UTF_8;
public IdentityEncoder() {}

View File

@ -24,7 +24,7 @@ import org.apache.lucene.util.BytesRef;
*
* <p>See {@link org.apache.lucene.analysis.payloads.PayloadHelper#encodeInt(int, byte[], int)}.
*/
public class IntegerEncoder extends AbstractEncoder {
public class IntegerEncoder extends AbstractEncoder implements PayloadEncoder {
@Override
public BytesRef encode(char[] buffer, int offset, int length) {

View File

@ -22,7 +22,7 @@ package org.apache.lucene.analysis.wikipedia;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** JFlex-generated tokenizer that is aware of Wikipedia syntax. */
@SuppressWarnings({"unused", "fallthrough"})
@SuppressWarnings("fallthrough")
class WikipediaTokenizerImpl {
/** This character denotes the end of file */

View File

@ -22,7 +22,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* JFlex-generated tokenizer that is aware of Wikipedia syntax.
*/
@SuppressWarnings({"unused","fallthrough"})
@SuppressWarnings("fallthrough")
%%
%class WikipediaTokenizerImpl

View File

@ -358,8 +358,9 @@ public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
static void assertLegalOffsets(String in) throws Exception {
int length = in.length();
HTMLStripCharFilter reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(in)));
int ch = 0;
int off = 0;
while (reader.read() != -1) {
while ((ch = reader.read()) != -1) {
int correction = reader.correctOffset(off);
assertTrue(
"invalid offset correction: " + off + "->" + correction + " for doc of length: " + length,

View File

@ -56,10 +56,11 @@ public class TestCJKBigramFilterFactory extends BaseTokenStreamFactoryTestCase {
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
expectThrows(
IllegalArgumentException.class,
() -> {
tokenFilterFactory("CJKBigram", "bogusArg", "bogusValue");
});
IllegalArgumentException expected =
expectThrows(
IllegalArgumentException.class,
() -> {
tokenFilterFactory("CJKBigram", "bogusArg", "bogusValue");
});
}
}

View File

@ -673,4 +673,16 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
null,
false);
}
private Analyzer getAnalyzer(final int flags) {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(
tokenizer, new WordDelimiterFilter(tokenizer, flags, null));
}
};
}
}

View File

@ -800,6 +800,16 @@ public class TestWordDelimiterGraphFilter extends BaseTokenStreamTestCase {
return (flags & flag) != 0;
}
private static boolean isEnglishPossessive(String text, int pos) {
if (pos > 2) {
if ((text.charAt(pos - 1) == 's' || text.charAt(pos - 1) == 'S')
&& (pos == text.length() || text.charAt(pos) != '-')) {
text = text.substring(0, text.length() - 2);
}
}
return true;
}
private static class WordPart {
final String part;
final int startOffset;

View File

@ -44,7 +44,7 @@ public class TestNGramTokenizer extends BaseTokenStreamTestCase {
expectThrows(
IllegalArgumentException.class,
() -> {
new NGramTokenizer(2, 1);
NGramTokenizer tok = new NGramTokenizer(2, 1);
});
}

View File

@ -70,7 +70,7 @@ public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
public void testNoTokens() throws Exception {
Tokenizer t = new SimplePatternSplitTokenizer(".*");
t.getAttribute(CharTermAttribute.class);
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
String s;
while (true) {
s = TestUtil.randomUnicodeString(random());
@ -95,7 +95,7 @@ public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
public void testSplitSingleCharWhitespace() throws Exception {
Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]");
t.getAttribute(CharTermAttribute.class);
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
t.setReader(new StringReader("a \tb c"));
assertTokenStreamContents(
t, new String[] {"a", "b", "c"}, new int[] {0, 3, 7}, new int[] {1, 4, 8});
@ -103,7 +103,7 @@ public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
public void testSplitMultiCharWhitespace() throws Exception {
Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
t.getAttribute(CharTermAttribute.class);
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
t.setReader(new StringReader("a \tb c"));
assertTokenStreamContents(
t, new String[] {"a", "b", "c"}, new int[] {0, 3, 7}, new int[] {1, 4, 8});
@ -111,21 +111,21 @@ public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
public void testLeadingNonToken() throws Exception {
Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
t.getAttribute(CharTermAttribute.class);
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
t.setReader(new StringReader(" a c"));
assertTokenStreamContents(t, new String[] {"a", "c"}, new int[] {4, 6}, new int[] {5, 7});
}
public void testTrailingNonToken() throws Exception {
Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
t.getAttribute(CharTermAttribute.class);
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
t.setReader(new StringReader("a c "));
assertTokenStreamContents(t, new String[] {"a", "c"}, new int[] {0, 2}, new int[] {1, 3});
}
public void testEmptyStringPatternOneMatch() throws Exception {
Tokenizer t = new SimplePatternSplitTokenizer("a*");
t.getAttribute(CharTermAttribute.class);
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
t.setReader(new StringReader("bbab"));
assertTokenStreamContents(t, new String[] {"bb", "b"}, new int[] {0, 3}, new int[] {2, 4});
}

View File

@ -62,6 +62,14 @@ public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
b.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig);
}
private void assertEquals(CharTermAttribute term, String expected) {
assertEquals(expected.length(), term.length());
final char[] buffer = term.buffer();
for (int chIDX = 0; chIDX < expected.length(); chIDX++) {
assertEquals(expected.charAt(chIDX), buffer[chIDX]);
}
}
// For the output string: separate positions with a space,
// and separate multiple tokens at each position with a
// /. If a token should have end offset != the input

View File

@ -156,7 +156,8 @@ public class TestWikipediaTokenizerFactory extends BaseTokenStreamFactoryTestCas
expectThrows(
IllegalArgumentException.class,
() -> {
tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, "-1").create(newAttributeFactory());
Tokenizer tf =
tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, "-1").create(newAttributeFactory());
});
assertTrue(
expected

View File

@ -25,7 +25,7 @@ import org.apache.lucene.util.AttributeReflector;
*
* @lucene.experimental
*/
public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute {
public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute, Cloneable {
private int code = UScript.COMMON;
/** Initializes this attribute with <code>UScript.COMMON</code> */

View File

@ -21,7 +21,7 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Attribute for {@link Token#getBaseForm()}. */
public class BaseFormAttributeImpl extends AttributeImpl implements BaseFormAttribute {
public class BaseFormAttributeImpl extends AttributeImpl implements BaseFormAttribute, Cloneable {
private Token token;
@Override

View File

@ -22,7 +22,8 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Attribute for Kuromoji inflection data. */
public class InflectionAttributeImpl extends AttributeImpl implements InflectionAttribute {
public class InflectionAttributeImpl extends AttributeImpl
implements InflectionAttribute, Cloneable {
private Token token;
@Override

View File

@ -22,7 +22,8 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Attribute for {@link Token#getPartOfSpeech()}. */
public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSpeechAttribute {
public class PartOfSpeechAttributeImpl extends AttributeImpl
implements PartOfSpeechAttribute, Cloneable {
private Token token;
@Override

View File

@ -22,7 +22,7 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Attribute for Kuromoji reading data */
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute {
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute, Cloneable {
private Token token;
@Override

View File

@ -27,7 +27,7 @@ import org.apache.lucene.util.AttributeReflector;
* @see MorphosyntacticTagsAttribute
*/
public class MorphosyntacticTagsAttributeImpl extends AttributeImpl
implements MorphosyntacticTagsAttribute {
implements MorphosyntacticTagsAttribute, Cloneable {
/** Initializes this attribute with no tags */
public MorphosyntacticTagsAttributeImpl() {}

View File

@ -28,7 +28,8 @@ import org.apache.lucene.util.AttributeReflector;
*
* @lucene.experimental
*/
public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSpeechAttribute {
public class PartOfSpeechAttributeImpl extends AttributeImpl
implements PartOfSpeechAttribute, Cloneable {
private Token token;
@Override

View File

@ -25,7 +25,7 @@ import org.apache.lucene.util.AttributeReflector;
*
* @lucene.experimental
*/
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute {
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute, Cloneable {
private Token token;
@Override

View File

@ -43,6 +43,7 @@ public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
private int termNum = 0;
private int sentenceStart = 0;
private NLPSentenceDetectorOp sentenceOp = null;
private NLPTokenizerOp tokenizerOp = null;
public OpenNLPTokenizer(
@ -53,6 +54,7 @@ public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
throw new IllegalArgumentException(
"OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
}
this.sentenceOp = sentenceOp;
this.tokenizerOp = tokenizerOp;
}

View File

@ -82,9 +82,10 @@ public class TestOpenNLPTokenizerFactory extends BaseTokenStreamTestCase {
expectThrows(
IllegalArgumentException.class,
() -> {
CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
.withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
.build();
CustomAnalyzer analyzer =
CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
.withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
.build();
});
assertTrue(
expected.getMessage().contains("Configuration Error: missing parameter 'sentenceModel'"));
@ -96,9 +97,10 @@ public class TestOpenNLPTokenizerFactory extends BaseTokenStreamTestCase {
expectThrows(
IllegalArgumentException.class,
() -> {
CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
.withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
.build();
CustomAnalyzer analyzer =
CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
.withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
.build();
});
assertTrue(
expected.getMessage().contains("Configuration Error: missing parameter 'tokenizerModel'"));

View File

@ -27,6 +27,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/** Filter for DoubleMetaphone (supporting secondary codes) */
public final class DoubleMetaphoneFilter extends TokenFilter {
private static final String TOKEN_TYPE = "DoubleMetaphone";
private final LinkedList<State> remainingTokens = new LinkedList<>();
private final DoubleMetaphone encoder = new DoubleMetaphone();
private final boolean inject;

View File

@ -53,6 +53,8 @@ class BigramDictionary extends AbstractDictionary {
private int max = 0;
private int repeat = 0;
// static Logger log = Logger.getLogger(BigramDictionary.class);
public static synchronized BigramDictionary getInstance() {
@ -141,7 +143,7 @@ class BigramDictionary extends AbstractDictionary {
*/
public void loadFromFile(String dctFilePath) throws IOException {
int i, cnt, length;
int i, cnt, length, total = 0;
// The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.
// The 3756th is used (as a header) to store information.
int[] buffer = new int[3];
@ -161,6 +163,7 @@ class BigramDictionary extends AbstractDictionary {
if (cnt <= 0) {
continue;
}
total += cnt;
int j = 0;
while (j < cnt) {
dctFile.read(intBuffer);
@ -229,11 +232,13 @@ class BigramDictionary extends AbstractDictionary {
if (hash2 < 0) hash2 = PRIME_BIGRAM_LENGTH + hash2;
int index = hash1;
int i = 1;
repeat++;
while (bigramHashTable[index] != 0
&& bigramHashTable[index] != hashId
&& i < PRIME_BIGRAM_LENGTH) {
index = (hash1 + i * hash2) % PRIME_BIGRAM_LENGTH;
i++;
repeat++;
if (i > max) max = i;
}
// System.out.println(i - 1);

View File

@ -228,6 +228,7 @@ public class Trie {
int cmd = -1;
StrEnum e = new StrEnum(key, forward);
Character ch = null;
Character aux = null;
for (int i = 0; i < key.length(); ) {
ch = e.next();
@ -242,7 +243,7 @@ public class Trie {
for (int skip = c.skip; skip > 0; skip--) {
if (i < key.length()) {
e.next();
aux = e.next();
} else {
return null;
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene50.compressing;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
@ -57,7 +58,8 @@ import org.apache.lucene.util.packed.PackedInts;
*
* @lucene.experimental
*/
public final class Lucene50CompressingTermVectorsReader extends TermVectorsReader {
public final class Lucene50CompressingTermVectorsReader extends TermVectorsReader
implements Closeable {
// hard limit on the maximum number of documents per chunk
static final int MAX_DOCUMENTS_PER_CHUNK = 128;

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene60;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@ -31,7 +32,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
/** Reads point values previously written with Lucene60PointsWriter */
public class Lucene60PointsReader extends PointsReader {
public class Lucene60PointsReader extends PointsReader implements Closeable {
final IndexInput dataIn;
final SegmentReadState readState;
final Map<Integer, BKDReader> readers = new HashMap<>();

View File

@ -20,6 +20,7 @@ import static org.apache.lucene.backward_codecs.lucene70.Lucene70DocValuesFormat
import static org.apache.lucene.backward_codecs.lucene70.Lucene70DocValuesFormat.NUMERIC_BLOCK_SHIFT;
import static org.apache.lucene.backward_codecs.lucene70.Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE;
import java.io.Closeable; // javadocs
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
@ -53,7 +54,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.apache.lucene.util.packed.DirectWriter;
/** writer for {@link Lucene70DocValuesFormat} */
final class Lucene70DocValuesConsumer extends DocValuesConsumer {
final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Closeable {
IndexOutput data, meta;
final int maxDoc;

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene70;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@ -48,7 +49,7 @@ import org.apache.lucene.util.packed.DirectMonotonicReader;
import org.apache.lucene.util.packed.DirectReader;
/** reader for {@link Lucene70DocValuesFormat} */
final class Lucene70DocValuesProducer extends DocValuesProducer {
final class Lucene70DocValuesProducer extends DocValuesProducer implements Closeable {
private final Map<String, NumericEntry> numerics = new HashMap<>();
private final Map<String, BinaryEntry> binaries = new HashMap<>();
private final Map<String, SortedEntry> sorted = new HashMap<>();

View File

@ -327,6 +327,20 @@ final class Lucene70NormsProducer extends NormsProducer implements Cloneable {
};
}
private IndexInput getDisiInput2(FieldInfo field, NormsEntry entry) throws IOException {
IndexInput slice = null;
if (merging) {
slice = disiInputs.get(field.number);
}
if (slice == null) {
slice = data.slice("docs", entry.docsWithFieldOffset, entry.docsWithFieldLength);
if (merging) {
disiInputs.put(field.number, slice);
}
}
return slice;
}
@Override
public NumericDocValues getNorms(FieldInfo field) throws IOException {
final NormsEntry entry = norms.get(field.number);

View File

@ -106,14 +106,17 @@ public class Lucene70SegmentInfoFormat extends SegmentInfoFormat {
Throwable priorE = null;
SegmentInfo si = null;
try {
CodecUtil.checkIndexHeader(
input,
Lucene70SegmentInfoFormat.CODEC_NAME,
Lucene70SegmentInfoFormat.VERSION_START,
Lucene70SegmentInfoFormat.VERSION_CURRENT,
segmentID,
"");
int format =
CodecUtil.checkIndexHeader(
input,
Lucene70SegmentInfoFormat.CODEC_NAME,
Lucene70SegmentInfoFormat.VERSION_START,
Lucene70SegmentInfoFormat.VERSION_CURRENT,
segmentID,
"");
si = parseSegmentInfo(dir, input, segment, segmentID);
} catch (Throwable exception) {
priorE = exception;
} finally {

View File

@ -62,7 +62,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.apache.lucene.util.packed.DirectWriter;
/** writer for {@link Lucene80DocValuesFormat} */
final class Lucene80DocValuesConsumer extends DocValuesConsumer {
final class Lucene80DocValuesConsumer extends DocValuesConsumer implements Closeable {
final Lucene80DocValuesFormat.Mode mode;
IndexOutput data, meta;

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene80;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@ -51,7 +52,7 @@ import org.apache.lucene.util.packed.DirectMonotonicReader;
import org.apache.lucene.util.packed.DirectReader;
/** reader for {@link Lucene80DocValuesFormat} */
final class Lucene80DocValuesProducer extends DocValuesProducer {
final class Lucene80DocValuesProducer extends DocValuesProducer implements Closeable {
private final Map<String, NumericEntry> numerics = new HashMap<>();
private final Map<String, BinaryEntry> binaries = new HashMap<>();
private final Map<String, SortedEntry> sorted = new HashMap<>();

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene60;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@ -40,7 +41,7 @@ import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWriter;
/** Writes dimensional values */
public class Lucene60PointsWriter extends PointsWriter {
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
/** Output used to write the BKD tree data file */
protected final IndexOutput dataOut;

View File

@ -97,6 +97,7 @@ public class TestIndexedDISI extends LuceneTestCase {
private void assertAdvanceBeyondEnd(BitSet set, Directory dir) throws IOException {
final int cardinality = set.cardinality();
final byte denseRankPower = 9; // Not tested here so fixed to isolate factors
long length;
int jumpTableentryCount;
try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
jumpTableentryCount =
@ -433,7 +434,9 @@ public class TestIndexedDISI extends LuceneTestCase {
length = out.getFilePointer();
}
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
new IndexedDISI(in, 0L, length, jumpTableEntryCount, denseRankPowerRead, set.cardinality());
IndexedDISI disi =
new IndexedDISI(
in, 0L, length, jumpTableEntryCount, denseRankPowerRead, set.cardinality());
}
// This tests the legality of the denseRankPower only, so we don't do anything with the disi
}

View File

@ -36,7 +36,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
* A QueryMaker that uses common and uncommon actual Wikipedia queries for searching the English
* Wikipedia collection. 90 queries total.
*/
public class EnwikiQueryMaker extends AbstractQueryMaker {
public class EnwikiQueryMaker extends AbstractQueryMaker implements QueryMaker {
// common and a few uncommon queries from wikipedia search logs
private static String[] STANDARD_QUERIES = {

View File

@ -43,7 +43,7 @@ import org.apache.lucene.util.IOUtils;
* <pre>file.query.maker.file=c:/myqueries.txt
* file.query.maker.default.field=body</pre>
*/
public class FileBasedQueryMaker extends AbstractQueryMaker {
public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMaker {
@Override
protected Query[] prepareQueries() throws Exception {

View File

@ -34,7 +34,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
* A QueryMaker that makes queries devised manually (by Grant Ingersoll) for searching in the
* Reuters collection.
*/
public class ReutersQueryMaker extends AbstractQueryMaker {
public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker {
private static String[] STANDARD_QUERIES = {
// Start with some short queries

View File

@ -30,7 +30,7 @@ import org.apache.lucene.search.TermQuery;
* A QueryMaker that makes queries for a collection created using {@link
* org.apache.lucene.benchmark.byTask.feeds.SingleDocSource}.
*/
public class SimpleQueryMaker extends AbstractQueryMaker {
public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
/**
* Prepare the queries for this test. Extending classes can override this method for preparing

View File

@ -97,6 +97,12 @@ public class ReadTokensTask extends PerfTask {
int left;
String s;
void init(String s) {
this.s = s;
left = s.length();
this.upto = 0;
}
@Override
public int read(char[] c) {
return read(c, 0, c.length);

View File

@ -178,7 +178,6 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception;
}
@SuppressWarnings("unused")
private volatile int preventOptimizeAway = 0;
private class StandardHLImpl implements HLImpl {

View File

@ -437,7 +437,7 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
}
}
}
return new NewsPost(body.toString(), subject, groupName);
return new NewsPost(body.toString(), subject, groupName, number);
} catch (Throwable e) {
return null;
}
@ -447,11 +447,13 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
private final String body;
private final String subject;
private final String group;
private final String number;
private NewsPost(String body, String subject, String group) {
private NewsPost(String body, String subject, String group, String number) {
this.body = body;
this.subject = subject;
this.group = group;
this.number = number;
}
public String getBody() {
@ -465,5 +467,9 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
public String getGroup() {
return group;
}
public String getNumber() {
return number;
}
}
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.codecs.blockterms;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@ -51,7 +52,7 @@ import org.apache.lucene.util.IOUtils;
*
* @lucene.experimental
*/
public class BlockTermsWriter extends FieldsConsumer {
public class BlockTermsWriter extends FieldsConsumer implements Closeable {
static final String CODEC_NAME = "BlockTermsWriter";

View File

@ -1920,6 +1920,14 @@ public final class DirectPostingsFormat extends PostingsFormat {
public HighFreqDocsEnum() {}
public int[] getDocIDs() {
return docIDs;
}
public int[] getFreqs() {
return freqs;
}
public PostingsEnum reset(int[] docIDs, int[] freqs) {
this.docIDs = docIDs;
this.freqs = freqs;
@ -2098,6 +2106,18 @@ public final class DirectPostingsFormat extends PostingsFormat {
posJump = hasOffsets ? 3 : 1;
}
public int[] getDocIDs() {
return docIDs;
}
public int[][] getPositions() {
return positions;
}
public int getPosJump() {
return posJump;
}
public PostingsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
this.docIDs = docIDs;
this.freqs = freqs;

View File

@ -559,7 +559,7 @@ public class FSTTermsReader extends FieldsProducer {
if (term == null) {
return SeekStatus.END;
} else {
return term.get().equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
}
}

View File

@ -22,6 +22,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_V
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
@ -194,6 +195,59 @@ final class SimpleTextBKDReader extends PointValues implements Accountable {
}
}
private void visitCompressedDocValues(
int[] commonPrefixLengths,
byte[] scratchPackedValue,
IndexInput in,
int[] docIDs,
int count,
IntersectVisitor visitor,
int compressedDim)
throws IOException {
// the byte at `compressedByteOffset` is compressed using run-length compression,
// other suffix bytes are stored verbatim
final int compressedByteOffset =
compressedDim * bytesPerDim + commonPrefixLengths[compressedDim];
commonPrefixLengths[compressedDim]++;
int i;
for (i = 0; i < count; ) {
scratchPackedValue[compressedByteOffset] = in.readByte();
final int runLen = Byte.toUnsignedInt(in.readByte());
for (int j = 0; j < runLen; ++j) {
for (int dim = 0; dim < numDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(scratchPackedValue, dim * bytesPerDim + prefix, bytesPerDim - prefix);
}
visitor.visit(docIDs[i + j], scratchPackedValue);
}
i += runLen;
}
if (i != count) {
throw new CorruptIndexException(
"Sub blocks do not add up to the expected count: " + count + " != " + i, in);
}
}
private int readCompressedDim(IndexInput in) throws IOException {
int compressedDim = in.readByte();
if (compressedDim < -1 || compressedDim >= numIndexDims) {
throw new CorruptIndexException("Got compressedDim=" + compressedDim, in);
}
return compressedDim;
}
private void readCommonPrefixes(
int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
for (int dim = 0; dim < numDims; dim++) {
int prefix = in.readVInt();
commonPrefixLengths[dim] = prefix;
if (prefix > 0) {
in.readBytes(scratchPackedValue, dim * bytesPerDim, prefix);
}
// System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
}
}
private void intersect(
IntersectState state, int nodeID, byte[] cellMinPacked, byte[] cellMaxPacked)
throws IOException {

View File

@ -816,6 +816,40 @@ final class SimpleTextBKDWriter implements Closeable {
}
}
private void writeLeafBlockPackedValuesRange(
IndexOutput out,
int[] commonPrefixLengths,
int start,
int end,
IntFunction<BytesRef> packedValues)
throws IOException {
for (int i = start; i < end; ++i) {
BytesRef ref = packedValues.apply(i);
assert ref.length == config.packedBytesLength;
for (int dim = 0; dim < config.numDims; dim++) {
int prefix = commonPrefixLengths[dim];
out.writeBytes(
ref.bytes, ref.offset + dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
}
}
private static int runLen(
IntFunction<BytesRef> packedValues, int start, int end, int byteOffset) {
BytesRef first = packedValues.apply(start);
byte b = first.bytes[first.offset + byteOffset];
for (int i = start + 1; i < end; ++i) {
BytesRef ref = packedValues.apply(i);
byte b2 = ref.bytes[ref.offset + byteOffset];
assert Byte.toUnsignedInt(b2) >= Byte.toUnsignedInt(b);
if (b != b2) {
return i - start;
}
}
return end - start;
}
@Override
public void close() throws IOException {
if (tempInput != null) {

View File

@ -157,6 +157,14 @@ class SimpleTextPointsWriter extends PointsWriter {
SimpleTextUtil.write(out, s, scratch);
}
private void writeInt(IndexOutput out, int x) throws IOException {
SimpleTextUtil.write(out, Integer.toString(x), scratch);
}
private void writeLong(IndexOutput out, long x) throws IOException {
SimpleTextUtil.write(out, Long.toString(x), scratch);
}
private void write(IndexOutput out, BytesRef b) throws IOException {
SimpleTextUtil.write(out, b);
}

View File

@ -74,8 +74,8 @@ public class SimpleTextVectorWriter extends VectorWriter {
public void writeField(FieldInfo fieldInfo, VectorValues vectors) throws IOException {
long vectorDataOffset = vectorData.getFilePointer();
List<Integer> docIds = new ArrayList<>();
int docV;
for (docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc()) {
int docV, ord = 0;
for (docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc(), ord++) {
writeVectorValue(vectors);
docIds.add(docV);
}

View File

@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
* </ul>
*/
@SuppressWarnings({"unused","fallthrough"})
@SuppressWarnings("fallthrough")
public final class StandardTokenizerImpl {

View File

@ -37,7 +37,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
* </ul>
*/
@SuppressWarnings({"unused","fallthrough"})
@SuppressWarnings("fallthrough")
%%
%unicode 9.0

View File

@ -26,7 +26,8 @@ import org.apache.lucene.util.BytesRef;
*
* @lucene.internal
*/
public class BytesTermAttributeImpl extends AttributeImpl implements BytesTermAttribute {
public class BytesTermAttributeImpl extends AttributeImpl
implements BytesTermAttribute, TermToBytesRefAttribute {
private BytesRef bytes;
/** Initialize this attribute with no bytes. */

View File

@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRefBuilder;
/** Default implementation of {@link CharTermAttribute}. */
public class CharTermAttributeImpl extends AttributeImpl
implements CharTermAttribute, TermToBytesRefAttribute {
implements CharTermAttribute, TermToBytesRefAttribute, Cloneable {
private static int MIN_BUFFER_SIZE = 10;
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, Character.BYTES)];

View File

@ -20,7 +20,7 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of {@link FlagsAttribute}. */
public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute {
public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable {
private int flags = 0;
/** Initialize this attribute with no bits set */

View File

@ -20,7 +20,7 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of {@link OffsetAttribute}. */
public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute {
public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable {
private int startOffset;
private int endOffset;

View File

@ -21,7 +21,7 @@ import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
/** Default implementation of {@link PayloadAttribute}. */
public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute {
public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable {
private BytesRef payload;
/** Initialize this attribute with no payload. */

View File

@ -21,7 +21,7 @@ import org.apache.lucene.util.AttributeReflector;
/** Default implementation of {@link PositionIncrementAttribute}. */
public class PositionIncrementAttributeImpl extends AttributeImpl
implements PositionIncrementAttribute {
implements PositionIncrementAttribute, Cloneable {
private int positionIncrement = 1;
/** Initialize this attribute with position increment of 1 */

View File

@ -20,7 +20,8 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of {@link PositionLengthAttribute}. */
public class PositionLengthAttributeImpl extends AttributeImpl implements PositionLengthAttribute {
public class PositionLengthAttributeImpl extends AttributeImpl
implements PositionLengthAttribute, Cloneable {
private int positionLength = 1;
/** Initializes this attribute with position length of 1. */

View File

@ -20,7 +20,8 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of {@link TermFrequencyAttribute}. */
public class TermFrequencyAttributeImpl extends AttributeImpl implements TermFrequencyAttribute {
public class TermFrequencyAttributeImpl extends AttributeImpl
implements TermFrequencyAttribute, Cloneable {
private int termFrequency = 1;
/** Initialize this attribute with term frequency of 1 */

View File

@ -20,7 +20,7 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of {@link TypeAttribute}. */
public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute {
public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable {
private String type;
/** Initialize this attribute with {@link TypeAttribute#DEFAULT_TYPE} */

View File

@ -118,6 +118,7 @@ public abstract class VectorWriter implements Closeable {
/** Tracks state of one sub-reader that we are merging */
private static class VectorValuesSub extends DocIDMerger.Sub {
final MergeState.DocMap docMap;
final VectorValues values;
final int segmentIndex;
int count;
@ -126,6 +127,7 @@ public abstract class VectorWriter implements Closeable {
super(docMap);
this.values = values;
this.segmentIndex = segmentIndex;
this.docMap = docMap;
assert values.docID() == -1;
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.codecs.lucene86;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@ -32,7 +33,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
/** Reads point values previously written with {@link Lucene86PointsWriter} */
public class Lucene86PointsReader extends PointsReader {
public class Lucene86PointsReader extends PointsReader implements Closeable {
final IndexInput indexIn, dataIn;
final SegmentReadState readState;
final Map<Integer, BKDReader> readers = new HashMap<>();

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.codecs.lucene86;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@ -38,7 +39,7 @@ import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWriter;
/** Writes dimensional values */
public class Lucene86PointsWriter extends PointsWriter {
public class Lucene86PointsWriter extends PointsWriter implements Closeable {
/** Outputs used to write the BKD tree data files. */
protected final IndexOutput metaOut, indexOut, dataOut;

View File

@ -103,9 +103,12 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
Throwable priorE = null;
SegmentInfo si = null;
try {
CodecUtil.checkIndexHeader(
input, CODEC_NAME, VERSION_START, VERSION_CURRENT, segmentID, "");
int format =
CodecUtil.checkIndexHeader(
input, CODEC_NAME, VERSION_START, VERSION_CURRENT, segmentID, "");
si = parseSegmentInfo(dir, input, segment, segmentID);
} catch (Throwable exception) {
priorE = exception;
} finally {

View File

@ -62,7 +62,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.apache.lucene.util.packed.DirectWriter;
/** writer for {@link Lucene90DocValuesFormat} */
final class Lucene90DocValuesConsumer extends DocValuesConsumer {
final class Lucene90DocValuesConsumer extends DocValuesConsumer implements Closeable {
final Lucene90DocValuesFormat.Mode mode;
IndexOutput data, meta;

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.codecs.lucene90;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@ -51,7 +52,7 @@ import org.apache.lucene.util.packed.DirectMonotonicReader;
import org.apache.lucene.util.packed.DirectReader;
/** reader for {@link Lucene90DocValuesFormat} */
final class Lucene90DocValuesProducer extends DocValuesProducer {
final class Lucene90DocValuesProducer extends DocValuesProducer implements Closeable {
private final Map<String, NumericEntry> numerics = new HashMap<>();
private final Map<String, BinaryEntry> binaries = new HashMap<>();
private final Map<String, SortedEntry> sorted = new HashMap<>();

View File

@ -125,13 +125,14 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
Throwable priorE = null;
FieldInfo infos[] = null;
try {
CodecUtil.checkIndexHeader(
input,
Lucene90FieldInfosFormat.CODEC_NAME,
Lucene90FieldInfosFormat.FORMAT_START,
Lucene90FieldInfosFormat.FORMAT_CURRENT,
segmentInfo.getId(),
segmentSuffix);
int version =
CodecUtil.checkIndexHeader(
input,
Lucene90FieldInfosFormat.CODEC_NAME,
Lucene90FieldInfosFormat.FORMAT_START,
Lucene90FieldInfosFormat.FORMAT_CURRENT,
segmentInfo.getId(),
segmentSuffix);
final int size = input.readVInt(); // read in the size
infos = new FieldInfo[size];

View File

@ -21,6 +21,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@ -323,6 +324,7 @@ public final class Lucene90VectorReader extends VectorReader {
final BytesRef binaryValue;
final ByteBuffer byteBuffer;
final FloatBuffer floatBuffer;
final int byteSize;
final float[] value;
@ -334,6 +336,7 @@ public final class Lucene90VectorReader extends VectorReader {
this.dataIn = dataIn;
byteSize = Float.BYTES * fieldEntry.dimension;
byteBuffer = ByteBuffer.allocate(byteSize);
floatBuffer = byteBuffer.asFloatBuffer();
value = new float[fieldEntry.dimension];
binaryValue = new BytesRef(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
}

View File

@ -97,12 +97,14 @@ final class IntersectTermsEnumFrame {
int suffix;
private final IntersectTermsEnum ite;
private final int version;
public IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException {
this.ite = ite;
this.ord = ord;
this.termState = ite.fr.parent.postingsReader.newTermState();
this.termState.totalTermFreq = -1;
this.version = ite.fr.parent.version;
suffixLengthBytes = new byte[32];
suffixLengthsReader = new ByteArrayDataInput();
}

View File

@ -271,6 +271,13 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
return bytes;
}
/** Seek {@code input} to the directory offset. */
private static void seekDir(IndexInput input) throws IOException {
input.seek(input.length() - CodecUtil.footerLength() - 8);
long offset = input.readLong();
input.seek(offset);
}
// for debugging
// private static String toHex(int v) {
// return "0x" + Integer.toHexString(v);

View File

@ -94,12 +94,14 @@ final class SegmentTermsEnumFrame {
final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
private final SegmentTermsEnum ste;
private final int version;
public SegmentTermsEnumFrame(SegmentTermsEnum ste, int ord) throws IOException {
this.ste = ste;
this.ord = ord;
this.state = ste.fr.parent.postingsReader.newTermState();
this.state.totalTermFreq = -1;
this.version = ste.fr.parent.version;
suffixLengthBytes = new byte[32];
suffixLengthsReader = new ByteArrayDataInput();
}

View File

@ -29,6 +29,7 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_CURRENT;
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
@ -70,7 +71,8 @@ import org.apache.lucene.util.packed.PackedInts;
*
* @lucene.experimental
*/
public final class Lucene90CompressingTermVectorsReader extends TermVectorsReader {
public final class Lucene90CompressingTermVectorsReader extends TermVectorsReader
implements Closeable {
private final FieldInfos fieldInfos;
final FieldsIndex indexReader;

View File

@ -64,6 +64,8 @@ import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.SuppressForbidden;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* Basic tool and API to check the health of an index and write a new segments file that removes
@ -1092,6 +1094,171 @@ public final class CheckIndex implements Closeable {
return status;
}
/**
* Visits all terms in the range minTerm (inclusive) to maxTerm (exclusive), marking all doc IDs
* encountered into allDocsSeen, and returning the total number of terms visited.
*/
private static long getDocsFromTermRange(
String field,
int maxDoc,
TermsEnum termsEnum,
FixedBitSet docsSeen,
BytesRef minTerm,
BytesRef maxTerm,
boolean isIntersect)
throws IOException {
docsSeen.clear(0, docsSeen.length());
long termCount = 0;
PostingsEnum postingsEnum = null;
BytesRefBuilder lastTerm = null;
while (true) {
BytesRef term;
// Kinda messy: for intersect, we must first next(), but for "normal", we are already on our
// first term:
if (isIntersect || termCount != 0) {
term = termsEnum.next();
} else {
term = termsEnum.term();
}
if (term == null) {
if (isIntersect == false) {
throw new RuntimeException("didn't see max term field=" + field + " term=" + maxTerm);
}
// System.out.println(" terms=" + termCount);
return termCount;
}
assert term.isValid();
if (lastTerm == null) {
lastTerm = new BytesRefBuilder();
lastTerm.copyBytes(term);
} else {
if (lastTerm.get().compareTo(term) >= 0) {
throw new RuntimeException(
"terms out of order: lastTerm=" + lastTerm.get() + " term=" + term);
}
lastTerm.copyBytes(term);
}
// System.out.println(" term=" + term);
// Caller already ensured terms enum positioned >= minTerm:
if (term.compareTo(minTerm) < 0) {
throw new RuntimeException("saw term before min term field=" + field + " term=" + minTerm);
}
if (isIntersect == false) {
int cmp = term.compareTo(maxTerm);
if (cmp == 0) {
// Done!
// System.out.println(" terms=" + termCount);
return termCount;
} else if (cmp > 0) {
throw new RuntimeException("didn't see end term field=" + field + " term=" + maxTerm);
}
}
postingsEnum = termsEnum.postings(postingsEnum, 0);
int lastDoc = -1;
while (true) {
int doc = postingsEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if (doc <= lastDoc) {
throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
}
if (doc >= maxDoc) {
throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
}
// System.out.println(" doc=" + doc);
docsSeen.set(doc);
lastDoc = doc;
}
termCount++;
}
}
/**
* Test Terms.intersect on this range, and validates that it returns the same doc ids as using
* non-intersect TermsEnum. Returns true if any fake terms were seen.
*/
private static boolean checkSingleTermRange(
String field,
int maxDoc,
Terms terms,
BytesRef minTerm,
BytesRef maxTerm,
FixedBitSet normalDocs,
FixedBitSet intersectDocs)
throws IOException {
// System.out.println(" check minTerm=" + minTerm.utf8ToString() + " maxTerm=" +
// maxTerm.utf8ToString());
assert minTerm.compareTo(maxTerm) <= 0;
TermsEnum termsEnum = terms.iterator();
TermsEnum.SeekStatus status = termsEnum.seekCeil(minTerm);
if (status != TermsEnum.SeekStatus.FOUND) {
throw new RuntimeException(
"failed to seek to existing term field=" + field + " term=" + minTerm);
}
// Do "dumb" iteration to visit all terms in the range:
long normalTermCount =
getDocsFromTermRange(field, maxDoc, termsEnum, normalDocs, minTerm, maxTerm, false);
// Now do the same operation using intersect:
long intersectTermCount =
getDocsFromTermRange(
field,
maxDoc,
terms.intersect(
new CompiledAutomaton(
Automata.makeBinaryInterval(minTerm, true, maxTerm, false),
true,
false,
Integer.MAX_VALUE,
true),
null),
intersectDocs,
minTerm,
maxTerm,
true);
if (intersectTermCount > normalTermCount) {
throw new RuntimeException(
"intersect returned too many terms: field="
+ field
+ " intersectTermCount="
+ intersectTermCount
+ " normalTermCount="
+ normalTermCount);
}
if (normalDocs.equals(intersectDocs) == false) {
throw new RuntimeException(
"intersect visited different docs than straight terms enum: "
+ normalDocs.cardinality()
+ " for straight enum, vs "
+ intersectDocs.cardinality()
+ " for intersect, minTerm="
+ minTerm
+ " maxTerm="
+ maxTerm);
}
// System.out.println(" docs=" + normalTermCount);
// System.out.println(" " + intersectTermCount + " vs " + normalTermCount);
return intersectTermCount != normalTermCount;
}
/**
* checks Fields api is consistent with itself. searcher is optional, to verify with queries. Can
* be null.
@ -2386,6 +2553,7 @@ public final class CheckIndex implements Closeable {
public static class VerifyPointsVisitor implements PointValues.IntersectVisitor {
private long pointCountSeen;
private int lastDocID = -1;
private final int maxDoc;
private final FixedBitSet docsSeen;
private final byte[] lastMinPackedValue;
private final byte[] lastMaxPackedValue;
@ -2402,6 +2570,7 @@ public final class CheckIndex implements Closeable {
/** Sole constructor */
public VerifyPointsVisitor(String fieldName, int maxDoc, PointValues values)
throws IOException {
this.maxDoc = maxDoc;
this.fieldName = fieldName;
numDataDims = values.getNumDimensions();
numIndexDims = values.getNumIndexDimensions();

View File

@ -49,6 +49,7 @@ public class OrdinalMap implements Accountable {
// TODO: use more efficient packed ints structures?
private static class TermsEnumIndex {
public static final TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
final int subIndex;
final TermsEnum termsEnum;
BytesRef currentTerm;

View File

@ -35,6 +35,8 @@ public class SpanScorer extends Scorer {
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
private float freq;
/** number of matches (computed in setFreqCurrentDoc) */
private int numMatches;
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
@ -75,12 +77,13 @@ public class SpanScorer extends Scorer {
}
/**
* Sets {@link #freq} for the current document.
* Sets {@link #freq} and {@link #numMatches} for the current document.
*
* <p>This will be called at most once per document.
*/
protected final void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
spans.doStartCurrentDoc();
@ -99,6 +102,7 @@ public class SpanScorer extends Scorer {
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased
// endPos="+endPos;
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos=" + endPos;
numMatches++;
if (docScorer == null) { // scores not required, break out here
freq = 1;
return;

View File

@ -37,7 +37,7 @@ import java.io.IOException;
*
* @see Directory
*/
public abstract class IndexInput extends DataInput implements Closeable {
public abstract class IndexInput extends DataInput implements Cloneable, Closeable {
private final String resourceDescription;

View File

@ -28,7 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
*
* @lucene.internal
*/
public final class FixedBitSet extends BitSet {
public final class FixedBitSet extends BitSet implements Bits, Accountable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);

View File

@ -374,6 +374,7 @@ public class OfflineSorter {
/** Merge the most recent {@code maxTempFile} partitions into a new partition. */
void mergePartitions(Directory trackingDir, List<Future<Partition>> segments) throws IOException {
long start = System.currentTimeMillis();
List<Future<Partition>> segmentsToMerge;
if (segments.size() > maxTempFiles) {
segmentsToMerge = segments.subList(segments.size() - maxTempFiles, segments.size());
@ -428,6 +429,7 @@ public class OfflineSorter {
long start = System.currentTimeMillis();
SortableBytesRefArray buffer;
boolean exhausted = false;
int count;
if (valueLength != -1) {
// fixed length case
buffer = new FixedLengthBytesRefArray(valueLength);

View File

@ -33,7 +33,7 @@ import org.apache.lucene.search.DocIdSetIterator;
*
* @lucene.internal
*/
public class SparseFixedBitSet extends BitSet {
public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(SparseFixedBitSet.class);

View File

@ -71,6 +71,8 @@ public final class FST<T> implements Accountable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(FST.class);
private static final long ARC_SHALLOW_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(Arc.class);
private static final int BIT_FINAL_ARC = 1 << 0;
static final int BIT_LAST_ARC = 1 << 1;

View File

@ -59,6 +59,7 @@ import org.apache.lucene.util.SparseFixedBitSet;
public final class HnswGraph extends KnnGraphValues {
private final int maxConn;
private final VectorValues.SearchStrategy searchStrategy;
// Each entry lists the top maxConn neighbors of a node. The nodes correspond to vectors added to
// HnswBuilder, and the
@ -69,12 +70,13 @@ public final class HnswGraph extends KnnGraphValues {
private int upto;
private NeighborArray cur;
HnswGraph(int maxConn) {
HnswGraph(int maxConn, VectorValues.SearchStrategy searchStrategy) {
graph = new ArrayList<>();
// Typically with diversity criteria we see nodes not fully occupied; average fanout seems to be
// about 1/2 maxConn. There is some indexing time penalty for under-allocating, but saves RAM
graph.add(new NeighborArray(Math.max(32, maxConn / 4)));
this.maxConn = maxConn;
this.searchStrategy = searchStrategy;
}
/**

View File

@ -99,7 +99,7 @@ public final class HnswGraphBuilder {
}
this.maxConn = maxConn;
this.beamWidth = beamWidth;
this.hnsw = new HnswGraph(maxConn);
this.hnsw = new HnswGraph(maxConn, searchStrategy);
bound = BoundsChecker.create(searchStrategy.reversed);
random = new Random(seed);
scratch = new NeighborArray(Math.max(beamWidth, maxConn + 1));

View File

@ -28,6 +28,7 @@ import org.apache.lucene.util.ArrayUtil;
public class NeighborArray {
private int size;
private int upto;
float[] score;
int[] node;

Some files were not shown because too many files have changed in this diff Show More