mirror of https://github.com/apache/lucene.git
Revert "LUCENE-9856: fail precommit on unused local variables (#34)"
This reverts commit 20dba278bb
.
This commit is contained in:
parent
20dba278bb
commit
e6c4956cf6
|
@ -86,7 +86,7 @@ ext {
|
|||
scriptDepVersions = [
|
||||
"apache-rat": "0.11",
|
||||
"commons-codec": "1.13",
|
||||
"ecj": "3.25.0",
|
||||
"ecj": "3.19.0",
|
||||
"javacc": "7.0.4",
|
||||
"jflex": "1.7.0",
|
||||
"jgit": "5.9.0.202009080501-r",
|
||||
|
|
|
@ -95,12 +95,6 @@ def commonCleanups = { FileTree generatedFiles ->
|
|||
text = text.replace(
|
||||
"public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }",
|
||||
"// (setDebugStream omitted).")
|
||||
text = text.replace(
|
||||
"public class QueryParserTokenManager ",
|
||||
'@SuppressWarnings("unused") public class QueryParserTokenManager ')
|
||||
text = text.replace(
|
||||
"public class StandardSyntaxParserTokenManager ",
|
||||
'@SuppressWarnings("unused") public class StandardSyntaxParserTokenManager ')
|
||||
return text
|
||||
})
|
||||
}
|
||||
|
@ -129,9 +123,6 @@ configure(project(":lucene:queryparser")) {
|
|||
text = text.replace(
|
||||
"final private LookaheadSuccess jj_ls =",
|
||||
"static final private LookaheadSuccess jj_ls =")
|
||||
text = text.replace(
|
||||
"public class QueryParser ",
|
||||
'@SuppressWarnings("unused") public class QueryParser ')
|
||||
return text
|
||||
})
|
||||
}
|
||||
|
@ -154,9 +145,6 @@ configure(project(":lucene:queryparser")) {
|
|||
text = text.replace(
|
||||
"new java.util.ArrayList<int[]>",
|
||||
"new java.util.ArrayList<>")
|
||||
text = text.replace(
|
||||
"public class QueryParser ",
|
||||
'@SuppressWarnings("unused") public class QueryParser ')
|
||||
return text
|
||||
})
|
||||
}
|
||||
|
@ -233,9 +221,6 @@ configure(project(":lucene:queryparser")) {
|
|||
text = text.replace(
|
||||
"Collections.<QueryNode> singletonList",
|
||||
"Collections.singletonList")
|
||||
text = text.replace(
|
||||
"public class StandardSyntaxParser ",
|
||||
'@SuppressWarnings("unused") public class StandardSyntaxParser ')
|
||||
return text
|
||||
})
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@ eclipse.preferences.version=1
|
|||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=11
|
||||
org.eclipse.jdt.core.compiler.compliance=11
|
||||
org.eclipse.jdt.core.compiler.doc.comment.support=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=error
|
||||
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.problem.comparingIdentical=error
|
||||
|
@ -33,7 +32,6 @@ org.eclipse.jdt.core.compiler.problem.noEffectAssignment=error
|
|||
org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=error
|
||||
org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=error
|
||||
org.eclipse.jdt.core.compiler.problem.unusedImport=error
|
||||
org.eclipse.jdt.core.compiler.problem.unusedLocal=error
|
||||
org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=error
|
||||
org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
|
||||
org.eclipse.jdt.core.compiler.source=11
|
||||
|
|
|
@ -1,24 +1,13 @@
|
|||
#Sun Sep 23 20:55:03 EDT 2012
|
||||
eclipse.preferences.version=1
|
||||
org.eclipse.jdt.core.compiler.annotation.inheritNullAnnotations=disabled
|
||||
org.eclipse.jdt.core.compiler.annotation.missingNonNullByDefaultAnnotation=ignore
|
||||
org.eclipse.jdt.core.compiler.annotation.nonnull=org.eclipse.jdt.annotation.NonNull
|
||||
org.eclipse.jdt.core.compiler.annotation.nonnull.secondary=
|
||||
org.eclipse.jdt.core.compiler.annotation.nonnullbydefault=org.eclipse.jdt.annotation.NonNullByDefault
|
||||
org.eclipse.jdt.core.compiler.annotation.nonnullbydefault.secondary=
|
||||
org.eclipse.jdt.core.compiler.annotation.nonnullisdefault=disabled
|
||||
org.eclipse.jdt.core.compiler.annotation.nullable=org.eclipse.jdt.annotation.Nullable
|
||||
org.eclipse.jdt.core.compiler.annotation.nullable.secondary=
|
||||
org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
|
||||
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
|
||||
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
|
||||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=11
|
||||
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
|
||||
org.eclipse.jdt.core.compiler.compliance=11
|
||||
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
|
||||
org.eclipse.jdt.core.compiler.debug.localVariable=generate
|
||||
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
|
||||
org.eclipse.jdt.core.compiler.doc.comment.support=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.APILeak=error
|
||||
org.eclipse.jdt.core.compiler.problem.annotatedTypeArgumentToUnannotated=error
|
||||
org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=error
|
||||
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
|
||||
|
@ -29,9 +18,7 @@ org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
|
|||
org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.discouragedReference=error
|
||||
org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.problem.explicitlyClosedAutoCloseable=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
|
||||
|
@ -50,10 +37,8 @@ org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=disabled
|
|||
org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=private
|
||||
org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=error
|
||||
org.eclipse.jdt.core.compiler.problem.missingDefaultCase=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=error
|
||||
org.eclipse.jdt.core.compiler.problem.missingEnumCaseDespiteDefault=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=error
|
||||
org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public
|
||||
|
@ -69,63 +54,43 @@ org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignor
|
|||
org.eclipse.jdt.core.compiler.problem.noEffectAssignment=error
|
||||
org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=error
|
||||
org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.nonnullParameterAnnotationDropped=error
|
||||
org.eclipse.jdt.core.compiler.problem.nonnullTypeVariableFromLegacyInvocation=error
|
||||
org.eclipse.jdt.core.compiler.problem.nullAnnotationInferenceConflict=error
|
||||
org.eclipse.jdt.core.compiler.problem.nullReference=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.nullSpecViolation=error
|
||||
org.eclipse.jdt.core.compiler.problem.nullUncheckedConversion=error
|
||||
org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=error
|
||||
org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.pessimisticNullAnalysisForFreeTypeVariables=error
|
||||
org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.potentiallyUnclosedCloseable=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.redundantNullAnnotation=error
|
||||
org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.redundantSpecificationOfTypeArguments=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=error
|
||||
org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.reportMethodCanBePotentiallyStatic=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.reportMethodCanBeStatic=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=error
|
||||
org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.suppressWarningsNotFullyAnalysed=error
|
||||
org.eclipse.jdt.core.compiler.problem.syntacticNullAnalysisForFields=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.terminalDeprecation=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.typeParameterHiding=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unavoidableGenericTypeProblems=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unclosedCloseable=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unlikelyCollectionMethodArgumentType=error
|
||||
org.eclipse.jdt.core.compiler.problem.unlikelyCollectionMethodArgumentTypeStrict=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.unlikelyEqualsArgumentType=error
|
||||
org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unstableAutoModuleName=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.unusedExceptionParameter=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedImport=error
|
||||
org.eclipse.jdt.core.compiler.problem.unusedLabel=error
|
||||
org.eclipse.jdt.core.compiler.problem.unusedLocal=error
|
||||
org.eclipse.jdt.core.compiler.problem.unusedLabel=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedLocal=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
|
||||
org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=error
|
||||
org.eclipse.jdt.core.compiler.problem.unusedTypeParameter=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.unusedWarningToken=ignore
|
||||
org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=error
|
||||
org.eclipse.jdt.core.compiler.release=disabled
|
||||
org.eclipse.jdt.core.compiler.source=11
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
|
|||
/**
|
||||
* A CharFilter that wraps another Reader and attempts to strip out HTML constructs.
|
||||
*/
|
||||
@SuppressWarnings({"unused","fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
|
||||
public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
|
|||
/**
|
||||
* A CharFilter that wraps another Reader and attempts to strip out HTML constructs.
|
||||
*/
|
||||
@SuppressWarnings({"unused","fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
%%
|
||||
|
||||
%unicode 9.0
|
||||
|
|
|
@ -22,7 +22,7 @@ package org.apache.lucene.analysis.classic;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/** This class implements the classic lucene StandardTokenizer up until 3.0 */
|
||||
@SuppressWarnings({"unused", "fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
class ClassicTokenizerImpl {
|
||||
|
||||
/** This character denotes the end of file */
|
||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
/**
|
||||
* This class implements the classic lucene StandardTokenizer up until 3.0
|
||||
*/
|
||||
@SuppressWarnings({"unused","fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
%%
|
||||
|
||||
%class ClassicTokenizerImpl
|
||||
|
|
|
@ -22,6 +22,7 @@ import javax.xml.parsers.SAXParserFactory;
|
|||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
import org.xml.sax.XMLReader;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
|
@ -340,4 +341,24 @@ public class PatternParser extends DefaultHandler {
|
|||
word = readToken(chars);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a string of the location. */
|
||||
private String getLocationString(SAXParseException ex) {
|
||||
StringBuilder str = new StringBuilder();
|
||||
|
||||
String systemId = ex.getSystemId();
|
||||
if (systemId != null) {
|
||||
int index = systemId.lastIndexOf('/');
|
||||
if (index != -1) {
|
||||
systemId = systemId.substring(index + 1);
|
||||
}
|
||||
str.append(systemId);
|
||||
}
|
||||
str.append(':');
|
||||
str.append(ex.getLineNumber());
|
||||
str.append(':');
|
||||
str.append(ex.getColumnNumber());
|
||||
|
||||
return str.toString();
|
||||
} // getLocationString(SAXParseException):String
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
* <li><EMOJI>: A sequence of Emoji characters</li>
|
||||
* </ul>
|
||||
*/
|
||||
@SuppressWarnings({"unused","fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
|
||||
public final class UAX29URLEmailTokenizerImpl {
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
* <li><EMOJI>: A sequence of Emoji characters</li>
|
||||
* </ul>
|
||||
*/
|
||||
@SuppressWarnings({"unused","fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
%%
|
||||
|
||||
%unicode 9.0
|
||||
|
|
|
@ -619,6 +619,10 @@ public class KStemmer {
|
|||
* CharArrayMap<String>(maxCacheSize,false); }
|
||||
***/
|
||||
|
||||
private char finalChar() {
|
||||
return word.charAt(k);
|
||||
}
|
||||
|
||||
private char penultChar() {
|
||||
return word.charAt(k - 1);
|
||||
}
|
||||
|
|
|
@ -59,6 +59,8 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
|
|||
|
||||
private static final String PARAM_DICTIONARY = "dictionary";
|
||||
private static final String PARAM_AFFIX = "affix";
|
||||
// NOTE: this one is currently unused?:
|
||||
private static final String PARAM_RECURSION_CAP = "recursionCap";
|
||||
private static final String PARAM_IGNORE_CASE = "ignoreCase";
|
||||
private static final String PARAM_LONGEST_ONLY = "longestOnly";
|
||||
|
||||
|
|
|
@ -348,7 +348,7 @@ public final class ConcatenateGraphFilter extends TokenStream {
|
|||
* @lucene.internal
|
||||
*/
|
||||
public static final class BytesRefBuilderTermAttributeImpl extends AttributeImpl
|
||||
implements BytesRefBuilderTermAttribute {
|
||||
implements BytesRefBuilderTermAttribute, TermToBytesRefAttribute {
|
||||
private final BytesRefBuilder bytes = new BytesRefBuilder();
|
||||
private transient CharsRefBuilder charsRef;
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
public final class FixBrokenOffsetsFilter extends TokenFilter {
|
||||
|
||||
private int lastStartOffset;
|
||||
private int lastEndOffset;
|
||||
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
|
@ -57,6 +58,7 @@ public final class FixBrokenOffsetsFilter extends TokenFilter {
|
|||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
lastStartOffset = 0;
|
||||
lastEndOffset = 0;
|
||||
}
|
||||
|
||||
private void fixOffsets() {
|
||||
|
@ -70,5 +72,6 @@ public final class FixBrokenOffsetsFilter extends TokenFilter {
|
|||
}
|
||||
offsetAtt.setOffset(startOffset, endOffset);
|
||||
lastStartOffset = startOffset;
|
||||
lastEndOffset = endOffset;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||
import org.apache.lucene.analysis.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.ResourceLoader;
|
||||
import org.apache.lucene.util.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Factory for a {@link ProtectedTermFilter}
|
||||
|
@ -81,7 +82,8 @@ import org.apache.lucene.util.ResourceLoader;
|
|||
* @since 7.4.0
|
||||
* @lucene.spi {@value #NAME}
|
||||
*/
|
||||
public class ProtectedTermFilterFactory extends ConditionalTokenFilterFactory {
|
||||
public class ProtectedTermFilterFactory extends ConditionalTokenFilterFactory
|
||||
implements ResourceLoaderAware {
|
||||
|
||||
public static final String NAME = "protectedTerm";
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
*
|
||||
* @see org.apache.lucene.analysis.payloads.PayloadHelper#encodeFloat(float, byte[], int)
|
||||
*/
|
||||
public class FloatEncoder extends AbstractEncoder {
|
||||
public class FloatEncoder extends AbstractEncoder implements PayloadEncoder {
|
||||
|
||||
@Override
|
||||
public BytesRef encode(char[] buffer, int offset, int length) {
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Does nothing other than convert the char array to a byte array using the specified encoding. */
|
||||
public class IdentityEncoder extends AbstractEncoder {
|
||||
public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder {
|
||||
protected Charset charset = StandardCharsets.UTF_8;
|
||||
|
||||
public IdentityEncoder() {}
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
*
|
||||
* <p>See {@link org.apache.lucene.analysis.payloads.PayloadHelper#encodeInt(int, byte[], int)}.
|
||||
*/
|
||||
public class IntegerEncoder extends AbstractEncoder {
|
||||
public class IntegerEncoder extends AbstractEncoder implements PayloadEncoder {
|
||||
|
||||
@Override
|
||||
public BytesRef encode(char[] buffer, int offset, int length) {
|
||||
|
|
|
@ -22,7 +22,7 @@ package org.apache.lucene.analysis.wikipedia;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/** JFlex-generated tokenizer that is aware of Wikipedia syntax. */
|
||||
@SuppressWarnings({"unused", "fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
class WikipediaTokenizerImpl {
|
||||
|
||||
/** This character denotes the end of file */
|
||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
/**
|
||||
* JFlex-generated tokenizer that is aware of Wikipedia syntax.
|
||||
*/
|
||||
@SuppressWarnings({"unused","fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
%%
|
||||
|
||||
%class WikipediaTokenizerImpl
|
||||
|
|
|
@ -358,8 +358,9 @@ public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
|
|||
static void assertLegalOffsets(String in) throws Exception {
|
||||
int length = in.length();
|
||||
HTMLStripCharFilter reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(in)));
|
||||
int ch = 0;
|
||||
int off = 0;
|
||||
while (reader.read() != -1) {
|
||||
while ((ch = reader.read()) != -1) {
|
||||
int correction = reader.correctOffset(off);
|
||||
assertTrue(
|
||||
"invalid offset correction: " + off + "->" + correction + " for doc of length: " + length,
|
||||
|
|
|
@ -56,10 +56,11 @@ public class TestCJKBigramFilterFactory extends BaseTokenStreamFactoryTestCase {
|
|||
|
||||
/** Test that bogus arguments result in exception */
|
||||
public void testBogusArguments() throws Exception {
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
tokenFilterFactory("CJKBigram", "bogusArg", "bogusValue");
|
||||
});
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
tokenFilterFactory("CJKBigram", "bogusArg", "bogusValue");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -673,4 +673,16 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
null,
|
||||
false);
|
||||
}
|
||||
|
||||
private Analyzer getAnalyzer(final int flags) {
|
||||
return new Analyzer() {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(
|
||||
tokenizer, new WordDelimiterFilter(tokenizer, flags, null));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -800,6 +800,16 @@ public class TestWordDelimiterGraphFilter extends BaseTokenStreamTestCase {
|
|||
return (flags & flag) != 0;
|
||||
}
|
||||
|
||||
private static boolean isEnglishPossessive(String text, int pos) {
|
||||
if (pos > 2) {
|
||||
if ((text.charAt(pos - 1) == 's' || text.charAt(pos - 1) == 'S')
|
||||
&& (pos == text.length() || text.charAt(pos) != '-')) {
|
||||
text = text.substring(0, text.length() - 2);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static class WordPart {
|
||||
final String part;
|
||||
final int startOffset;
|
||||
|
|
|
@ -44,7 +44,7 @@ public class TestNGramTokenizer extends BaseTokenStreamTestCase {
|
|||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new NGramTokenizer(2, 1);
|
||||
NGramTokenizer tok = new NGramTokenizer(2, 1);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testNoTokens() throws Exception {
|
||||
Tokenizer t = new SimplePatternSplitTokenizer(".*");
|
||||
t.getAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
|
||||
String s;
|
||||
while (true) {
|
||||
s = TestUtil.randomUnicodeString(random());
|
||||
|
@ -95,7 +95,7 @@ public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testSplitSingleCharWhitespace() throws Exception {
|
||||
Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]");
|
||||
t.getAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
|
||||
t.setReader(new StringReader("a \tb c"));
|
||||
assertTokenStreamContents(
|
||||
t, new String[] {"a", "b", "c"}, new int[] {0, 3, 7}, new int[] {1, 4, 8});
|
||||
|
@ -103,7 +103,7 @@ public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testSplitMultiCharWhitespace() throws Exception {
|
||||
Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
|
||||
t.getAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
|
||||
t.setReader(new StringReader("a \tb c"));
|
||||
assertTokenStreamContents(
|
||||
t, new String[] {"a", "b", "c"}, new int[] {0, 3, 7}, new int[] {1, 4, 8});
|
||||
|
@ -111,21 +111,21 @@ public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testLeadingNonToken() throws Exception {
|
||||
Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
|
||||
t.getAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
|
||||
t.setReader(new StringReader(" a c"));
|
||||
assertTokenStreamContents(t, new String[] {"a", "c"}, new int[] {4, 6}, new int[] {5, 7});
|
||||
}
|
||||
|
||||
public void testTrailingNonToken() throws Exception {
|
||||
Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
|
||||
t.getAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
|
||||
t.setReader(new StringReader("a c "));
|
||||
assertTokenStreamContents(t, new String[] {"a", "c"}, new int[] {0, 2}, new int[] {1, 3});
|
||||
}
|
||||
|
||||
public void testEmptyStringPatternOneMatch() throws Exception {
|
||||
Tokenizer t = new SimplePatternSplitTokenizer("a*");
|
||||
t.getAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
|
||||
t.setReader(new StringReader("bbab"));
|
||||
assertTokenStreamContents(t, new String[] {"bb", "b"}, new int[] {0, 3}, new int[] {2, 4});
|
||||
}
|
||||
|
|
|
@ -62,6 +62,14 @@ public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
|
|||
b.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig);
|
||||
}
|
||||
|
||||
private void assertEquals(CharTermAttribute term, String expected) {
|
||||
assertEquals(expected.length(), term.length());
|
||||
final char[] buffer = term.buffer();
|
||||
for (int chIDX = 0; chIDX < expected.length(); chIDX++) {
|
||||
assertEquals(expected.charAt(chIDX), buffer[chIDX]);
|
||||
}
|
||||
}
|
||||
|
||||
// For the output string: separate positions with a space,
|
||||
// and separate multiple tokens at each position with a
|
||||
// /. If a token should have end offset != the input
|
||||
|
|
|
@ -156,7 +156,8 @@ public class TestWikipediaTokenizerFactory extends BaseTokenStreamFactoryTestCas
|
|||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, "-1").create(newAttributeFactory());
|
||||
Tokenizer tf =
|
||||
tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, "-1").create(newAttributeFactory());
|
||||
});
|
||||
assertTrue(
|
||||
expected
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.util.AttributeReflector;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute {
|
||||
public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute, Cloneable {
|
||||
private int code = UScript.COMMON;
|
||||
|
||||
/** Initializes this attribute with <code>UScript.COMMON</code> */
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Attribute for {@link Token#getBaseForm()}. */
|
||||
public class BaseFormAttributeImpl extends AttributeImpl implements BaseFormAttribute {
|
||||
public class BaseFormAttributeImpl extends AttributeImpl implements BaseFormAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,7 +22,8 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Attribute for Kuromoji inflection data. */
|
||||
public class InflectionAttributeImpl extends AttributeImpl implements InflectionAttribute {
|
||||
public class InflectionAttributeImpl extends AttributeImpl
|
||||
implements InflectionAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,7 +22,8 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Attribute for {@link Token#getPartOfSpeech()}. */
|
||||
public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSpeechAttribute {
|
||||
public class PartOfSpeechAttributeImpl extends AttributeImpl
|
||||
implements PartOfSpeechAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Attribute for Kuromoji reading data */
|
||||
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute {
|
||||
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.util.AttributeReflector;
|
|||
* @see MorphosyntacticTagsAttribute
|
||||
*/
|
||||
public class MorphosyntacticTagsAttributeImpl extends AttributeImpl
|
||||
implements MorphosyntacticTagsAttribute {
|
||||
implements MorphosyntacticTagsAttribute, Cloneable {
|
||||
|
||||
/** Initializes this attribute with no tags */
|
||||
public MorphosyntacticTagsAttributeImpl() {}
|
||||
|
|
|
@ -28,7 +28,8 @@ import org.apache.lucene.util.AttributeReflector;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSpeechAttribute {
|
||||
public class PartOfSpeechAttributeImpl extends AttributeImpl
|
||||
implements PartOfSpeechAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.util.AttributeReflector;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute {
|
||||
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -43,6 +43,7 @@ public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
|
|||
private int termNum = 0;
|
||||
private int sentenceStart = 0;
|
||||
|
||||
private NLPSentenceDetectorOp sentenceOp = null;
|
||||
private NLPTokenizerOp tokenizerOp = null;
|
||||
|
||||
public OpenNLPTokenizer(
|
||||
|
@ -53,6 +54,7 @@ public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
|
|||
throw new IllegalArgumentException(
|
||||
"OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
|
||||
}
|
||||
this.sentenceOp = sentenceOp;
|
||||
this.tokenizerOp = tokenizerOp;
|
||||
}
|
||||
|
||||
|
|
|
@ -82,9 +82,10 @@ public class TestOpenNLPTokenizerFactory extends BaseTokenStreamTestCase {
|
|||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
|
||||
.withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
|
||||
.build();
|
||||
CustomAnalyzer analyzer =
|
||||
CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
|
||||
.withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
|
||||
.build();
|
||||
});
|
||||
assertTrue(
|
||||
expected.getMessage().contains("Configuration Error: missing parameter 'sentenceModel'"));
|
||||
|
@ -96,9 +97,10 @@ public class TestOpenNLPTokenizerFactory extends BaseTokenStreamTestCase {
|
|||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
|
||||
.withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
|
||||
.build();
|
||||
CustomAnalyzer analyzer =
|
||||
CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
|
||||
.withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
|
||||
.build();
|
||||
});
|
||||
assertTrue(
|
||||
expected.getMessage().contains("Configuration Error: missing parameter 'tokenizerModel'"));
|
||||
|
|
|
@ -27,6 +27,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
/** Filter for DoubleMetaphone (supporting secondary codes) */
|
||||
public final class DoubleMetaphoneFilter extends TokenFilter {
|
||||
|
||||
private static final String TOKEN_TYPE = "DoubleMetaphone";
|
||||
|
||||
private final LinkedList<State> remainingTokens = new LinkedList<>();
|
||||
private final DoubleMetaphone encoder = new DoubleMetaphone();
|
||||
private final boolean inject;
|
||||
|
|
|
@ -53,6 +53,8 @@ class BigramDictionary extends AbstractDictionary {
|
|||
|
||||
private int max = 0;
|
||||
|
||||
private int repeat = 0;
|
||||
|
||||
// static Logger log = Logger.getLogger(BigramDictionary.class);
|
||||
|
||||
public static synchronized BigramDictionary getInstance() {
|
||||
|
@ -141,7 +143,7 @@ class BigramDictionary extends AbstractDictionary {
|
|||
*/
|
||||
public void loadFromFile(String dctFilePath) throws IOException {
|
||||
|
||||
int i, cnt, length;
|
||||
int i, cnt, length, total = 0;
|
||||
// The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.
|
||||
// The 3756th is used (as a header) to store information.
|
||||
int[] buffer = new int[3];
|
||||
|
@ -161,6 +163,7 @@ class BigramDictionary extends AbstractDictionary {
|
|||
if (cnt <= 0) {
|
||||
continue;
|
||||
}
|
||||
total += cnt;
|
||||
int j = 0;
|
||||
while (j < cnt) {
|
||||
dctFile.read(intBuffer);
|
||||
|
@ -229,11 +232,13 @@ class BigramDictionary extends AbstractDictionary {
|
|||
if (hash2 < 0) hash2 = PRIME_BIGRAM_LENGTH + hash2;
|
||||
int index = hash1;
|
||||
int i = 1;
|
||||
repeat++;
|
||||
while (bigramHashTable[index] != 0
|
||||
&& bigramHashTable[index] != hashId
|
||||
&& i < PRIME_BIGRAM_LENGTH) {
|
||||
index = (hash1 + i * hash2) % PRIME_BIGRAM_LENGTH;
|
||||
i++;
|
||||
repeat++;
|
||||
if (i > max) max = i;
|
||||
}
|
||||
// System.out.println(i - 1);
|
||||
|
|
|
@ -228,6 +228,7 @@ public class Trie {
|
|||
int cmd = -1;
|
||||
StrEnum e = new StrEnum(key, forward);
|
||||
Character ch = null;
|
||||
Character aux = null;
|
||||
|
||||
for (int i = 0; i < key.length(); ) {
|
||||
ch = e.next();
|
||||
|
@ -242,7 +243,7 @@ public class Trie {
|
|||
|
||||
for (int skip = c.skip; skip > 0; skip--) {
|
||||
if (i < key.length()) {
|
||||
e.next();
|
||||
aux = e.next();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene50.compressing;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -57,7 +58,8 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene50CompressingTermVectorsReader extends TermVectorsReader {
|
||||
public final class Lucene50CompressingTermVectorsReader extends TermVectorsReader
|
||||
implements Closeable {
|
||||
|
||||
// hard limit on the maximum number of documents per chunk
|
||||
static final int MAX_DOCUMENTS_PER_CHUNK = 128;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene60;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -31,7 +32,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.bkd.BKDReader;
|
||||
|
||||
/** Reads point values previously written with Lucene60PointsWriter */
|
||||
public class Lucene60PointsReader extends PointsReader {
|
||||
public class Lucene60PointsReader extends PointsReader implements Closeable {
|
||||
final IndexInput dataIn;
|
||||
final SegmentReadState readState;
|
||||
final Map<Integer, BKDReader> readers = new HashMap<>();
|
||||
|
|
|
@ -20,6 +20,7 @@ import static org.apache.lucene.backward_codecs.lucene70.Lucene70DocValuesFormat
|
|||
import static org.apache.lucene.backward_codecs.lucene70.Lucene70DocValuesFormat.NUMERIC_BLOCK_SHIFT;
|
||||
import static org.apache.lucene.backward_codecs.lucene70.Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE;
|
||||
|
||||
import java.io.Closeable; // javadocs
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
@ -53,7 +54,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
|
|||
import org.apache.lucene.util.packed.DirectWriter;
|
||||
|
||||
/** writer for {@link Lucene70DocValuesFormat} */
|
||||
final class Lucene70DocValuesConsumer extends DocValuesConsumer {
|
||||
final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||
|
||||
IndexOutput data, meta;
|
||||
final int maxDoc;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene70;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -48,7 +49,7 @@ import org.apache.lucene.util.packed.DirectMonotonicReader;
|
|||
import org.apache.lucene.util.packed.DirectReader;
|
||||
|
||||
/** reader for {@link Lucene70DocValuesFormat} */
|
||||
final class Lucene70DocValuesProducer extends DocValuesProducer {
|
||||
final class Lucene70DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||
private final Map<String, NumericEntry> numerics = new HashMap<>();
|
||||
private final Map<String, BinaryEntry> binaries = new HashMap<>();
|
||||
private final Map<String, SortedEntry> sorted = new HashMap<>();
|
||||
|
|
|
@ -327,6 +327,20 @@ final class Lucene70NormsProducer extends NormsProducer implements Cloneable {
|
|||
};
|
||||
}
|
||||
|
||||
private IndexInput getDisiInput2(FieldInfo field, NormsEntry entry) throws IOException {
|
||||
IndexInput slice = null;
|
||||
if (merging) {
|
||||
slice = disiInputs.get(field.number);
|
||||
}
|
||||
if (slice == null) {
|
||||
slice = data.slice("docs", entry.docsWithFieldOffset, entry.docsWithFieldLength);
|
||||
if (merging) {
|
||||
disiInputs.put(field.number, slice);
|
||||
}
|
||||
}
|
||||
return slice;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNorms(FieldInfo field) throws IOException {
|
||||
final NormsEntry entry = norms.get(field.number);
|
||||
|
|
|
@ -106,14 +106,17 @@ public class Lucene70SegmentInfoFormat extends SegmentInfoFormat {
|
|||
Throwable priorE = null;
|
||||
SegmentInfo si = null;
|
||||
try {
|
||||
CodecUtil.checkIndexHeader(
|
||||
input,
|
||||
Lucene70SegmentInfoFormat.CODEC_NAME,
|
||||
Lucene70SegmentInfoFormat.VERSION_START,
|
||||
Lucene70SegmentInfoFormat.VERSION_CURRENT,
|
||||
segmentID,
|
||||
"");
|
||||
int format =
|
||||
CodecUtil.checkIndexHeader(
|
||||
input,
|
||||
Lucene70SegmentInfoFormat.CODEC_NAME,
|
||||
Lucene70SegmentInfoFormat.VERSION_START,
|
||||
Lucene70SegmentInfoFormat.VERSION_CURRENT,
|
||||
segmentID,
|
||||
"");
|
||||
|
||||
si = parseSegmentInfo(dir, input, segment, segmentID);
|
||||
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
|
|
|
@ -62,7 +62,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
|
|||
import org.apache.lucene.util.packed.DirectWriter;
|
||||
|
||||
/** writer for {@link Lucene80DocValuesFormat} */
|
||||
final class Lucene80DocValuesConsumer extends DocValuesConsumer {
|
||||
final class Lucene80DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||
|
||||
final Lucene80DocValuesFormat.Mode mode;
|
||||
IndexOutput data, meta;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene80;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -51,7 +52,7 @@ import org.apache.lucene.util.packed.DirectMonotonicReader;
|
|||
import org.apache.lucene.util.packed.DirectReader;
|
||||
|
||||
/** reader for {@link Lucene80DocValuesFormat} */
|
||||
final class Lucene80DocValuesProducer extends DocValuesProducer {
|
||||
final class Lucene80DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||
private final Map<String, NumericEntry> numerics = new HashMap<>();
|
||||
private final Map<String, BinaryEntry> binaries = new HashMap<>();
|
||||
private final Map<String, SortedEntry> sorted = new HashMap<>();
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene60;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
@ -40,7 +41,7 @@ import org.apache.lucene.util.bkd.BKDReader;
|
|||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Writes dimensional values */
|
||||
public class Lucene60PointsWriter extends PointsWriter {
|
||||
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
||||
|
||||
/** Output used to write the BKD tree data file */
|
||||
protected final IndexOutput dataOut;
|
||||
|
|
|
@ -97,6 +97,7 @@ public class TestIndexedDISI extends LuceneTestCase {
|
|||
private void assertAdvanceBeyondEnd(BitSet set, Directory dir) throws IOException {
|
||||
final int cardinality = set.cardinality();
|
||||
final byte denseRankPower = 9; // Not tested here so fixed to isolate factors
|
||||
long length;
|
||||
int jumpTableentryCount;
|
||||
try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
|
||||
jumpTableentryCount =
|
||||
|
@ -433,7 +434,9 @@ public class TestIndexedDISI extends LuceneTestCase {
|
|||
length = out.getFilePointer();
|
||||
}
|
||||
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
|
||||
new IndexedDISI(in, 0L, length, jumpTableEntryCount, denseRankPowerRead, set.cardinality());
|
||||
IndexedDISI disi =
|
||||
new IndexedDISI(
|
||||
in, 0L, length, jumpTableEntryCount, denseRankPowerRead, set.cardinality());
|
||||
}
|
||||
// This tests the legality of the denseRankPower only, so we don't do anything with the disi
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
|
|||
* A QueryMaker that uses common and uncommon actual Wikipedia queries for searching the English
|
||||
* Wikipedia collection. 90 queries total.
|
||||
*/
|
||||
public class EnwikiQueryMaker extends AbstractQueryMaker {
|
||||
public class EnwikiQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
||||
|
||||
// common and a few uncommon queries from wikipedia search logs
|
||||
private static String[] STANDARD_QUERIES = {
|
||||
|
|
|
@ -43,7 +43,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
* <pre>file.query.maker.file=c:/myqueries.txt
|
||||
* file.query.maker.default.field=body</pre>
|
||||
*/
|
||||
public class FileBasedQueryMaker extends AbstractQueryMaker {
|
||||
public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
||||
|
||||
@Override
|
||||
protected Query[] prepareQueries() throws Exception {
|
||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
|
|||
* A QueryMaker that makes queries devised manually (by Grant Ingersoll) for searching in the
|
||||
* Reuters collection.
|
||||
*/
|
||||
public class ReutersQueryMaker extends AbstractQueryMaker {
|
||||
public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
||||
|
||||
private static String[] STANDARD_QUERIES = {
|
||||
// Start with some short queries
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
* A QueryMaker that makes queries for a collection created using {@link
|
||||
* org.apache.lucene.benchmark.byTask.feeds.SingleDocSource}.
|
||||
*/
|
||||
public class SimpleQueryMaker extends AbstractQueryMaker {
|
||||
public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
||||
|
||||
/**
|
||||
* Prepare the queries for this test. Extending classes can override this method for preparing
|
||||
|
|
|
@ -97,6 +97,12 @@ public class ReadTokensTask extends PerfTask {
|
|||
int left;
|
||||
String s;
|
||||
|
||||
void init(String s) {
|
||||
this.s = s;
|
||||
left = s.length();
|
||||
this.upto = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] c) {
|
||||
return read(c, 0, c.length);
|
||||
|
|
|
@ -178,7 +178,6 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
|
|||
void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private volatile int preventOptimizeAway = 0;
|
||||
|
||||
private class StandardHLImpl implements HLImpl {
|
||||
|
|
|
@ -437,7 +437,7 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
return new NewsPost(body.toString(), subject, groupName);
|
||||
return new NewsPost(body.toString(), subject, groupName, number);
|
||||
} catch (Throwable e) {
|
||||
return null;
|
||||
}
|
||||
|
@ -447,11 +447,13 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
|||
private final String body;
|
||||
private final String subject;
|
||||
private final String group;
|
||||
private final String number;
|
||||
|
||||
private NewsPost(String body, String subject, String group) {
|
||||
private NewsPost(String body, String subject, String group, String number) {
|
||||
this.body = body;
|
||||
this.subject = subject;
|
||||
this.group = group;
|
||||
this.number = number;
|
||||
}
|
||||
|
||||
public String getBody() {
|
||||
|
@ -465,5 +467,9 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
|||
public String getGroup() {
|
||||
return group;
|
||||
}
|
||||
|
||||
public String getNumber() {
|
||||
return number;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.codecs.blockterms;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -51,7 +52,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BlockTermsWriter extends FieldsConsumer {
|
||||
public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
||||
|
||||
static final String CODEC_NAME = "BlockTermsWriter";
|
||||
|
||||
|
|
|
@ -1920,6 +1920,14 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
|
||||
public HighFreqDocsEnum() {}
|
||||
|
||||
public int[] getDocIDs() {
|
||||
return docIDs;
|
||||
}
|
||||
|
||||
public int[] getFreqs() {
|
||||
return freqs;
|
||||
}
|
||||
|
||||
public PostingsEnum reset(int[] docIDs, int[] freqs) {
|
||||
this.docIDs = docIDs;
|
||||
this.freqs = freqs;
|
||||
|
@ -2098,6 +2106,18 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
posJump = hasOffsets ? 3 : 1;
|
||||
}
|
||||
|
||||
public int[] getDocIDs() {
|
||||
return docIDs;
|
||||
}
|
||||
|
||||
public int[][] getPositions() {
|
||||
return positions;
|
||||
}
|
||||
|
||||
public int getPosJump() {
|
||||
return posJump;
|
||||
}
|
||||
|
||||
public PostingsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
|
||||
this.docIDs = docIDs;
|
||||
this.freqs = freqs;
|
||||
|
|
|
@ -559,7 +559,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
if (term == null) {
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
return term.get().equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
|
||||
return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_V
|
|||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
|
@ -194,6 +195,59 @@ final class SimpleTextBKDReader extends PointValues implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
private void visitCompressedDocValues(
|
||||
int[] commonPrefixLengths,
|
||||
byte[] scratchPackedValue,
|
||||
IndexInput in,
|
||||
int[] docIDs,
|
||||
int count,
|
||||
IntersectVisitor visitor,
|
||||
int compressedDim)
|
||||
throws IOException {
|
||||
// the byte at `compressedByteOffset` is compressed using run-length compression,
|
||||
// other suffix bytes are stored verbatim
|
||||
final int compressedByteOffset =
|
||||
compressedDim * bytesPerDim + commonPrefixLengths[compressedDim];
|
||||
commonPrefixLengths[compressedDim]++;
|
||||
int i;
|
||||
for (i = 0; i < count; ) {
|
||||
scratchPackedValue[compressedByteOffset] = in.readByte();
|
||||
final int runLen = Byte.toUnsignedInt(in.readByte());
|
||||
for (int j = 0; j < runLen; ++j) {
|
||||
for (int dim = 0; dim < numDims; dim++) {
|
||||
int prefix = commonPrefixLengths[dim];
|
||||
in.readBytes(scratchPackedValue, dim * bytesPerDim + prefix, bytesPerDim - prefix);
|
||||
}
|
||||
visitor.visit(docIDs[i + j], scratchPackedValue);
|
||||
}
|
||||
i += runLen;
|
||||
}
|
||||
if (i != count) {
|
||||
throw new CorruptIndexException(
|
||||
"Sub blocks do not add up to the expected count: " + count + " != " + i, in);
|
||||
}
|
||||
}
|
||||
|
||||
private int readCompressedDim(IndexInput in) throws IOException {
|
||||
int compressedDim = in.readByte();
|
||||
if (compressedDim < -1 || compressedDim >= numIndexDims) {
|
||||
throw new CorruptIndexException("Got compressedDim=" + compressedDim, in);
|
||||
}
|
||||
return compressedDim;
|
||||
}
|
||||
|
||||
private void readCommonPrefixes(
|
||||
int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
|
||||
for (int dim = 0; dim < numDims; dim++) {
|
||||
int prefix = in.readVInt();
|
||||
commonPrefixLengths[dim] = prefix;
|
||||
if (prefix > 0) {
|
||||
in.readBytes(scratchPackedValue, dim * bytesPerDim, prefix);
|
||||
}
|
||||
// System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
|
||||
}
|
||||
}
|
||||
|
||||
private void intersect(
|
||||
IntersectState state, int nodeID, byte[] cellMinPacked, byte[] cellMaxPacked)
|
||||
throws IOException {
|
||||
|
|
|
@ -816,6 +816,40 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private void writeLeafBlockPackedValuesRange(
|
||||
IndexOutput out,
|
||||
int[] commonPrefixLengths,
|
||||
int start,
|
||||
int end,
|
||||
IntFunction<BytesRef> packedValues)
|
||||
throws IOException {
|
||||
for (int i = start; i < end; ++i) {
|
||||
BytesRef ref = packedValues.apply(i);
|
||||
assert ref.length == config.packedBytesLength;
|
||||
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
int prefix = commonPrefixLengths[dim];
|
||||
out.writeBytes(
|
||||
ref.bytes, ref.offset + dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int runLen(
|
||||
IntFunction<BytesRef> packedValues, int start, int end, int byteOffset) {
|
||||
BytesRef first = packedValues.apply(start);
|
||||
byte b = first.bytes[first.offset + byteOffset];
|
||||
for (int i = start + 1; i < end; ++i) {
|
||||
BytesRef ref = packedValues.apply(i);
|
||||
byte b2 = ref.bytes[ref.offset + byteOffset];
|
||||
assert Byte.toUnsignedInt(b2) >= Byte.toUnsignedInt(b);
|
||||
if (b != b2) {
|
||||
return i - start;
|
||||
}
|
||||
}
|
||||
return end - start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (tempInput != null) {
|
||||
|
|
|
@ -157,6 +157,14 @@ class SimpleTextPointsWriter extends PointsWriter {
|
|||
SimpleTextUtil.write(out, s, scratch);
|
||||
}
|
||||
|
||||
private void writeInt(IndexOutput out, int x) throws IOException {
|
||||
SimpleTextUtil.write(out, Integer.toString(x), scratch);
|
||||
}
|
||||
|
||||
private void writeLong(IndexOutput out, long x) throws IOException {
|
||||
SimpleTextUtil.write(out, Long.toString(x), scratch);
|
||||
}
|
||||
|
||||
private void write(IndexOutput out, BytesRef b) throws IOException {
|
||||
SimpleTextUtil.write(out, b);
|
||||
}
|
||||
|
|
|
@ -74,8 +74,8 @@ public class SimpleTextVectorWriter extends VectorWriter {
|
|||
public void writeField(FieldInfo fieldInfo, VectorValues vectors) throws IOException {
|
||||
long vectorDataOffset = vectorData.getFilePointer();
|
||||
List<Integer> docIds = new ArrayList<>();
|
||||
int docV;
|
||||
for (docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc()) {
|
||||
int docV, ord = 0;
|
||||
for (docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc(), ord++) {
|
||||
writeVectorValue(vectors);
|
||||
docIds.add(docV);
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
* <li><EMOJI>: A sequence of Emoji characters</li>
|
||||
* </ul>
|
||||
*/
|
||||
@SuppressWarnings({"unused","fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
|
||||
public final class StandardTokenizerImpl {
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
* <li><EMOJI>: A sequence of Emoji characters</li>
|
||||
* </ul>
|
||||
*/
|
||||
@SuppressWarnings({"unused","fallthrough"})
|
||||
@SuppressWarnings("fallthrough")
|
||||
%%
|
||||
|
||||
%unicode 9.0
|
||||
|
|
|
@ -26,7 +26,8 @@ import org.apache.lucene.util.BytesRef;
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class BytesTermAttributeImpl extends AttributeImpl implements BytesTermAttribute {
|
||||
public class BytesTermAttributeImpl extends AttributeImpl
|
||||
implements BytesTermAttribute, TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
/** Initialize this attribute with no bytes. */
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
|
||||
/** Default implementation of {@link CharTermAttribute}. */
|
||||
public class CharTermAttributeImpl extends AttributeImpl
|
||||
implements CharTermAttribute, TermToBytesRefAttribute {
|
||||
implements CharTermAttribute, TermToBytesRefAttribute, Cloneable {
|
||||
private static int MIN_BUFFER_SIZE = 10;
|
||||
|
||||
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, Character.BYTES)];
|
||||
|
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Default implementation of {@link FlagsAttribute}. */
|
||||
public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute {
|
||||
public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable {
|
||||
private int flags = 0;
|
||||
|
||||
/** Initialize this attribute with no bits set */
|
||||
|
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Default implementation of {@link OffsetAttribute}. */
|
||||
public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute {
|
||||
public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable {
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.util.AttributeReflector;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Default implementation of {@link PayloadAttribute}. */
|
||||
public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute {
|
||||
public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable {
|
||||
private BytesRef payload;
|
||||
|
||||
/** Initialize this attribute with no payload. */
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.util.AttributeReflector;
|
|||
|
||||
/** Default implementation of {@link PositionIncrementAttribute}. */
|
||||
public class PositionIncrementAttributeImpl extends AttributeImpl
|
||||
implements PositionIncrementAttribute {
|
||||
implements PositionIncrementAttribute, Cloneable {
|
||||
private int positionIncrement = 1;
|
||||
|
||||
/** Initialize this attribute with position increment of 1 */
|
||||
|
|
|
@ -20,7 +20,8 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Default implementation of {@link PositionLengthAttribute}. */
|
||||
public class PositionLengthAttributeImpl extends AttributeImpl implements PositionLengthAttribute {
|
||||
public class PositionLengthAttributeImpl extends AttributeImpl
|
||||
implements PositionLengthAttribute, Cloneable {
|
||||
private int positionLength = 1;
|
||||
|
||||
/** Initializes this attribute with position length of 1. */
|
||||
|
|
|
@ -20,7 +20,8 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Default implementation of {@link TermFrequencyAttribute}. */
|
||||
public class TermFrequencyAttributeImpl extends AttributeImpl implements TermFrequencyAttribute {
|
||||
public class TermFrequencyAttributeImpl extends AttributeImpl
|
||||
implements TermFrequencyAttribute, Cloneable {
|
||||
private int termFrequency = 1;
|
||||
|
||||
/** Initialize this attribute with term frequency of 1 */
|
||||
|
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Default implementation of {@link TypeAttribute}. */
|
||||
public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute {
|
||||
public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable {
|
||||
private String type;
|
||||
|
||||
/** Initialize this attribute with {@link TypeAttribute#DEFAULT_TYPE} */
|
||||
|
|
|
@ -118,6 +118,7 @@ public abstract class VectorWriter implements Closeable {
|
|||
/** Tracks state of one sub-reader that we are merging */
|
||||
private static class VectorValuesSub extends DocIDMerger.Sub {
|
||||
|
||||
final MergeState.DocMap docMap;
|
||||
final VectorValues values;
|
||||
final int segmentIndex;
|
||||
int count;
|
||||
|
@ -126,6 +127,7 @@ public abstract class VectorWriter implements Closeable {
|
|||
super(docMap);
|
||||
this.values = values;
|
||||
this.segmentIndex = segmentIndex;
|
||||
this.docMap = docMap;
|
||||
assert values.docID() == -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.codecs.lucene86;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -32,7 +33,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.bkd.BKDReader;
|
||||
|
||||
/** Reads point values previously written with {@link Lucene86PointsWriter} */
|
||||
public class Lucene86PointsReader extends PointsReader {
|
||||
public class Lucene86PointsReader extends PointsReader implements Closeable {
|
||||
final IndexInput indexIn, dataIn;
|
||||
final SegmentReadState readState;
|
||||
final Map<Integer, BKDReader> readers = new HashMap<>();
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.codecs.lucene86;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -38,7 +39,7 @@ import org.apache.lucene.util.bkd.BKDReader;
|
|||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Writes dimensional values */
|
||||
public class Lucene86PointsWriter extends PointsWriter {
|
||||
public class Lucene86PointsWriter extends PointsWriter implements Closeable {
|
||||
|
||||
/** Outputs used to write the BKD tree data files. */
|
||||
protected final IndexOutput metaOut, indexOut, dataOut;
|
||||
|
|
|
@ -103,9 +103,12 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
|
|||
Throwable priorE = null;
|
||||
SegmentInfo si = null;
|
||||
try {
|
||||
CodecUtil.checkIndexHeader(
|
||||
input, CODEC_NAME, VERSION_START, VERSION_CURRENT, segmentID, "");
|
||||
int format =
|
||||
CodecUtil.checkIndexHeader(
|
||||
input, CODEC_NAME, VERSION_START, VERSION_CURRENT, segmentID, "");
|
||||
|
||||
si = parseSegmentInfo(dir, input, segment, segmentID);
|
||||
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
|
|
|
@ -62,7 +62,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
|
|||
import org.apache.lucene.util.packed.DirectWriter;
|
||||
|
||||
/** writer for {@link Lucene90DocValuesFormat} */
|
||||
final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
||||
final class Lucene90DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||
|
||||
final Lucene90DocValuesFormat.Mode mode;
|
||||
IndexOutput data, meta;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.codecs.lucene90;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -51,7 +52,7 @@ import org.apache.lucene.util.packed.DirectMonotonicReader;
|
|||
import org.apache.lucene.util.packed.DirectReader;
|
||||
|
||||
/** reader for {@link Lucene90DocValuesFormat} */
|
||||
final class Lucene90DocValuesProducer extends DocValuesProducer {
|
||||
final class Lucene90DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||
private final Map<String, NumericEntry> numerics = new HashMap<>();
|
||||
private final Map<String, BinaryEntry> binaries = new HashMap<>();
|
||||
private final Map<String, SortedEntry> sorted = new HashMap<>();
|
||||
|
|
|
@ -125,13 +125,14 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
|
|||
Throwable priorE = null;
|
||||
FieldInfo infos[] = null;
|
||||
try {
|
||||
CodecUtil.checkIndexHeader(
|
||||
input,
|
||||
Lucene90FieldInfosFormat.CODEC_NAME,
|
||||
Lucene90FieldInfosFormat.FORMAT_START,
|
||||
Lucene90FieldInfosFormat.FORMAT_CURRENT,
|
||||
segmentInfo.getId(),
|
||||
segmentSuffix);
|
||||
int version =
|
||||
CodecUtil.checkIndexHeader(
|
||||
input,
|
||||
Lucene90FieldInfosFormat.CODEC_NAME,
|
||||
Lucene90FieldInfosFormat.FORMAT_START,
|
||||
Lucene90FieldInfosFormat.FORMAT_CURRENT,
|
||||
segmentInfo.getId(),
|
||||
segmentSuffix);
|
||||
|
||||
final int size = input.readVInt(); // read in the size
|
||||
infos = new FieldInfo[size];
|
||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.FloatBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -323,6 +324,7 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
|
||||
final BytesRef binaryValue;
|
||||
final ByteBuffer byteBuffer;
|
||||
final FloatBuffer floatBuffer;
|
||||
final int byteSize;
|
||||
final float[] value;
|
||||
|
||||
|
@ -334,6 +336,7 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
this.dataIn = dataIn;
|
||||
byteSize = Float.BYTES * fieldEntry.dimension;
|
||||
byteBuffer = ByteBuffer.allocate(byteSize);
|
||||
floatBuffer = byteBuffer.asFloatBuffer();
|
||||
value = new float[fieldEntry.dimension];
|
||||
binaryValue = new BytesRef(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
|
||||
}
|
||||
|
|
|
@ -97,12 +97,14 @@ final class IntersectTermsEnumFrame {
|
|||
int suffix;
|
||||
|
||||
private final IntersectTermsEnum ite;
|
||||
private final int version;
|
||||
|
||||
public IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException {
|
||||
this.ite = ite;
|
||||
this.ord = ord;
|
||||
this.termState = ite.fr.parent.postingsReader.newTermState();
|
||||
this.termState.totalTermFreq = -1;
|
||||
this.version = ite.fr.parent.version;
|
||||
suffixLengthBytes = new byte[32];
|
||||
suffixLengthsReader = new ByteArrayDataInput();
|
||||
}
|
||||
|
|
|
@ -271,6 +271,13 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
|||
return bytes;
|
||||
}
|
||||
|
||||
/** Seek {@code input} to the directory offset. */
|
||||
private static void seekDir(IndexInput input) throws IOException {
|
||||
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
||||
long offset = input.readLong();
|
||||
input.seek(offset);
|
||||
}
|
||||
|
||||
// for debugging
|
||||
// private static String toHex(int v) {
|
||||
// return "0x" + Integer.toHexString(v);
|
||||
|
|
|
@ -94,12 +94,14 @@ final class SegmentTermsEnumFrame {
|
|||
final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
|
||||
|
||||
private final SegmentTermsEnum ste;
|
||||
private final int version;
|
||||
|
||||
public SegmentTermsEnumFrame(SegmentTermsEnum ste, int ord) throws IOException {
|
||||
this.ste = ste;
|
||||
this.ord = ord;
|
||||
this.state = ste.fr.parent.postingsReader.newTermState();
|
||||
this.state.totalTermFreq = -1;
|
||||
this.version = ste.fr.parent.version;
|
||||
suffixLengthBytes = new byte[32];
|
||||
suffixLengthsReader = new ByteArrayDataInput();
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT
|
|||
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_CURRENT;
|
||||
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -70,7 +71,8 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene90CompressingTermVectorsReader extends TermVectorsReader {
|
||||
public final class Lucene90CompressingTermVectorsReader extends TermVectorsReader
|
||||
implements Closeable {
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
final FieldsIndex indexReader;
|
||||
|
|
|
@ -64,6 +64,8 @@ import org.apache.lucene.util.LongBitSet;
|
|||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
/**
|
||||
* Basic tool and API to check the health of an index and write a new segments file that removes
|
||||
|
@ -1092,6 +1094,171 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Visits all terms in the range minTerm (inclusive) to maxTerm (exclusive), marking all doc IDs
|
||||
* encountered into allDocsSeen, and returning the total number of terms visited.
|
||||
*/
|
||||
private static long getDocsFromTermRange(
|
||||
String field,
|
||||
int maxDoc,
|
||||
TermsEnum termsEnum,
|
||||
FixedBitSet docsSeen,
|
||||
BytesRef minTerm,
|
||||
BytesRef maxTerm,
|
||||
boolean isIntersect)
|
||||
throws IOException {
|
||||
docsSeen.clear(0, docsSeen.length());
|
||||
|
||||
long termCount = 0;
|
||||
PostingsEnum postingsEnum = null;
|
||||
BytesRefBuilder lastTerm = null;
|
||||
while (true) {
|
||||
BytesRef term;
|
||||
|
||||
// Kinda messy: for intersect, we must first next(), but for "normal", we are already on our
|
||||
// first term:
|
||||
if (isIntersect || termCount != 0) {
|
||||
term = termsEnum.next();
|
||||
} else {
|
||||
term = termsEnum.term();
|
||||
}
|
||||
|
||||
if (term == null) {
|
||||
if (isIntersect == false) {
|
||||
throw new RuntimeException("didn't see max term field=" + field + " term=" + maxTerm);
|
||||
}
|
||||
// System.out.println(" terms=" + termCount);
|
||||
return termCount;
|
||||
}
|
||||
|
||||
assert term.isValid();
|
||||
|
||||
if (lastTerm == null) {
|
||||
lastTerm = new BytesRefBuilder();
|
||||
lastTerm.copyBytes(term);
|
||||
} else {
|
||||
if (lastTerm.get().compareTo(term) >= 0) {
|
||||
throw new RuntimeException(
|
||||
"terms out of order: lastTerm=" + lastTerm.get() + " term=" + term);
|
||||
}
|
||||
lastTerm.copyBytes(term);
|
||||
}
|
||||
|
||||
// System.out.println(" term=" + term);
|
||||
|
||||
// Caller already ensured terms enum positioned >= minTerm:
|
||||
if (term.compareTo(minTerm) < 0) {
|
||||
throw new RuntimeException("saw term before min term field=" + field + " term=" + minTerm);
|
||||
}
|
||||
|
||||
if (isIntersect == false) {
|
||||
int cmp = term.compareTo(maxTerm);
|
||||
if (cmp == 0) {
|
||||
// Done!
|
||||
// System.out.println(" terms=" + termCount);
|
||||
return termCount;
|
||||
} else if (cmp > 0) {
|
||||
throw new RuntimeException("didn't see end term field=" + field + " term=" + maxTerm);
|
||||
}
|
||||
}
|
||||
|
||||
postingsEnum = termsEnum.postings(postingsEnum, 0);
|
||||
|
||||
int lastDoc = -1;
|
||||
while (true) {
|
||||
int doc = postingsEnum.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
if (doc <= lastDoc) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
|
||||
}
|
||||
if (doc >= maxDoc) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
|
||||
}
|
||||
|
||||
// System.out.println(" doc=" + doc);
|
||||
docsSeen.set(doc);
|
||||
|
||||
lastDoc = doc;
|
||||
}
|
||||
|
||||
termCount++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Terms.intersect on this range, and validates that it returns the same doc ids as using
|
||||
* non-intersect TermsEnum. Returns true if any fake terms were seen.
|
||||
*/
|
||||
private static boolean checkSingleTermRange(
|
||||
String field,
|
||||
int maxDoc,
|
||||
Terms terms,
|
||||
BytesRef minTerm,
|
||||
BytesRef maxTerm,
|
||||
FixedBitSet normalDocs,
|
||||
FixedBitSet intersectDocs)
|
||||
throws IOException {
|
||||
// System.out.println(" check minTerm=" + minTerm.utf8ToString() + " maxTerm=" +
|
||||
// maxTerm.utf8ToString());
|
||||
assert minTerm.compareTo(maxTerm) <= 0;
|
||||
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
TermsEnum.SeekStatus status = termsEnum.seekCeil(minTerm);
|
||||
if (status != TermsEnum.SeekStatus.FOUND) {
|
||||
throw new RuntimeException(
|
||||
"failed to seek to existing term field=" + field + " term=" + minTerm);
|
||||
}
|
||||
|
||||
// Do "dumb" iteration to visit all terms in the range:
|
||||
long normalTermCount =
|
||||
getDocsFromTermRange(field, maxDoc, termsEnum, normalDocs, minTerm, maxTerm, false);
|
||||
|
||||
// Now do the same operation using intersect:
|
||||
long intersectTermCount =
|
||||
getDocsFromTermRange(
|
||||
field,
|
||||
maxDoc,
|
||||
terms.intersect(
|
||||
new CompiledAutomaton(
|
||||
Automata.makeBinaryInterval(minTerm, true, maxTerm, false),
|
||||
true,
|
||||
false,
|
||||
Integer.MAX_VALUE,
|
||||
true),
|
||||
null),
|
||||
intersectDocs,
|
||||
minTerm,
|
||||
maxTerm,
|
||||
true);
|
||||
|
||||
if (intersectTermCount > normalTermCount) {
|
||||
throw new RuntimeException(
|
||||
"intersect returned too many terms: field="
|
||||
+ field
|
||||
+ " intersectTermCount="
|
||||
+ intersectTermCount
|
||||
+ " normalTermCount="
|
||||
+ normalTermCount);
|
||||
}
|
||||
|
||||
if (normalDocs.equals(intersectDocs) == false) {
|
||||
throw new RuntimeException(
|
||||
"intersect visited different docs than straight terms enum: "
|
||||
+ normalDocs.cardinality()
|
||||
+ " for straight enum, vs "
|
||||
+ intersectDocs.cardinality()
|
||||
+ " for intersect, minTerm="
|
||||
+ minTerm
|
||||
+ " maxTerm="
|
||||
+ maxTerm);
|
||||
}
|
||||
// System.out.println(" docs=" + normalTermCount);
|
||||
// System.out.println(" " + intersectTermCount + " vs " + normalTermCount);
|
||||
return intersectTermCount != normalTermCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* checks Fields api is consistent with itself. searcher is optional, to verify with queries. Can
|
||||
* be null.
|
||||
|
@ -2386,6 +2553,7 @@ public final class CheckIndex implements Closeable {
|
|||
public static class VerifyPointsVisitor implements PointValues.IntersectVisitor {
|
||||
private long pointCountSeen;
|
||||
private int lastDocID = -1;
|
||||
private final int maxDoc;
|
||||
private final FixedBitSet docsSeen;
|
||||
private final byte[] lastMinPackedValue;
|
||||
private final byte[] lastMaxPackedValue;
|
||||
|
@ -2402,6 +2570,7 @@ public final class CheckIndex implements Closeable {
|
|||
/** Sole constructor */
|
||||
public VerifyPointsVisitor(String fieldName, int maxDoc, PointValues values)
|
||||
throws IOException {
|
||||
this.maxDoc = maxDoc;
|
||||
this.fieldName = fieldName;
|
||||
numDataDims = values.getNumDimensions();
|
||||
numIndexDims = values.getNumIndexDimensions();
|
||||
|
|
|
@ -49,6 +49,7 @@ public class OrdinalMap implements Accountable {
|
|||
// TODO: use more efficient packed ints structures?
|
||||
|
||||
private static class TermsEnumIndex {
|
||||
public static final TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
|
||||
final int subIndex;
|
||||
final TermsEnum termsEnum;
|
||||
BytesRef currentTerm;
|
||||
|
|
|
@ -35,6 +35,8 @@ public class SpanScorer extends Scorer {
|
|||
|
||||
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
|
||||
private float freq;
|
||||
/** number of matches (computed in setFreqCurrentDoc) */
|
||||
private int numMatches;
|
||||
|
||||
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
|
||||
|
||||
|
@ -75,12 +77,13 @@ public class SpanScorer extends Scorer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sets {@link #freq} for the current document.
|
||||
* Sets {@link #freq} and {@link #numMatches} for the current document.
|
||||
*
|
||||
* <p>This will be called at most once per document.
|
||||
*/
|
||||
protected final void setFreqCurrentDoc() throws IOException {
|
||||
freq = 0.0f;
|
||||
numMatches = 0;
|
||||
|
||||
spans.doStartCurrentDoc();
|
||||
|
||||
|
@ -99,6 +102,7 @@ public class SpanScorer extends Scorer {
|
|||
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased
|
||||
// endPos="+endPos;
|
||||
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos=" + endPos;
|
||||
numMatches++;
|
||||
if (docScorer == null) { // scores not required, break out here
|
||||
freq = 1;
|
||||
return;
|
||||
|
|
|
@ -37,7 +37,7 @@ import java.io.IOException;
|
|||
*
|
||||
* @see Directory
|
||||
*/
|
||||
public abstract class IndexInput extends DataInput implements Closeable {
|
||||
public abstract class IndexInput extends DataInput implements Cloneable, Closeable {
|
||||
|
||||
private final String resourceDescription;
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class FixedBitSet extends BitSet {
|
||||
public final class FixedBitSet extends BitSet implements Bits, Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);
|
||||
|
|
|
@ -374,6 +374,7 @@ public class OfflineSorter {
|
|||
|
||||
/** Merge the most recent {@code maxTempFile} partitions into a new partition. */
|
||||
void mergePartitions(Directory trackingDir, List<Future<Partition>> segments) throws IOException {
|
||||
long start = System.currentTimeMillis();
|
||||
List<Future<Partition>> segmentsToMerge;
|
||||
if (segments.size() > maxTempFiles) {
|
||||
segmentsToMerge = segments.subList(segments.size() - maxTempFiles, segments.size());
|
||||
|
@ -428,6 +429,7 @@ public class OfflineSorter {
|
|||
long start = System.currentTimeMillis();
|
||||
SortableBytesRefArray buffer;
|
||||
boolean exhausted = false;
|
||||
int count;
|
||||
if (valueLength != -1) {
|
||||
// fixed length case
|
||||
buffer = new FixedLengthBytesRefArray(valueLength);
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class SparseFixedBitSet extends BitSet {
|
||||
public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(SparseFixedBitSet.class);
|
||||
|
|
|
@ -71,6 +71,8 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(FST.class);
|
||||
private static final long ARC_SHALLOW_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(Arc.class);
|
||||
|
||||
private static final int BIT_FINAL_ARC = 1 << 0;
|
||||
static final int BIT_LAST_ARC = 1 << 1;
|
||||
|
|
|
@ -59,6 +59,7 @@ import org.apache.lucene.util.SparseFixedBitSet;
|
|||
public final class HnswGraph extends KnnGraphValues {
|
||||
|
||||
private final int maxConn;
|
||||
private final VectorValues.SearchStrategy searchStrategy;
|
||||
|
||||
// Each entry lists the top maxConn neighbors of a node. The nodes correspond to vectors added to
|
||||
// HnswBuilder, and the
|
||||
|
@ -69,12 +70,13 @@ public final class HnswGraph extends KnnGraphValues {
|
|||
private int upto;
|
||||
private NeighborArray cur;
|
||||
|
||||
HnswGraph(int maxConn) {
|
||||
HnswGraph(int maxConn, VectorValues.SearchStrategy searchStrategy) {
|
||||
graph = new ArrayList<>();
|
||||
// Typically with diversity criteria we see nodes not fully occupied; average fanout seems to be
|
||||
// about 1/2 maxConn. There is some indexing time penalty for under-allocating, but saves RAM
|
||||
graph.add(new NeighborArray(Math.max(32, maxConn / 4)));
|
||||
this.maxConn = maxConn;
|
||||
this.searchStrategy = searchStrategy;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -99,7 +99,7 @@ public final class HnswGraphBuilder {
|
|||
}
|
||||
this.maxConn = maxConn;
|
||||
this.beamWidth = beamWidth;
|
||||
this.hnsw = new HnswGraph(maxConn);
|
||||
this.hnsw = new HnswGraph(maxConn, searchStrategy);
|
||||
bound = BoundsChecker.create(searchStrategy.reversed);
|
||||
random = new Random(seed);
|
||||
scratch = new NeighborArray(Math.max(beamWidth, maxConn + 1));
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
public class NeighborArray {
|
||||
|
||||
private int size;
|
||||
private int upto;
|
||||
|
||||
float[] score;
|
||||
int[] node;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue