GITHUB#12655: gradle tidy after google java format update for jdk 21 and regen

* tidy whitespace changes from googleJavaFormat upgrade
* generateForUtil fixed and regened https://bugs.python.org/issue39350
* generateAntlr
* generateClassicTokenizer
* generateWikipediaTokenizer
This commit is contained in:
Kevin Risden 2023-10-10 16:15:44 -04:00
parent 2c42b8941a
commit de3b294be4
No known key found for this signature in database
GPG Key ID: 040FAE3292C5F73F
286 changed files with 928 additions and 10 deletions

View File

@ -1,5 +1,5 @@
{
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "381a9627fd7da6402216e3279cf81a09af222aaf",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "1f7a446f3483326385eef257cea8366c27da0850",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex": "f52109bb7d5701979fde90aeeeda726246a8d5fd"
}

View File

@ -1,5 +1,5 @@
{
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "d36e38342f984050b3a314f153b7a001a2d2be82",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "ac298e08bc5b96202efca0c01f9f0376fda976bd",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex": "0b8c7774b98e8237702013e82c352d4711509bd0"
}

View File

@ -53,15 +53,19 @@ public final class CJKBigramFilter extends TokenFilter {
// configuration
/** bigram flag for Han Ideographs */
public static final int HAN = 1;
/** bigram flag for Hiragana */
public static final int HIRAGANA = 2;
/** bigram flag for Katakana */
public static final int KATAKANA = 4;
/** bigram flag for Hangul */
public static final int HANGUL = 8;
/** when we emit a bigram, it's then marked as this type */
public static final String DOUBLE_TYPE = "<DOUBLE>";
/** when we emit a unigram, it's then marked as this type */
public static final String SINGLE_TYPE = "<SINGLE>";

View File

@ -297,8 +297,10 @@ class ClassicTokenizerImpl {
/** Error code for "Unknown internal scanner error". */
private static final int ZZ_UNKNOWN_ERROR = 0;
/** Error code for "could not match input". */
private static final int ZZ_NO_MATCH = 1;
/** Error code for "pushback value was too large". */
private static final int ZZ_PUSHBACK_2BIG = 2;

View File

@ -15,6 +15,7 @@
* limitations under the License.
*/
package org.apache.lucene.analysis.de;
// This file is encoded in UTF-8
import java.io.IOException;

View File

@ -38,22 +38,31 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
/** Alpha/numeric token type */
public static final int ALPHANUM = 0;
/** Numeric token type */
public static final int NUM = 1;
/** Southeast Asian token type */
public static final int SOUTHEAST_ASIAN = 2;
/** Ideographic token type */
public static final int IDEOGRAPHIC = 3;
/** Hiragana token type */
public static final int HIRAGANA = 4;
/** Katakana token type */
public static final int KATAKANA = 5;
/** Hangul token type */
public static final int HANGUL = 6;
/** URL token type */
public static final int URL = 7;
/** Email token type */
public static final int EMAIL = 8;
/** Emoji token type. */
public static final int EMOJI = 9;

View File

@ -42,6 +42,7 @@ package org.apache.lucene.analysis.en;
/** A list of words used by Kstem */
class KStemData1 {
private KStemData1() {}
// KStemData1 ... KStemData8 are created from "head_word_list.txt"
static String[] data = {
"aback", "abacus", "abandon", "abandoned", "abase",

View File

@ -56,6 +56,7 @@ abstract class WordStorage {
private static final int MAX_STORED_LENGTH = SUGGESTIBLE_MASK - 1;
private final int maxEntryLength;
private final boolean hasCustomMorphData;
/**
* A map from word's hash (modulo array's length) into an int containing:
*

View File

@ -40,6 +40,7 @@ public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
public LimitTokenCountAnalyzer(Analyzer delegate, int maxTokenCount) {
this(delegate, maxTokenCount, false);
}
/**
* Build an analyzer that limits the maximum number of tokens per field.
*

View File

@ -151,6 +151,7 @@ public final class StemmerOverrideFilter extends TokenFilter {
return matchOutput;
}
}
/** This builder builds an {@link FST} for the {@link StemmerOverrideFilter} */
public static class Builder {
private final BytesRefHash hash = new BytesRefHash();

View File

@ -47,6 +47,7 @@ public final class TypeAsSynonymFilter extends TokenFilter {
public TypeAsSynonymFilter(TokenStream input) {
this(input, null, null, ~0);
}
/**
* @param input input tokenstream
* @param prefix Prepend this string to every token type emitted as token text. If null, nothing

View File

@ -45,11 +45,13 @@ public final class WordDelimiterIterator {
/** start position of text, excluding leading delimiters */
int startBounds;
/** end position of text, excluding trailing delimiters */
int endBounds;
/** Beginning of subword */
int current;
/** End of subword */
int end;

View File

@ -63,6 +63,7 @@ import static org.apache.lucene.analysis.util.StemmerUtil.*;
class NorwegianLightStemmer {
/** Constant to remove Bokmål-specific endings */
static final int BOKMAAL = 1;
/** Constant to remove Nynorsk-specific endings */
static final int NYNORSK = 2;

View File

@ -43,6 +43,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
/** File containing default Romanian stopwords. */
public static final String DEFAULT_STOPWORD_FILE = "stopwords.txt";
/** The comment character in the stopwords file. All lines prefixed with this will be ignored. */
private static final String STOPWORDS_COMMENT = "#";

View File

@ -48,10 +48,13 @@ import org.apache.lucene.util.fst.Util;
public class SynonymMap {
/** for multiword support, you must separate words with this separator */
public static final char WORD_SEPARATOR = 0;
/** map&lt;input word, list&lt;ord&gt;&gt; */
public final FST<BytesRef> fst;
/** map&lt;ord, outputword&gt; */
public final BytesRefHash words;
/** maxHorizontalContext: maximum context we need on the tokenstream */
public final int maxHorizontalContext;

View File

@ -22,6 +22,7 @@ import org.apache.lucene.util.BytesRef;
public class TermAndBoost {
/** the term */
public final BytesRef term;
/** the boost */
public final float boost;

View File

@ -39,6 +39,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
/** File containing default Thai stopwords. */
public static final String DEFAULT_STOPWORD_FILE = "stopwords.txt";
/** The comment character in the stopwords file. All lines prefixed with this will be ignored. */
private static final String STOPWORDS_COMMENT = "#";

View File

@ -42,6 +42,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
/** File containing default Turkish stopwords. */
public static final String DEFAULT_STOPWORD_FILE = "stopwords.txt";
/** The comment character in the stopwords file. All lines prefixed with this will be ignored. */
private static final String STOPWORDS_COMMENT = "#";

View File

@ -38,10 +38,13 @@ import org.apache.lucene.util.AttributeFactory;
public abstract class SegmentingTokenizerBase extends Tokenizer {
protected static final int BUFFERMAX = 1024;
protected final char[] buffer = new char[BUFFERMAX];
/** true length of text in the buffer */
private int length = 0;
/** length in buffer that can be evaluated safely, up to a safe end point */
private int usableLength = 0;
/** accumulated offset of previous buffers for this reader, for offsetAtt */
protected int offset = 0;

View File

@ -97,18 +97,22 @@ public final class WikipediaTokenizer extends Tokenizer {
/** Only output tokens */
public static final int TOKENS_ONLY = 0;
/**
* Only output untokenized tokens, which are tokens that would normally be split into several
* tokens
*/
public static final int UNTOKENIZED_ONLY = 1;
/** Output the both the untokenized token and the splits */
public static final int BOTH = 2;
/**
* This flag is used to indicate that the produced "Token" would, if {@link #TOKENS_ONLY} was
* used, produce multiple tokens.
*/
public static final int UNTOKENIZED_TOKEN_FLAG = 1;
/** A private instance of the JFlex-constructed scanner */
private final WikipediaTokenizerImpl scanner;

View File

@ -402,8 +402,10 @@ class WikipediaTokenizerImpl {
/** Error code for "Unknown internal scanner error". */
private static final int ZZ_UNKNOWN_ERROR = 0;
/** Error code for "could not match input". */
private static final int ZZ_NO_MATCH = 1;
/** Error code for "pushback value was too large". */
private static final int ZZ_PUSHBACK_2BIG = 2;

View File

@ -33,6 +33,7 @@ public class TestBengaliAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "বারী", "বার");
a.close();
}
/** test Digits */
public void testDigits() throws Exception {
BengaliAnalyzer a = new BengaliAnalyzer();

View File

@ -611,6 +611,7 @@ public class TestFlattenGraphFilter extends BaseTokenStreamTestCase {
new int[] {1, 1, 3, 1, 2, 1, 1, 1},
7);
}
// This graph can create a disconnected input node that is farther ahead in the output than its
// subsequent input node.
// Exceptions: Free too early or dropped tokens.

View File

@ -32,6 +32,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
assumeTrue(
"JRE does not support Thai dictionary-based BreakIterator", ThaiTokenizer.DBBI_AVAILABLE);
}
/*
* testcase for offsets
*/

View File

@ -41,19 +41,25 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
/** Token type for words containing ideographic characters */
public static final String WORD_IDEO =
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
/** Token type for words containing Japanese hiragana */
public static final String WORD_HIRAGANA =
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA];
/** Token type for words containing Japanese katakana */
public static final String WORD_KATAKANA =
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
/** Token type for words containing Korean hangul */
public static final String WORD_HANGUL = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];
/** Token type for words that contain letters */
public static final String WORD_LETTER =
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
/** Token type for words that appear to be numbers */
public static final String WORD_NUMBER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM];
/** Token type for words that appear to be emoji sequences */
public static final String WORD_EMOJI = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMOJI];

View File

@ -40,10 +40,13 @@ import org.apache.lucene.util.AttributeFactory;
public final class ICUTokenizer extends Tokenizer {
private static final int IOBUFFER = 4096;
private final char[] buffer = new char[IOBUFFER];
/** true length of text in the buffer */
private int length = 0;
/** length in buffer that can be evaluated safely, up to a safe end point */
private int usableLength = 0;
/** accumulated offset of previous buffers for this reader, for offsetAtt */
private int offset = 0;

View File

@ -29,10 +29,13 @@ public abstract class ICUTokenizerConfig {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
public ICUTokenizerConfig() {}
/** Return a breakiterator capable of processing a given script. */
public abstract RuleBasedBreakIterator getBreakIterator(int script);
/** Return a token type value for a given script and BreakIterator rule status. */
public abstract String getType(int script, int ruleStatus);
/** true if Han, Hiragana, and Katakana scripts should all be returned as Japanese */
public abstract boolean combineCJ();
}

View File

@ -31,18 +31,21 @@ public interface ScriptAttribute extends Attribute {
* @return numeric code
*/
public int getCode();
/**
* Set the numeric code for this script value. This is the constant value from {@link UScript}.
*
* @param code numeric code
*/
public void setCode(int code);
/**
* Get the full name.
*
* @return UTR #24 full name.
*/
public String getName();
/**
* Get the abbreviated name.
*

View File

@ -20,14 +20,19 @@ package org.apache.lucene.analysis.ja.dict;
final class DictionaryConstants {
/** Codec header of the dictionary file. */
public static final String DICT_HEADER = "kuromoji_dict";
/** Codec header of the dictionary mapping file. */
public static final String TARGETMAP_HEADER = "kuromoji_dict_map";
/** Codec header of the POS dictionary file. */
public static final String POSDICT_HEADER = "kuromoji_dict_pos";
/** Codec header of the connection costs. */
public static final String CONN_COSTS_HEADER = "kuromoji_cc";
/** Codec header of the character definition file. */
public static final String CHARDEF_HEADER = "kuromoji_cd";
/** Codec version of the binary dictionary */
public static final int VERSION = 1;
}

View File

@ -205,10 +205,12 @@ class TokenInfoMorphData implements JaMorphData {
/** flag that the entry has baseform data. otherwise it's not inflected (same as surface form) */
public static final int HAS_BASEFORM = 1;
/**
* flag that the entry has reading data. otherwise reading is surface form converted to katakana
*/
public static final int HAS_READING = 2;
/** flag that the entry has pronunciation data. otherwise pronunciation is the reading */
public static final int HAS_PRONUNCIATION = 4;
}

View File

@ -20,14 +20,19 @@ package org.apache.lucene.analysis.ko.dict;
final class DictionaryConstants {
/** Codec header of the dictionary file. */
public static final String DICT_HEADER = "ko_dict";
/** Codec header of the dictionary mapping file. */
public static final String TARGETMAP_HEADER = "ko_dict_map";
/** Codec header of the POS dictionary file. */
public static final String POSDICT_HEADER = "ko_dict_pos";
/** Codec header of the connection costs file. */
public static final String CONN_COSTS_HEADER = "ko_cc";
/** Codec header of the character definition file */
public static final String CHARDEF_HEADER = "ko_cd";
/** Codec version of the binary dictionary */
public static final int VERSION = 1;
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
public final class DaitchMokotoffSoundexFilter extends TokenFilter {
/** true if encoded tokens should be added as synonyms */
protected boolean inject = true;
/** phonetic encoder */
protected DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();

View File

@ -41,8 +41,10 @@ public class DoubleMetaphoneFilterFactory extends TokenFilterFactory {
/** parameter name: true if encoded tokens should be added as synonyms */
public static final String INJECT = "inject";
/** parameter name: restricts the length of the phonetic code */
public static final String MAX_CODE_LENGTH = "maxCodeLength";
/** default maxCodeLength if not specified */
public static final int DEFAULT_MAX_CODE_LENGTH = 4;

View File

@ -33,8 +33,10 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
public final class PhoneticFilter extends TokenFilter {
/** true if encoded tokens should be added as synonyms */
protected boolean inject = true;
/** phonetic encoder */
protected Encoder encoder = null;
/** captured state, non-null when <code>inject=true</code> and a token is buffered */
protected State save = null;

View File

@ -75,8 +75,10 @@ public class PhoneticFilterFactory extends TokenFilterFactory implements Resourc
/** parameter name: either a short name or a full class name */
public static final String ENCODER = "encoder";
/** parameter name: true if encoded tokens should be added as synonyms */
public static final String INJECT = "inject"; // boolean
/** parameter name: restricts the length of the phonetic code */
public static final String MAX_CODE_LENGTH = "maxCodeLength";

View File

@ -58,10 +58,13 @@ package org.egothor.stemmer;
class Cell {
/** next row id in this way */
int ref = -1;
/** command of the cell */
int cmd = -1;
/** how many cmd-s was in subtrie before pack() */
int cnt = 0;
/** how many chars would be discarded from input key in this way */
int skip = 0;

View File

@ -1,4 +1,4 @@
{
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/ForUtil.java": "e91aafa414018b34a39c8f0947ff58c1f1dde78d",
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/gen_ForUtil.py": "7be3f1e17c9055d68a8ad6b0d6321481dcc4d711"
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/ForUtil.java": "c3bff5677f7d98fbb362018a4c1dbad4d670610f",
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/gen_ForUtil.py": "e3c0b1d1d9fcc9f04ae8870e40aab44133321272"
}

View File

@ -50,6 +50,7 @@ public final class FieldReader extends Terms {
final Lucene40BlockTreeTermsReader parent;
final FST<BytesRef> index;
// private boolean DEBUG;
FieldReader(

View File

@ -64,6 +64,7 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
/** Extension of compound file */
static final String DATA_EXTENSION = "cfs";
/** Extension of compound file entries */
static final String ENTRIES_EXTENSION = "cfe";

View File

@ -405,20 +405,25 @@ public class Lucene50PostingsFormat extends PostingsFormat {
public static final class IntBlockTermState extends BlockTermState {
/** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
public long docStartFP;
/** file pointer to the start of the positions enumeration, in {@link #POS_EXTENSION} file */
public long posStartFP;
/** file pointer to the start of the payloads enumeration, in {@link #PAY_EXTENSION} file */
public long payStartFP;
/**
* file offset for the start of the skip list, relative to docStartFP, if there are more than
* {@link #BLOCK_SIZE} docs; otherwise -1
*/
public long skipOffset;
/**
* file offset for the last position in the last block, if there are more than {@link
* #BLOCK_SIZE} positions; otherwise -1
*/
public long lastPosBlockOffset;
/**
* docid when there is a single pulsed posting, otherwise -1. freq is always implicitly
* totalTermFreq in this case.

View File

@ -45,14 +45,19 @@ public class Lucene50CompressingStoredFieldsFormat extends StoredFieldsFormat {
/** format name */
protected final String formatName;
/** segment suffix */
protected final String segmentSuffix;
/** compression mode */
protected final CompressionMode compressionMode;
/** chunk size */
protected final int chunkSize;
/** max docs per chunk */
protected final int maxDocsPerChunk;
/** block shift */
protected final int blockShift;

View File

@ -54,10 +54,13 @@ public final class Lucene50CompressingStoredFieldsReader extends StoredFieldsRea
/** Extension of stored fields file */
public static final String FIELDS_EXTENSION = "fdt";
/** Extension of stored fields index */
public static final String INDEX_EXTENSION = "fdx";
/** Extension of stored fields meta */
public static final String META_EXTENSION = "fdm";
/** Codec name for the index. */
public static final String INDEX_CODEC_NAME = "Lucene85FieldsIndex";
@ -73,8 +76,10 @@ public final class Lucene50CompressingStoredFieldsReader extends StoredFieldsRea
static final int VERSION_START = 1;
static final int VERSION_OFFHEAP_INDEX = 2;
/** Version where all metadata were moved to the meta file. */
static final int VERSION_META = 3;
/**
* Version where numChunks is explicitly recorded in meta file and a dirty chunk bit is recorded
* in each chunk

View File

@ -38,14 +38,19 @@ public class Lucene50CompressingTermVectorsFormat extends TermVectorsFormat {
/** format name */
protected final String formatName;
/** segment suffix */
protected final String segmentSuffix;
/** compression mode */
protected final CompressionMode compressionMode;
/** chunk size */
protected final int chunkSize;
/** block size */
protected final int blockSize;
/** max docs per chunk */
protected final int maxDocsPerChunk;

View File

@ -64,8 +64,10 @@ public final class Lucene50CompressingTermVectorsReader extends TermVectorsReade
static final int VERSION_START = 1;
static final int VERSION_OFFHEAP_INDEX = 2;
/** Version where all metadata were moved to the meta file. */
static final int VERSION_META = 3;
/** Version where numChunks is explicitly recorded in meta file */
static final int VERSION_NUM_CHUNKS = 4;

View File

@ -368,6 +368,7 @@ final class ForUtil {
MASKS32[i] = mask32(i);
}
}
// mark values in array as final longs to avoid the cost of reading array, arrays should only be
// used when the idx is a variable
private static final long MASK8_1 = MASKS8[1];

View File

@ -405,20 +405,25 @@ public class Lucene84PostingsFormat extends PostingsFormat {
public static final class IntBlockTermState extends BlockTermState {
/** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
public long docStartFP;
/** file pointer to the start of the positions enumeration, in {@link #POS_EXTENSION} file */
public long posStartFP;
/** file pointer to the start of the payloads enumeration, in {@link #PAY_EXTENSION} file */
public long payStartFP;
/**
* file offset for the start of the skip list, relative to docStartFP, if there are more than
* {@link ForUtil#BLOCK_SIZE} docs; otherwise -1
*/
public long skipOffset;
/**
* file offset for the last position in the last block, if there are more than {@link
* ForUtil#BLOCK_SIZE} positions; otherwise -1
*/
public long lastPosBlockOffset;
/**
* docid when there is a single pulsed posting, otherwise -1. freq is always implicitly
* totalTermFreq in this case.

View File

@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from fractions import gcd
from math import gcd
"""Code generation for ForUtil.java"""

View File

@ -64,6 +64,7 @@ public class Lucene87Codec extends Codec {
/** compression mode for stored fields */
protected final Lucene87StoredFieldsFormat.Mode storedMode;
/** compression mode for doc value fields */
protected final Lucene80DocValuesFormat.Mode dvMode;

View File

@ -37,6 +37,7 @@ public final class Lucene90HnswGraphBuilder {
/** Default random seed for level generation * */
private static final long DEFAULT_RAND_SEED = 42;
/** A name for the HNSW component for the info-stream * */
public static final String HNSW_COMPONENT = "HNSW";

View File

@ -79,6 +79,7 @@ public class Lucene90HnswVectorsFormat extends KnnVectorsFormat {
/** Default number of maximum connections per node */
public static final int DEFAULT_MAX_CONN = 16;
/**
* Default number of the size of the queue maintained while searching and the number of random
* entry points to sample during a graph construction.

View File

@ -96,6 +96,7 @@ public class Lucene91HnswVectorsFormat extends KnnVectorsFormat {
/** Default number of maximum connections per node */
public static final int DEFAULT_MAX_CONN = 16;
/**
* Default number of the size of the queue maintained while searching during a graph construction.
*/

View File

@ -110,6 +110,7 @@ public class Lucene94HnswVectorsFormat extends KnnVectorsFormat {
/** Default number of maximum connections per node */
public static final int DEFAULT_MAX_CONN = 16;
/**
* Default number of the size of the queue maintained while searching during a graph construction.
*/

View File

@ -32,6 +32,7 @@ public final class LegacyDirectMonotonicWriter {
/** min block shift */
public static final int MIN_BLOCK_SHIFT = 2;
/** max block shift */
public static final int MAX_BLOCK_SHIFT = 22;

View File

@ -31,12 +31,16 @@ class LegacyPacked64 extends PackedInts.Reader {
/** Values are stores contiguously in the blocks array. */
private final long[] blocks;
/** A right-aligned mask of width BitsPerValue used by {@link #get(int)}. */
private final long maskRight;
/** Optimization: Saves one lookup in {@link #get(int)}. */
private final int bpvMinusBlockSize;
/** number of values */
protected final int valueCount;
/** bits per value. */
protected final int bitsPerValue;

View File

@ -68,6 +68,7 @@ public final class Lucene50RWCompoundFormat extends CompoundFormat {
/** Extension of compound file */
static final String DATA_EXTENSION = "cfs";
/** Extension of compound file entries */
static final String ENTRIES_EXTENSION = "cfe";

View File

@ -329,10 +329,13 @@ public class BKDWriter60 implements Closeable {
private final int packedBytesLength;
private final MergeState.DocMap docMap;
private final MergeIntersectsVisitor mergeIntersectsVisitor;
/** Which doc in this block we are up to */
private int docBlockUpto;
/** Current doc ID */
public int docID;
/** Current packed value */
public final byte[] packedValue;

View File

@ -42,6 +42,7 @@ public final class Lucene91HnswGraphBuilder {
/** Default random seed for level generation * */
private static final long DEFAULT_RAND_SEED = 42;
/** A name for the HNSW component for the info-stream * */
public static final String HNSW_COMPONENT = "HNSW";

View File

@ -53,6 +53,7 @@ public class LineDocSource extends ContentSource {
/** Reader of a single input line into {@link DocData}. */
public abstract static class LineParser {
protected final String[] header;
/**
* Construct with the header
*
@ -61,6 +62,7 @@ public class LineDocSource extends ContentSource {
public LineParser(String[] header) {
this.header = header;
}
/** parse an input line and fill doc data appropriately */
public abstract void parseLine(DocData docData, String line);
}

View File

@ -46,10 +46,12 @@ public class QualityStats {
this.rank = rank;
this.recall = recall;
}
/** Returns the rank: where on the list of returned docs this relevant doc appeared. */
public int getRank() {
return rank;
}
/** Returns the recall: how many relevant docs were returned up to this point, inclusive. */
public double getRecall() {
return recall;

View File

@ -74,6 +74,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
* indexed
*/
public abstract boolean isIndexTerm(BytesRef term, TermStats stats);
/** Called when a new field is started. */
public abstract void newField(FieldInfo fieldInfo);
}

View File

@ -43,6 +43,7 @@ final class OrdsFieldReader extends Terms {
final OrdsBlockTreeTermsReader parent;
final FST<Output> index;
// private boolean DEBUG;
OrdsFieldReader(

View File

@ -62,8 +62,10 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
protected BlockHeader.Serializer blockHeaderReader;
protected BlockLine.Serializer blockLineReader;
/** In-memory read buffer for the current block. */
protected ByteArrayDataInput blockReadBuffer;
/**
* In-memory read buffer for the details region of the current block. It shares the same byte
* array as {@link #blockReadBuffer}, with a different position.
@ -74,6 +76,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
/** {@link IndexDictionary.Browser} supplier for lazy loading. */
protected final IndexDictionary.BrowserSupplier dictionaryBrowserSupplier;
/** Holds the {@link IndexDictionary.Browser} once loaded. */
protected IndexDictionary.Browser dictionaryBrowser;
@ -82,19 +85,25 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
* UniformSplitPostingsFormat#TERMS_BLOCKS_EXTENSION block file}.
*/
protected long blockStartFP;
/** Current block header. */
protected BlockHeader blockHeader;
/** Current block line. */
protected BlockLine blockLine;
/** Current block line details. */
protected BlockTermState termState;
/**
* Offset of the start of the first line of the current block (just after the header), relative to
* the block start.
*/
protected int blockFirstLineStart;
/** Current line index in the block. */
protected int lineIndexInBlock;
/**
* Whether the current {@link TermState} has been forced with a call to {@link
* #seekExact(BytesRef, TermState)}.
@ -102,6 +111,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
* @see #forcedTerm
*/
protected boolean termStateForced;
/**
* Set when {@link #seekExact(BytesRef, TermState)} is called.
*

View File

@ -81,15 +81,19 @@ public class IntersectBlockReader extends BlockReader {
/** Set this when our current mode is seeking to this term. Set to null after. */
protected BytesRef seekTerm;
/** Number of bytes accepted by the automaton when validating the current term. */
protected int numMatchedBytes;
/**
* Automaton states reached when validating the current term, from 0 to {@link #numMatchedBytes} -
* 1.
*/
protected int[] states;
/** Block iteration order determined when scanning the terms in the current block. */
protected BlockIteration blockIteration;
/**
* Counter of the number of consecutively rejected terms. Depending on {@link
* #NUM_CONSECUTIVELY_REJECTED_TERMS_THRESHOLD}, this may trigger a jump to a block away.

View File

@ -39,6 +39,7 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
/** Extension of the file containing the terms dictionary (the FST "trie"). */
public static final String TERMS_DICTIONARY_EXTENSION = "ustd";
/** Extension of the file containing the terms blocks for each field and the fields metadata. */
public static final String TERMS_BLOCKS_EXTENSION = "ustb";

View File

@ -100,11 +100,13 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
/** Default value for the target block size (number of terms per block). */
public static final int DEFAULT_TARGET_NUM_BLOCK_LINES = 32;
/**
* Default value for the maximum allowed delta variation of the block size (delta of the number of
* terms per block). The block size will be [target block size]+-[allowed delta].
*/
public static final int DEFAULT_DELTA_NUM_LINES = (int) (DEFAULT_TARGET_NUM_BLOCK_LINES * 0.1);
/** Upper limit of the block size (maximum number of terms per block). */
protected static final int MAX_NUM_BLOCK_LINES = 1_000;

View File

@ -44,6 +44,7 @@ public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat {
/** Extension of the file containing the terms dictionary (the FST "trie"). */
public static final String TERMS_DICTIONARY_EXTENSION = "stustd";
/** Extension of the file containing the terms blocks for each field and the fields metadata. */
public static final String TERMS_BLOCKS_EXTENSION = "stustb";

View File

@ -1,4 +1,4 @@
{
"lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java": "f2091e2b7284b70c740052a9b0ee389e67eb48a9",
"lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py": "7a137c58f88d68247be4368122780fa9cc8dce3e"
"lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java": "a67f47001bad680d75c57a864a1552a78fc85600",
"lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py": "3219c6d0289cc64d80dbdbc5db4bd05b8774e797"
}

View File

@ -57,6 +57,7 @@ module org.apache.lucene.core {
// Only export internal packages to the test framework.
exports org.apache.lucene.internal.tests to
org.apache.lucene.test_framework;
// Open certain packages for the test framework (ram usage tester).
opens org.apache.lucene.document to
org.apache.lucene.test_framework;

View File

@ -59,6 +59,7 @@ public abstract class AbstractAnalysisFactory {
/** the luceneVersion arg */
protected final Version luceneMatchVersion;
/** whether the luceneMatchVersion arg is explicitly specified in the serialized schema */
private boolean isExplicitLuceneMatchVersion = false;

View File

@ -345,6 +345,7 @@ public abstract class Analyzer implements Closeable {
public static final class TokenStreamComponents {
/** Original source of the tokens. */
protected final Consumer<Reader> source;
/**
* Sink tokenstream, such as the outer tokenfilter decorating the chain. This can be the source
* if there are no filters.

View File

@ -41,18 +41,25 @@ public final class StandardTokenizer extends Tokenizer {
/** Alpha/numeric token type */
public static final int ALPHANUM = 0;
/** Numeric token type */
public static final int NUM = 1;
/** Southeast Asian token type */
public static final int SOUTHEAST_ASIAN = 2;
/** Ideographic token type */
public static final int IDEOGRAPHIC = 3;
/** Hiragana token type */
public static final int HIRAGANA = 4;
/** Katakana token type */
public static final int KATAKANA = 5;
/** Hangul token type */
public static final int HANGUL = 6;
/** Emoji token type. */
public static final int EMOJI = 7;

View File

@ -28,11 +28,13 @@ import org.apache.lucene.index.TermState;
public class BlockTermState extends OrdTermState {
/** how many docs have this term */
public int docFreq;
/** total number of occurrences of this term */
public long totalTermFreq;
/** the term's ord in the current block */
public int termBlockOrd;
/** fp into the terms dict primary file (_X.tim) that holds this term */
// TODO: update BTR to nuke this
public long blockFilePointer;

View File

@ -44,6 +44,7 @@ public final class CodecUtil {
/** Constant to identify the start of a codec header. */
public static final int CODEC_MAGIC = 0x3fd76c17;
/** Constant to identify the start of a codec footer. */
public static final int FOOTER_MAGIC = ~CODEC_MAGIC;

View File

@ -269,6 +269,7 @@ final class ForUtil {
MASKS32[i] = mask32(i);
}
}
// mark values in array as final longs to avoid the cost of reading array, arrays should only be
// used when the idx is a variable
private static final long MASK8_1 = MASKS8[1];

View File

@ -69,6 +69,7 @@ public final class Lucene90CompoundFormat extends CompoundFormat {
/** Extension of compound file */
static final String DATA_EXTENSION = "cfs";
/** Extension of compound file entries */
static final String ENTRIES_EXTENSION = "cfe";

View File

@ -443,15 +443,19 @@ public final class Lucene90PostingsFormat extends PostingsFormat {
public static final class IntBlockTermState extends BlockTermState {
/** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
public long docStartFP;
/** file pointer to the start of the positions enumeration, in {@link #POS_EXTENSION} file */
public long posStartFP;
/** file pointer to the start of the payloads enumeration, in {@link #PAY_EXTENSION} file */
public long payStartFP;
/**
* file offset for the start of the skip list, relative to docStartFP, if there are more than
* {@link ForUtil#BLOCK_SIZE} docs; otherwise -1
*/
public long skipOffset;
/**
* file offset for the last position in the last block, if there are more than {@link
* ForUtil#BLOCK_SIZE} positions; otherwise -1
@ -463,6 +467,7 @@ public final class Lucene90PostingsFormat extends PostingsFormat {
* positions to skip for that block, without telling us how many positions it has skipped.
*/
public long lastPosBlockOffset;
/**
* docid when there is a single pulsed posting, otherwise -1. freq is always implicitly
* totalTermFreq in this case.

View File

@ -53,6 +53,7 @@ public final class FieldReader extends Terms {
final Lucene90BlockTreeTermsReader parent;
final FST<BytesRef> index;
// private boolean DEBUG;
FieldReader(

View File

@ -57,10 +57,13 @@ public final class Lucene90CompressingStoredFieldsWriter extends StoredFieldsWri
/** Extension of stored fields file */
public static final String FIELDS_EXTENSION = "fdt";
/** Extension of stored fields index */
public static final String INDEX_EXTENSION = "fdx";
/** Extension of stored fields meta */
public static final String META_EXTENSION = "fdm";
/** Codec name for the index. */
public static final String INDEX_CODEC_NAME = "Lucene90FieldsIndex";

View File

@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from fractions import gcd
from math import gcd
"""Code generation for ForUtil.java"""

View File

@ -115,6 +115,7 @@ public final class Lucene95HnswVectorsFormat extends KnnVectorsFormat {
* numbers here will use an inordinate amount of heap
*/
private static final int MAXIMUM_MAX_CONN = 512;
/** Default number of maximum connections per node */
public static final int DEFAULT_MAX_CONN = 16;
@ -124,6 +125,7 @@ public final class Lucene95HnswVectorsFormat extends KnnVectorsFormat {
* 3200`
*/
private static final int MAXIMUM_BEAM_WIDTH = 3200;
/**
* Default number of the size of the queue maintained while searching during a graph construction.
*/

View File

@ -83,6 +83,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
static class FieldsGroup {
final List<String> fields;
final int suffix;
/**
* Custom SegmentWriteState for this group of fields, with the segmentSuffix uniqueified for
* this PostingsFormat

View File

@ -70,6 +70,7 @@ public class InetAddressPoint extends Field {
/** The minimum value that an ip address can hold. */
public static final InetAddress MIN_VALUE;
/** The maximum value that an ip address can hold. */
public static final InetAddress MAX_VALUE;

View File

@ -91,6 +91,7 @@ import org.apache.lucene.util.SloppyMath;
public class LatLonPoint extends Field {
/** LatLonPoint is encoded as integer values so number of bytes is 4 */
public static final int BYTES = Integer.BYTES;
/**
* Type for an indexed LatLonPoint
*

View File

@ -35,8 +35,10 @@ final class LongHashSet implements Accountable {
final int mask;
final boolean hasMissingValue;
final int size;
/** minimum value in the set, or Long.MAX_VALUE for an empty set */
final long minValue;
/** maximum value in the set, or Long.MIN_VALUE for an empty set */
final long maxValue;

View File

@ -47,17 +47,22 @@ import org.apache.lucene.util.DocIdSetBuilder;
public abstract class RangeFieldQuery extends Query {
/** field name */
final String field;
/**
* query relation intersects: {@code CELL_CROSSES_QUERY}, contains: {@code CELL_CONTAINS_QUERY},
* within: {@code CELL_WITHIN_QUERY}
*/
final QueryType queryType;
/** number of dimensions - max 4 */
final int numDims;
/** ranges encoded as a sortable byte array */
final byte[] ranges;
/** number of bytes per dimension */
final int bytesPerDim;
/** ByteArrayComparator selected by bytesPerDim */
final ByteArrayComparator comparator;

View File

@ -51,12 +51,16 @@ import org.apache.lucene.util.BytesRef;
abstract class ShapeDocValues {
/** doc value format version; used to support bwc for any encoding changes */
protected static final byte VERSION = 0;
/** the binary doc value */
private final BytesRef data;
/** the geometry comparator used to check relations */
protected final ShapeComparator shapeComparator;
/** the centroid of the shape docvalue */
protected final Geometry centroid;
/** the bounding box of the shape docvalue */
protected final Geometry boundingBox;

View File

@ -412,24 +412,34 @@ public final class ShapeField {
/** all coordinates are different */
TRIANGLE
}
/** x coordinate, vertex one */
public int aX;
/** y coordinate, vertex one */
public int aY;
/** x coordinate, vertex two */
public int bX;
/** y coordinate, vertex two */
public int bY;
/** x coordinate, vertex three */
public int cX;
/** y coordinate, vertex three */
public int cY;
/** represent if edge ab belongs to original shape */
public boolean ab;
/** represent if edge bc belongs to original shape */
public boolean bc;
/** represent if edge ca belongs to original shape */
public boolean ca;
/** triangle type */
public TYPE type;

View File

@ -59,6 +59,7 @@ import org.apache.lucene.util.FixedBitSet;
abstract class SpatialQuery extends Query {
/** field name */
final String field;
/**
* query relation disjoint: {@link QueryRelation#DISJOINT}, intersects: {@link
* QueryRelation#INTERSECTS}, within: {@link QueryRelation#DISJOINT}, contains: {@link

View File

@ -55,6 +55,7 @@ import org.apache.lucene.util.NumericUtils;
public class XYPointField extends Field {
/** XYPoint is encoded as integer values so number of bytes is 4 */
public static final int BYTES = Integer.BYTES;
/**
* Type for an indexed XYPoint
*

View File

@ -33,8 +33,10 @@ package org.apache.lucene.geo;
public final class Circle extends LatLonGeometry {
/** Center latitude */
private final double lat;
/** Center longitude */
private final double lon;
/** radius in meters */
private final double radiusMeters;

View File

@ -259,24 +259,34 @@ class Circle2D implements Component2D {
/** check if the point is within a distance */
boolean contains(double x, double y);
/** check if the line is within a distance */
boolean intersectsLine(double aX, double aY, double bX, double bY);
/** Relates this calculator to the provided bounding box */
Relation relate(double minX, double maxX, double minY, double maxY);
/** check if the bounding box is disjoint with this calculator bounding box */
boolean disjoint(double minX, double maxX, double minY, double maxY);
/** check if the bounding box is contains this calculator bounding box */
boolean within(double minX, double maxX, double minY, double maxY);
/** get min X of this calculator */
double getMinX();
/** get max X of this calculator */
double getMaxX();
/** get min Y of this calculator */
double getMinY();
/** get max Y of this calculator */
double getMaxY();
/** get center X */
double geX();
/** get center Y */
double getY();
}

View File

@ -28,20 +28,26 @@ import org.apache.lucene.util.ArrayUtil;
final class ComponentTree implements Component2D {
/** minimum Y of this geometry's bounding box area */
private double minY;
/** maximum Y of this geometry's bounding box area */
private double maxY;
/** minimum X of this geometry's bounding box area */
private double minX;
/** maximum X of this geometry's bounding box area */
private double maxX;
// child components, or null. Note internal nodes might mot have
// a consistent bounding box. Internal nodes should not be accessed
// outside if this class.
private Component2D left;
private Component2D right;
/** which dimension was this node split on */
// TODO: its implicit based on level, but boolean keeps code simple
private final boolean splitX;
/** root node of edge tree */
private final Component2D component;

View File

@ -34,14 +34,19 @@ final class EdgeTree {
// X-Y pair (in original order) of the two vertices
final double y1, y2;
final double x1, x2;
/** min Y of this edge */
final double low;
/** max Y of this edge or any children */
double max;
/** left child edge, or null */
EdgeTree left;
/** right child edge, or null */
EdgeTree right;
/** helper bytes to signal if a point is on an edge, it is within the edge tree or disjoint */
private static final byte FALSE = 0x00;

View File

@ -43,10 +43,13 @@ public final class GeoUtils {
/** min longitude value in radians */
public static final double MIN_LON_RADIANS = Math.toRadians(MIN_LON_INCL);
/** min latitude value in radians */
public static final double MIN_LAT_RADIANS = Math.toRadians(MIN_LAT_INCL);
/** max longitude value in radians */
public static final double MAX_LON_RADIANS = Math.toRadians(MAX_LON_INCL);
/** max latitude value in radians */
public static final double MAX_LAT_RADIANS = Math.toRadians(MAX_LAT_INCL);

View File

@ -33,15 +33,19 @@ import java.util.Arrays;
public class Line extends LatLonGeometry {
/** array of latitude coordinates */
private final double[] lats;
/** array of longitude coordinates */
private final double[] lons;
/** minimum latitude of this line's bounding box */
public final double minLat;
/** maximum latitude of this line's bounding box */
public final double maxLat;
/** minimum longitude of this line's bounding box */
public final double minLon;
/** maximum longitude of this line's bounding box */
public final double maxLon;

View File

@ -29,12 +29,16 @@ final class Line2D implements Component2D {
/** minimum Y of this geometry's bounding box area */
private final double minY;
/** maximum Y of this geometry's bounding box area */
private final double maxY;
/** minimum X of this geometry's bounding box area */
private final double minX;
/** maximum X of this geometry's bounding box area */
private final double maxX;
/** lines represented as a 2-d interval tree. */
private final EdgeTree tree;

View File

@ -33,6 +33,7 @@ public final class Point extends LatLonGeometry {
/** latitude coordinate */
private final double lat;
/** longitude coordinate */
private final double lon;

View File

@ -47,12 +47,16 @@ public final class Polygon extends LatLonGeometry {
/** minimum latitude of this polygon's bounding box area */
public final double minLat;
/** maximum latitude of this polygon's bounding box area */
public final double maxLat;
/** minimum longitude of this polygon's bounding box area */
public final double minLon;
/** maximum longitude of this polygon's bounding box area */
public final double maxLon;
/** winding order of the vertices */
private final WindingOrder windingOrder;

View File

@ -28,14 +28,19 @@ import org.apache.lucene.index.PointValues.Relation;
final class Polygon2D implements Component2D {
/** minimum Y of this geometry's bounding box area */
private final double minY;
/** maximum Y of this geometry's bounding box area */
private final double maxY;
/** minimum X of this geometry's bounding box area */
private final double minX;
/** maximum X of this geometry's bounding box area */
private final double maxX;
/** tree of holes, or null */
protected final Component2D holes;
/** Edges of the polygon represented as a 2-d interval tree. */
final EdgeTree tree;

View File

@ -36,10 +36,13 @@ import static org.apache.lucene.util.SloppyMath.cos;
public class Rectangle extends LatLonGeometry {
/** maximum longitude value (in degrees) */
public final double minLat;
/** minimum longitude value (in degrees) */
public final double minLon;
/** maximum latitude value (in degrees) */
public final double maxLat;
/** minimum latitude value (in degrees) */
public final double maxLon;

Some files were not shown because too many files have changed in this diff Show More