mirror of https://github.com/apache/lucene.git
GITHUB#12655: gradle tidy after google java format update for jdk 21 and regen
* tidy whitespace changes from googleJavaFormat upgrade * generateForUtil fixed and regened https://bugs.python.org/issue39350 * generateAntlr * generateClassicTokenizer * generateWikipediaTokenizer
This commit is contained in:
parent
2c42b8941a
commit
de3b294be4
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "381a9627fd7da6402216e3279cf81a09af222aaf",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "1f7a446f3483326385eef257cea8366c27da0850",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex": "f52109bb7d5701979fde90aeeeda726246a8d5fd"
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "d36e38342f984050b3a314f153b7a001a2d2be82",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "ac298e08bc5b96202efca0c01f9f0376fda976bd",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex": "0b8c7774b98e8237702013e82c352d4711509bd0"
|
||||
}
|
|
@ -53,15 +53,19 @@ public final class CJKBigramFilter extends TokenFilter {
|
|||
// configuration
|
||||
/** bigram flag for Han Ideographs */
|
||||
public static final int HAN = 1;
|
||||
|
||||
/** bigram flag for Hiragana */
|
||||
public static final int HIRAGANA = 2;
|
||||
|
||||
/** bigram flag for Katakana */
|
||||
public static final int KATAKANA = 4;
|
||||
|
||||
/** bigram flag for Hangul */
|
||||
public static final int HANGUL = 8;
|
||||
|
||||
/** when we emit a bigram, it's then marked as this type */
|
||||
public static final String DOUBLE_TYPE = "<DOUBLE>";
|
||||
|
||||
/** when we emit a unigram, it's then marked as this type */
|
||||
public static final String SINGLE_TYPE = "<SINGLE>";
|
||||
|
||||
|
|
|
@ -297,8 +297,10 @@ class ClassicTokenizerImpl {
|
|||
|
||||
/** Error code for "Unknown internal scanner error". */
|
||||
private static final int ZZ_UNKNOWN_ERROR = 0;
|
||||
|
||||
/** Error code for "could not match input". */
|
||||
private static final int ZZ_NO_MATCH = 1;
|
||||
|
||||
/** Error code for "pushback value was too large". */
|
||||
private static final int ZZ_PUSHBACK_2BIG = 2;
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.de;
|
||||
|
||||
// This file is encoded in UTF-8
|
||||
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -38,22 +38,31 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
|||
|
||||
/** Alpha/numeric token type */
|
||||
public static final int ALPHANUM = 0;
|
||||
|
||||
/** Numeric token type */
|
||||
public static final int NUM = 1;
|
||||
|
||||
/** Southeast Asian token type */
|
||||
public static final int SOUTHEAST_ASIAN = 2;
|
||||
|
||||
/** Ideographic token type */
|
||||
public static final int IDEOGRAPHIC = 3;
|
||||
|
||||
/** Hiragana token type */
|
||||
public static final int HIRAGANA = 4;
|
||||
|
||||
/** Katakana token type */
|
||||
public static final int KATAKANA = 5;
|
||||
|
||||
/** Hangul token type */
|
||||
public static final int HANGUL = 6;
|
||||
|
||||
/** URL token type */
|
||||
public static final int URL = 7;
|
||||
|
||||
/** Email token type */
|
||||
public static final int EMAIL = 8;
|
||||
|
||||
/** Emoji token type. */
|
||||
public static final int EMOJI = 9;
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ package org.apache.lucene.analysis.en;
|
|||
/** A list of words used by Kstem */
|
||||
class KStemData1 {
|
||||
private KStemData1() {}
|
||||
|
||||
// KStemData1 ... KStemData8 are created from "head_word_list.txt"
|
||||
static String[] data = {
|
||||
"aback", "abacus", "abandon", "abandoned", "abase",
|
||||
|
|
|
@ -56,6 +56,7 @@ abstract class WordStorage {
|
|||
private static final int MAX_STORED_LENGTH = SUGGESTIBLE_MASK - 1;
|
||||
private final int maxEntryLength;
|
||||
private final boolean hasCustomMorphData;
|
||||
|
||||
/**
|
||||
* A map from word's hash (modulo array's length) into an int containing:
|
||||
*
|
||||
|
|
|
@ -40,6 +40,7 @@ public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
|
|||
public LimitTokenCountAnalyzer(Analyzer delegate, int maxTokenCount) {
|
||||
this(delegate, maxTokenCount, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an analyzer that limits the maximum number of tokens per field.
|
||||
*
|
||||
|
|
|
@ -151,6 +151,7 @@ public final class StemmerOverrideFilter extends TokenFilter {
|
|||
return matchOutput;
|
||||
}
|
||||
}
|
||||
|
||||
/** This builder builds an {@link FST} for the {@link StemmerOverrideFilter} */
|
||||
public static class Builder {
|
||||
private final BytesRefHash hash = new BytesRefHash();
|
||||
|
|
|
@ -47,6 +47,7 @@ public final class TypeAsSynonymFilter extends TokenFilter {
|
|||
public TypeAsSynonymFilter(TokenStream input) {
|
||||
this(input, null, null, ~0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param input input tokenstream
|
||||
* @param prefix Prepend this string to every token type emitted as token text. If null, nothing
|
||||
|
|
|
@ -45,11 +45,13 @@ public final class WordDelimiterIterator {
|
|||
|
||||
/** start position of text, excluding leading delimiters */
|
||||
int startBounds;
|
||||
|
||||
/** end position of text, excluding trailing delimiters */
|
||||
int endBounds;
|
||||
|
||||
/** Beginning of subword */
|
||||
int current;
|
||||
|
||||
/** End of subword */
|
||||
int end;
|
||||
|
||||
|
|
|
@ -63,6 +63,7 @@ import static org.apache.lucene.analysis.util.StemmerUtil.*;
|
|||
class NorwegianLightStemmer {
|
||||
/** Constant to remove Bokmål-specific endings */
|
||||
static final int BOKMAAL = 1;
|
||||
|
||||
/** Constant to remove Nynorsk-specific endings */
|
||||
static final int NYNORSK = 2;
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/** File containing default Romanian stopwords. */
|
||||
public static final String DEFAULT_STOPWORD_FILE = "stopwords.txt";
|
||||
|
||||
/** The comment character in the stopwords file. All lines prefixed with this will be ignored. */
|
||||
private static final String STOPWORDS_COMMENT = "#";
|
||||
|
||||
|
|
|
@ -48,10 +48,13 @@ import org.apache.lucene.util.fst.Util;
|
|||
public class SynonymMap {
|
||||
/** for multiword support, you must separate words with this separator */
|
||||
public static final char WORD_SEPARATOR = 0;
|
||||
|
||||
/** map<input word, list<ord>> */
|
||||
public final FST<BytesRef> fst;
|
||||
|
||||
/** map<ord, outputword> */
|
||||
public final BytesRefHash words;
|
||||
|
||||
/** maxHorizontalContext: maximum context we need on the tokenstream */
|
||||
public final int maxHorizontalContext;
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
public class TermAndBoost {
|
||||
/** the term */
|
||||
public final BytesRef term;
|
||||
|
||||
/** the boost */
|
||||
public final float boost;
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/** File containing default Thai stopwords. */
|
||||
public static final String DEFAULT_STOPWORD_FILE = "stopwords.txt";
|
||||
|
||||
/** The comment character in the stopwords file. All lines prefixed with this will be ignored. */
|
||||
private static final String STOPWORDS_COMMENT = "#";
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/** File containing default Turkish stopwords. */
|
||||
public static final String DEFAULT_STOPWORD_FILE = "stopwords.txt";
|
||||
|
||||
/** The comment character in the stopwords file. All lines prefixed with this will be ignored. */
|
||||
private static final String STOPWORDS_COMMENT = "#";
|
||||
|
||||
|
|
|
@ -38,10 +38,13 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
public abstract class SegmentingTokenizerBase extends Tokenizer {
|
||||
protected static final int BUFFERMAX = 1024;
|
||||
protected final char[] buffer = new char[BUFFERMAX];
|
||||
|
||||
/** true length of text in the buffer */
|
||||
private int length = 0;
|
||||
|
||||
/** length in buffer that can be evaluated safely, up to a safe end point */
|
||||
private int usableLength = 0;
|
||||
|
||||
/** accumulated offset of previous buffers for this reader, for offsetAtt */
|
||||
protected int offset = 0;
|
||||
|
||||
|
|
|
@ -97,18 +97,22 @@ public final class WikipediaTokenizer extends Tokenizer {
|
|||
|
||||
/** Only output tokens */
|
||||
public static final int TOKENS_ONLY = 0;
|
||||
|
||||
/**
|
||||
* Only output untokenized tokens, which are tokens that would normally be split into several
|
||||
* tokens
|
||||
*/
|
||||
public static final int UNTOKENIZED_ONLY = 1;
|
||||
|
||||
/** Output the both the untokenized token and the splits */
|
||||
public static final int BOTH = 2;
|
||||
|
||||
/**
|
||||
* This flag is used to indicate that the produced "Token" would, if {@link #TOKENS_ONLY} was
|
||||
* used, produce multiple tokens.
|
||||
*/
|
||||
public static final int UNTOKENIZED_TOKEN_FLAG = 1;
|
||||
|
||||
/** A private instance of the JFlex-constructed scanner */
|
||||
private final WikipediaTokenizerImpl scanner;
|
||||
|
||||
|
|
|
@ -402,8 +402,10 @@ class WikipediaTokenizerImpl {
|
|||
|
||||
/** Error code for "Unknown internal scanner error". */
|
||||
private static final int ZZ_UNKNOWN_ERROR = 0;
|
||||
|
||||
/** Error code for "could not match input". */
|
||||
private static final int ZZ_NO_MATCH = 1;
|
||||
|
||||
/** Error code for "pushback value was too large". */
|
||||
private static final int ZZ_PUSHBACK_2BIG = 2;
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ public class TestBengaliAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "বারী", "বার");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test Digits */
|
||||
public void testDigits() throws Exception {
|
||||
BengaliAnalyzer a = new BengaliAnalyzer();
|
||||
|
|
|
@ -611,6 +611,7 @@ public class TestFlattenGraphFilter extends BaseTokenStreamTestCase {
|
|||
new int[] {1, 1, 3, 1, 2, 1, 1, 1},
|
||||
7);
|
||||
}
|
||||
|
||||
// This graph can create a disconnected input node that is farther ahead in the output than its
|
||||
// subsequent input node.
|
||||
// Exceptions: Free too early or dropped tokens.
|
||||
|
|
|
@ -32,6 +32,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
assumeTrue(
|
||||
"JRE does not support Thai dictionary-based BreakIterator", ThaiTokenizer.DBBI_AVAILABLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* testcase for offsets
|
||||
*/
|
||||
|
|
|
@ -41,19 +41,25 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
|||
/** Token type for words containing ideographic characters */
|
||||
public static final String WORD_IDEO =
|
||||
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
|
||||
|
||||
/** Token type for words containing Japanese hiragana */
|
||||
public static final String WORD_HIRAGANA =
|
||||
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA];
|
||||
|
||||
/** Token type for words containing Japanese katakana */
|
||||
public static final String WORD_KATAKANA =
|
||||
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
|
||||
|
||||
/** Token type for words containing Korean hangul */
|
||||
public static final String WORD_HANGUL = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];
|
||||
|
||||
/** Token type for words that contain letters */
|
||||
public static final String WORD_LETTER =
|
||||
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
|
||||
|
||||
/** Token type for words that appear to be numbers */
|
||||
public static final String WORD_NUMBER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM];
|
||||
|
||||
/** Token type for words that appear to be emoji sequences */
|
||||
public static final String WORD_EMOJI = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMOJI];
|
||||
|
||||
|
|
|
@ -40,10 +40,13 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
public final class ICUTokenizer extends Tokenizer {
|
||||
private static final int IOBUFFER = 4096;
|
||||
private final char[] buffer = new char[IOBUFFER];
|
||||
|
||||
/** true length of text in the buffer */
|
||||
private int length = 0;
|
||||
|
||||
/** length in buffer that can be evaluated safely, up to a safe end point */
|
||||
private int usableLength = 0;
|
||||
|
||||
/** accumulated offset of previous buffers for this reader, for offsetAtt */
|
||||
private int offset = 0;
|
||||
|
||||
|
|
|
@ -29,10 +29,13 @@ public abstract class ICUTokenizerConfig {
|
|||
|
||||
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
|
||||
public ICUTokenizerConfig() {}
|
||||
|
||||
/** Return a breakiterator capable of processing a given script. */
|
||||
public abstract RuleBasedBreakIterator getBreakIterator(int script);
|
||||
|
||||
/** Return a token type value for a given script and BreakIterator rule status. */
|
||||
public abstract String getType(int script, int ruleStatus);
|
||||
|
||||
/** true if Han, Hiragana, and Katakana scripts should all be returned as Japanese */
|
||||
public abstract boolean combineCJ();
|
||||
}
|
||||
|
|
|
@ -31,18 +31,21 @@ public interface ScriptAttribute extends Attribute {
|
|||
* @return numeric code
|
||||
*/
|
||||
public int getCode();
|
||||
|
||||
/**
|
||||
* Set the numeric code for this script value. This is the constant value from {@link UScript}.
|
||||
*
|
||||
* @param code numeric code
|
||||
*/
|
||||
public void setCode(int code);
|
||||
|
||||
/**
|
||||
* Get the full name.
|
||||
*
|
||||
* @return UTR #24 full name.
|
||||
*/
|
||||
public String getName();
|
||||
|
||||
/**
|
||||
* Get the abbreviated name.
|
||||
*
|
||||
|
|
|
@ -20,14 +20,19 @@ package org.apache.lucene.analysis.ja.dict;
|
|||
final class DictionaryConstants {
|
||||
/** Codec header of the dictionary file. */
|
||||
public static final String DICT_HEADER = "kuromoji_dict";
|
||||
|
||||
/** Codec header of the dictionary mapping file. */
|
||||
public static final String TARGETMAP_HEADER = "kuromoji_dict_map";
|
||||
|
||||
/** Codec header of the POS dictionary file. */
|
||||
public static final String POSDICT_HEADER = "kuromoji_dict_pos";
|
||||
|
||||
/** Codec header of the connection costs. */
|
||||
public static final String CONN_COSTS_HEADER = "kuromoji_cc";
|
||||
|
||||
/** Codec header of the character definition file. */
|
||||
public static final String CHARDEF_HEADER = "kuromoji_cd";
|
||||
|
||||
/** Codec version of the binary dictionary */
|
||||
public static final int VERSION = 1;
|
||||
}
|
||||
|
|
|
@ -205,10 +205,12 @@ class TokenInfoMorphData implements JaMorphData {
|
|||
|
||||
/** flag that the entry has baseform data. otherwise it's not inflected (same as surface form) */
|
||||
public static final int HAS_BASEFORM = 1;
|
||||
|
||||
/**
|
||||
* flag that the entry has reading data. otherwise reading is surface form converted to katakana
|
||||
*/
|
||||
public static final int HAS_READING = 2;
|
||||
|
||||
/** flag that the entry has pronunciation data. otherwise pronunciation is the reading */
|
||||
public static final int HAS_PRONUNCIATION = 4;
|
||||
}
|
||||
|
|
|
@ -20,14 +20,19 @@ package org.apache.lucene.analysis.ko.dict;
|
|||
final class DictionaryConstants {
|
||||
/** Codec header of the dictionary file. */
|
||||
public static final String DICT_HEADER = "ko_dict";
|
||||
|
||||
/** Codec header of the dictionary mapping file. */
|
||||
public static final String TARGETMAP_HEADER = "ko_dict_map";
|
||||
|
||||
/** Codec header of the POS dictionary file. */
|
||||
public static final String POSDICT_HEADER = "ko_dict_pos";
|
||||
|
||||
/** Codec header of the connection costs file. */
|
||||
public static final String CONN_COSTS_HEADER = "ko_cc";
|
||||
|
||||
/** Codec header of the character definition file */
|
||||
public static final String CHARDEF_HEADER = "ko_cd";
|
||||
|
||||
/** Codec version of the binary dictionary */
|
||||
public static final int VERSION = 1;
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
public final class DaitchMokotoffSoundexFilter extends TokenFilter {
|
||||
/** true if encoded tokens should be added as synonyms */
|
||||
protected boolean inject = true;
|
||||
|
||||
/** phonetic encoder */
|
||||
protected DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
|
||||
|
||||
|
|
|
@ -41,8 +41,10 @@ public class DoubleMetaphoneFilterFactory extends TokenFilterFactory {
|
|||
|
||||
/** parameter name: true if encoded tokens should be added as synonyms */
|
||||
public static final String INJECT = "inject";
|
||||
|
||||
/** parameter name: restricts the length of the phonetic code */
|
||||
public static final String MAX_CODE_LENGTH = "maxCodeLength";
|
||||
|
||||
/** default maxCodeLength if not specified */
|
||||
public static final int DEFAULT_MAX_CODE_LENGTH = 4;
|
||||
|
||||
|
|
|
@ -33,8 +33,10 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
public final class PhoneticFilter extends TokenFilter {
|
||||
/** true if encoded tokens should be added as synonyms */
|
||||
protected boolean inject = true;
|
||||
|
||||
/** phonetic encoder */
|
||||
protected Encoder encoder = null;
|
||||
|
||||
/** captured state, non-null when <code>inject=true</code> and a token is buffered */
|
||||
protected State save = null;
|
||||
|
||||
|
|
|
@ -75,8 +75,10 @@ public class PhoneticFilterFactory extends TokenFilterFactory implements Resourc
|
|||
|
||||
/** parameter name: either a short name or a full class name */
|
||||
public static final String ENCODER = "encoder";
|
||||
|
||||
/** parameter name: true if encoded tokens should be added as synonyms */
|
||||
public static final String INJECT = "inject"; // boolean
|
||||
|
||||
/** parameter name: restricts the length of the phonetic code */
|
||||
public static final String MAX_CODE_LENGTH = "maxCodeLength";
|
||||
|
||||
|
|
|
@ -58,10 +58,13 @@ package org.egothor.stemmer;
|
|||
class Cell {
|
||||
/** next row id in this way */
|
||||
int ref = -1;
|
||||
|
||||
/** command of the cell */
|
||||
int cmd = -1;
|
||||
|
||||
/** how many cmd-s was in subtrie before pack() */
|
||||
int cnt = 0;
|
||||
|
||||
/** how many chars would be discarded from input key in this way */
|
||||
int skip = 0;
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
{
|
||||
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/ForUtil.java": "e91aafa414018b34a39c8f0947ff58c1f1dde78d",
|
||||
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/gen_ForUtil.py": "7be3f1e17c9055d68a8ad6b0d6321481dcc4d711"
|
||||
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/ForUtil.java": "c3bff5677f7d98fbb362018a4c1dbad4d670610f",
|
||||
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/gen_ForUtil.py": "e3c0b1d1d9fcc9f04ae8870e40aab44133321272"
|
||||
}
|
|
@ -50,6 +50,7 @@ public final class FieldReader extends Terms {
|
|||
final Lucene40BlockTreeTermsReader parent;
|
||||
|
||||
final FST<BytesRef> index;
|
||||
|
||||
// private boolean DEBUG;
|
||||
|
||||
FieldReader(
|
||||
|
|
|
@ -64,6 +64,7 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
|
|||
|
||||
/** Extension of compound file */
|
||||
static final String DATA_EXTENSION = "cfs";
|
||||
|
||||
/** Extension of compound file entries */
|
||||
static final String ENTRIES_EXTENSION = "cfe";
|
||||
|
||||
|
|
|
@ -405,20 +405,25 @@ public class Lucene50PostingsFormat extends PostingsFormat {
|
|||
public static final class IntBlockTermState extends BlockTermState {
|
||||
/** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
|
||||
public long docStartFP;
|
||||
|
||||
/** file pointer to the start of the positions enumeration, in {@link #POS_EXTENSION} file */
|
||||
public long posStartFP;
|
||||
|
||||
/** file pointer to the start of the payloads enumeration, in {@link #PAY_EXTENSION} file */
|
||||
public long payStartFP;
|
||||
|
||||
/**
|
||||
* file offset for the start of the skip list, relative to docStartFP, if there are more than
|
||||
* {@link #BLOCK_SIZE} docs; otherwise -1
|
||||
*/
|
||||
public long skipOffset;
|
||||
|
||||
/**
|
||||
* file offset for the last position in the last block, if there are more than {@link
|
||||
* #BLOCK_SIZE} positions; otherwise -1
|
||||
*/
|
||||
public long lastPosBlockOffset;
|
||||
|
||||
/**
|
||||
* docid when there is a single pulsed posting, otherwise -1. freq is always implicitly
|
||||
* totalTermFreq in this case.
|
||||
|
|
|
@ -45,14 +45,19 @@ public class Lucene50CompressingStoredFieldsFormat extends StoredFieldsFormat {
|
|||
|
||||
/** format name */
|
||||
protected final String formatName;
|
||||
|
||||
/** segment suffix */
|
||||
protected final String segmentSuffix;
|
||||
|
||||
/** compression mode */
|
||||
protected final CompressionMode compressionMode;
|
||||
|
||||
/** chunk size */
|
||||
protected final int chunkSize;
|
||||
|
||||
/** max docs per chunk */
|
||||
protected final int maxDocsPerChunk;
|
||||
|
||||
/** block shift */
|
||||
protected final int blockShift;
|
||||
|
||||
|
|
|
@ -54,10 +54,13 @@ public final class Lucene50CompressingStoredFieldsReader extends StoredFieldsRea
|
|||
|
||||
/** Extension of stored fields file */
|
||||
public static final String FIELDS_EXTENSION = "fdt";
|
||||
|
||||
/** Extension of stored fields index */
|
||||
public static final String INDEX_EXTENSION = "fdx";
|
||||
|
||||
/** Extension of stored fields meta */
|
||||
public static final String META_EXTENSION = "fdm";
|
||||
|
||||
/** Codec name for the index. */
|
||||
public static final String INDEX_CODEC_NAME = "Lucene85FieldsIndex";
|
||||
|
||||
|
@ -73,8 +76,10 @@ public final class Lucene50CompressingStoredFieldsReader extends StoredFieldsRea
|
|||
|
||||
static final int VERSION_START = 1;
|
||||
static final int VERSION_OFFHEAP_INDEX = 2;
|
||||
|
||||
/** Version where all metadata were moved to the meta file. */
|
||||
static final int VERSION_META = 3;
|
||||
|
||||
/**
|
||||
* Version where numChunks is explicitly recorded in meta file and a dirty chunk bit is recorded
|
||||
* in each chunk
|
||||
|
|
|
@ -38,14 +38,19 @@ public class Lucene50CompressingTermVectorsFormat extends TermVectorsFormat {
|
|||
|
||||
/** format name */
|
||||
protected final String formatName;
|
||||
|
||||
/** segment suffix */
|
||||
protected final String segmentSuffix;
|
||||
|
||||
/** compression mode */
|
||||
protected final CompressionMode compressionMode;
|
||||
|
||||
/** chunk size */
|
||||
protected final int chunkSize;
|
||||
|
||||
/** block size */
|
||||
protected final int blockSize;
|
||||
|
||||
/** max docs per chunk */
|
||||
protected final int maxDocsPerChunk;
|
||||
|
||||
|
|
|
@ -64,8 +64,10 @@ public final class Lucene50CompressingTermVectorsReader extends TermVectorsReade
|
|||
|
||||
static final int VERSION_START = 1;
|
||||
static final int VERSION_OFFHEAP_INDEX = 2;
|
||||
|
||||
/** Version where all metadata were moved to the meta file. */
|
||||
static final int VERSION_META = 3;
|
||||
|
||||
/** Version where numChunks is explicitly recorded in meta file */
|
||||
static final int VERSION_NUM_CHUNKS = 4;
|
||||
|
||||
|
|
|
@ -368,6 +368,7 @@ final class ForUtil {
|
|||
MASKS32[i] = mask32(i);
|
||||
}
|
||||
}
|
||||
|
||||
// mark values in array as final longs to avoid the cost of reading array, arrays should only be
|
||||
// used when the idx is a variable
|
||||
private static final long MASK8_1 = MASKS8[1];
|
||||
|
|
|
@ -405,20 +405,25 @@ public class Lucene84PostingsFormat extends PostingsFormat {
|
|||
public static final class IntBlockTermState extends BlockTermState {
|
||||
/** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
|
||||
public long docStartFP;
|
||||
|
||||
/** file pointer to the start of the positions enumeration, in {@link #POS_EXTENSION} file */
|
||||
public long posStartFP;
|
||||
|
||||
/** file pointer to the start of the payloads enumeration, in {@link #PAY_EXTENSION} file */
|
||||
public long payStartFP;
|
||||
|
||||
/**
|
||||
* file offset for the start of the skip list, relative to docStartFP, if there are more than
|
||||
* {@link ForUtil#BLOCK_SIZE} docs; otherwise -1
|
||||
*/
|
||||
public long skipOffset;
|
||||
|
||||
/**
|
||||
* file offset for the last position in the last block, if there are more than {@link
|
||||
* ForUtil#BLOCK_SIZE} positions; otherwise -1
|
||||
*/
|
||||
public long lastPosBlockOffset;
|
||||
|
||||
/**
|
||||
* docid when there is a single pulsed posting, otherwise -1. freq is always implicitly
|
||||
* totalTermFreq in this case.
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from fractions import gcd
|
||||
from math import gcd
|
||||
|
||||
"""Code generation for ForUtil.java"""
|
||||
|
||||
|
|
|
@ -64,6 +64,7 @@ public class Lucene87Codec extends Codec {
|
|||
|
||||
/** compression mode for stored fields */
|
||||
protected final Lucene87StoredFieldsFormat.Mode storedMode;
|
||||
|
||||
/** compression mode for doc value fields */
|
||||
protected final Lucene80DocValuesFormat.Mode dvMode;
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ public final class Lucene90HnswGraphBuilder {
|
|||
|
||||
/** Default random seed for level generation * */
|
||||
private static final long DEFAULT_RAND_SEED = 42;
|
||||
|
||||
/** A name for the HNSW component for the info-stream * */
|
||||
public static final String HNSW_COMPONENT = "HNSW";
|
||||
|
||||
|
|
|
@ -79,6 +79,7 @@ public class Lucene90HnswVectorsFormat extends KnnVectorsFormat {
|
|||
|
||||
/** Default number of maximum connections per node */
|
||||
public static final int DEFAULT_MAX_CONN = 16;
|
||||
|
||||
/**
|
||||
* Default number of the size of the queue maintained while searching and the number of random
|
||||
* entry points to sample during a graph construction.
|
||||
|
|
|
@ -96,6 +96,7 @@ public class Lucene91HnswVectorsFormat extends KnnVectorsFormat {
|
|||
|
||||
/** Default number of maximum connections per node */
|
||||
public static final int DEFAULT_MAX_CONN = 16;
|
||||
|
||||
/**
|
||||
* Default number of the size of the queue maintained while searching during a graph construction.
|
||||
*/
|
||||
|
|
|
@ -110,6 +110,7 @@ public class Lucene94HnswVectorsFormat extends KnnVectorsFormat {
|
|||
|
||||
/** Default number of maximum connections per node */
|
||||
public static final int DEFAULT_MAX_CONN = 16;
|
||||
|
||||
/**
|
||||
* Default number of the size of the queue maintained while searching during a graph construction.
|
||||
*/
|
||||
|
|
|
@ -32,6 +32,7 @@ public final class LegacyDirectMonotonicWriter {
|
|||
|
||||
/** min block shift */
|
||||
public static final int MIN_BLOCK_SHIFT = 2;
|
||||
|
||||
/** max block shift */
|
||||
public static final int MAX_BLOCK_SHIFT = 22;
|
||||
|
||||
|
|
|
@ -31,12 +31,16 @@ class LegacyPacked64 extends PackedInts.Reader {
|
|||
|
||||
/** Values are stores contiguously in the blocks array. */
|
||||
private final long[] blocks;
|
||||
|
||||
/** A right-aligned mask of width BitsPerValue used by {@link #get(int)}. */
|
||||
private final long maskRight;
|
||||
|
||||
/** Optimization: Saves one lookup in {@link #get(int)}. */
|
||||
private final int bpvMinusBlockSize;
|
||||
|
||||
/** number of values */
|
||||
protected final int valueCount;
|
||||
|
||||
/** bits per value. */
|
||||
protected final int bitsPerValue;
|
||||
|
||||
|
|
|
@ -68,6 +68,7 @@ public final class Lucene50RWCompoundFormat extends CompoundFormat {
|
|||
|
||||
/** Extension of compound file */
|
||||
static final String DATA_EXTENSION = "cfs";
|
||||
|
||||
/** Extension of compound file entries */
|
||||
static final String ENTRIES_EXTENSION = "cfe";
|
||||
|
||||
|
|
|
@ -329,10 +329,13 @@ public class BKDWriter60 implements Closeable {
|
|||
private final int packedBytesLength;
|
||||
private final MergeState.DocMap docMap;
|
||||
private final MergeIntersectsVisitor mergeIntersectsVisitor;
|
||||
|
||||
/** Which doc in this block we are up to */
|
||||
private int docBlockUpto;
|
||||
|
||||
/** Current doc ID */
|
||||
public int docID;
|
||||
|
||||
/** Current packed value */
|
||||
public final byte[] packedValue;
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ public final class Lucene91HnswGraphBuilder {
|
|||
|
||||
/** Default random seed for level generation * */
|
||||
private static final long DEFAULT_RAND_SEED = 42;
|
||||
|
||||
/** A name for the HNSW component for the info-stream * */
|
||||
public static final String HNSW_COMPONENT = "HNSW";
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ public class LineDocSource extends ContentSource {
|
|||
/** Reader of a single input line into {@link DocData}. */
|
||||
public abstract static class LineParser {
|
||||
protected final String[] header;
|
||||
|
||||
/**
|
||||
* Construct with the header
|
||||
*
|
||||
|
@ -61,6 +62,7 @@ public class LineDocSource extends ContentSource {
|
|||
public LineParser(String[] header) {
|
||||
this.header = header;
|
||||
}
|
||||
|
||||
/** parse an input line and fill doc data appropriately */
|
||||
public abstract void parseLine(DocData docData, String line);
|
||||
}
|
||||
|
|
|
@ -46,10 +46,12 @@ public class QualityStats {
|
|||
this.rank = rank;
|
||||
this.recall = recall;
|
||||
}
|
||||
|
||||
/** Returns the rank: where on the list of returned docs this relevant doc appeared. */
|
||||
public int getRank() {
|
||||
return rank;
|
||||
}
|
||||
|
||||
/** Returns the recall: how many relevant docs were returned up to this point, inclusive. */
|
||||
public double getRecall() {
|
||||
return recall;
|
||||
|
|
|
@ -74,6 +74,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
* indexed
|
||||
*/
|
||||
public abstract boolean isIndexTerm(BytesRef term, TermStats stats);
|
||||
|
||||
/** Called when a new field is started. */
|
||||
public abstract void newField(FieldInfo fieldInfo);
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ final class OrdsFieldReader extends Terms {
|
|||
final OrdsBlockTreeTermsReader parent;
|
||||
|
||||
final FST<Output> index;
|
||||
|
||||
// private boolean DEBUG;
|
||||
|
||||
OrdsFieldReader(
|
||||
|
|
|
@ -62,8 +62,10 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
|
|||
|
||||
protected BlockHeader.Serializer blockHeaderReader;
|
||||
protected BlockLine.Serializer blockLineReader;
|
||||
|
||||
/** In-memory read buffer for the current block. */
|
||||
protected ByteArrayDataInput blockReadBuffer;
|
||||
|
||||
/**
|
||||
* In-memory read buffer for the details region of the current block. It shares the same byte
|
||||
* array as {@link #blockReadBuffer}, with a different position.
|
||||
|
@ -74,6 +76,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
|
|||
|
||||
/** {@link IndexDictionary.Browser} supplier for lazy loading. */
|
||||
protected final IndexDictionary.BrowserSupplier dictionaryBrowserSupplier;
|
||||
|
||||
/** Holds the {@link IndexDictionary.Browser} once loaded. */
|
||||
protected IndexDictionary.Browser dictionaryBrowser;
|
||||
|
||||
|
@ -82,19 +85,25 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
|
|||
* UniformSplitPostingsFormat#TERMS_BLOCKS_EXTENSION block file}.
|
||||
*/
|
||||
protected long blockStartFP;
|
||||
|
||||
/** Current block header. */
|
||||
protected BlockHeader blockHeader;
|
||||
|
||||
/** Current block line. */
|
||||
protected BlockLine blockLine;
|
||||
|
||||
/** Current block line details. */
|
||||
protected BlockTermState termState;
|
||||
|
||||
/**
|
||||
* Offset of the start of the first line of the current block (just after the header), relative to
|
||||
* the block start.
|
||||
*/
|
||||
protected int blockFirstLineStart;
|
||||
|
||||
/** Current line index in the block. */
|
||||
protected int lineIndexInBlock;
|
||||
|
||||
/**
|
||||
* Whether the current {@link TermState} has been forced with a call to {@link
|
||||
* #seekExact(BytesRef, TermState)}.
|
||||
|
@ -102,6 +111,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
|
|||
* @see #forcedTerm
|
||||
*/
|
||||
protected boolean termStateForced;
|
||||
|
||||
/**
|
||||
* Set when {@link #seekExact(BytesRef, TermState)} is called.
|
||||
*
|
||||
|
|
|
@ -81,15 +81,19 @@ public class IntersectBlockReader extends BlockReader {
|
|||
|
||||
/** Set this when our current mode is seeking to this term. Set to null after. */
|
||||
protected BytesRef seekTerm;
|
||||
|
||||
/** Number of bytes accepted by the automaton when validating the current term. */
|
||||
protected int numMatchedBytes;
|
||||
|
||||
/**
|
||||
* Automaton states reached when validating the current term, from 0 to {@link #numMatchedBytes} -
|
||||
* 1.
|
||||
*/
|
||||
protected int[] states;
|
||||
|
||||
/** Block iteration order determined when scanning the terms in the current block. */
|
||||
protected BlockIteration blockIteration;
|
||||
|
||||
/**
|
||||
* Counter of the number of consecutively rejected terms. Depending on {@link
|
||||
* #NUM_CONSECUTIVELY_REJECTED_TERMS_THRESHOLD}, this may trigger a jump to a block away.
|
||||
|
|
|
@ -39,6 +39,7 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
|
|||
|
||||
/** Extension of the file containing the terms dictionary (the FST "trie"). */
|
||||
public static final String TERMS_DICTIONARY_EXTENSION = "ustd";
|
||||
|
||||
/** Extension of the file containing the terms blocks for each field and the fields metadata. */
|
||||
public static final String TERMS_BLOCKS_EXTENSION = "ustb";
|
||||
|
||||
|
|
|
@ -100,11 +100,13 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
|
|||
|
||||
/** Default value for the target block size (number of terms per block). */
|
||||
public static final int DEFAULT_TARGET_NUM_BLOCK_LINES = 32;
|
||||
|
||||
/**
|
||||
* Default value for the maximum allowed delta variation of the block size (delta of the number of
|
||||
* terms per block). The block size will be [target block size]+-[allowed delta].
|
||||
*/
|
||||
public static final int DEFAULT_DELTA_NUM_LINES = (int) (DEFAULT_TARGET_NUM_BLOCK_LINES * 0.1);
|
||||
|
||||
/** Upper limit of the block size (maximum number of terms per block). */
|
||||
protected static final int MAX_NUM_BLOCK_LINES = 1_000;
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat {
|
|||
|
||||
/** Extension of the file containing the terms dictionary (the FST "trie"). */
|
||||
public static final String TERMS_DICTIONARY_EXTENSION = "stustd";
|
||||
|
||||
/** Extension of the file containing the terms blocks for each field and the fields metadata. */
|
||||
public static final String TERMS_BLOCKS_EXTENSION = "stustb";
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
{
|
||||
"lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java": "f2091e2b7284b70c740052a9b0ee389e67eb48a9",
|
||||
"lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py": "7a137c58f88d68247be4368122780fa9cc8dce3e"
|
||||
"lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java": "a67f47001bad680d75c57a864a1552a78fc85600",
|
||||
"lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py": "3219c6d0289cc64d80dbdbc5db4bd05b8774e797"
|
||||
}
|
|
@ -57,6 +57,7 @@ module org.apache.lucene.core {
|
|||
// Only export internal packages to the test framework.
|
||||
exports org.apache.lucene.internal.tests to
|
||||
org.apache.lucene.test_framework;
|
||||
|
||||
// Open certain packages for the test framework (ram usage tester).
|
||||
opens org.apache.lucene.document to
|
||||
org.apache.lucene.test_framework;
|
||||
|
|
|
@ -59,6 +59,7 @@ public abstract class AbstractAnalysisFactory {
|
|||
|
||||
/** the luceneVersion arg */
|
||||
protected final Version luceneMatchVersion;
|
||||
|
||||
/** whether the luceneMatchVersion arg is explicitly specified in the serialized schema */
|
||||
private boolean isExplicitLuceneMatchVersion = false;
|
||||
|
||||
|
|
|
@ -345,6 +345,7 @@ public abstract class Analyzer implements Closeable {
|
|||
public static final class TokenStreamComponents {
|
||||
/** Original source of the tokens. */
|
||||
protected final Consumer<Reader> source;
|
||||
|
||||
/**
|
||||
* Sink tokenstream, such as the outer tokenfilter decorating the chain. This can be the source
|
||||
* if there are no filters.
|
||||
|
|
|
@ -41,18 +41,25 @@ public final class StandardTokenizer extends Tokenizer {
|
|||
|
||||
/** Alpha/numeric token type */
|
||||
public static final int ALPHANUM = 0;
|
||||
|
||||
/** Numeric token type */
|
||||
public static final int NUM = 1;
|
||||
|
||||
/** Southeast Asian token type */
|
||||
public static final int SOUTHEAST_ASIAN = 2;
|
||||
|
||||
/** Ideographic token type */
|
||||
public static final int IDEOGRAPHIC = 3;
|
||||
|
||||
/** Hiragana token type */
|
||||
public static final int HIRAGANA = 4;
|
||||
|
||||
/** Katakana token type */
|
||||
public static final int KATAKANA = 5;
|
||||
|
||||
/** Hangul token type */
|
||||
public static final int HANGUL = 6;
|
||||
|
||||
/** Emoji token type. */
|
||||
public static final int EMOJI = 7;
|
||||
|
||||
|
|
|
@ -28,11 +28,13 @@ import org.apache.lucene.index.TermState;
|
|||
public class BlockTermState extends OrdTermState {
|
||||
/** how many docs have this term */
|
||||
public int docFreq;
|
||||
|
||||
/** total number of occurrences of this term */
|
||||
public long totalTermFreq;
|
||||
|
||||
/** the term's ord in the current block */
|
||||
public int termBlockOrd;
|
||||
|
||||
/** fp into the terms dict primary file (_X.tim) that holds this term */
|
||||
// TODO: update BTR to nuke this
|
||||
public long blockFilePointer;
|
||||
|
|
|
@ -44,6 +44,7 @@ public final class CodecUtil {
|
|||
|
||||
/** Constant to identify the start of a codec header. */
|
||||
public static final int CODEC_MAGIC = 0x3fd76c17;
|
||||
|
||||
/** Constant to identify the start of a codec footer. */
|
||||
public static final int FOOTER_MAGIC = ~CODEC_MAGIC;
|
||||
|
||||
|
|
|
@ -269,6 +269,7 @@ final class ForUtil {
|
|||
MASKS32[i] = mask32(i);
|
||||
}
|
||||
}
|
||||
|
||||
// mark values in array as final longs to avoid the cost of reading array, arrays should only be
|
||||
// used when the idx is a variable
|
||||
private static final long MASK8_1 = MASKS8[1];
|
||||
|
|
|
@ -69,6 +69,7 @@ public final class Lucene90CompoundFormat extends CompoundFormat {
|
|||
|
||||
/** Extension of compound file */
|
||||
static final String DATA_EXTENSION = "cfs";
|
||||
|
||||
/** Extension of compound file entries */
|
||||
static final String ENTRIES_EXTENSION = "cfe";
|
||||
|
||||
|
|
|
@ -443,15 +443,19 @@ public final class Lucene90PostingsFormat extends PostingsFormat {
|
|||
public static final class IntBlockTermState extends BlockTermState {
|
||||
/** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
|
||||
public long docStartFP;
|
||||
|
||||
/** file pointer to the start of the positions enumeration, in {@link #POS_EXTENSION} file */
|
||||
public long posStartFP;
|
||||
|
||||
/** file pointer to the start of the payloads enumeration, in {@link #PAY_EXTENSION} file */
|
||||
public long payStartFP;
|
||||
|
||||
/**
|
||||
* file offset for the start of the skip list, relative to docStartFP, if there are more than
|
||||
* {@link ForUtil#BLOCK_SIZE} docs; otherwise -1
|
||||
*/
|
||||
public long skipOffset;
|
||||
|
||||
/**
|
||||
* file offset for the last position in the last block, if there are more than {@link
|
||||
* ForUtil#BLOCK_SIZE} positions; otherwise -1
|
||||
|
@ -463,6 +467,7 @@ public final class Lucene90PostingsFormat extends PostingsFormat {
|
|||
* positions to skip for that block, without telling us how many positions it has skipped.
|
||||
*/
|
||||
public long lastPosBlockOffset;
|
||||
|
||||
/**
|
||||
* docid when there is a single pulsed posting, otherwise -1. freq is always implicitly
|
||||
* totalTermFreq in this case.
|
||||
|
|
|
@ -53,6 +53,7 @@ public final class FieldReader extends Terms {
|
|||
final Lucene90BlockTreeTermsReader parent;
|
||||
|
||||
final FST<BytesRef> index;
|
||||
|
||||
// private boolean DEBUG;
|
||||
|
||||
FieldReader(
|
||||
|
|
|
@ -57,10 +57,13 @@ public final class Lucene90CompressingStoredFieldsWriter extends StoredFieldsWri
|
|||
|
||||
/** Extension of stored fields file */
|
||||
public static final String FIELDS_EXTENSION = "fdt";
|
||||
|
||||
/** Extension of stored fields index */
|
||||
public static final String INDEX_EXTENSION = "fdx";
|
||||
|
||||
/** Extension of stored fields meta */
|
||||
public static final String META_EXTENSION = "fdm";
|
||||
|
||||
/** Codec name for the index. */
|
||||
public static final String INDEX_CODEC_NAME = "Lucene90FieldsIndex";
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from fractions import gcd
|
||||
from math import gcd
|
||||
|
||||
"""Code generation for ForUtil.java"""
|
||||
|
||||
|
|
|
@ -115,6 +115,7 @@ public final class Lucene95HnswVectorsFormat extends KnnVectorsFormat {
|
|||
* numbers here will use an inordinate amount of heap
|
||||
*/
|
||||
private static final int MAXIMUM_MAX_CONN = 512;
|
||||
|
||||
/** Default number of maximum connections per node */
|
||||
public static final int DEFAULT_MAX_CONN = 16;
|
||||
|
||||
|
@ -124,6 +125,7 @@ public final class Lucene95HnswVectorsFormat extends KnnVectorsFormat {
|
|||
* 3200`
|
||||
*/
|
||||
private static final int MAXIMUM_BEAM_WIDTH = 3200;
|
||||
|
||||
/**
|
||||
* Default number of the size of the queue maintained while searching during a graph construction.
|
||||
*/
|
||||
|
|
|
@ -83,6 +83,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
|||
static class FieldsGroup {
|
||||
final List<String> fields;
|
||||
final int suffix;
|
||||
|
||||
/**
|
||||
* Custom SegmentWriteState for this group of fields, with the segmentSuffix uniqueified for
|
||||
* this PostingsFormat
|
||||
|
|
|
@ -70,6 +70,7 @@ public class InetAddressPoint extends Field {
|
|||
|
||||
/** The minimum value that an ip address can hold. */
|
||||
public static final InetAddress MIN_VALUE;
|
||||
|
||||
/** The maximum value that an ip address can hold. */
|
||||
public static final InetAddress MAX_VALUE;
|
||||
|
||||
|
|
|
@ -91,6 +91,7 @@ import org.apache.lucene.util.SloppyMath;
|
|||
public class LatLonPoint extends Field {
|
||||
/** LatLonPoint is encoded as integer values so number of bytes is 4 */
|
||||
public static final int BYTES = Integer.BYTES;
|
||||
|
||||
/**
|
||||
* Type for an indexed LatLonPoint
|
||||
*
|
||||
|
|
|
@ -35,8 +35,10 @@ final class LongHashSet implements Accountable {
|
|||
final int mask;
|
||||
final boolean hasMissingValue;
|
||||
final int size;
|
||||
|
||||
/** minimum value in the set, or Long.MAX_VALUE for an empty set */
|
||||
final long minValue;
|
||||
|
||||
/** maximum value in the set, or Long.MIN_VALUE for an empty set */
|
||||
final long maxValue;
|
||||
|
||||
|
|
|
@ -47,17 +47,22 @@ import org.apache.lucene.util.DocIdSetBuilder;
|
|||
public abstract class RangeFieldQuery extends Query {
|
||||
/** field name */
|
||||
final String field;
|
||||
|
||||
/**
|
||||
* query relation intersects: {@code CELL_CROSSES_QUERY}, contains: {@code CELL_CONTAINS_QUERY},
|
||||
* within: {@code CELL_WITHIN_QUERY}
|
||||
*/
|
||||
final QueryType queryType;
|
||||
|
||||
/** number of dimensions - max 4 */
|
||||
final int numDims;
|
||||
|
||||
/** ranges encoded as a sortable byte array */
|
||||
final byte[] ranges;
|
||||
|
||||
/** number of bytes per dimension */
|
||||
final int bytesPerDim;
|
||||
|
||||
/** ByteArrayComparator selected by bytesPerDim */
|
||||
final ByteArrayComparator comparator;
|
||||
|
||||
|
|
|
@ -51,12 +51,16 @@ import org.apache.lucene.util.BytesRef;
|
|||
abstract class ShapeDocValues {
|
||||
/** doc value format version; used to support bwc for any encoding changes */
|
||||
protected static final byte VERSION = 0;
|
||||
|
||||
/** the binary doc value */
|
||||
private final BytesRef data;
|
||||
|
||||
/** the geometry comparator used to check relations */
|
||||
protected final ShapeComparator shapeComparator;
|
||||
|
||||
/** the centroid of the shape docvalue */
|
||||
protected final Geometry centroid;
|
||||
|
||||
/** the bounding box of the shape docvalue */
|
||||
protected final Geometry boundingBox;
|
||||
|
||||
|
|
|
@ -412,24 +412,34 @@ public final class ShapeField {
|
|||
/** all coordinates are different */
|
||||
TRIANGLE
|
||||
}
|
||||
|
||||
/** x coordinate, vertex one */
|
||||
public int aX;
|
||||
|
||||
/** y coordinate, vertex one */
|
||||
public int aY;
|
||||
|
||||
/** x coordinate, vertex two */
|
||||
public int bX;
|
||||
|
||||
/** y coordinate, vertex two */
|
||||
public int bY;
|
||||
|
||||
/** x coordinate, vertex three */
|
||||
public int cX;
|
||||
|
||||
/** y coordinate, vertex three */
|
||||
public int cY;
|
||||
|
||||
/** represent if edge ab belongs to original shape */
|
||||
public boolean ab;
|
||||
|
||||
/** represent if edge bc belongs to original shape */
|
||||
public boolean bc;
|
||||
|
||||
/** represent if edge ca belongs to original shape */
|
||||
public boolean ca;
|
||||
|
||||
/** triangle type */
|
||||
public TYPE type;
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
abstract class SpatialQuery extends Query {
|
||||
/** field name */
|
||||
final String field;
|
||||
|
||||
/**
|
||||
* query relation disjoint: {@link QueryRelation#DISJOINT}, intersects: {@link
|
||||
* QueryRelation#INTERSECTS}, within: {@link QueryRelation#DISJOINT}, contains: {@link
|
||||
|
|
|
@ -55,6 +55,7 @@ import org.apache.lucene.util.NumericUtils;
|
|||
public class XYPointField extends Field {
|
||||
/** XYPoint is encoded as integer values so number of bytes is 4 */
|
||||
public static final int BYTES = Integer.BYTES;
|
||||
|
||||
/**
|
||||
* Type for an indexed XYPoint
|
||||
*
|
||||
|
|
|
@ -33,8 +33,10 @@ package org.apache.lucene.geo;
|
|||
public final class Circle extends LatLonGeometry {
|
||||
/** Center latitude */
|
||||
private final double lat;
|
||||
|
||||
/** Center longitude */
|
||||
private final double lon;
|
||||
|
||||
/** radius in meters */
|
||||
private final double radiusMeters;
|
||||
|
||||
|
|
|
@ -259,24 +259,34 @@ class Circle2D implements Component2D {
|
|||
|
||||
/** check if the point is within a distance */
|
||||
boolean contains(double x, double y);
|
||||
|
||||
/** check if the line is within a distance */
|
||||
boolean intersectsLine(double aX, double aY, double bX, double bY);
|
||||
|
||||
/** Relates this calculator to the provided bounding box */
|
||||
Relation relate(double minX, double maxX, double minY, double maxY);
|
||||
|
||||
/** check if the bounding box is disjoint with this calculator bounding box */
|
||||
boolean disjoint(double minX, double maxX, double minY, double maxY);
|
||||
|
||||
/** check if the bounding box is contains this calculator bounding box */
|
||||
boolean within(double minX, double maxX, double minY, double maxY);
|
||||
|
||||
/** get min X of this calculator */
|
||||
double getMinX();
|
||||
|
||||
/** get max X of this calculator */
|
||||
double getMaxX();
|
||||
|
||||
/** get min Y of this calculator */
|
||||
double getMinY();
|
||||
|
||||
/** get max Y of this calculator */
|
||||
double getMaxY();
|
||||
|
||||
/** get center X */
|
||||
double geX();
|
||||
|
||||
/** get center Y */
|
||||
double getY();
|
||||
}
|
||||
|
|
|
@ -28,20 +28,26 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
final class ComponentTree implements Component2D {
|
||||
/** minimum Y of this geometry's bounding box area */
|
||||
private double minY;
|
||||
|
||||
/** maximum Y of this geometry's bounding box area */
|
||||
private double maxY;
|
||||
|
||||
/** minimum X of this geometry's bounding box area */
|
||||
private double minX;
|
||||
|
||||
/** maximum X of this geometry's bounding box area */
|
||||
private double maxX;
|
||||
|
||||
// child components, or null. Note internal nodes might mot have
|
||||
// a consistent bounding box. Internal nodes should not be accessed
|
||||
// outside if this class.
|
||||
private Component2D left;
|
||||
private Component2D right;
|
||||
|
||||
/** which dimension was this node split on */
|
||||
// TODO: its implicit based on level, but boolean keeps code simple
|
||||
private final boolean splitX;
|
||||
|
||||
/** root node of edge tree */
|
||||
private final Component2D component;
|
||||
|
||||
|
|
|
@ -34,14 +34,19 @@ final class EdgeTree {
|
|||
// X-Y pair (in original order) of the two vertices
|
||||
final double y1, y2;
|
||||
final double x1, x2;
|
||||
|
||||
/** min Y of this edge */
|
||||
final double low;
|
||||
|
||||
/** max Y of this edge or any children */
|
||||
double max;
|
||||
|
||||
/** left child edge, or null */
|
||||
EdgeTree left;
|
||||
|
||||
/** right child edge, or null */
|
||||
EdgeTree right;
|
||||
|
||||
/** helper bytes to signal if a point is on an edge, it is within the edge tree or disjoint */
|
||||
private static final byte FALSE = 0x00;
|
||||
|
||||
|
|
|
@ -43,10 +43,13 @@ public final class GeoUtils {
|
|||
|
||||
/** min longitude value in radians */
|
||||
public static final double MIN_LON_RADIANS = Math.toRadians(MIN_LON_INCL);
|
||||
|
||||
/** min latitude value in radians */
|
||||
public static final double MIN_LAT_RADIANS = Math.toRadians(MIN_LAT_INCL);
|
||||
|
||||
/** max longitude value in radians */
|
||||
public static final double MAX_LON_RADIANS = Math.toRadians(MAX_LON_INCL);
|
||||
|
||||
/** max latitude value in radians */
|
||||
public static final double MAX_LAT_RADIANS = Math.toRadians(MAX_LAT_INCL);
|
||||
|
||||
|
|
|
@ -33,15 +33,19 @@ import java.util.Arrays;
|
|||
public class Line extends LatLonGeometry {
|
||||
/** array of latitude coordinates */
|
||||
private final double[] lats;
|
||||
|
||||
/** array of longitude coordinates */
|
||||
private final double[] lons;
|
||||
|
||||
/** minimum latitude of this line's bounding box */
|
||||
public final double minLat;
|
||||
|
||||
/** maximum latitude of this line's bounding box */
|
||||
public final double maxLat;
|
||||
|
||||
/** minimum longitude of this line's bounding box */
|
||||
public final double minLon;
|
||||
|
||||
/** maximum longitude of this line's bounding box */
|
||||
public final double maxLon;
|
||||
|
||||
|
|
|
@ -29,12 +29,16 @@ final class Line2D implements Component2D {
|
|||
|
||||
/** minimum Y of this geometry's bounding box area */
|
||||
private final double minY;
|
||||
|
||||
/** maximum Y of this geometry's bounding box area */
|
||||
private final double maxY;
|
||||
|
||||
/** minimum X of this geometry's bounding box area */
|
||||
private final double minX;
|
||||
|
||||
/** maximum X of this geometry's bounding box area */
|
||||
private final double maxX;
|
||||
|
||||
/** lines represented as a 2-d interval tree. */
|
||||
private final EdgeTree tree;
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ public final class Point extends LatLonGeometry {
|
|||
|
||||
/** latitude coordinate */
|
||||
private final double lat;
|
||||
|
||||
/** longitude coordinate */
|
||||
private final double lon;
|
||||
|
||||
|
|
|
@ -47,12 +47,16 @@ public final class Polygon extends LatLonGeometry {
|
|||
|
||||
/** minimum latitude of this polygon's bounding box area */
|
||||
public final double minLat;
|
||||
|
||||
/** maximum latitude of this polygon's bounding box area */
|
||||
public final double maxLat;
|
||||
|
||||
/** minimum longitude of this polygon's bounding box area */
|
||||
public final double minLon;
|
||||
|
||||
/** maximum longitude of this polygon's bounding box area */
|
||||
public final double maxLon;
|
||||
|
||||
/** winding order of the vertices */
|
||||
private final WindingOrder windingOrder;
|
||||
|
||||
|
|
|
@ -28,14 +28,19 @@ import org.apache.lucene.index.PointValues.Relation;
|
|||
final class Polygon2D implements Component2D {
|
||||
/** minimum Y of this geometry's bounding box area */
|
||||
private final double minY;
|
||||
|
||||
/** maximum Y of this geometry's bounding box area */
|
||||
private final double maxY;
|
||||
|
||||
/** minimum X of this geometry's bounding box area */
|
||||
private final double minX;
|
||||
|
||||
/** maximum X of this geometry's bounding box area */
|
||||
private final double maxX;
|
||||
|
||||
/** tree of holes, or null */
|
||||
protected final Component2D holes;
|
||||
|
||||
/** Edges of the polygon represented as a 2-d interval tree. */
|
||||
final EdgeTree tree;
|
||||
|
||||
|
|
|
@ -36,10 +36,13 @@ import static org.apache.lucene.util.SloppyMath.cos;
|
|||
public class Rectangle extends LatLonGeometry {
|
||||
/** maximum longitude value (in degrees) */
|
||||
public final double minLat;
|
||||
|
||||
/** minimum longitude value (in degrees) */
|
||||
public final double minLon;
|
||||
|
||||
/** maximum latitude value (in degrees) */
|
||||
public final double maxLat;
|
||||
|
||||
/** minimum latitude value (in degrees) */
|
||||
public final double maxLon;
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue