mirror of https://github.com/apache/lucene.git
docs: get minimal doclint options passing
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1642250 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
72adebaf7d
commit
bab0b46337
|
@ -232,8 +232,8 @@ public class BrazilianStemmer {
|
|||
/**
|
||||
* 1) Turn to lowercase
|
||||
* 2) Remove accents
|
||||
* 3) ã -> a ; õ -> o
|
||||
* 4) ç -> c
|
||||
* 3) ã -> a ; õ -> o
|
||||
* 4) ç -> c
|
||||
*
|
||||
* @return null or a string transformed
|
||||
*/
|
||||
|
|
|
@ -294,7 +294,7 @@ public final class CJKBigramFilter extends TokenFilter {
|
|||
|
||||
/**
|
||||
* Flushes a bigram token to output from our buffer
|
||||
* This is the normal case, e.g. ABC -> AB BC
|
||||
* This is the normal case, e.g. ABC -> AB BC
|
||||
*/
|
||||
private void flushBigram() {
|
||||
clearAttributes();
|
||||
|
|
|
@ -108,7 +108,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
||||
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
||||
* a stem exclusion set is provided via
|
||||
* {@link #CzechAnalyzer(CharArraySet, CharArraySet)} a
|
||||
* {@link SetKeywordMarkerFilter} is added before
|
||||
|
|
|
@ -161,12 +161,12 @@ public class GermanStemmer
|
|||
/**
|
||||
* Do some substitutions for the term to reduce overstemming:
|
||||
*
|
||||
* - Substitute Umlauts with their corresponding vowel: äöü -> aou,
|
||||
* - Substitute Umlauts with their corresponding vowel:{@code äöü -> aou},
|
||||
* "ß" is substituted by "ss"
|
||||
* - Substitute a second char of a pair of equal characters with
|
||||
* an asterisk: ?? -> ?*
|
||||
* an asterisk: {@code ?? -> ?*}
|
||||
* - Substitute some common character combinations with a token:
|
||||
* sch/ch/ei/ie/ig/st -> $/§/%/&/#/!
|
||||
* {@code sch/ch/ei/ie/ig/st -> $/§/%/&/#/!}
|
||||
*/
|
||||
private void substitute( StringBuilder buffer )
|
||||
{
|
||||
|
|
|
@ -248,7 +248,7 @@ public class KStemmer {
|
|||
* word, use the method wordLength, which returns (k+1).
|
||||
*/
|
||||
|
||||
/***
|
||||
/*
|
||||
* private void initializeStemHash() { if (maxCacheSize > 0) cache = new
|
||||
* CharArrayMap<String>(maxCacheSize,false); }
|
||||
***/
|
||||
|
|
|
@ -447,7 +447,7 @@ public class Dictionary {
|
|||
* @param reader BufferedReader to read the content of the rule from
|
||||
* @param conditionPattern {@link String#format(String, Object...)} pattern to be used to generate the condition regex
|
||||
* pattern
|
||||
* @param seenPatterns map from condition -> index of patterns, for deduplication.
|
||||
* @param seenPatterns map from condition -> index of patterns, for deduplication.
|
||||
* @throws IOException Can be thrown while reading the rule
|
||||
*/
|
||||
private void parseAffix(TreeMap<String,List<Integer>> affixes,
|
||||
|
|
|
@ -87,10 +87,10 @@ public class LatvianStemmer {
|
|||
/**
|
||||
* Most cases are handled except for the ambiguous ones:
|
||||
* <ul>
|
||||
* <li> s -> š
|
||||
* <li> t -> š
|
||||
* <li> d -> ž
|
||||
* <li> z -> ž
|
||||
* <li> s -> š
|
||||
* <li> t -> š
|
||||
* <li> d -> ž
|
||||
* <li> z -> ž
|
||||
* </ul>
|
||||
*/
|
||||
private int unpalatalize(char s[], int len) {
|
||||
|
|
|
@ -153,7 +153,7 @@ public final class ASCIIFoldingFilter extends TokenFilter {
|
|||
* accents are removed from accented characters.
|
||||
* @param input The characters to fold
|
||||
* @param inputPos Index of the first character to fold
|
||||
* @param output The result of the folding. Should be of size >= {@code length * 4}.
|
||||
* @param output The result of the folding. Should be of size >= {@code length * 4}.
|
||||
* @param outputPos Index of output where to put the result of the folding
|
||||
* @param length The number of characters to fold
|
||||
* @return length of output
|
||||
|
|
|
@ -65,7 +65,7 @@ public final class CapitalizationFilter extends TokenFilter {
|
|||
* @param forceFirstLetter Force the first letter to be capitalized even if it is in the keep list.
|
||||
* @param okPrefix do not change word capitalization if a word begins with something in this list.
|
||||
* @param minWordLength how long the word needs to be to get capitalization applied. If the
|
||||
* minWordLength is 3, "and" > "And" but "or" stays "or".
|
||||
* minWordLength is 3, "and" > "And" but "or" stays "or".
|
||||
* @param maxWordCount if the token contains more then maxWordCount words, the capitalization is
|
||||
* assumed to be correct.
|
||||
* @param maxTokenLength ???
|
||||
|
|
|
@ -39,7 +39,7 @@ import java.util.Set;
|
|||
* for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
|
||||
* "Mckinley"<br/>
|
||||
* "minWordLength" - how long the word needs to be to get capitalization applied. If the
|
||||
* minWordLength is 3, "and" > "And" but "or" stays "or"<br/>
|
||||
* minWordLength is 3, "and" > "And" but "or" stays "or"<br/>
|
||||
* "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
|
||||
* assumed to be correct.<br/>
|
||||
*
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.StemmerUtil;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This filter folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o.
|
||||
* This filter folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o.
|
||||
* It also discriminate against use of double vowels aa, ae, ao, oe and oo, leaving just the first one.
|
||||
* <p/>
|
||||
* It's is a semantically more destructive solution than {@link ScandinavianNormalizationFilter} but
|
||||
|
|
|
@ -66,7 +66,7 @@ import java.util.Arrays;
|
|||
* <ul>
|
||||
* <li><code>"PowerShot"</code> →
|
||||
* <code>0:"Power", 1:"Shot" 1:"PowerShot"</code></li>
|
||||
* <li><code>"A's+B's&C's"</code> -gt; <code>0:"A", 1:"B", 2:"C", 2:"ABC"</code>
|
||||
* <li><code>"A's+B's&C's"</code> > <code>0:"A", 1:"B", 2:"C", 2:"ABC"</code>
|
||||
* </li>
|
||||
* <li><code>"Super-Duper-XL500-42-AutoCoder!"</code> →
|
||||
* <code>0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"</code>
|
||||
|
@ -97,42 +97,42 @@ public final class WordDelimiterFilter extends TokenFilter {
|
|||
/**
|
||||
* Causes parts of words to be generated:
|
||||
* <p/>
|
||||
* "PowerShot" => "Power" "Shot"
|
||||
* "PowerShot" => "Power" "Shot"
|
||||
*/
|
||||
public static final int GENERATE_WORD_PARTS = 1;
|
||||
|
||||
/**
|
||||
* Causes number subwords to be generated:
|
||||
* <p/>
|
||||
* "500-42" => "500" "42"
|
||||
* "500-42" => "500" "42"
|
||||
*/
|
||||
public static final int GENERATE_NUMBER_PARTS = 2;
|
||||
|
||||
/**
|
||||
* Causes maximum runs of word parts to be catenated:
|
||||
* <p/>
|
||||
* "wi-fi" => "wifi"
|
||||
* "wi-fi" => "wifi"
|
||||
*/
|
||||
public static final int CATENATE_WORDS = 4;
|
||||
|
||||
/**
|
||||
* Causes maximum runs of word parts to be catenated:
|
||||
* <p/>
|
||||
* "wi-fi" => "wifi"
|
||||
* "wi-fi" => "wifi"
|
||||
*/
|
||||
public static final int CATENATE_NUMBERS = 8;
|
||||
|
||||
/**
|
||||
* Causes all subword parts to be catenated:
|
||||
* <p/>
|
||||
* "wi-fi-4000" => "wifi4000"
|
||||
* "wi-fi-4000" => "wifi4000"
|
||||
*/
|
||||
public static final int CATENATE_ALL = 16;
|
||||
|
||||
/**
|
||||
* Causes original words are preserved and added to the subword list (Defaults to false)
|
||||
* <p/>
|
||||
* "500-42" => "500" "42" "500-42"
|
||||
* "500-42" => "500" "42" "500-42"
|
||||
*/
|
||||
public static final int PRESERVE_ORIGINAL = 32;
|
||||
|
||||
|
@ -151,7 +151,7 @@ public final class WordDelimiterFilter extends TokenFilter {
|
|||
/**
|
||||
* Causes trailing "'s" to be removed for each subword
|
||||
* <p/>
|
||||
* "O'Neil's" => "O", "Neil"
|
||||
* "O'Neil's" => "O", "Neil"
|
||||
*/
|
||||
public static final int STEM_ENGLISH_POSSESSIVE = 256;
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ public final class WordDelimiterIterator {
|
|||
/**
|
||||
* If true, causes trailing "'s" to be removed for each subword. (Defaults to true)
|
||||
* <p/>
|
||||
* "O'Neil's" => "O", "Neil"
|
||||
* "O'Neil's" => "O", "Neil"
|
||||
*/
|
||||
final boolean stemEnglishPossessive;
|
||||
|
||||
|
@ -99,7 +99,7 @@ public final class WordDelimiterIterator {
|
|||
* @param charTypeTable table containing character types
|
||||
* @param splitOnCaseChange if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
|
||||
* @param splitOnNumerics if true, causes "j2se" to be three tokens; "j" "2" "se"
|
||||
* @param stemEnglishPossessive if true, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
|
||||
* @param stemEnglishPossessive if true, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
|
||||
*/
|
||||
WordDelimiterIterator(byte[] charTypeTable, boolean splitOnCaseChange, boolean splitOnNumerics, boolean stemEnglishPossessive) {
|
||||
this.charTypeTable = charTypeTable;
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
* that characters between startOffset and endOffset in the original stream are
|
||||
* the same as the term chars.
|
||||
* <p>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
|
||||
* <table>
|
||||
* <table summary="ngram tokens example">
|
||||
* <tr><th>Term</th><td>ab</td><td>abc</td><td>bc</td><td>bcd</td><td>cd</td><td>cde</td><td>de</td></tr>
|
||||
* <tr><th>Position increment</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
|
||||
* <tr><th>Position length</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
|
||||
|
|
|
@ -41,7 +41,7 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
* {@link String#split(java.lang.String)}
|
||||
* </p>
|
||||
* <p>
|
||||
* Using group >= 0 selects the matching group as the token. For example, if you have:<br/>
|
||||
* Using group >= 0 selects the matching group as the token. For example, if you have:<br/>
|
||||
* <pre>
|
||||
* pattern = \'([^\']+)\'
|
||||
* group = 0
|
||||
|
|
|
@ -38,7 +38,7 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
* {@link String#split(java.lang.String)}
|
||||
* </p>
|
||||
* <p>
|
||||
* Using group >= 0 selects the matching group as the token. For example, if you have:<br/>
|
||||
* Using group >= 0 selects the matching group as the token. For example, if you have:<br/>
|
||||
* <pre>
|
||||
* pattern = \'([^\']+)\'
|
||||
* group = 0
|
||||
|
|
|
@ -24,10 +24,10 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Reverse token string, for example "country" => "yrtnuoc".
|
||||
* Reverse token string, for example "country" => "yrtnuoc".
|
||||
* <p>
|
||||
* If <code>marker</code> is supplied, then tokens will be also prepended by
|
||||
* that character. For example, with a marker of \u0001, "country" =>
|
||||
* that character. For example, with a marker of \u0001, "country" =>
|
||||
* "\u0001yrtnuoc". This is useful when implementing efficient leading
|
||||
* wildcards search.
|
||||
*/
|
||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.lucene.util.AttributeSource;
|
|||
* might be tokenized into shingles "please divide", "divide this",
|
||||
* "this sentence", "sentence into", and "into shingles".
|
||||
*
|
||||
* <p>This filter handles position increments > 1 by inserting filler tokens
|
||||
* <p>This filter handles position increments > 1 by inserting filler tokens
|
||||
* (tokens with termtext "_"). It does not handle a position increment of 0.
|
||||
*/
|
||||
public final class ShingleFilter extends TokenFilter {
|
||||
|
@ -356,7 +356,7 @@ public final class ShingleFilter extends TokenFilter {
|
|||
|
||||
/**
|
||||
* <p>Get the next token from the input stream.
|
||||
* <p>If the next token has <code>positionIncrement > 1</code>,
|
||||
* <p>If the next token has <code>positionIncrement > 1</code>,
|
||||
* <code>positionIncrement - 1</code> {@link #fillerToken}s are
|
||||
* inserted first.
|
||||
* @param target Where to put the new token; if null, a new instance is created.
|
||||
|
|
|
@ -32,11 +32,11 @@ import org.apache.lucene.util.CharsRefBuilder;
|
|||
* Parser for the Solr synonyms format.
|
||||
* <ol>
|
||||
* <li> Blank lines and lines starting with '#' are comments.
|
||||
* <li> Explicit mappings match any token sequence on the LHS of "=>"
|
||||
* <li> Explicit mappings match any token sequence on the LHS of "=>"
|
||||
* and replace with all alternatives on the RHS. These types of mappings
|
||||
* ignore the expand parameter in the constructor.
|
||||
* Example:
|
||||
* <blockquote>i-pod, i pod => ipod</blockquote>
|
||||
* <blockquote>i-pod, i pod => ipod</blockquote>
|
||||
* <li> Equivalent synonyms may be separated with commas and give
|
||||
* no explicit mapping. In this case the mapping behavior will
|
||||
* be taken from the expand parameter in the constructor. This allows
|
||||
|
@ -47,10 +47,10 @@ import org.apache.lucene.util.CharsRefBuilder;
|
|||
* <li> Multiple synonym mapping entries are merged.
|
||||
* Example:
|
||||
* <blockquote>
|
||||
* foo => foo bar<br>
|
||||
* foo => baz<br><br>
|
||||
* foo => foo bar<br>
|
||||
* foo => baz<br><br>
|
||||
* is equivalent to<br><br>
|
||||
* foo => foo bar, baz
|
||||
* foo => foo bar, baz
|
||||
* </blockquote>
|
||||
* </ol>
|
||||
* @lucene.experimental
|
||||
|
|
|
@ -50,9 +50,9 @@ import org.apache.lucene.util.fst.FST;
|
|||
* For example if you have these rules:
|
||||
*
|
||||
* <pre>
|
||||
* a -> x
|
||||
* a b -> y
|
||||
* b c d -> z
|
||||
* a -> x
|
||||
* a b -> y
|
||||
* b c d -> z
|
||||
* </pre>
|
||||
*
|
||||
* Then input <code>a b c d e</code> parses to <code>y b c
|
||||
|
|
|
@ -192,7 +192,7 @@ public class SynonymMap {
|
|||
}
|
||||
|
||||
/**
|
||||
* Add a phrase->phrase synonym mapping.
|
||||
* Add a phrase->phrase synonym mapping.
|
||||
* Phrases are character sequences where words are
|
||||
* separated with character zero (U+0000). Empty words
|
||||
* (two U+0000s in a row) are not allowed in the input nor
|
||||
|
|
|
@ -84,7 +84,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
* unigram or a bigram It also will not return a token for the final position
|
||||
* if the final word is already in the preceding bigram Example:(three
|
||||
* tokens/positions in)
|
||||
* "foo bar the"=>"foo:1|bar:2,bar-the:2|the:3=> "foo" "bar-the" (2 tokens
|
||||
* "foo bar the"=>"foo:1|bar:2,bar-the:2|the:3=> "foo" "bar-the" (2 tokens
|
||||
* out)
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -27,7 +27,7 @@ import java.util.List;
|
|||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/** Tests that > 64k affixes actually works and doesnt overflow some internal int */
|
||||
/** Tests that > 64k affixes actually works and doesnt overflow some internal int */
|
||||
public class Test64kAffixes extends LuceneTestCase {
|
||||
|
||||
public void test() throws Exception {
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestLatvianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* decl II nouns with (s,t) -> š and (d,z) -> ž
|
||||
* decl II nouns with (s,t) -> š and (d,z) -> ž
|
||||
* palatalization will generally conflate to two stems
|
||||
* due to the ambiguity (plural and singular).
|
||||
*/
|
||||
|
@ -151,7 +151,7 @@ public class TestLatvianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Genitive plural forms with (s,t) -> š and (d,z) -> ž
|
||||
* Genitive plural forms with (s,t) -> š and (d,z) -> ž
|
||||
* will not conflate due to ambiguity.
|
||||
*/
|
||||
public void testNouns5() throws IOException {
|
||||
|
@ -240,7 +240,7 @@ public class TestLatvianStemmer extends BaseTokenStreamTestCase {
|
|||
|
||||
/**
|
||||
* Note: we intentionally don't handle the ambiguous
|
||||
* (s,t) -> š and (d,z) -> ž
|
||||
* (s,t) -> š and (d,z) -> ž
|
||||
*/
|
||||
public void testPalatalization() throws IOException {
|
||||
checkOneTerm(a, "krāsns", "krāsn"); // nom. sing.
|
||||
|
|
|
@ -39,7 +39,7 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator.DEF
|
|||
*/
|
||||
public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
/***
|
||||
/*
|
||||
public void testPerformance() throws IOException {
|
||||
String s = "now is the time-for all good men to come to-the aid of their country.";
|
||||
Token tok = new Token();
|
||||
|
|
|
@ -281,7 +281,7 @@ public class TestPatternReplaceCharFilter extends BaseTokenStreamTestCase {
|
|||
* A demonstration of how backtracking regular expressions can lead to relatively
|
||||
* easy DoS attacks.
|
||||
*
|
||||
* @see "http://swtch.com/~rsc/regexp/regexp1.html"
|
||||
* @see <a href="http://swtch.com/~rsc/regexp/regexp1.html">"http://swtch.com/~rsc/regexp/regexp1.html"</a>
|
||||
*/
|
||||
@Ignore
|
||||
public void testNastyPattern() throws Exception {
|
||||
|
|
|
@ -28,8 +28,8 @@ import java.io.Reader;
|
|||
* <p>
|
||||
* Sequences of iteration marks are supported. In case an illegal sequence of iteration
|
||||
* marks is encountered, the implementation emits the illegal source character as-is
|
||||
* without considering its script. For example, with input "?ゝ", we get
|
||||
* "??" even though "?" isn't hiragana.
|
||||
* without considering its script. For example, with input "?ゝ", we get
|
||||
* "??" even though "?" isn't hiragana.
|
||||
* </p>
|
||||
* <p>
|
||||
* Note that a full stop punctuation character "。" (U+3002) can not be iterated
|
||||
|
|
|
@ -62,8 +62,8 @@ import org.apache.lucene.util.fst.FST;
|
|||
* <p>
|
||||
* This tokenizer uses a rolling Viterbi search to find the
|
||||
* least cost segmentation (path) of the incoming characters.
|
||||
* For tokens that appear to be compound (> length 2 for all
|
||||
* Kanji, or > length 7 for non-Kanji), we see if there is a
|
||||
* For tokens that appear to be compound (> length 2 for all
|
||||
* Kanji, or > length 7 for non-Kanji), we see if there is a
|
||||
* 2nd best segmentation of that token after applying
|
||||
* penalties to the long tokens. If so, and the Mode is
|
||||
* {@link Mode#SEARCH}, we output the alternate segmentation
|
||||
|
|
|
@ -158,7 +158,7 @@ public class Token {
|
|||
|
||||
/**
|
||||
* Set the position length (in tokens) of this token. For normal
|
||||
* tokens this is 1; for compound tokens it's > 1.
|
||||
* tokens this is 1; for compound tokens it's > 1.
|
||||
*/
|
||||
public void setPositionLength(int positionLength) {
|
||||
this.positionLength = positionLength;
|
||||
|
@ -166,7 +166,7 @@ public class Token {
|
|||
|
||||
/**
|
||||
* Get the length (in tokens) of this token. For normal
|
||||
* tokens this is 1; for compound tokens it's > 1.
|
||||
* tokens this is 1; for compound tokens it's > 1.
|
||||
* @return position length of token
|
||||
*/
|
||||
public int getPositionLength() {
|
||||
|
|
|
@ -40,7 +40,7 @@ public class MorfologikAnalyzer extends Analyzer {
|
|||
* and have an associated <code>.info</code> metadata file. See the Morfologik project
|
||||
* for details.
|
||||
*
|
||||
* @see "http://morfologik.blogspot.com/"
|
||||
* @see <a href="http://morfologik.blogspot.com/">http://morfologik.blogspot.com/</a>
|
||||
*/
|
||||
public MorfologikAnalyzer(final String dictionaryResource) {
|
||||
this.dictionary = dictionaryResource;
|
||||
|
|
|
@ -58,7 +58,7 @@ public class Utility {
|
|||
* @param lstartIndex start offset into larray
|
||||
* @param rarray right array
|
||||
* @param rstartIndex start offset into rarray
|
||||
* @return 0 if the arrays are equal,1 if larray > rarray, -1 if larray < rarray
|
||||
* @return 0 if the arrays are equal,1 if larray > rarray, -1 if larray < rarray
|
||||
*/
|
||||
public static int compareArray(char[] larray, int lstartIndex, char[] rarray,
|
||||
int rstartIndex) {
|
||||
|
|
|
@ -30,7 +30,7 @@ public interface QueryMaker {
|
|||
/**
|
||||
* Create the next query, of the given size.
|
||||
* @param size the size of the query - number of terms, etc.
|
||||
* @exception Exception if cannot make the query, or if size>0 was specified but this feature is not supported.
|
||||
* @exception Exception if cannot make the query, or if size > 0 was specified but this feature is not supported.
|
||||
*/
|
||||
public Query makeQuery (int size) throws Exception;
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ import java.util.Properties;
|
|||
* It's parsed by {@link com.spatial4j.core.context.SpatialContext#readShapeFromWkt(String)} (String)} and then
|
||||
* further manipulated via a configurable {@link SpatialDocMaker.ShapeConverter}. When using point
|
||||
* data, it's likely you'll want to configure the shape converter so that the query shapes actually
|
||||
* cover a region. The queries are all created & cached in advance. This query maker works in
|
||||
* cover a region. The queries are all created and cached in advance. This query maker works in
|
||||
* conjunction with {@link SpatialDocMaker}. See spatial.alg for a listing of options, in
|
||||
* particular the options starting with "query.".
|
||||
*/
|
||||
|
|
|
@ -393,7 +393,7 @@ public class AnalyzerFactoryTask extends PerfTask {
|
|||
/**
|
||||
* This method looks up a class with its fully qualified name (FQN), or a short-name
|
||||
* class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
|
||||
* as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
|
||||
* as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
|
||||
* "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
|
||||
*
|
||||
* If className contains a period, the class is first looked up as-is, assuming that it
|
||||
|
|
|
@ -99,7 +99,7 @@ public class NewAnalyzerTask extends PerfTask {
|
|||
* <p/>
|
||||
* Analyzer names may also refer to previously defined AnalyzerFactory's.
|
||||
* <p/>
|
||||
* Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
|
||||
* Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
|
||||
* <p/>
|
||||
* Example AnalyzerFactory usage:
|
||||
* <pre>
|
||||
|
|
|
@ -268,7 +268,7 @@ public abstract class ReadTask extends PerfTask {
|
|||
}
|
||||
|
||||
/**
|
||||
* Return true if, with search & results traversing, docs should be retrieved.
|
||||
* Return true if, with search and results traversing, docs should be retrieved.
|
||||
*/
|
||||
public abstract boolean withRetrieve();
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ import java.util.Collections;
|
|||
* <li>fields - The fields to highlight. If not specified all fields will be highlighted (or at least attempted)</li>
|
||||
* </ul>
|
||||
* Example:
|
||||
* <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
|
||||
* <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
|
||||
* </pre>
|
||||
*
|
||||
* Documents must be stored in order for this task to work. Additionally, term vector positions can be used as well.
|
||||
|
|
|
@ -47,7 +47,7 @@ import java.util.Collections;
|
|||
* <li>fields - The fields to highlight. If not specified all fields will be highlighted (or at least attempted)</li>
|
||||
* </ul>
|
||||
* Example:
|
||||
* <pre>"SearchVecHlgtSameRdr" SearchTravRetVectorHighlight(size[10],highlight[10],maxFrags[3],fields[body]) > : 1000
|
||||
* <pre>"SearchVecHlgtSameRdr" SearchTravRetVectorHighlight(size[10],highlight[10],maxFrags[3],fields[body]) > : 1000
|
||||
* </pre>
|
||||
*
|
||||
* Fields must be stored and term vector offsets and positions in order must be true for this task to work.
|
||||
|
|
|
@ -55,7 +55,7 @@ public abstract class TermsIndexReaderBase implements Closeable, Accountable {
|
|||
*/
|
||||
public static abstract class FieldIndexEnum {
|
||||
|
||||
/** Seeks to "largest" indexed term that's <=
|
||||
/** Seeks to "largest" indexed term that's <=
|
||||
* term; returns file pointer index (into the main
|
||||
* terms index file) for that term */
|
||||
public abstract long seek(BytesRef term) throws IOException;
|
||||
|
|
|
@ -109,7 +109,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
}
|
||||
}
|
||||
|
||||
/** Sets an index term when docFreq >= docFreqThresh, or
|
||||
/** Sets an index term when docFreq >= docFreqThresh, or
|
||||
* every interval terms. This should reduce seek time
|
||||
* to high docFreq terms. */
|
||||
public static final class EveryNOrDocFreqTermSelector extends IndexTermSelector {
|
||||
|
|
|
@ -82,7 +82,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
|
||||
/**
|
||||
* This is just like {@link BlockTreeTermsWriter}, except it also stores a version per term, and adds a method to its TermsEnum
|
||||
* implementation to seekExact only if the version is >= the specified version. The version is added to the terms index to avoid seeking if
|
||||
* implementation to seekExact only if the version is >= the specified version. The version is added to the terms index to avoid seeking if
|
||||
* no term in the block has a high enough version. The term blocks file is .tiv and the terms index extension is .tipv.
|
||||
*
|
||||
* @lucene.experimental
|
||||
|
|
|
@ -56,7 +56,7 @@ import org.apache.lucene.util.automaton.Transition;
|
|||
|
||||
/** Wraps {@link Lucene50PostingsFormat} format for on-disk
|
||||
* storage, but then at read time loads and stores all
|
||||
* terms & postings directly in RAM as byte[], int[].
|
||||
* terms and postings directly in RAM as byte[], int[].
|
||||
*
|
||||
* <p><b><font color=red>WARNING</font></b>: This is
|
||||
* exceptionally RAM intensive: it makes no effort to
|
||||
|
@ -91,7 +91,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
|
||||
/** minSkipCount is how many terms in a row must have the
|
||||
* same prefix before we put a skip pointer down. Terms
|
||||
* with docFreq <= lowFreqCutoff will use a single int[]
|
||||
* with docFreq <= lowFreqCutoff will use a single int[]
|
||||
* to hold all docs, freqs, position and offsets; terms
|
||||
* with higher docFreq will use separate arrays. */
|
||||
public DirectPostingsFormat(int minSkipCount, int lowFreqCutoff) {
|
||||
|
|
|
@ -753,7 +753,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
/** Load frame for target arc(node) on fst, so that
|
||||
* arc.label >= label and !fsa.reject(arc.label) */
|
||||
* arc.label >= label and !fsa.reject(arc.label) */
|
||||
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
|
||||
FST.Arc<Long> arc = frame.arc;
|
||||
arc = Util.readCeilArc(label, fst, top.arc, arc, fstReader);
|
||||
|
|
|
@ -650,7 +650,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
/** Load frame for target arc(node) on fst, so that
|
||||
* arc.label >= label and !fsa.reject(arc.label) */
|
||||
* arc.label >= label and !fsa.reject(arc.label) */
|
||||
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
|
||||
FST.Arc<FSTTermOutputs.TermData> arc = frame.fstArc;
|
||||
arc = Util.readCeilArc(label, fst, top.fstArc, arc, fstReader);
|
||||
|
|
|
@ -69,7 +69,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
// it pulls the FST directly from what you wrote w/o going
|
||||
// to disk.
|
||||
|
||||
/** Stores terms & postings (docs, positions, payloads) in
|
||||
/** Stores terms and postings (docs, positions, payloads) in
|
||||
* RAM, using an FST.
|
||||
*
|
||||
* <p>Note that this codec implements advance as a linear
|
||||
|
|
|
@ -164,7 +164,7 @@
|
|||
<property name="javac.debug" value="on"/>
|
||||
<property name="javac.source" value="1.8"/>
|
||||
<property name="javac.target" value="1.8"/>
|
||||
<property name="javac.args" value="-Xlint -Xlint:-deprecation -Xlint:-serial -Xlint:-options"/>
|
||||
<property name="javac.args" value="-Xlint -Xlint:-deprecation -Xlint:-serial -Xlint:-options -Xdoclint:all/protected -Xdoclint:-html -Xdoclint:-missing"/>
|
||||
<property name="javadoc.link" value="http://download.oracle.com/javase/8/docs/api/"/>
|
||||
<property name="javadoc.link.junit" value="http://junit.sourceforge.net/javadoc/"/>
|
||||
<property name="javadoc.packagelist.dir" location="${common.dir}/tools/javadoc"/>
|
||||
|
|
|
@ -79,8 +79,8 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P
|
|||
public Token() {
|
||||
}
|
||||
|
||||
/** Constructs a Token with the given term text, and start
|
||||
* & end offsets. The type defaults to "word."
|
||||
/** Constructs a Token with the given term text, start
|
||||
* and end offsets. The type defaults to "word."
|
||||
* <b>NOTE:</b> for better indexing speed you should
|
||||
* instead use the char[] termBuffer methods to set the
|
||||
* term text.
|
||||
|
|
|
@ -78,7 +78,7 @@ public class TokenStreamToAutomaton {
|
|||
}
|
||||
}
|
||||
|
||||
/** Subclass & implement this if you need to change the
|
||||
/** Subclass and implement this if you need to change the
|
||||
* token (such as escaping certain bytes) before it's
|
||||
* turned into a graph. */
|
||||
protected BytesRef changeToken(BytesRef in) {
|
||||
|
|
|
@ -48,7 +48,7 @@ public interface CharTermAttribute extends Attribute, CharSequence, Appendable {
|
|||
/** Grows the termBuffer to at least size newSize, preserving the
|
||||
* existing content.
|
||||
* @param newSize minimum size of the new termBuffer
|
||||
* @return newly created termBuffer with length >= newSize
|
||||
* @return newly created termBuffer with {@code length >= newSize}
|
||||
*/
|
||||
public char[] resizeBuffer(int newSize);
|
||||
|
||||
|
|
|
@ -178,7 +178,7 @@ public final class CodecUtil {
|
|||
* @param maxVersion The maximum supported expected version number.
|
||||
* @return The actual version found, when a valid header is found
|
||||
* that matches <code>codec</code>, with an actual version
|
||||
* where <code>minVersion <= actual <= maxVersion</code>.
|
||||
* where {@code minVersion <= actual <= maxVersion}.
|
||||
* Otherwise an exception is thrown.
|
||||
* @throws CorruptIndexException If the first four bytes are not
|
||||
* {@link #CODEC_MAGIC}, or if the actual codec found is
|
||||
|
@ -238,7 +238,7 @@ public final class CodecUtil {
|
|||
* @param expectedSuffix The expected auxiliary suffix for this file.
|
||||
* @return The actual version found, when a valid header is found
|
||||
* that matches <code>codec</code>, with an actual version
|
||||
* where <code>minVersion <= actual <= maxVersion</code>,
|
||||
* where {@code minVersion <= actual <= maxVersion},
|
||||
* and matching <code>expectedID</code> and <code>expectedSuffix</code>
|
||||
* Otherwise an exception is thrown.
|
||||
* @throws CorruptIndexException If the first four bytes are not
|
||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.MathUtil;
|
|||
* Skip level i contains every skipInterval-th entry from skip level i-1.
|
||||
* Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))).
|
||||
*
|
||||
* Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1.
|
||||
* Each skip entry on a level {@code i>0} contains a pointer to the corresponding skip entry in list i-1.
|
||||
* This guarantees a logarithmic amount of skips to find the target document.
|
||||
*
|
||||
* While this class takes care of writing the different skip levels,
|
||||
|
|
|
@ -183,7 +183,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
|
|||
* for the field. */
|
||||
public abstract void startDoc(int docID, int freq) throws IOException;
|
||||
|
||||
/** Add a new position & payload, and start/end offset. A
|
||||
/** Add a new position and payload, and start/end offset. A
|
||||
* null payload means no payload; a non-null payload with
|
||||
* zero length also means no payload. Caller may reuse
|
||||
* the {@link BytesRef} for the payload between calls
|
||||
|
@ -191,7 +191,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
|
|||
* and <code>endOffset</code> will be -1 when offsets are not indexed. */
|
||||
public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
|
||||
|
||||
/** Called when we are done adding positions & payloads
|
||||
/** Called when we are done adding positions and payloads
|
||||
* for each doc. */
|
||||
public abstract void finishDoc() throws IOException;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.index.MergeState;
|
|||
import org.apache.lucene.index.SegmentReader;
|
||||
|
||||
/**
|
||||
* Computes which segments have identical field name->number mappings,
|
||||
* Computes which segments have identical field name to number mappings,
|
||||
* which allows stored fields and term vectors in this codec to be bulk-merged.
|
||||
*/
|
||||
class MatchingReaders {
|
||||
|
|
|
@ -50,7 +50,7 @@ final class ForUtil {
|
|||
* Upper limit of the number of values that might be decoded in a single call to
|
||||
* {@link #readBlock(IndexInput, byte[], int[])}. Although values after
|
||||
* <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers
|
||||
* whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s.
|
||||
* whose size is {@code >= MAX_DATA_SIZE} to avoid {@link ArrayIndexOutOfBoundsException}s.
|
||||
*/
|
||||
static final int MAX_DATA_SIZE;
|
||||
static {
|
||||
|
|
|
@ -81,10 +81,10 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
|||
public static final int BINARY_PREFIX_COMPRESSED = 2;
|
||||
|
||||
/** Standard storage for sorted set values with 1 level of indirection:
|
||||
* docId -> address -> ord. */
|
||||
* {@code docId -> address -> ord}. */
|
||||
public static final int SORTED_WITH_ADDRESSES = 0;
|
||||
/** Single-valued sorted set values, encoded as sorted values, so no level
|
||||
* of indirection: docId -> ord. */
|
||||
* of indirection: {@code docId -> ord}. */
|
||||
public static final int SORTED_SINGLE_VALUED = 1;
|
||||
|
||||
/** placeholder for missing offset that means there are no missing values */
|
||||
|
|
|
@ -131,7 +131,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
|||
* <ul>
|
||||
* <li>0 --> delta-compressed. For each block of 16k integers, every integer is delta-encoded
|
||||
* from the minimum value within the block.
|
||||
* <li>1 -->, gcd-compressed. When all integers share a common divisor, only quotients are stored
|
||||
* <li>1 --> gcd-compressed. When all integers share a common divisor, only quotients are stored
|
||||
* using blocks of delta-encoded ints.
|
||||
* <li>2 --> table-compressed. When the number of unique numeric values is small and it would save space,
|
||||
* a lookup table of unique values is written, followed by the ordinal for each document.
|
||||
|
@ -141,7 +141,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
|||
* <p>BinaryType indicates how Binary values will be stored:
|
||||
* <ul>
|
||||
* <li>0 --> fixed-width. All values have the same length, addressing by multiplication.
|
||||
* <li>1 -->, variable-width. An address for each value is stored.
|
||||
* <li>1 --> variable-width. An address for each value is stored.
|
||||
* <li>2 --> prefix-compressed. An address to the start of every interval'th value is stored.
|
||||
* </ul>
|
||||
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
|
||||
|
|
|
@ -305,7 +305,7 @@ class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
* can match.
|
||||
*
|
||||
* @param position current position in the input String
|
||||
* @return position >=0 if more possible solutions exist for the DFA
|
||||
* @return {@code position >= 0} if more possible solutions exist for the DFA
|
||||
*/
|
||||
private int backtrack(int position) {
|
||||
while (position-- > 0) {
|
||||
|
|
|
@ -162,7 +162,7 @@ public class ConcurrentMergeScheduler extends MergeScheduler {
|
|||
};
|
||||
|
||||
/**
|
||||
* Called whenever the running merges have changed, to pause & unpause
|
||||
* Called whenever the running merges have changed, to pause and unpause
|
||||
* threads. This method sorts the merge threads by their merge size in
|
||||
* descending order and then pauses/unpauses threads from first to last --
|
||||
* that way, smaller merges are guaranteed to run before larger ones.
|
||||
|
|
|
@ -40,7 +40,7 @@ public enum DocValuesType {
|
|||
* A pre-sorted byte[]. Fields with this type only store distinct byte values
|
||||
* and store an additional offset pointer per document to dereference the shared
|
||||
* byte[]. The stored byte[] is presorted and allows access via document id,
|
||||
* ordinal and by-value. Values must be <= 32766 bytes.
|
||||
* ordinal and by-value. Values must be {@code <= 32766} bytes.
|
||||
*/
|
||||
SORTED,
|
||||
/**
|
||||
|
@ -52,7 +52,7 @@ public enum DocValuesType {
|
|||
* A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values
|
||||
* and store additional offset pointers per document to dereference the shared
|
||||
* byte[]s. The stored byte[] is presorted and allows access via document id,
|
||||
* ordinal and by-value. Values must be <= 32766 bytes.
|
||||
* ordinal and by-value. Values must be {@code <= 32766} bytes.
|
||||
*/
|
||||
SORTED_SET,
|
||||
}
|
||||
|
|
|
@ -178,7 +178,7 @@ class DocumentsWriterFlushQueue {
|
|||
/**
|
||||
* Publishes the flushed segment, segment private deletes (if any) and its
|
||||
* associated global delete (if present) to IndexWriter. The actual
|
||||
* publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s
|
||||
* publishing operation is synced on {@code IW -> BDS} so that the {@link SegmentInfo}'s
|
||||
* delete generation is always GlobalPacket_deleteGeneration + 1
|
||||
*/
|
||||
protected final void publishFlushedSegment(IndexWriter indexWriter, FlushedSegment newSegment, FrozenBufferedUpdates globalPacket)
|
||||
|
|
|
@ -53,7 +53,7 @@ import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
|
|||
* <p>
|
||||
* If {@link IndexWriterConfig#setRAMBufferSizeMB(double)} is enabled, the
|
||||
* largest ram consuming {@link DocumentsWriterPerThread} will be marked as
|
||||
* pending iff the global active RAM consumption is >= the configured max RAM
|
||||
* pending iff the global active RAM consumption is {@code >=} the configured max RAM
|
||||
* buffer.
|
||||
*/
|
||||
class FlushByRamOrCountsPolicy extends FlushPolicy {
|
||||
|
|
|
@ -108,7 +108,7 @@ public abstract class IndexCommit implements Comparable<IndexCommit> {
|
|||
|
||||
/** Returns userData, previously passed to {@link
|
||||
* IndexWriter#setCommitData(Map)} for this commit. Map is
|
||||
* String -> String. */
|
||||
* {@code String -> String}. */
|
||||
public abstract Map<String,String> getUserData() throws IOException;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -56,7 +56,7 @@ public final class IndexFileNames {
|
|||
/**
|
||||
* Computes the full file name from base, extension and generation. If the
|
||||
* generation is -1, the file name is null. If it's 0, the file name is
|
||||
* <base>.<ext>. If it's > 0, the file name is
|
||||
* <base>.<ext>. If it's > 0, the file name is
|
||||
* <base>_<gen>.<ext>.<br>
|
||||
* <b>NOTE:</b> .<ext> is added to the name only if <code>ext</code> is
|
||||
* not an empty string.
|
||||
|
|
|
@ -1575,8 +1575,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
final InfoStream infoStream;
|
||||
|
||||
/**
|
||||
* Forces merge policy to merge segments until there are <=
|
||||
* maxNumSegments. The actual merges to be
|
||||
* Forces merge policy to merge segments until there are
|
||||
* {@code <= maxNumSegments}. The actual merges to be
|
||||
* executed are determined by the {@link MergePolicy}.
|
||||
*
|
||||
* <p>This is a horribly costly operation, especially when
|
||||
|
@ -1595,7 +1595,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
* afterwards, to allow IndexWriter to free up disk space.</p>
|
||||
*
|
||||
* <p>If some but not all readers re-open while merging
|
||||
* is underway, this will cause > 2X temporary
|
||||
* is underway, this will cause {@code > 2X} temporary
|
||||
* space to be consumed as those new readers will then
|
||||
* hold open the temporary segments at that time. It is
|
||||
* best not to re-open readers while merging is running.</p>
|
||||
|
@ -2818,7 +2818,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
private final Object commitLock = new Object();
|
||||
|
||||
/**
|
||||
* <p>Commits all pending changes (added & deleted
|
||||
* <p>Commits all pending changes (added and deleted
|
||||
* documents, segment merges, added
|
||||
* indexes, etc.) to the index, and syncs all referenced
|
||||
* index files, such that a reader will see the changes
|
||||
|
@ -2830,7 +2830,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
*
|
||||
* <p> Note that this operation calls Directory.sync on
|
||||
* the index files. That call should not return until the
|
||||
* file contents & metadata are on stable storage. For
|
||||
* file contents and metadata are on stable storage. For
|
||||
* FSDirectory, this calls the OS's fsync. But, beware:
|
||||
* some hardware devices may in fact cache writes even
|
||||
* during fsync, and return before the bits are actually
|
||||
|
|
|
@ -60,7 +60,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
* or larger will never be merged. @see setMaxMergeDocs */
|
||||
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
|
||||
|
||||
/** Default noCFSRatio. If a merge's size is >= 10% of
|
||||
/** Default noCFSRatio. If a merge's size is {@code >= 10%} of
|
||||
* the index, then we disable compound file for it.
|
||||
* @see MergePolicy#setNoCFSRatio */
|
||||
public static final double DEFAULT_NO_CFS_RATIO = 0.1;
|
||||
|
@ -124,8 +124,8 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
* faster, but indexing speed is slower. With larger
|
||||
* values, more RAM is used during indexing, and while
|
||||
* searches is slower, indexing is
|
||||
* faster. Thus larger values (> 10) are best for batch
|
||||
* index creation, and smaller values (< 10) for indices
|
||||
* faster. Thus larger values ({@code > 10}) are best for batch
|
||||
* index creation, and smaller values ({@code < 10}) for indices
|
||||
* that are interactively maintained. */
|
||||
public void setMergeFactor(int mergeFactor) {
|
||||
if (mergeFactor < 2)
|
||||
|
|
|
@ -420,7 +420,7 @@ public abstract class MergePolicy {
|
|||
|
||||
/**
|
||||
* Determine what set of merge operations is necessary in
|
||||
* order to merge to <= the specified segment count. {@link IndexWriter} calls this when its
|
||||
* order to merge to {@code <=} the specified segment count. {@link IndexWriter} calls this when its
|
||||
* {@link IndexWriter#forceMerge} method is called. This call is always
|
||||
* synchronized on the {@link IndexWriter} instance so only one thread at a
|
||||
* time will call this method.
|
||||
|
|
|
@ -120,14 +120,14 @@ public final class MultiFields extends Fields {
|
|||
return getFields(r).terms(field);
|
||||
}
|
||||
|
||||
/** Returns {@link DocsEnum} for the specified field &
|
||||
/** Returns {@link DocsEnum} for the specified field and
|
||||
* term. This will return null if the field or term does
|
||||
* not exist. */
|
||||
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
return getTermDocsEnum(r, liveDocs, field, term, DocsEnum.FLAG_FREQS);
|
||||
}
|
||||
|
||||
/** Returns {@link DocsEnum} for the specified field &
|
||||
/** Returns {@link DocsEnum} for the specified field and
|
||||
* term, with control over whether freqs are required.
|
||||
* Some codecs may be able to optimize their
|
||||
* implementation when freqs are not required. This will
|
||||
|
@ -147,7 +147,7 @@ public final class MultiFields extends Fields {
|
|||
}
|
||||
|
||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||
* field & term. This will return null if the field or
|
||||
* field and term. This will return null if the field or
|
||||
* term does not exist or positions were not indexed.
|
||||
* @see #getTermPositionsEnum(IndexReader, Bits, String, BytesRef, int) */
|
||||
public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
|
@ -155,7 +155,7 @@ public final class MultiFields extends Fields {
|
|||
}
|
||||
|
||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||
* field & term, with control over whether offsets and payloads are
|
||||
* field and term, with control over whether offsets and payloads are
|
||||
* required. Some codecs may be able to optimize
|
||||
* their implementation when offsets and/or payloads are not
|
||||
* required. This will return null if the field or term does not
|
||||
|
|
|
@ -676,7 +676,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
return dest;
|
||||
}
|
||||
|
||||
/** Writes & syncs to the Directory dir, taking care to
|
||||
/** Writes and syncs to the Directory dir, taking care to
|
||||
* remove the segments file on exception
|
||||
* <p>
|
||||
* Note: {@link #changed()} should be called prior to this
|
||||
|
|
|
@ -46,7 +46,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
|
|||
* {@link BytesRef} may be re-used across calls to {@link #lookupOrd(int)}
|
||||
* so make sure to {@link BytesRef#deepCopyOf(BytesRef) copy it} if you want
|
||||
* to keep it around.
|
||||
* @param ord ordinal to lookup (must be >= 0 and < {@link #getValueCount()})
|
||||
* @param ord ordinal to lookup (must be >= 0 and < {@link #getValueCount()})
|
||||
* @see #getOrd(int)
|
||||
*/
|
||||
public abstract BytesRef lookupOrd(int ord);
|
||||
|
|
|
@ -46,7 +46,7 @@ public abstract class Terms {
|
|||
* are accepted by the provided {@link
|
||||
* CompiledAutomaton}. If the <code>startTerm</code> is
|
||||
* provided then the returned enum will only accept terms
|
||||
* > <code>startTerm</code>, but you still must call
|
||||
* {@code > startTerm}, but you still must call
|
||||
* next() first to get to the first term. Note that the
|
||||
* provided <code>startTerm</code> must be accepted by
|
||||
* the automaton.
|
||||
|
|
|
@ -91,7 +91,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||
|
||||
int[] sortedTermIDs;
|
||||
|
||||
/** Collapse the hash table & sort in-place; also sets
|
||||
/** Collapse the hash table and sort in-place; also sets
|
||||
* this.sortedTermIDs to the results */
|
||||
public int[] sortPostings() {
|
||||
sortedTermIDs = bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
|
|
|
@ -73,7 +73,7 @@ import java.util.Map;
|
|||
// maybe CMS should do so)
|
||||
|
||||
public class TieredMergePolicy extends MergePolicy {
|
||||
/** Default noCFSRatio. If a merge's size is >= 10% of
|
||||
/** Default noCFSRatio. If a merge's size is {@code >= 10%} of
|
||||
* the index, then we disable compound file for it.
|
||||
* @see MergePolicy#setNoCFSRatio */
|
||||
public static final double DEFAULT_NO_CFS_RATIO = 0.1;
|
||||
|
@ -215,7 +215,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
/** Sets the allowed number of segments per tier. Smaller
|
||||
* values mean more merging but fewer segments.
|
||||
*
|
||||
* <p><b>NOTE</b>: this value should be >= the {@link
|
||||
* <p><b>NOTE</b>: this value should be {@code >=} the {@link
|
||||
* #setMaxMergeAtOnce} otherwise you'll force too much
|
||||
* merging to occur.</p>
|
||||
*
|
||||
|
|
|
@ -98,9 +98,9 @@ public abstract class FieldComparator<T> {
|
|||
*
|
||||
* @param slot1 first slot to compare
|
||||
* @param slot2 second slot to compare
|
||||
* @return any N < 0 if slot2's value is sorted after
|
||||
* slot1, any N > 0 if the slot2's value is sorted before
|
||||
* slot1 and 0 if they are equal
|
||||
* @return any {@code N < 0} if slot2's value is sorted after
|
||||
* slot1, any {@code N > 0} if the slot2's value is sorted before
|
||||
* slot1 and {@code 0} if they are equal
|
||||
*/
|
||||
public abstract int compare(int slot1, int slot2);
|
||||
|
||||
|
@ -134,9 +134,9 @@ public abstract class FieldComparator<T> {
|
|||
* frequently).</p>
|
||||
*
|
||||
* @param doc that was hit
|
||||
* @return any N < 0 if the doc's value is sorted after
|
||||
* the bottom entry (not competitive), any N > 0 if the
|
||||
* doc's value is sorted before the bottom entry and 0 if
|
||||
* @return any {@code N < 0} if the doc's value is sorted after
|
||||
* the bottom entry (not competitive), any {@code N > 0} if the
|
||||
* doc's value is sorted before the bottom entry and {@code 0} if
|
||||
* they are equal.
|
||||
*/
|
||||
public abstract int compareBottom(int doc) throws IOException;
|
||||
|
@ -150,9 +150,9 @@ public abstract class FieldComparator<T> {
|
|||
* use searchAfter (deep paging).
|
||||
*
|
||||
* @param doc that was hit
|
||||
* @return any N < 0 if the doc's value is sorted after
|
||||
* the bottom entry (not competitive), any N > 0 if the
|
||||
* doc's value is sorted before the bottom entry and 0 if
|
||||
* @return any {@code N < 0} if the doc's value is sorted after
|
||||
* the bottom entry (not competitive), any {@code N > 0} if the
|
||||
* doc's value is sorted before the bottom entry and {@code 0} if
|
||||
* they are equal.
|
||||
*/
|
||||
public abstract int compareTop(int doc) throws IOException;
|
||||
|
|
|
@ -71,7 +71,7 @@ public class FuzzyQuery extends MultiTermQuery {
|
|||
* of that length is also required.
|
||||
*
|
||||
* @param term the term to search for
|
||||
* @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
|
||||
* @param maxEdits must be {@code >= 0} and {@code <=} {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
|
||||
* @param prefixLength length of common (non-fuzzy) prefix
|
||||
* @param maxExpansions the maximum number of terms to match. If this number is
|
||||
* greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten,
|
||||
|
|
|
@ -126,7 +126,7 @@ public abstract class LiveFieldValues<S,T> implements ReferenceManager.RefreshLi
|
|||
}
|
||||
}
|
||||
|
||||
/** This is called when the id/value was already flushed & opened
|
||||
/** This is called when the id/value was already flushed and opened
|
||||
* in an NRT IndexSearcher. You must implement this to
|
||||
* go look up the value (eg, via doc values, field cache,
|
||||
* stored fields, etc.). */
|
||||
|
|
|
@ -94,8 +94,8 @@ public abstract class TopDocsCollector<T extends ScoreDoc> extends SimpleCollect
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the documents in the rage [start .. pq.size()) that were collected
|
||||
* by this collector. Note that if start >= pq.size(), an empty TopDocs is
|
||||
* Returns the documents in the range [start .. pq.size()) that were collected
|
||||
* by this collector. Note that if {@code start >= pq.size()}, an empty TopDocs is
|
||||
* returned.<br>
|
||||
* This method is convenient to call if the application always asks for the
|
||||
* last results, starting from the last 'page'.<br>
|
||||
|
@ -113,8 +113,8 @@ public abstract class TopDocsCollector<T extends ScoreDoc> extends SimpleCollect
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the documents in the rage [start .. start+howMany) that were
|
||||
* collected by this collector. Note that if start >= pq.size(), an empty
|
||||
* Returns the documents in the range [start .. start+howMany) that were
|
||||
* collected by this collector. Note that if {@code start >= pq.size()}, an empty
|
||||
* TopDocs is returned, and if pq.size() - start < howMany, then only the
|
||||
* available documents in [start .. pq.size()) are returned.<br>
|
||||
* This method is useful to call in case pagination of search results is
|
||||
|
|
|
@ -216,7 +216,7 @@ public abstract class SimilarityBase extends Similarity {
|
|||
|
||||
// ------------------------------ Norm handling ------------------------------
|
||||
|
||||
/** Norm -> document length map. */
|
||||
/** Norm to document length map. */
|
||||
private static final float[] NORM_TABLE = new float[256];
|
||||
|
||||
static {
|
||||
|
|
|
@ -80,17 +80,17 @@ import org.apache.lucene.util.BytesRef;
|
|||
* of the weighted query vectors <i>V(q)</i> and <i>V(d)</i>:
|
||||
*
|
||||
* <br> <br>
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto">
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto" summary="formatting only">
|
||||
* <tr><td>
|
||||
* <table cellpadding="1" cellspacing="0" border="1" align="center">
|
||||
* <table cellpadding="1" cellspacing="0" border="1" align="center" summary="formatting only">
|
||||
* <tr><td>
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center">
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" summary="cosine similarity formula">
|
||||
* <tr>
|
||||
* <td valign="middle" align="right" rowspan="1">
|
||||
* cosine-similarity(q,d) =
|
||||
* </td>
|
||||
* <td valign="middle" align="center">
|
||||
* <table>
|
||||
* <table summary="cosine similarity formula">
|
||||
* <tr><td align="center" style="text-align: center"><small>V(q) · V(d)</small></td></tr>
|
||||
* <tr><td align="center" style="text-align: center">–––––––––</td></tr>
|
||||
* <tr><td align="center" style="text-align: center"><small>|V(q)| |V(d)|</small></td></tr>
|
||||
|
@ -165,11 +165,11 @@ import org.apache.lucene.util.BytesRef;
|
|||
* we get <i>Lucene's Conceptual scoring formula</i>:
|
||||
*
|
||||
* <br> <br>
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto">
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto" summary="formatting only">
|
||||
* <tr><td>
|
||||
* <table cellpadding="1" cellspacing="0" border="1" align="center">
|
||||
* <table cellpadding="1" cellspacing="0" border="1" align="center" summary="formatting only">
|
||||
* <tr><td>
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center">
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" summary="formatting only">
|
||||
* <tr>
|
||||
* <td valign="middle" align="right" rowspan="1">
|
||||
* score(q,d) =
|
||||
|
@ -177,7 +177,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* <font color="#CCCC00">query-boost(q)</font> ·
|
||||
* </td>
|
||||
* <td valign="middle" align="center">
|
||||
* <table>
|
||||
* <table summary="Lucene conceptual scoring formula">
|
||||
* <tr><td align="center" style="text-align: center"><small><font color="#993399">V(q) · V(d)</font></small></td></tr>
|
||||
* <tr><td align="center" style="text-align: center">–––––––––</td></tr>
|
||||
* <tr><td align="center" style="text-align: center"><small><font color="#FF33CC">|V(q)|</font></small></td></tr>
|
||||
|
@ -257,11 +257,11 @@ import org.apache.lucene.util.BytesRef;
|
|||
* to those of the <i>conceptual</i> formula:
|
||||
*
|
||||
* <P>
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto">
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto" summary="formatting only">
|
||||
* <tr><td>
|
||||
* <table cellpadding="" cellspacing="2" border="2" align="center">
|
||||
* <table cellpadding="" cellspacing="2" border="2" align="center" summary="formatting only">
|
||||
* <tr><td>
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center">
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" summary="Lucene conceptual scoring formula">
|
||||
* <tr>
|
||||
* <td valign="middle" align="right" rowspan="1">
|
||||
* score(q,d) =
|
||||
|
@ -310,7 +310,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* {@link org.apache.lucene.search.similarities.DefaultSimilarity#tf(float) DefaultSimilarity} is:
|
||||
*
|
||||
* <br> <br>
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto">
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto" summary="term frequency computation">
|
||||
* <tr>
|
||||
* <td valign="middle" align="right" rowspan="1">
|
||||
* {@link org.apache.lucene.search.similarities.DefaultSimilarity#tf(float) tf(t in d)} =
|
||||
|
@ -335,7 +335,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* {@link org.apache.lucene.search.similarities.DefaultSimilarity#idf(long, long) DefaultSimilarity} is:
|
||||
*
|
||||
* <br> <br>
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto">
|
||||
* <table cellpadding="2" cellspacing="2" border="0" align="center" style="width:auto" summary="inverse document frequency computation">
|
||||
* <tr>
|
||||
* <td valign="middle" align="right">
|
||||
* {@link org.apache.lucene.search.similarities.DefaultSimilarity#idf(long, long) idf(t)} =
|
||||
|
@ -344,7 +344,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* 1 + log <big>(</big>
|
||||
* </td>
|
||||
* <td valign="middle" align="center">
|
||||
* <table>
|
||||
* <table summary="inverse document frequency computation">
|
||||
* <tr><td align="center" style="text-align: center"><small>numDocs</small></td></tr>
|
||||
* <tr><td align="center" style="text-align: center">–––––––––</td></tr>
|
||||
* <tr><td align="center" style="text-align: center"><small>docFreq+1</small></td></tr>
|
||||
|
@ -383,7 +383,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* {@link org.apache.lucene.search.similarities.DefaultSimilarity#queryNorm(float) DefaultSimilarity}
|
||||
* produces a <a href="http://en.wikipedia.org/wiki/Euclidean_norm#Euclidean_norm">Euclidean norm</a>:
|
||||
* <br> <br>
|
||||
* <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto">
|
||||
* <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto" summary="query normalization computation">
|
||||
* <tr>
|
||||
* <td valign="middle" align="right" rowspan="1">
|
||||
* queryNorm(q) =
|
||||
|
@ -391,7 +391,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* =
|
||||
* </td>
|
||||
* <td valign="middle" align="center" rowspan="1">
|
||||
* <table>
|
||||
* <table summary="query normalization computation">
|
||||
* <tr><td align="center" style="text-align: center"><big>1</big></td></tr>
|
||||
* <tr><td align="center" style="text-align: center"><big>
|
||||
* ––––––––––––––
|
||||
|
@ -409,7 +409,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* computes this value as:
|
||||
*
|
||||
* <br> <br>
|
||||
* <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto">
|
||||
* <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto" summary="sum of squared weights computation">
|
||||
* <tr>
|
||||
* <td valign="middle" align="right" rowspan="1">
|
||||
* {@link org.apache.lucene.search.Weight#getValueForNormalization() sumOfSquaredWeights} =
|
||||
|
@ -475,7 +475,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* If the document has multiple fields with the same name, all their boosts are multiplied together:
|
||||
*
|
||||
* <br> <br>
|
||||
* <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto">
|
||||
* <table cellpadding="1" cellspacing="0" border="0" align="center" style="width:auto" summary="index-time normalization">
|
||||
* <tr>
|
||||
* <td valign="middle" align="right" rowspan="1">
|
||||
* norm(t,d) =
|
||||
|
|
|
@ -34,12 +34,13 @@ public abstract class Spans {
|
|||
* <code> target ≤ current</code>, or after the iterator has exhausted.
|
||||
* Both cases may result in unpredicted behavior.
|
||||
* <p>Returns true iff there is such
|
||||
* a match. <p>Behaves as if written: <pre class="prettyprint">
|
||||
* a match. <p>Behaves as if written:
|
||||
* <pre class="prettyprint">
|
||||
* boolean skipTo(int target) {
|
||||
* do {
|
||||
* if (!next())
|
||||
* return false;
|
||||
* } while (target > doc());
|
||||
* } while (target > doc());
|
||||
* return true;
|
||||
* }
|
||||
* </pre>
|
||||
|
|
|
@ -91,7 +91,7 @@ public abstract class DataOutput {
|
|||
* resulting integer value. Thus values from zero to 127 may be stored in a single
|
||||
* byte, values from 128 to 16,383 may be stored in two bytes, and so on.</p>
|
||||
* <p>VByte Encoding Example</p>
|
||||
* <table cellspacing="0" cellpadding="2" border="0">
|
||||
* <table cellspacing="0" cellpadding="2" border="0" summary="variable length encoding examples">
|
||||
* <col width="64*">
|
||||
* <col width="64*">
|
||||
* <col width="64*">
|
||||
|
|
|
@ -27,7 +27,7 @@ import java.nio.file.Paths;
|
|||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Simple standalone tool that forever acquires & releases a
|
||||
* Simple standalone tool that forever acquires and releases a
|
||||
* lock using a specific LockFactory. Run without any args
|
||||
* to see usage.
|
||||
*
|
||||
|
|
|
@ -58,7 +58,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
* </pre>
|
||||
*
|
||||
* <p>This will cache all newly flushed segments, all merges
|
||||
* whose expected segment size is <= 5 MB, unless the net
|
||||
* whose expected segment size is {@code <= 5 MB}, unless the net
|
||||
* cached bytes exceeds 60 MB at which point all writes will
|
||||
* not be cached (until the net bytes falls below 60 MB).</p>
|
||||
*
|
||||
|
@ -77,9 +77,9 @@ public class NRTCachingDirectory extends FilterDirectory implements Accountable
|
|||
|
||||
/**
|
||||
* We will cache a newly created output if 1) it's a
|
||||
* flush or a merge and the estimated size of the merged segment is <=
|
||||
* maxMergeSizeMB, and 2) the total cached bytes is <=
|
||||
* maxCachedMB */
|
||||
* flush or a merge and the estimated size of the merged segment is
|
||||
* {@code <= maxMergeSizeMB}, and 2) the total cached bytes is
|
||||
* {@code <= maxCachedMB} */
|
||||
public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) {
|
||||
super(delegate);
|
||||
maxMergeSizeBytes = (long) (maxMergeSizeMB*1024*1024);
|
||||
|
|
|
@ -93,7 +93,7 @@ public abstract class RateLimiter {
|
|||
|
||||
/** Pauses, if necessary, to keep the instantaneous IO
|
||||
* rate at or below the target. Be sure to only call
|
||||
* this method when bytes > {@link #getMinPauseCheckBytes},
|
||||
* this method when bytes > {@link #getMinPauseCheckBytes},
|
||||
* otherwise it will pause way too long!
|
||||
*
|
||||
* @return the pause time in nano seconds */
|
||||
|
|
|
@ -29,7 +29,7 @@ import java.io.OutputStream;
|
|||
* external server ({@link LockVerifyServer}) to assert that
|
||||
* at most one process holds the lock at a time. To use
|
||||
* this, you should also run {@link LockVerifyServer} on the
|
||||
* host & port matching what you pass to the constructor.
|
||||
* host and port matching what you pass to the constructor.
|
||||
*
|
||||
* @see LockVerifyServer
|
||||
* @see LockStressTest
|
||||
|
|
|
@ -134,7 +134,7 @@ public final class ArrayUtil {
|
|||
END APACHE HARMONY CODE
|
||||
*/
|
||||
|
||||
/** Returns an array size >= minTargetSize, generally
|
||||
/** Returns an array size >= minTargetSize, generally
|
||||
* over-allocating exponentially to achieve amortized
|
||||
* linear-time cost as the array grows.
|
||||
*
|
||||
|
|
|
@ -46,7 +46,7 @@ public final class BitUtil {
|
|||
// packed inside a 32 bit integer (8 4 bit numbers). That
|
||||
// should be faster than accessing an array for each index, and
|
||||
// the total array size is kept smaller (256*sizeof(int))=1K
|
||||
/***** the python code that generated bitlist
|
||||
/* the python code that generated bitlist
|
||||
def bits2int(val):
|
||||
arr=0
|
||||
for shift in range(8,0,-1):
|
||||
|
@ -58,7 +58,7 @@ public final class BitUtil {
|
|||
def int_table():
|
||||
tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
|
||||
return ','.join(tbl)
|
||||
******/
|
||||
*/
|
||||
private static final int[] BIT_LISTS = {
|
||||
0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41, 0x42, 0x421, 0x43,
|
||||
0x431, 0x432, 0x4321, 0x5, 0x51, 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321,
|
||||
|
@ -98,12 +98,12 @@ public final class BitUtil {
|
|||
}
|
||||
|
||||
/** Return the list of bits which are set in b encoded as followed:
|
||||
* <code>(i >>> (4 * n)) & 0x0F</code> is the offset of the n-th set bit of
|
||||
* {@code (i >>> (4 * n)) & 0x0F} is the offset of the n-th set bit of
|
||||
* the given byte plus one, or 0 if there are n or less bits set in the given
|
||||
* byte. For example <code>bitList(12)</code> returns 0x43:<ul>
|
||||
* <li><code>0x43 & 0x0F</code> is 3, meaning the the first bit set is at offset 3-1 = 2,</li>
|
||||
* <li><code>(0x43 >>> 4) & 0x0F</code> is 4, meaning there is a second bit set at offset 4-1=3,</li>
|
||||
* <li><code>(0x43 >>> 8) & 0x0F</code> is 0, meaning there is no more bit set in this byte.</li>
|
||||
* <li>{@code 0x43 & 0x0F} is 3, meaning the the first bit set is at offset 3-1 = 2,</li>
|
||||
* <li>{@code (0x43 >>> 4) & 0x0F} is 4, meaning there is a second bit set at offset 4-1=3,</li>
|
||||
* <li>{@code (0x43 >>> 8) & 0x0F} is 0, meaning there is no more bit set in this byte.</li>
|
||||
* </ul>*/
|
||||
public static int bitList(byte b) {
|
||||
return BIT_LISTS[b & 0xFF];
|
||||
|
@ -142,7 +142,7 @@ public final class BitUtil {
|
|||
return popCount;
|
||||
}
|
||||
|
||||
/** Returns the popcount or cardinality of A & ~B.
|
||||
/** Returns the popcount or cardinality of {@code A & ~B}.
|
||||
* Neither array is modified. */
|
||||
public static long pop_andnot(long[] arr1, long[] arr2, int wordOffset, int numWords) {
|
||||
long popCount = 0;
|
||||
|
|
|
@ -266,7 +266,7 @@ public final class BytesRefHash {
|
|||
* haven't been hashed before.
|
||||
*
|
||||
* @throws MaxBytesLengthExceededException
|
||||
* if the given bytes are > 2 +
|
||||
* if the given bytes are {@code > 2 +}
|
||||
* {@link ByteBlockPool#BYTE_BLOCK_SIZE}
|
||||
*/
|
||||
public int add(BytesRef bytes) {
|
||||
|
@ -403,7 +403,7 @@ public final class BytesRefHash {
|
|||
}
|
||||
|
||||
/**
|
||||
* Called when hash is too small (> 50% occupied) or too large (< 20%
|
||||
* Called when hash is too small ({@code > 50%} occupied) or too large ({@code < 20%}
|
||||
* occupied).
|
||||
*/
|
||||
private void rehash(final int newSize, boolean hashOnData) {
|
||||
|
|
|
@ -186,7 +186,7 @@ public final class PagedBytes implements Accountable {
|
|||
|
||||
/** Copy BytesRef in, setting BytesRef out to the result.
|
||||
* Do not use this if you will use freeze(true).
|
||||
* This only supports bytes.length <= blockSize */
|
||||
* This only supports bytes.length <= blockSize */
|
||||
public void copy(BytesRef bytes, BytesRef out) {
|
||||
int left = blockSize - upto;
|
||||
if (bytes.length > left || currentBlock==null) {
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.Arrays;
|
|||
/**
|
||||
* A native int hash-based set where one value is reserved to mean "EMPTY" internally. The space overhead is fairly low
|
||||
* as there is only one power-of-two sized int[] to hold the values. The set is re-hashed when adding a value that
|
||||
* would make it >= 75% full. Consider extending and over-riding {@link #hash(int)} if the values might be poor
|
||||
* would make it >= 75% full. Consider extending and over-riding {@link #hash(int)} if the values might be poor
|
||||
* hash keys; Lucene docids should be fine.
|
||||
* The internal fields are exposed publicly to enable more efficient use at the expense of better O-O principles.
|
||||
* <p/>
|
||||
|
|
|
@ -43,11 +43,11 @@ import java.util.Set;
|
|||
* instance's class, use a <strong>non-static</strong> field:</p>
|
||||
* <pre class="prettyprint">
|
||||
* final boolean isDeprecatedMethodOverridden =
|
||||
* oldMethod.getImplementationDistance(this.getClass()) > newMethod.getImplementationDistance(this.getClass());
|
||||
* oldMethod.getImplementationDistance(this.getClass()) > newMethod.getImplementationDistance(this.getClass());
|
||||
*
|
||||
* <em>// alternatively (more readable):</em>
|
||||
* final boolean isDeprecatedMethodOverridden =
|
||||
* VirtualMethod.compareImplementationDistance(this.getClass(), oldMethod, newMethod) > 0
|
||||
* VirtualMethod.compareImplementationDistance(this.getClass(), oldMethod, newMethod) > 0
|
||||
* </pre>
|
||||
* <p>{@link #getImplementationDistance} returns the distance of the subclass that overrides this method.
|
||||
* The one with the larger distance should be used preferable.
|
||||
|
|
|
@ -211,10 +211,10 @@ final public class Automata {
|
|||
* @param min minimal value of interval
|
||||
* @param max maximal value of interval (both end points are included in the
|
||||
* interval)
|
||||
* @param digits if >0, use fixed number of digits (strings must be prefixed
|
||||
* @param digits if > 0, use fixed number of digits (strings must be prefixed
|
||||
* by 0's to obtain the right length) - otherwise, the number of
|
||||
* digits is not fixed (any number of leading 0s is accepted)
|
||||
* @exception IllegalArgumentException if min>max or if numbers in the
|
||||
* @exception IllegalArgumentException if min > max or if numbers in the
|
||||
* interval cannot be expressed with the given fixed number of
|
||||
* digits
|
||||
*/
|
||||
|
|
|
@ -297,7 +297,7 @@ public class CompiledAutomaton {
|
|||
}
|
||||
|
||||
/** Finds largest term accepted by this Automaton, that's
|
||||
* <= the provided input term. The result is placed in
|
||||
* <= the provided input term. The result is placed in
|
||||
* output; it's fine for output and input to point to
|
||||
* the same bytes. The returned result is either the
|
||||
* provided output, or null if there is no floor term
|
||||
|
|
|
@ -41,7 +41,7 @@ import java.util.Set;
|
|||
* <p>
|
||||
* Regular expressions are built from the following abstract syntax:
|
||||
* <p>
|
||||
* <table border=0>
|
||||
* <table border=0 summary="description of regular expression grammar">
|
||||
* <tr>
|
||||
* <td><i>regexp</i></td>
|
||||
* <td>::=</td>
|
||||
|
|
|
@ -58,7 +58,7 @@ public final class BytesRefFSTEnum<T> extends FSTEnum<T> {
|
|||
return setResult();
|
||||
}
|
||||
|
||||
/** Seeks to smallest term that's >= target. */
|
||||
/** Seeks to smallest term that's >= target. */
|
||||
public InputOutput<T> seekCeil(BytesRef target) throws IOException {
|
||||
this.target = target;
|
||||
targetLength = target.length;
|
||||
|
@ -66,7 +66,7 @@ public final class BytesRefFSTEnum<T> extends FSTEnum<T> {
|
|||
return setResult();
|
||||
}
|
||||
|
||||
/** Seeks to biggest term that's <= target. */
|
||||
/** Seeks to biggest term that's <= target. */
|
||||
public InputOutput<T> seekFloor(BytesRef target) throws IOException {
|
||||
this.target = target;
|
||||
targetLength = target.length;
|
||||
|
|
|
@ -75,7 +75,7 @@ class BytesStore extends DataOutput implements Accountable {
|
|||
nextWrite = blocks.get(blocks.size()-1).length;
|
||||
}
|
||||
|
||||
/** Absolute write byte; you must ensure dest is < max
|
||||
/** Absolute write byte; you must ensure dest is < max
|
||||
* position written so far. */
|
||||
public void writeByte(int dest, byte b) {
|
||||
int blockIndex = dest >> blockBits;
|
||||
|
|
|
@ -114,7 +114,7 @@ abstract class FSTEnum<T> {
|
|||
// TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
|
||||
// SEEK_END)? saves the eq check above?
|
||||
|
||||
/** Seeks to smallest term that's >= target. */
|
||||
/** Seeks to smallest term that's >= target. */
|
||||
protected void doSeekCeil() throws IOException {
|
||||
|
||||
//System.out.println(" advance len=" + target.length + " curlen=" + current.length);
|
||||
|
@ -256,7 +256,7 @@ abstract class FSTEnum<T> {
|
|||
|
||||
// TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
|
||||
// SEEK_END)? saves the eq check above?
|
||||
/** Seeks to largest term that's <= target. */
|
||||
/** Seeks to largest term that's <= target. */
|
||||
protected void doSeekFloor() throws IOException {
|
||||
|
||||
// TODO: possibly caller could/should provide common
|
||||
|
|
|
@ -58,7 +58,7 @@ public final class IntsRefFSTEnum<T> extends FSTEnum<T> {
|
|||
return setResult();
|
||||
}
|
||||
|
||||
/** Seeks to smallest term that's >= target. */
|
||||
/** Seeks to smallest term that's >= target. */
|
||||
public InputOutput<T> seekCeil(IntsRef target) throws IOException {
|
||||
this.target = target;
|
||||
targetLength = target.length;
|
||||
|
@ -66,7 +66,7 @@ public final class IntsRefFSTEnum<T> extends FSTEnum<T> {
|
|||
return setResult();
|
||||
}
|
||||
|
||||
/** Seeks to biggest term that's <= target. */
|
||||
/** Seeks to biggest term that's <= target. */
|
||||
public InputOutput<T> seekFloor(IntsRef target) throws IOException {
|
||||
this.target = target;
|
||||
targetLength = target.length;
|
||||
|
|
|
@ -41,13 +41,13 @@ public abstract class Outputs<T> {
|
|||
// (new object per byte/char/int) if eg used during
|
||||
// analysis
|
||||
|
||||
/** Eg common("foobar", "food") -> "foo" */
|
||||
/** Eg common("foobar", "food") -> "foo" */
|
||||
public abstract T common(T output1, T output2);
|
||||
|
||||
/** Eg subtract("foobar", "foo") -> "bar" */
|
||||
/** Eg subtract("foobar", "foo") -> "bar" */
|
||||
public abstract T subtract(T output, T inc);
|
||||
|
||||
/** Eg add("foo", "bar") -> "foobar" */
|
||||
/** Eg add("foo", "bar") -> "foobar" */
|
||||
public abstract T add(T prefix, T output);
|
||||
|
||||
/** Encode an output value into a {@link DataOutput}. */
|
||||
|
|
|
@ -588,7 +588,7 @@ public final class Util {
|
|||
*
|
||||
* <p>
|
||||
* Note: larger FSTs (a few thousand nodes) won't even
|
||||
* render, don't bother. If the FST is > 2.1 GB in size
|
||||
* render, don't bother. If the FST is > 2.1 GB in size
|
||||
* then this method will throw strange exceptions.
|
||||
*
|
||||
* @param sameRank
|
||||
|
@ -600,7 +600,7 @@ public final class Util {
|
|||
* If <code>true</code> states will have labels equal to their offsets in their
|
||||
* binary format. Expands the graph considerably.
|
||||
*
|
||||
* @see "http://www.graphviz.org/"
|
||||
* @see <a href="http://www.graphviz.org/">graphviz project</a>
|
||||
*/
|
||||
public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates)
|
||||
throws IOException {
|
||||
|
|
|
@ -152,16 +152,16 @@ abstract class BulkOperation implements PackedInts.Decoder, PackedInts.Encoder {
|
|||
* For every number of bits per value, there is a minimum number of
|
||||
* blocks (b) / values (v) you need to write in order to reach the next block
|
||||
* boundary:
|
||||
* - 16 bits per value -> b=2, v=1
|
||||
* - 24 bits per value -> b=3, v=1
|
||||
* - 50 bits per value -> b=25, v=4
|
||||
* - 63 bits per value -> b=63, v=8
|
||||
* - 16 bits per value -> b=2, v=1
|
||||
* - 24 bits per value -> b=3, v=1
|
||||
* - 50 bits per value -> b=25, v=4
|
||||
* - 63 bits per value -> b=63, v=8
|
||||
* - ...
|
||||
*
|
||||
* A bulk read consists in copying <code>iterations*v</code> values that are
|
||||
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
|
||||
* (higher values of <code>iterations</code> are likely to yield a better
|
||||
* throughput) => this requires n * (b + 8v) bytes of memory.
|
||||
* throughput): this requires n * (b + 8v) bytes of memory.
|
||||
*
|
||||
* This method computes <code>iterations</code> as
|
||||
* <code>ramBudget / (b + 8v)</code> (since a long is 8 bytes).
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue