From 0d339043e378d8333c376bae89411b813de25b10 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 6 Feb 2020 22:20:44 -0500 Subject: [PATCH] LUCENE-9209: fix javadocs to be html5, enable doclint html checks, remove jtidy Current javadocs declare an HTML5 doctype: !DOCTYPE HTML. Some HTML5 features are used, but unfortunately also some constructs that do not exist in HTML5 are used as well. Because of this, we have no checking of any html syntax. jtidy is disabled because it works with html4. doclint is disabled because it works with html5. our docs are neither. javadoc "doclint" feature can efficiently check that the html isn't crazy. we just have to fix really ancient removed/deprecated stuff (such as use of tt tag). This enables the html checking in both ant and gradle. The docs are fixed via straightforward transformations. One exception is table cellpadding, for this some helper CSS classes were added to make the transition easier (since it must apply padding to inner th/td, not possible inline). I added TODOs, we should clean this up. Most problems look like they may have been generated from a GUI or similar and not a human. --- gradle/defaults-javadoc.gradle | 2 +- .../lucene/analysis/br/BrazilianStemmer.java | 4 +- .../charfilter/HTMLStripCharFilter.java | 4 +- .../analysis/compound/package-info.java | 9 +- .../lucene/analysis/de/GermanStemmer.java | 6 +- .../ConcatenateGraphFilterFactory.java | 8 +- .../analysis/ngram/EdgeNGramTokenFilter.java | 2 +- .../analysis/ngram/EdgeNGramTokenizer.java | 2 +- .../lucene/analysis/ngram/NGramTokenizer.java | 5 +- .../analysis/standard/ClassicFilter.java | 2 +- .../standard/ClassicTokenizerImpl.java | 4 +- .../standard/UAX29URLEmailTokenizerImpl.java | 4 +- .../wikipedia/WikipediaTokenizerImpl.java | 4 +- .../charfilter/htmlStripReaderTest.html | 22 ++-- .../icu/segmentation/ICUTokenizerFactory.java | 2 +- lucene/analysis/icu/src/java/overview.html | 28 ++--- .../opennlp/OpenNLPLemmatizerFilter.java | 2 +- .../src/java/org/egothor/stemmer/Diff.java | 4 +- .../src/java/org/egothor/stemmer/Gener.java | 4 +- .../src/java/org/egothor/stemmer/Lift.java | 2 +- .../java/org/egothor/stemmer/MultiTrie.java | 4 +- .../java/org/egothor/stemmer/MultiTrie2.java | 4 +- .../java/org/egothor/stemmer/Optimizer.java | 4 +- .../java/org/egothor/stemmer/Optimizer2.java | 4 +- .../src/java/org/egothor/stemmer/Row.java | 4 +- .../src/java/org/egothor/stemmer/Trie.java | 4 +- .../analysis/stempel/src/java/overview.html | 40 +++--- .../lucene50/Lucene50PostingsFormat.java | 20 +-- .../lucene/benchmark/byTask/package-info.java | 17 +-- .../byTask/tasks/AnalyzerFactoryTask.java | 6 +- lucene/build.xml | 7 -- .../lucene/codecs/memory/FSTTermsWriter.java | 4 +- lucene/common-build.xml | 30 +---- lucene/core/src/data/jflex/skeleton.default | 4 +- .../skeleton.disable.buffer.expansion.txt | 4 +- .../apache/lucene/analysis/package-info.java | 16 ++- .../standard/StandardTokenizerImpl.java | 4 +- .../blocktree/BlockTreeTermsWriter.java | 8 +- .../CompressingStoredFieldsFormat.java | 4 +- .../lucene50/Lucene50CompoundFormat.java | 4 +- .../lucene50/Lucene50FieldInfosFormat.java | 2 +- .../lucene50/Lucene50StoredFieldsFormat.java | 16 +-- .../lucene50/Lucene50TermVectorsFormat.java | 8 +- .../lucene60/Lucene60FieldInfosFormat.java | 2 +- .../lucene70/Lucene70SegmentInfoFormat.java | 2 +- .../lucene80/Lucene80DocValuesFormat.java | 8 +- .../codecs/lucene80/Lucene80NormsFormat.java | 8 +- .../lucene84/Lucene84PostingsFormat.java | 20 +-- .../lucene/codecs/lucene84/package-info.java | 27 +++-- .../perfield/PerFieldDocValuesFormat.java | 4 +- .../perfield/PerFieldPostingsFormat.java | 4 +- .../lucene/index/BaseCompositeReader.java | 2 +- .../apache/lucene/index/CompositeReader.java | 2 +- .../apache/lucene/index/DirectoryReader.java | 2 +- .../org/apache/lucene/index/IndexReader.java | 2 +- .../lucene/index/IndexReaderContext.java | 4 +- .../org/apache/lucene/index/IndexWriter.java | 12 +- .../org/apache/lucene/index/LeafReader.java | 2 +- .../org/apache/lucene/index/MergePolicy.java | 2 +- .../org/apache/lucene/index/MultiReader.java | 2 +- .../org/apache/lucene/index/PointValues.java | 3 +- .../org/apache/lucene/index/SegmentInfos.java | 4 +- .../java/org/apache/lucene/index/Sorter.java | 4 +- .../org/apache/lucene/index/package-info.java | 38 +++--- .../lucene/search/CachingCollector.java | 2 +- .../apache/lucene/search/IndexSearcher.java | 2 +- .../apache/lucene/search/LRUQueryCache.java | 2 +- .../lucene/search/QueryCachingPolicy.java | 2 +- .../apache/lucene/search/package-info.java | 28 ++--- .../search/similarities/Similarity.java | 8 +- .../search/similarities/TFIDFSimilarity.java | 114 ++++++++++-------- .../search/similarities/package-info.java | 14 +-- .../org/apache/lucene/store/DataOutput.java | 11 +- .../org/apache/lucene/store/FSDirectory.java | 2 +- .../lucene/store/OutputStreamIndexOutput.java | 2 +- .../java/org/apache/lucene/util/BitUtil.java | 4 +- .../org/apache/lucene/util/ByteBlockPool.java | 6 +- .../org/apache/lucene/util/Constants.java | 4 +- .../apache/lucene/util/FilterIterator.java | 2 +- .../java/org/apache/lucene/util/IOUtils.java | 24 ++-- .../org/apache/lucene/util/IntBlockPool.java | 2 +- .../org/apache/lucene/util/PriorityQueue.java | 2 +- .../apache/lucene/util/SparseFixedBitSet.java | 6 +- .../org/apache/lucene/util/TimSorter.java | 2 +- .../apache/lucene/util/WeakIdentityMap.java | 2 +- .../apache/lucene/util/automaton/RegExp.java | 71 +++++------ .../lucene/util/automaton/StatePair.java | 2 +- .../lucene/util/automaton/package-info.java | 8 +- .../packed/AbstractBlockPackedWriter.java | 2 +- .../packed/BlockPackedReaderIterator.java | 2 +- .../lucene/util/packed/BlockPackedWriter.java | 8 +- .../packed/MonotonicBlockPackedWriter.java | 2 +- lucene/core/src/java/overview.html | 28 ++--- lucene/demo/src/java/overview.html | 17 ++- lucene/expressions/src/java/overview.html | 4 +- .../search/vectorhighlight/package-info.java | 5 +- .../search/highlight/HighlighterTest.java | 2 +- .../custom/HighlightCustomQueryTest.java | 2 +- lucene/misc/src/java/overview.html | 12 +- .../lucene/queries/CommonTermsQuery.java | 4 +- .../queryparser/classic/QueryParser.java | 2 +- .../lucene/queryparser/classic/QueryParser.jj | 2 +- .../queryparser/classic/package-info.java | 40 +++--- .../flexible/core/builders/package-info.java | 2 +- .../flexible/core/config/package-info.java | 2 +- .../flexible/core/messages/package-info.java | 2 +- .../flexible/core/nodes/package-info.java | 2 +- .../flexible/core/parser/package-info.java | 2 +- .../core/processors/package-info.java | 2 +- .../queryparser/simple/SimpleQueryParser.java | 18 +-- .../xml/builders/PointRangeQueryBuilder.java | 11 +- lucene/queryparser/src/java/overview.html | 19 ++- .../apache/lucene/search/CoveringQuery.java | 4 +- lucene/spatial-extras/src/java/overview.html | 1 - .../document/CompletionPostingsFormat.java | 8 +- .../org/apache/lucene/analysis/package.html | 1 - .../lucene/index/RandomIndexWriter.java | 2 +- .../java/org/apache/lucene/index/package.html | 1 - .../org/apache/lucene/search/package.html | 1 - .../lucene/search/similarities/package.html | 1 - lucene/tools/javadoc/table_padding.css | 37 ++++++ .../lucene/validation/LicenseCheckTask.java | 4 +- solr/build.xml | 6 - solr/contrib/ltr/src/java/overview.html | 2 +- .../solr/core/IndexDeletionPolicyWrapper.java | 2 +- .../java/org/apache/solr/core/SolrCore.java | 10 +- .../handler/FieldAnalysisRequestHandler.java | 13 +- .../solr/handler/RequestHandlerBase.java | 3 +- .../solr/handler/export/PriorityQueue.java | 2 +- .../solr/legacy/LegacyNumericRangeQuery.java | 4 +- .../solr/legacy/LegacyNumericUtils.java | 8 +- .../solr/response/SolrQueryResponse.java | 6 +- .../LegacyNumericRangeQueryBuilder.java | 11 +- .../client/solrj/io/stream/JDBCStream.java | 7 +- .../apache/solr/common/SolrInputDocument.java | 4 +- 135 files changed, 586 insertions(+), 558 deletions(-) create mode 100644 lucene/tools/javadoc/table_padding.css diff --git a/gradle/defaults-javadoc.gradle b/gradle/defaults-javadoc.gradle index 13de9e0b0d6..d3dee7a3d27 100644 --- a/gradle/defaults-javadoc.gradle +++ b/gradle/defaults-javadoc.gradle @@ -44,7 +44,7 @@ allprojects { ) opts.addStringOption("-release", "11") - opts.addBooleanOption('Xdoclint:all,-missing,-accessibility,-html', true) + opts.addBooleanOption('Xdoclint:all,-missing,-accessibility', true) def libName = project.path.startsWith(":lucene") ? "Lucene" : "Solr" opts.overview = file("src/main/java/overview.html").toString() diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java index 0f96331c357..273735835ac 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java @@ -39,10 +39,10 @@ public class BrazilianStemmer { } /** - * Stems the given term to an unique discriminator. + * Stems the given term to an unique discriminator. * * @param term The term that should be stemmed. - * @return Discriminator for term + * @return Discriminator for term */ protected String stem( String term ) { boolean altered = false ; // altered the term diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java index ae67bde0268..ebb92cdcaa1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java @@ -30184,7 +30184,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { * * All internal variables are reset, the old input stream * cannot be reused (internal buffer is discarded and lost). - * Lexical state is set to ZZ_INITIAL. + * Lexical state is set to ZZ_INITIAL. * * Internal scan buffer is resized down to its initial length, if it has grown. * @@ -30232,7 +30232,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { /** - * Returns the character at position pos from the + * Returns the character at position pos from the * matched text. * * It is equivalent to yytext().charAt(pos), but faster diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/package-info.java index 126f887f364..849bd9a2559 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/package-info.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/package-info.java @@ -18,7 +18,8 @@ /** * A filter that decomposes compound words you find in many Germanic * languages into the word parts. This example shows what it does: - * + *
+ * * * * @@ -27,7 +28,8 @@ * *
example input stream
Input token stream
*
- * + *
+ * * * * @@ -118,7 +120,8 @@ * *

Which variant should I use?

* This decision matrix should help you: - *
example output stream
Output token stream
+ *
+ * * * * diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java index 971b36e1e03..f29c21281c4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java @@ -37,17 +37,17 @@ public class GermanStemmer private StringBuilder sb = new StringBuilder(); /** - * Amount of characters that are removed with substitute() while stemming. + * Amount of characters that are removed with substitute() while stemming. */ private int substCount = 0; private static final Locale locale = new Locale("de", "DE"); /** - * Stemms the given term to an unique discriminator. + * Stemms the given term to an unique discriminator. * * @param term The term that should be stemmed. - * @return Discriminator for term + * @return Discriminator for term */ protected String stem( String term ) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java index 26f8d978058..dce7a94676e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java @@ -27,23 +27,23 @@ import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; * Factory for {@link ConcatenateGraphFilter}. * *
comparison of dictionary and hyphenation based decompounding
Token filterOutput quality
+ *
+ * * * * * *
ngram tokens example
Termababcbcbcdcdcdede
Position increment1111111
Position length1111111
Offsets[0,2[[0,3[[1,3[[1,4[[2,4[[2,5[[3,5[
- * + * *

This tokenizer changed a lot in Lucene 4.4 in order to:

- +

What Is Solr?

@@ -228,10 +228,10 @@ document.write("Last Published: " + document.lastModified);

- +

News

- +

02 October 2007 - Solr at OSSummit Asia

OSSummit Asia logo @@ -250,7 +250,7 @@ document.write("Last Published: " + document.lastModified); Lucene Case Studies by Erik Hatcher. A rapid series of examples of many Lucene and Solr using applications. - +

03 September 2007 - Lucene at ApacheCon Atlanta

ApacheCon US logo @@ -270,7 +270,7 @@ document.write("Last Published: " + document.lastModified);

  • November 16, 4:00 pm: Advanced Indexing Techniques with Apache Lucene by Michael Busch. Information on payloads and advanced indexing techniques.
  • - +

    06 June 2007: Release 1.2 available

    This is the first release since Solr graduated from the Incubator, @@ -280,40 +280,40 @@ document.write("Last Published: " + document.lastModified); and more flexible plugins.

    See the release notes for more details.

    - +

    17 January 2007: Solr graduates from Incubator

    Solr has graduated from the Apache Incubator, and is now a sub-project of Lucene.

    - +

    22 December 2006: Release 1.1.0 available

    This is the first release since Solr joined the Incubator, and brings many new features and performance optimizations including highlighting, faceted search, and JSON/Python/Ruby response formats.

    - +

    15 August 2006: Solr at ApacheCon US

    Chris Hostetter will be presenting "Faceted Searching With Apache Solr" at ApacheCon US 2006, on October 13th at 4:30pm. See the ApacheCon website for more details.

    - +

    21 April 2006: Solr at ApacheCon

    Yonik Seeley will be presenting "Apache Solr, a Full-Text Search Server based on Lucene" at ApacheCon Europe 2006, on June 29th at 5:30pm. See the ApacheCon website for more details.

    - +

    21 February 2006: nightly builds

    Solr now has nightly builds. This automatically creates a downloadable version of Solr every night. All unit tests must pass, or a message is sent to the developers mailing list and no new version is created. This also updates the javadoc.

    - +

    17 January 2006: Solr Joins Apache Incubator

    Solr, a search server based on Lucene, has been accepted into the Apache Incubator. Solr was originally developed by CNET Networks, and is widely used within CNET diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java index b2e3446b5ca..a8b54a07a8c 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java @@ -59,7 +59,7 @@ import com.ibm.icu.text.RuleBasedBreakIterator; * *

    * To add per-script rules, add a "rulefiles" argument, which should contain a - * comma-separated list of code:rulefile pairs in the following format: + * comma-separated list of code:rulefile pairs in the following format: * four-letter ISO 15924 script code, followed by a colon, then a resource * path. E.g. to specify rules for Latin (script code "Latn") and Cyrillic diff --git a/lucene/analysis/icu/src/java/overview.html b/lucene/analysis/icu/src/java/overview.html index fa61e7c1c6a..8ca0f1bdb2d 100644 --- a/lucene/analysis/icu/src/java/overview.html +++ b/lucene/analysis/icu/src/java/overview.html @@ -47,8 +47,8 @@ This module exposes the following functionality:

  • Text Transformation: Transforms Unicode text in a context-sensitive fashion: e.g. mapping Traditional to Simplified Chinese
  • -
    -

    Text Segmentation

    +
    +

    Text Segmentation

    Text Segmentation (Tokenization) divides document and query text into index terms (typically words). Unicode provides special properties and rules so that this can @@ -76,8 +76,8 @@ algorithm. */ Tokenizer tokenizer = new ICUTokenizer(reader); -


    -

    Collation

    +
    +

    Collation

    ICUCollationKeyAnalyzer converts each token into its binary CollationKey using the @@ -225,8 +225,8 @@ algorithm. you use CollationKeyAnalyzer to generate index terms, do not use ICUCollationKeyAnalyzer on the query side, or vice versa.

    -
    -

    Normalization

    +
    +

    Normalization

    ICUNormalizer2Filter normalizes term text to a Unicode Normalization Form, so @@ -253,8 +253,8 @@ algorithm. */ TokenStream tokenstream = new ICUNormalizer2Filter(tokenizer, normalizer); -


    -

    Case Folding

    +
    +

    Case Folding

    Default caseless matching, or case-folding is more than just conversion to lowercase. For example, it handles cases such as the Greek sigma, so that @@ -288,8 +288,8 @@ this integration. To perform case-folding, you use normalization with the form */ TokenStream tokenstream = new ICUNormalizer2Filter(tokenizer); -


    -

    Search Term Folding

    +
    +

    Search Term Folding

    Search term folding removes distinctions (such as accent marks) between similar characters. It is useful for a fuzzy or loose search. @@ -316,8 +316,8 @@ many character foldings recursively. */ TokenStream tokenstream = new ICUFoldingFilter(tokenizer); -


    -

    Text Transformation

    +
    +

    Text Transformation

    ICU provides text-transformation functionality via its Transliteration API. This allows you to transform text in a variety of ways, taking context into account. @@ -352,8 +352,8 @@ and */ TokenStream tokenstream = new ICUTransformFilter(tokenizer, Transliterator.getInstance("Serbian-Latin/BGN")); -


    -

    Backwards Compatibility

    +
    +

    Backwards Compatibility

    This module exists to provide up-to-date Unicode functionality that supports the most recent version of Unicode (currently 11.0). However, some users who wish diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java index 4c484b9435b..1f446809ec5 100644 --- a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java +++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java @@ -41,7 +41,7 @@ import org.apache.lucene.util.AttributeSource; *

    *

    * The dictionary file must be encoded as UTF-8, with one entry per line, - * in the form word[tab]lemma[tab]part-of-speech + * in the form word[tab]lemma[tab]part-of-speech *

    */ public class OpenNLPLemmatizerFilter extends TokenFilter { diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Diff.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Diff.java index da720fcfc0e..c4450056d78 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Diff.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Diff.java @@ -97,8 +97,8 @@ public class Diff { } /** - * Apply the given patch string diff to the given string - * dest. + * Apply the given patch string diff to the given string + * dest. * * @param dest Destination string * @param diff Patch string diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Gener.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Gener.java index 983c67f1950..31148eaf85d 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Gener.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Gener.java @@ -101,8 +101,8 @@ public class Gener extends Reduce { * * @param in the Row to test * @param remap Description of the Parameter - * @return true if the Row should remain, false - * otherwise + * @return true if the Row should remain, false + * otherwise */ public boolean eat(Row in, int remap[]) { int sum = 0; diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Lift.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Lift.java index 16da8c8149a..b7ac8ef5ada 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Lift.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Lift.java @@ -71,7 +71,7 @@ public class Lift extends Reduce { /** * Constructor for the Lift object. * - * @param changeSkip when set to true, comparison of two Cells takes + * @param changeSkip when set to true, comparison of two Cells takes * a skip command into account */ public Lift(boolean changeSkip) { diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie.java index e0d9376df6d..caf8a88b49d 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie.java @@ -92,7 +92,7 @@ public class MultiTrie extends Trie { /** * Constructor for the MultiTrie object * - * @param forward set to true if the elements should be read left to + * @param forward set to true if the elements should be read left to * right */ public MultiTrie(boolean forward) { @@ -157,7 +157,7 @@ public class MultiTrie extends Trie { * Add an element to this structure consisting of the given key and patch * command. *

    - * This method will return without executing if the cmd + * This method will return without executing if the cmd * parameter's length is 0. * * @param key the key diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie2.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie2.java index cfe3181ad23..781966aabb8 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie2.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie2.java @@ -81,7 +81,7 @@ public class MultiTrie2 extends MultiTrie { /** * Constructor for the MultiTrie2 object * - * @param forward set to true if the elements should be read left to + * @param forward set to true if the elements should be read left to * right */ public MultiTrie2(boolean forward) { @@ -187,7 +187,7 @@ public class MultiTrie2 extends MultiTrie { * Add an element to this structure consisting of the given key and patch * command. *

    - * This method will return without executing if the cmd + * This method will return without executing if the cmd * parameter's length is 0. * * @param key the key diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Optimizer.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Optimizer.java index 25b72353380..e4823650b3b 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Optimizer.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Optimizer.java @@ -117,7 +117,7 @@ public class Optimizer extends Reduce { * * @param master the master Row * @param existing the existing Row - * @return the resulting Row, or null if the operation cannot be + * @return the resulting Row, or null if the operation cannot be * realized */ public Row merge(Row master, Row existing) { @@ -151,7 +151,7 @@ public class Optimizer extends Reduce { * * @param m the master Cell * @param e the existing Cell - * @return the resulting Cell, or null if the operation cannot be + * @return the resulting Cell, or null if the operation cannot be * realized */ public Cell merge(Cell m, Cell e) { diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Optimizer2.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Optimizer2.java index ea29667b412..688029f3f2d 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Optimizer2.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Optimizer2.java @@ -58,7 +58,7 @@ package org.egothor.stemmer; * The Optimizer class is a Trie that will be reduced (have empty rows removed). *

    * This is the result of allowing a joining of rows when there is no collision - * between non-null values in the rows. Information loss, resulting in + * between non-null values in the rows. Information loss, resulting in * the stemmer not being able to recognize words (as in Optimizer), is * curtailed, allowing the stemmer to recognize words for which the original * trie was built. Use of this class allows the stemmer to be self-teaching. @@ -74,7 +74,7 @@ public class Optimizer2 extends Optimizer { * * @param m the master Cell * @param e the existing Cell - * @return the resulting Cell, or null if the operation cannot be + * @return the resulting Cell, or null if the operation cannot be * realized */ @Override diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java index 600b0081663..c17d6ce0d18 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java @@ -220,7 +220,7 @@ public class Row { * Character. * * @param way the Character associated with the desired Cell - * @return the reference, or -1 if the Cell is null + * @return the reference, or -1 if the Cell is null */ public int getRef(Character way) { Cell c = at(way); @@ -255,7 +255,7 @@ public class Row { * Return the number of identical Cells (containing patch commands) in this * Row. * - * @param eqSkip when set to false the removed patch commands are + * @param eqSkip when set to false the removed patch commands are * considered * @return the number of identical Cells, or -1 if there are (at least) two * different cells diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java index 403238893f3..0399b98ca38 100644 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java @@ -96,7 +96,7 @@ public class Trie { /** * Constructor for the Trie object. * - * @param forward set to true + * @param forward set to true */ public Trie(boolean forward) { rows.add(new Row()); @@ -107,7 +107,7 @@ public class Trie { /** * Constructor for the Trie object. * - * @param forward true if read left to right, false if read + * @param forward true if read left to right, false if read * right to left * @param root index of the row that is the root node * @param cmds the patch commands to store diff --git a/lucene/analysis/stempel/src/java/overview.html b/lucene/analysis/stempel/src/java/overview.html index 003c83ded59..a51db36f384 100644 --- a/lucene/analysis/stempel/src/java/overview.html +++ b/lucene/analysis/stempel/src/java/overview.html @@ -133,12 +133,11 @@ all possible cases, so there is always some loss of precision/recall (which means that even the words from the training corpus are sometimes incorrectly stemmed).
    -

    Algorithm and implementation

    +

    Algorithm and implementation

    The algorithm and its Java implementation is described in detail in the publications cited below. Here's just a short excerpt from [2]:

    -
    -
    "The aim is separation of the +
    "The aim is separation of the stemmer execution code from the data structures [...]. In other words, a static algorithm configurable by data must be developed. The word transformations that happen in the @@ -171,7 +170,6 @@ The P-commands are applied from the end of a word (right to left). This assumption can reduce the set of P-command's, because the last NOOP, moving the cursor to the end of a string without any changes, need not be stored."
    -

    Data structure used to keep the dictionary (words and their P-commands) is a trie. Several optimization steps are applied in turn to reduce and @@ -273,10 +271,9 @@ incorrect lemma. Note: quite often in such case the output was a correct stem.
  • table size: the size in bytes of the stemmer table.
  • -
    - +
    - + @@ -286,7 +283,7 @@ correct stem. - + @@ -296,7 +293,7 @@ correct stem. - + @@ -306,7 +303,7 @@ correct stem. - + @@ -316,7 +313,7 @@ correct stem. - + @@ -326,7 +323,7 @@ correct stem. - + @@ -336,7 +333,7 @@ correct stem. - + @@ -346,7 +343,7 @@ correct stem. - + @@ -356,7 +353,7 @@ correct stem. - + @@ -366,7 +363,7 @@ correct stem. - + @@ -376,7 +373,7 @@ correct stem. - + @@ -386,7 +383,7 @@ correct stem. - + @@ -396,7 +393,7 @@ correct stem. - + @@ -406,7 +403,7 @@ correct stem. - + @@ -418,7 +415,6 @@ correct stem.
    Training sets Testing forms Stem OK Lemma Bad Table size [B]
    100 1022985 842209 256642 28438
    200 1022985 862789 223209 48660
    500 1022985 885786 207204 108798
    700 1022985 909031 211292 139291
    1000 1022985 926079 207148 183677
    2000 1022985 942886 202915 313516
    5000 1022985 954721 201579 640969
    7000 1022985 956165 198588 839347
    10000 1022985 965427 196681 1144537
    12000 1022985 967664 192120 1313508
    15000 1022985 973188 190871 1567902
    17000 1022985 974203 188862 1733957
    20000 1022985 976234
    -

    I also measured the time to produce a stem (which involves traversing a trie, retrieving a patch command and applying the patch command to the input @@ -462,7 +458,7 @@ press. Intelligent Information Processing and Web Mining Conference, 2004, Zakopane, Poland.

  • Galambos, L.: Lemmatizer for Document Information Retrieval -Systems in JAVA. <http://www.informatik.uni-trier.de/%7Eley/db/conf/sofsem/sofsem2001.html#Galambos01> SOFSEM 2001, Piestany, Slovakia.
    diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java index 8a6ac794802..4dfd935eea0 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java @@ -95,14 +95,14 @@ import org.apache.lucene.util.packed.PackedInts; *

    * Files and detailed format: *

    * - * + * *
    *
    * Term Dictionary @@ -162,7 +162,7 @@ import org.apache.lucene.util.packed.PackedInts; *
    *
    * - * + * *
    *
    * Term Index @@ -172,7 +172,7 @@ import org.apache.lucene.util.packed.PackedInts; *
    * * - * + * *
    *
    * Frequencies and Skip Data @@ -260,7 +260,7 @@ import org.apache.lucene.util.packed.PackedInts; *
    *
    * - * + * *
    *
    * Positions @@ -313,7 +313,7 @@ import org.apache.lucene.util.packed.PackedInts; *
    *
    * - * + * *
    *
    * Payloads and Offsets diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java index 465557aba5b..c1576355e5f 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java @@ -25,7 +25,8 @@ * Contained packages: *

    * - * + *
    + * * * * @@ -63,7 +64,7 @@ * report. *
  • Results record counting clarified
  • * - * + * *

    Benchmarking By Tasks

    *

    * Benchmark Lucene using task primitives. @@ -79,7 +80,7 @@ * additional characteristics of the benchmark run. *

    * - * + * *

    How to use

    *

    * Easiest way to run a benchmarks is using the predefined ant task: @@ -166,7 +167,7 @@ * org.apache.lucene.benchmark.byTask.tasks specify that package thru the * alt.tasks.packages property. * - * + * *

    Benchmark "algorithm"

    * *

    @@ -312,7 +313,7 @@ * * * - * + * *

    Supported tasks/commands

    * *

    @@ -481,7 +482,7 @@ * * * - * + * *

    Benchmark properties

    * *

    @@ -604,7 +605,7 @@ * For sample use of these properties see the *.alg files under conf. *

    * - * + * *

    Example input algorithm and the result benchmark report

    *

    * The following example is in conf/sample.alg: @@ -690,7 +691,7 @@ * PopulateLong - - 1 20 1000 - - 1 - - 10003 - - - 77.0 - - 129.92 - 87,309,608 - 100,831,232 * * - * + * *

    Results record counting clarified

    *

    * Two columns in the results table indicate records counts: records-per-run and diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java index 051a8fc0bd1..696b5805c2f 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java @@ -51,8 +51,8 @@ import java.util.regex.Pattern; *

  • Analyzer args: *
      *
    • Required: name:analyzer-factory-name
    • - *
    • Optional: positionIncrementGap:int value (default: 0)
    • - *
    • Optional: offsetGap:int value (default: 1)
    • + *
    • Optional: positionIncrementGap:int value (default: 0)
    • + *
    • Optional: offsetGap:int value (default: 1)
    • *
    *
  • *
  • zero or more CharFilterFactory's, followed by
  • @@ -60,7 +60,7 @@ import java.util.regex.Pattern; *
  • zero or more TokenFilterFactory's
  • * * - * Each component analysis factory may specify luceneMatchVersion (defaults to + * Each component analysis factory may specify luceneMatchVersion (defaults to * {@link Version#LATEST}) and any of the args understood by the specified * *Factory class, in the above-describe param format. *

    diff --git a/lucene/build.xml b/lucene/build.xml index 0dfb064b06d..a8794b10c13 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -144,13 +144,6 @@ - - - - - diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java index fcc0d00a593..aa2a1c5b86e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java @@ -58,11 +58,11 @@ import org.apache.lucene.util.fst.Util; *

    * File: *

    *

    * - * + * *

    Term Dictionary

    *

    * The .tst contains a list of FSTs, one for each field. diff --git a/lucene/common-build.xml b/lucene/common-build.xml index a2e4e2af1ae..e04b25d4221 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -202,7 +202,7 @@ - + @@ -2089,30 +2089,6 @@ ${ant.project.name}.test.dependencies=${test.classpath.list} - - - - - - - - - @@ -2166,6 +2142,10 @@ ${ant.project.name}.test.dependencies=${test.classpath.list} + + + + diff --git a/lucene/core/src/data/jflex/skeleton.default b/lucene/core/src/data/jflex/skeleton.default index 9e08fbb0c46..2eaa2916a56 100644 --- a/lucene/core/src/data/jflex/skeleton.default +++ b/lucene/core/src/data/jflex/skeleton.default @@ -163,7 +163,7 @@ * * All internal variables are reset, the old input stream * cannot be reused (internal buffer is discarded and lost). - * Lexical state is set to ZZ_INITIAL. + * Lexical state is set to ZZ_INITIAL. * * Internal scan buffer is resized down to its initial length, if it has grown. * @@ -211,7 +211,7 @@ /** - * Returns the character at position pos from the + * Returns the character at position pos from the * matched text. * * It is equivalent to yytext().charAt(pos), but faster diff --git a/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt b/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt index a9dabcfd9b7..67032d659ff 100644 --- a/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt +++ b/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt @@ -169,7 +169,7 @@ * * All internal variables are reset, the old input stream * cannot be reused (internal buffer is discarded and lost). - * Lexical state is set to ZZ_INITIAL. + * Lexical state is set to ZZ_INITIAL. * * Internal scan buffer is resized down to its initial length, if it has grown. * @@ -217,7 +217,7 @@ /** - * Returns the character at position pos from the + * Returns the character at position pos from the * matched text. * * It is equivalent to yytext().charAt(pos), but faster diff --git a/lucene/core/src/java/org/apache/lucene/analysis/package-info.java b/lucene/core/src/java/org/apache/lucene/analysis/package-info.java index a536f73fc16..b7e752c4578 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/package-info.java @@ -383,7 +383,8 @@ * synonyms, setting the position increment to 0 is enough to denote the fact that two * words are synonyms, for example: *

    - *
    table of benchmark packages
    PackageDescription
    + *
    + * * * *
    table showing position increments of 1 and 0 for red and magenta, respectively
    Termredmagenta
    Position increment10
    @@ -394,7 +395,8 @@ * a TokenStream where "IBM" is a synonym of "Internal Business Machines". Position increments * are not enough anymore: *

    - * + *
    + * * * *
    position increments where international is zero
    TermIBMInternationalBusinessMachines
    Position increment1011
    @@ -405,7 +407,8 @@ * than "International" is a synonym of "Business". The only way to solve this issue is to * make "IBM" span across 3 positions, this is where position lengths come to rescue. *

    - * + *
    + * * * * @@ -414,7 +417,7 @@ * This new attribute makes clear that "IBM" and "International Business Machines" start and end * at the same positions. *

    - * + * *

    How to not write corrupt token streams

    *

    * There are a few rules to observe when writing custom Tokenizers and TokenFilters: @@ -472,7 +475,9 @@ *

    * Lucene provides seven Attributes out of the box: *

    - *
    position lengths where IBM is three
    TermIBMInternationalBusinessMachines
    Position increment1011
    Position length3111
    + *
    + * + * * * * * + * *
    common bundled attributes
    {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute} @@ -513,6 +518,7 @@ * return true from this attribute's isKeyword() method. *
    *

    More Requirements for Analysis Component Classes

    * Due to the historical development of the API, there are some perhaps diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java index a2ad3946cde..062911803ac 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java @@ -726,7 +726,7 @@ public final class StandardTokenizerImpl { * * All internal variables are reset, the old input stream * cannot be reused (internal buffer is discarded and lost). - * Lexical state is set to ZZ_INITIAL. + * Lexical state is set to ZZ_INITIAL. * * Internal scan buffer is resized down to its initial length, if it has grown. * @@ -774,7 +774,7 @@ public final class StandardTokenizerImpl { /** - * Returns the character at position pos from the + * Returns the character at position pos from the * matched text. * * It is equivalent to yytext().charAt(pos), but faster diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java index cdb810d1cb7..4dbe3c591e9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java @@ -95,11 +95,11 @@ import org.apache.lucene.util.fst.Util; * * Files: * *

    - * + * *

    Term Dictionary

    * *

    The .tim file contains the list of terms in each @@ -158,7 +158,7 @@ import org.apache.lucene.util.fst.Util; *

  • For inner nodes of the tree, every entry will steal one bit to mark whether it points * to child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted
  • * - * + * *

    Term Index

    *

    The .tip file contains an index into the term dictionary, so that it can be * accessed randomly. The index is also used to determine diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java index 927865a0100..5773c161f6b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java @@ -35,8 +35,8 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter; * A {@link StoredFieldsFormat} that compresses documents in chunks in * order to improve the compression ratio. *

    - * For a chunk size of chunkSize bytes, this {@link StoredFieldsFormat} - * does not support documents larger than (231 - chunkSize) + * For a chunk size of chunkSize bytes, this {@link StoredFieldsFormat} + * does not support documents larger than (231 - chunkSize) * bytes. *

    * For optimal performance, you should use a {@link MergePolicy} that returns diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java index 8fc314e6040..f44441bf8f8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java @@ -34,9 +34,9 @@ import org.apache.lucene.store.IndexOutput; *

    * Files: *

      - *
    • .cfs: An optional "virtual" file consisting of all the other + *
    • .cfs: An optional "virtual" file consisting of all the other * index files for systems that frequently run out of file handles. - *
    • .cfe: The "virtual" compound file's entry table holding all + *
    • .cfe: The "virtual" compound file's entry table holding all * entries in the corresponding .cfs file. *
    *

    Description:

    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java index 0ad0cad3044..384dbc2eb65 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java @@ -40,7 +40,7 @@ import org.apache.lucene.store.IndexOutput; /** * Lucene 5.0 Field Infos format. - *

    Field names are stored in the field info file, with suffix .fnm. + *

    Field names are stored in the field info file, with suffix .fnm. *

    FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, * FieldBits,DocValuesBits,DocValuesGen,Attributes> FieldsCount,Footer *

    Data types: diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java index ee91c9c6946..035fbd9b065 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java @@ -57,10 +57,10 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter; *

    File formats *

    Stored fields are represented by three files: *

      - *
    1. - *

      A fields data file (extension .fdt). This file stores a compact + *

    2. + *

      A fields data file (extension .fdt). This file stores a compact * representation of documents in compressed blocks of 16KB or more. When - * writing a segment, documents are appended to an in-memory byte[] + * writing a segment, documents are appended to an in-memory byte[] * buffer. When its size reaches 16KB or more, some metadata about the documents * is flushed to disk, immediately followed by a compressed representation of * the buffer using the @@ -83,21 +83,21 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter; * is less than 0.5%.

    3. * * - *
    4. - *

      A fields index file (extension .fdx). This file stores two + *

    5. + *

      A fields index file (extension .fdx). This file stores two * {@link DirectMonotonicWriter monotonic arrays}, one for the first doc IDs of * each block of compressed documents, and another one for the corresponding * offsets on disk. At search time, the array containing doc IDs is * binary-searched in order to find the block that contains the expected doc ID, * and the associated offset on disk is retrieved from the second array.

      - *
    6. - *

      A fields meta file (extension .fdm). This file stores metadata + *

    7. + *

      A fields meta file (extension .fdm). This file stores metadata * about the monotonic arrays stored in the index file.

      *
    8. *
    *

    Known limitations *

    This {@link StoredFieldsFormat} does not support individual documents - * larger than (231 - 214) bytes. + * larger than (231 - 214) bytes. * @lucene.experimental */ public final class Lucene50StoredFieldsFormat extends StoredFieldsFormat { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java index 40889bfd786..00412d5473c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java @@ -48,8 +48,8 @@ import org.apache.lucene.util.packed.PackedInts; * Looking up term vectors for any document requires at most 1 disk seek. *

    File formats *

      - *
    1. - *

      A vector data file (extension .tvd). This file stores terms, + *

    2. + *

      A vector data file (extension .tvd). This file stores terms, * frequencies, positions, offsets and payloads for every document. Upon writing * a new segment, it accumulates data into memory until the buffer used to store * terms and payloads grows beyond 4KB. Then it flushes all metadata, terms @@ -111,8 +111,8 @@ import org.apache.lucene.util.packed.PackedInts; *

    3. Footer --> {@link CodecUtil#writeFooter CodecFooter}
    4. * * - *
    5. - *

      An index file (extension .tvx). + *

    6. + *

      An index file (extension .tvx). *

        *
      • VectorIndex (.tvx) --> <Header>, <ChunkIndex>, Footer
      • *
      • Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
      • diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60FieldInfosFormat.java index e2ca9eea803..3d7d25ac4de 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60FieldInfosFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60FieldInfosFormat.java @@ -40,7 +40,7 @@ import org.apache.lucene.store.IndexOutput; /** * Lucene 6.0 Field Infos format. - *

        Field names are stored in the field info file, with suffix .fnm. + *

        Field names are stored in the field info file, with suffix .fnm. *

        FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, * FieldBits,DocValuesBits,DocValuesGen,Attributes,DimensionCount,DimensionNumBytes> FieldsCount,Footer *

        Data types: diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70SegmentInfoFormat.java index 2a3b44cb24f..ed557701156 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70SegmentInfoFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70SegmentInfoFormat.java @@ -45,7 +45,7 @@ import org.apache.lucene.util.Version; *

        * Files: *

          - *
        • .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, IndexSort, Footer + *
        • .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, IndexSort, Footer *
        * Data types: *
          diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java index 029980f6ba9..286c4e4bc6f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java @@ -35,8 +35,8 @@ import org.apache.lucene.util.packed.DirectWriter; *

          * Documents that have a value for the field are encoded in a way that it is always possible to * know the ordinal of the current document in the set of documents that have a value. For instance, - * say the set of documents that have a value for the field is {1, 5, 6, 11}. When the - * iterator is on 6, it knows that this is the 3rd item of the set. This way, values can + * say the set of documents that have a value for the field is {1, 5, 6, 11}. When the + * iterator is on 6, it knows that this is the 3rd item of the set. This way, values can * be stored densely and accessed based on their index at search time. If all documents in a segment * have a value for the field, the index is the same as the doc ID, so this case is encoded implicitly * and is very fast at query time. On the other hand if some documents are missing a value for the @@ -124,8 +124,8 @@ import org.apache.lucene.util.packed.DirectWriter; *

          * Files: *

            - *
          1. .dvd: DocValues data
          2. - *
          3. .dvm: DocValues metadata
          4. + *
          5. .dvd: DocValues data
          6. + *
          7. .dvm: DocValues metadata
          8. *
          * @lucene.experimental */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80NormsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80NormsFormat.java index 915116b529e..f202ed18de7 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80NormsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80NormsFormat.java @@ -34,11 +34,11 @@ import org.apache.lucene.store.DataOutput; *

          * Files: *

            - *
          1. .nvd: Norms data
          2. - *
          3. .nvm: Norms metadata
          4. + *
          5. .nvd: Norms data
          6. + *
          7. .nvm: Norms metadata
          8. *
          *
            - *
          1. + *
          2. *

            The Norms metadata or .nvm file.

            *

            For each norms field, this stores metadata, such as the offset into the * Norms data (.nvd)

            @@ -62,7 +62,7 @@ import org.apache.lucene.store.DataOutput; * in the norms data (.nvd), or -2 if no documents have a norm value, or -1 if all documents have a norm * value.

            *

            DocsWithFieldLength is the number of bytes used to encode the set of documents that have a norm.

            - *
          3. + *
          4. *

            The Norms data or .nvd file.

            *

            For each Norms field, this stores the actual per-document data (the heavy-lifting)

            *

            Norms data (.nvd) --> Header,< Data >NumFields,Footer

            diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsFormat.java index 80a89ce755e..23dbfec2f10 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsFormat.java @@ -96,14 +96,14 @@ import org.apache.lucene.util.packed.PackedInts; *

            * Files and detailed format: *

            * - * + * *
            *
            * Term Dictionary @@ -163,7 +163,7 @@ import org.apache.lucene.util.packed.PackedInts; *
            *
            * - * + * *
            *
            * Term Index @@ -173,7 +173,7 @@ import org.apache.lucene.util.packed.PackedInts; *
            * * - * + * *
            *
            * Frequencies and Skip Data @@ -261,7 +261,7 @@ import org.apache.lucene.util.packed.PackedInts; *
            *
            * - * + * *
            *
            * Positions @@ -314,7 +314,7 @@ import org.apache.lucene.util.packed.PackedInts; *
            *
            * - * + * *
            *
            * Payloads and Offsets diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/package-info.java index 99abb37a6db..f5768a426e9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/package-info.java @@ -41,7 +41,7 @@ *
          5. *
        *
  • - * + * *

    Introduction

    *
    *

    This document defines the index file formats used in this version of Lucene. @@ -51,7 +51,7 @@ *

    This document attempts to provide a high-level definition of the Apache * Lucene file formats.

    *
    - * + * *

    Definitions

    *
    *

    The fundamental concepts in Lucene are index, document, field and term.

    @@ -64,14 +64,14 @@ *

    The same sequence of bytes in two different fields is considered a different * term. Thus terms are represented as a pair: the string naming the field, and the * bytes within the field.

    - * + * *

    Inverted Indexing

    *

    The index stores statistics about terms in order to make term-based search * more efficient. Lucene's index falls into the family of indexes known as an * inverted index. This is because it can list, for a term, the documents * that contain it. This is the inverse of the natural relationship, in which * documents list terms.

    - * + * *

    Types of Fields

    *

    In Lucene, fields may be stored, in which case their text is stored * in the index literally, in a non-inverted manner. Fields that are inverted are @@ -82,7 +82,7 @@ * indexed literally.

    *

    See the {@link org.apache.lucene.document.Field Field} * java docs for more information on Fields.

    - * + * *

    Segments

    *

    Lucene indexes may be composed of multiple sub-indexes, or segments. * Each segment is a fully independent index, which could be searched separately. @@ -93,7 +93,7 @@ * *

    Searches may involve multiple segments and/or multiple indexes, each index * potentially composed of a set of segments.

    - * + * *

    Document Numbers

    *

    Internally, Lucene refers to documents by an integer document number. * The first document added to an index is numbered zero, and each subsequent @@ -122,7 +122,7 @@ * * *

    - * + * *

    Index Structure Overview

    *
    *

    Each segment index maintains the following:

    @@ -194,7 +194,7 @@ * *

    Details on each of these are provided in their linked pages.

    *
    - * + * *

    File Naming

    *
    *

    All files belonging to a segment have the same name with varying extensions. @@ -210,12 +210,13 @@ * segments_1, then segments_2, etc. The generation is a sequential long integer * represented in alpha-numeric (base 36) form.

    *
    - * + * *

    Summary of File Extensions

    *
    *

    The following table summarizes the names and extensions of the files in * Lucene:

    - * + *
    + * * * * @@ -315,7 +316,7 @@ * *
    lucene filenames by extension
    NameExtension
    *
    - * + * *

    Lock File

    * The write lock, which is stored in the index directory by default, is named * "write.lock". If the lock directory is different from the index directory then @@ -323,7 +324,7 @@ * derived from the full path to the index directory. When this file is present, a * writer is currently modifying the index (adding or removing documents). This * lock file ensures that only one writer is modifying the index at a time. - * + * *

    History

    *

    Compatibility notes are provided in this document, describing how file * formats have changed from prior versions:

    @@ -399,7 +400,7 @@ *
  • In version 8.4, postings, positions, offsets and payload lengths have move to a more * performant encoding that is vectorized.
  • * - * + * *

    Limitations

    *
    *

    Lucene uses a Java int to refer to diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java index f2e8940fac4..1d031769b8a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java @@ -54,8 +54,8 @@ import org.apache.lucene.util.IOUtils; * {@link ServiceLoader Service Provider Interface} to resolve format names. *

    * Files written by each docvalues format have an additional suffix containing the - * format name. For example, in a per-field configuration instead of _1.dat - * filenames would look like _1_Lucene40_0.dat. + * format name. For example, in a per-field configuration instead of _1.dat + * filenames would look like _1_Lucene40_0.dat. * @see ServiceLoader * @lucene.experimental */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java index 52a88511d0c..31125f70328 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java @@ -61,8 +61,8 @@ import org.apache.lucene.util.RamUsageEstimator; * {@link ServiceLoader Service Provider Interface} to resolve format names. *

    * Files written by each posting format have an additional suffix containing the - * format name. For example, in a per-field configuration instead of _1.prx - * filenames would look like _1_Lucene40_0.prx. + * format name. For example, in a per-field configuration instead of _1.prx + * filenames would look like _1_Lucene40_0.prx. * @see ServiceLoader * @lucene.experimental */ diff --git a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java index 5d32a1a06a4..a5b8630e954 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java @@ -37,7 +37,7 @@ import java.util.concurrent.atomic.AtomicInteger; * as documents are added to and deleted from an index. Clients should thus not * rely on a given document having the same number between sessions. * - *

    NOTE: {@link + *

    NOTE: {@link * IndexReader} instances are completely thread * safe, meaning multiple threads can call any of its methods, * concurrently. If your application requires external diff --git a/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java b/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java index 83bb92a8278..d965399f115 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java @@ -42,7 +42,7 @@ import org.apache.lucene.store.*; rely on a given document having the same number between sessions.

    -

    NOTE: {@link +

    NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires external diff --git a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java index dac2d8a17c5..1b2a81f0f87 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java @@ -42,7 +42,7 @@ import org.apache.lucene.store.Directory; rely on a given document having the same number between sessions.

    -

    NOTE: {@link +

    NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires external diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java index ff241446026..52a0373dd6a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java @@ -68,7 +68,7 @@ import org.apache.lucene.util.Bits; // javadocs rely on a given document having the same number between sessions.

    -

    NOTE: {@link +

    NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires external diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java b/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java index bca7a140c18..25d419d7474 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java @@ -28,9 +28,9 @@ public abstract class IndexReaderContext { public final CompositeReaderContext parent; /** true if this context struct represents the top level reader within the hierarchical context */ public final boolean isTopLevel; - /** the doc base for this reader in the parent, 0 if parent is null */ + /** the doc base for this reader in the parent, 0 if parent is null */ public final int docBaseInParent; - /** the ord for this reader in the parent, 0 if parent is null */ + /** the ord for this reader in the parent, 0 if parent is null */ public final int ordInParent; // An object that uniquely identifies this context without referencing diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 14f96d4ba2f..9652fca58ee 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -100,7 +100,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; and then adds the entire document). When finished adding, deleting and updating documents, {@link #close() close} should be called.

    - +

    Each method that changes the index returns a {@code long} sequence number, which expresses the effective order in which each change was applied. {@link #commit} also returns a sequence number, describing which @@ -108,7 +108,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; are transient (not saved into the index in any way) and only valid within a single {@code IndexWriter} instance.

    - +

    These changes are buffered in memory and periodically flushed to the {@link Directory} (during the above method calls). A flush is triggered when there are enough added documents @@ -134,7 +134,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; another IndexWriter on the same directory will lead to a {@link LockObtainFailedException}.

    - +

    Expert: IndexWriter allows an optional {@link IndexDeletionPolicy} implementation to be specified. You can use this to control when prior commits are deleted from @@ -155,7 +155,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; will likely result in poor performance compared to a local IO device.

    -

    Expert: +

    Expert: IndexWriter allows you to separately change the {@link MergePolicy} and the {@link MergeScheduler}. The {@link MergePolicy} is invoked whenever there are @@ -167,14 +167,14 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; it decides when and how to run the merges. The default is {@link ConcurrentMergeScheduler}.

    -

    NOTE: if you hit a +

    NOTE: if you hit a VirtualMachineError, or disaster strikes during a checkpoint then IndexWriter will close itself. This is a defensive measure in case any internal state (buffered documents, deletions, reference counts) were corrupted. Any subsequent calls will throw an AlreadyClosedException.

    -

    NOTE: {@link +

    NOTE: {@link IndexWriter} instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java index 1d09742fbda..e1c31354f00 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java @@ -34,7 +34,7 @@ import org.apache.lucene.util.Bits; rely on a given document having the same number between sessions.

    -

    NOTE: {@link +

    NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires external diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index be535a7ee34..d9d6b0b431c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -442,7 +442,7 @@ public abstract class MergePolicy { } /** - * Default ratio for compound file system usage. Set to 1.0, always use + * Default ratio for compound file system usage. Set to 1.0, always use * compound file system. */ protected static final double DEFAULT_NO_CFS_RATIO = 1.0; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiReader.java b/lucene/core/src/java/org/apache/lucene/index/MultiReader.java index 4d4238290f6..2b06210b8d9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiReader.java @@ -29,7 +29,7 @@ import java.io.IOException; * as documents are added to and deleted from an index. Clients should thus not * rely on a given document having the same number between sessions. * - *

    NOTE: {@link + *

    NOTE: {@link * IndexReader} instances are completely thread * safe, meaning multiple threads can call any of its methods, * concurrently. If your application requires external diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java index 87ac8026357..5406d60bc90 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java @@ -39,7 +39,8 @@ import org.apache.lucene.util.bkd.BKDWriter; * These structures are optimized for operations such as range, distance, nearest-neighbor, * and point-in-polygon queries. *

    Basic Point Types

    - * + *
    + * * * * diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index a18538a808d..116c2e11435 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -54,7 +54,7 @@ import org.apache.lucene.util.Version; * segments in relation to the file system. *

    * The active segments in the index are stored in the segment info file, - * segments_N. There may be one or more segments_N files in + * segments_N. There may be one or more segments_N files in * the index; however, the one with the largest generation is the active one * (when older segments_N files are present it's because they temporarily cannot * be deleted, or a custom {@link IndexDeletionPolicy} is in @@ -64,7 +64,7 @@ import org.apache.lucene.util.Version; *

    * Files: *

    diff --git a/lucene/core/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java index a3718c22f1a..5f43c5ad156 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Sorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java @@ -50,7 +50,7 @@ final class Sorter { } /** - * A permutation of doc IDs. For every document ID between 0 and + * A permutation of doc IDs. For every document ID between 0 and * {@link IndexReader#maxDoc()}, oldToNew(newToOld(docID)) must * return docID. */ @@ -394,7 +394,7 @@ final class Sorter { * {@link #sort(int, DocComparator)} to compute the old-to-new permutation * given a list of documents and their corresponding values. *

    - * A return value of null is allowed and means that + * A return value of null is allowed and means that * reader is already sorted. *

    * NOTE: deleted documents are expected to appear in the mapping as diff --git a/lucene/core/src/java/org/apache/lucene/index/package-info.java b/lucene/core/src/java/org/apache/lucene/index/package-info.java index 1dbc400ac5b..db228e44f82 100644 --- a/lucene/core/src/java/org/apache/lucene/index/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/index/package-info.java @@ -51,10 +51,10 @@ * * * - * + * *

    Index APIs

    - * + * *

    IndexWriter

    *

    {@link org.apache.lucene.index.IndexWriter} is used to create an index, and to add, update and @@ -66,7 +66,7 @@ * org.apache.lucene.store.FSDirectory}), but it may also stand for some other storage, such as * RAM.

    - * + * *

    IndexReader

    *

    {@link org.apache.lucene.index.IndexReader} is used to read data from the index, and supports @@ -76,7 +76,7 @@ * org.apache.lucene.index.DirectoryReader#openIfChanged}) in order to incorporate writes that may * occur after it is opened.

    - * + * *

    Segments and docids

    *

    Lucene's index is composed of segments, each of which contains a subset of all the documents @@ -101,10 +101,10 @@ * not exposed as part of an application, nor stored or referenced outside of Lucene's internal * APIs.

    - * + * *

    Field Types

    * - * + * * *

    Lucene supports a variety of different document field data structures. Lucene's core, the * inverted index, is comprised of "postings." The postings, with their term dictionary, can be @@ -115,14 +115,14 @@ * able to skip over low-scoring documents at search time. Postings do not provide any way of * retrieving terms given a document, short of scanning the entire index.

    * - * + * *

    Stored fields are essentially the opposite of postings, providing efficient retrieval of field * values given a docid. All stored field values for a document are stored together in a * block. Different types of stored field provide high-level datatypes such as strings and numbers * on top of the underlying bytes. Stored field values are usually retrieved by the searcher using * an implementation of {@link org.apache.lucene.index.StoredFieldVisitor}.

    - * + * *

    {@link org.apache.lucene.index.DocValues} fields are what are sometimes referred to as * columnar, or column-stride fields, by analogy to relational database terminology, in which * documents are considered as rows, and fields, columns. DocValues fields store values per-field: a @@ -130,14 +130,14 @@ * lookup of a field-value given a docid. These fields are used for efficient value-based sorting, * and for faceting, but they are not useful for filtering.

    - * + * *

    {@link org.apache.lucene.index.PointValues} represent numeric values using a kd-tree data * structure. Efficient 1- and higher dimensional implementations make these the choice for numeric * range and interval queries, and geo-spatial queries.

    - * + * *

    Postings APIs

    - * + * *

    * Fields *

    @@ -159,7 +159,7 @@ * Terms terms = fields.terms(field); * } * - * + * *

    * Terms *

    @@ -195,7 +195,7 @@ * PostingsEnum docsAndPositions = termsEnum.postings(null, null, PostingsEnum.FLAG_POSITIONS); * } * - * + * *

    * Documents *

    @@ -210,7 +210,7 @@ * System.out.println(docsEnum.freq()); * } * - * + * *

    * Positions *

    @@ -233,9 +233,9 @@ * } * } * - * + * *

    Index Statistics

    - * + * *

    * Term statistics *

    @@ -249,7 +249,7 @@ * of occurrences of this term across all documents. Like docFreq(), it will * also count occurrences that appear in deleted documents. * - * + * *

    * Field statistics *

    @@ -276,7 +276,7 @@ * field, and like totalTermFreq() it will also count occurrences that appear in * deleted documents. * - * + * *

    * Segment statistics *

    @@ -290,7 +290,7 @@ *
  • {@link org.apache.lucene.index.Fields#size}: Returns the number of indexed * fields. * - * + * *

    * Document statistics *

    diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java index 273ece47982..bc1c185a9b1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java @@ -38,7 +38,7 @@ import org.apache.lucene.util.ArrayUtil; * set is large this can easily be a very substantial amount * of RAM! * - *

    See the Lucene modules/grouping module for more + *

    See the Lucene modules/grouping module for more * details including a full code example.

    * * @lucene.experimental diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 66d3b6178df..f658de09e72 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -78,7 +78,7 @@ import org.apache.lucene.util.ThreadInterruptedException; * {@link TopScoreDocCollector#create} or {@link TopFieldCollector#create} and * call {@link #search(Query, Collector)}. * - *

    NOTE: {@link + *

    NOTE: {@link * IndexSearcher} instances are completely * thread safe, meaning multiple threads can call any of its * methods, concurrently. If your application requires diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index da881421bf0..265607124cf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -143,7 +143,7 @@ public class LRUQueryCache implements QueryCache, Accountable { * than 3% of the total number of documents in the index. * This should guarantee that all leaves from the upper * {@link TieredMergePolicy tier} will be cached while ensuring that at most - * 33 leaves can make it to the cache (very likely less than 10 in + * 33 leaves can make it to the cache (very likely less than 10 in * practice), which is useful for this implementation since some operations * perform in linear time with the number of cached leaves. * Only clauses whose cost is at most 100x the cost of the top-level query will diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java b/lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java index e6364fa2c31..1efb33e8ff7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java @@ -39,7 +39,7 @@ public interface QueryCachingPolicy { /** Whether the given {@link Query} is worth caching. * This method will be called by the {@link QueryCache} to know whether to * cache. It will first attempt to load a {@link DocIdSet} from the cache. - * If it is not cached yet and this method returns true then a + * If it is not cached yet and this method returns true then a * cache entry will be generated. Otherwise an uncached scorer will be * returned. */ boolean shouldCache(Query query) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/package-info.java b/lucene/core/src/java/org/apache/lucene/search/package-info.java index 265be4ce3a5..a50a0c37173 100644 --- a/lucene/core/src/java/org/apache/lucene/search/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/search/package-info.java @@ -29,7 +29,7 @@ * * * - * + * *

    Search Basics

    *

    * Lucene offers a wide variety of {@link org.apache.lucene.search.Query} implementations, most of which are in @@ -50,7 +50,7 @@ * * * - * + * *

    Query Classes

    *

    * {@link org.apache.lucene.search.TermQuery TermQuery} @@ -74,8 +74,8 @@ * TermQuery tq = new TermQuery(new Term("fieldName", "term")); * In this example, the {@link org.apache.lucene.search.Query Query} identifies all * {@link org.apache.lucene.document.Document Document}s that have the - * {@link org.apache.lucene.document.Field Field} named "fieldName" - * containing the word "term". + * {@link org.apache.lucene.document.Field Field} named "fieldName" + * containing the word "term". *

    * {@link org.apache.lucene.search.BooleanQuery BooleanQuery} *

    @@ -175,11 +175,11 @@ * The {@link org.apache.lucene.search.PrefixQuery PrefixQuery} allows an application * to identify all documents with terms that begin with a certain string. The * {@link org.apache.lucene.search.WildcardQuery WildcardQuery} generalizes this by allowing - * for the use of * (matches 0 or more characters) and ? (matches exactly one character) wildcards. + * for the use of * (matches 0 or more characters) and ? (matches exactly one character) wildcards. * Note that the {@link org.apache.lucene.search.WildcardQuery WildcardQuery} can be quite slow. Also * note that * {@link org.apache.lucene.search.WildcardQuery WildcardQuery} should - * not start with * and ?, as these are extremely slow. + * not start with * and ?, as these are extremely slow. * Some QueryParsers may not allow this by default, but provide a setAllowLeadingWildcard method * to remove that protection. * The {@link org.apache.lucene.search.RegexpQuery RegexpQuery} is even more general than WildcardQuery, @@ -196,7 +196,7 @@ * This type of query can be useful when accounting for spelling variations in the collection. * * - * + * *

    Scoring — Introduction

    *

    Lucene scoring is the heart of why we all love Lucene. It is blazingly fast and it hides * almost all of the complexity from the user. In a nutshell, it works. At least, that is, @@ -229,7 +229,7 @@ * Finally, we will finish up with some reference material in the Appendix. * * - * + * *

    Scoring — Basics

    *

    Scoring is very much dependent on the way documents are indexed, so it is important to understand * indexing. (see Lucene overview @@ -257,7 +257,7 @@ *

    Lucene allows influencing the score contribution of various parts of the query by wrapping with * {@link org.apache.lucene.search.BoostQuery}.

    * - * + * *

    Changing Scoring — Similarity

    *

    Changing the scoring formula

    *

    @@ -323,7 +323,7 @@ * expr.getDoubleValuesSource(bindings)); * * - * + * *

    Custom Queries — Expert Level

    * *

    Custom queries are an expert level task, so tread carefully and be prepared to share your code if @@ -374,7 +374,7 @@ * {@link org.apache.lucene.search.BooleanQuery BooleanQuery}, and other queries that implement {@link org.apache.lucene.search.Query#createWeight(IndexSearcher,ScoreMode,float) createWeight(IndexSearcher searcher,ScoreMode scoreMode, float boost)}

  • * - * + * *

    The Weight Interface

    *

    The * {@link org.apache.lucene.search.Weight Weight} @@ -402,7 +402,7 @@ * and offsets of matches. This is typically useful to implement highlighting. * * - * + * *

    The Scorer Class

    *

    The * {@link org.apache.lucene.search.Scorer Scorer} @@ -431,7 +431,7 @@ * details on the scoring process. * * - * + * *

    The BulkScorer Class

    *

    The * {@link org.apache.lucene.search.BulkScorer BulkScorer} scores a range of documents. There is only one @@ -453,7 +453,7 @@ * * * - * + * *

    Appendix: Search Algorithm

    *

    This section is mostly notes on stepping through the Scoring process and serves as * fertilizer for the earlier sections. diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java index daf7c69d225..4d6b3b51e8b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java @@ -42,7 +42,7 @@ import org.apache.lucene.util.SmallFloat; * this class at both index-time and * query-time. *

    - * Indexing Time + * Indexing Time * At indexing time, the indexer calls {@link #computeNorm(FieldInvertState)}, allowing * the Similarity implementation to set a per-document value for the field that will * be later accessible via {@link org.apache.lucene.index.LeafReader#getNormValues(String)}. @@ -60,7 +60,7 @@ import org.apache.lucene.util.SmallFloat; * Additional scoring factors can be stored in named {@link NumericDocValuesField}s and * accessed at query-time with {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)}. * However this should not be done in the {@link Similarity} but externally, for instance - * by using FunctionScoreQuery. + * by using FunctionScoreQuery. *

    * Finally, using index-time boosts (either via folding into the normalization byte or * via DocValues), is an inefficient way to boost the scores of different fields if the @@ -68,7 +68,7 @@ import org.apache.lucene.util.SmallFloat; * boost parameter C, and {@link PerFieldSimilarityWrapper} can return different * instances with different boosts depending upon field name. *

    - * Query time + * Query time * At query-time, Queries interact with the Similarity via these steps: *

      *
    1. The {@link #scorer(float, CollectionStatistics, TermStatistics...)} method is called a single time, @@ -80,7 +80,7 @@ import org.apache.lucene.util.SmallFloat; *
    2. Then {@link SimScorer#score(float, long)} is called for every matching document to compute its score. *
    *

    - * Explanations + * Explanations * When {@link IndexSearcher#explain(org.apache.lucene.search.Query, int)} is called, queries consult the Similarity's DocScorer for an * explanation of how it computed its score. The query passes in a the document id and an explanation of how the frequency * was computed. diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java index b96ef65d1e3..0992ac8cd75 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java @@ -79,20 +79,24 @@ import org.apache.lucene.util.SmallFloat; * of the weighted query vectors V(q) and V(d): * *
     
    - *

    Basic point types in Java and Lucene
    Java typeLucene class
    {@code int}{@link IntPoint}
    {@code long}{@link LongPoint}
    + *
    + * *
    formatting only
    - * + *
    + * *
    formatting only
    - * + *
    + * * - * - * * *
    cosine similarity formula
    + * * cosine-similarity(q,d)   =   * - * - * - * - * + * * @@ -101,7 +105,7 @@ import org.apache.lucene.util.SmallFloat; *
    V(q) · V(d)
    –––––––––
    |V(q)| |V(d)|
    + * + * + * + * + * *
    cosine similarity formula
    V(q) · V(d)
    –––––––––
    |V(q)| |V(d)|
    *
    *
    - *
    VSM Score
    + * VSM Score *
    *
     
    @@ -161,24 +165,28 @@ import org.apache.lucene.util.SmallFloat; * we get Lucene's Conceptual scoring formula: * *
     
    - * + *
    + * *
    formatting only
    - * + *
    + * *
    formatting only
    - * + *
    + * * - * - * * *
    formatting only
    + * * score(q,d)   =   * query-boost(q) ·   * - * - * - * - * + * - * @@ -188,7 +196,7 @@ import org.apache.lucene.util.SmallFloat; *
    V(q) · V(d)
    –––––––––
    |V(q)|
    + * + * + * + * + * *
    Lucene conceptual scoring formula
    V(q) · V(d)
    –––––––––
    |V(q)|
    *
    + * *   ·   doc-len-norm(d) *   ·   doc-boost(d) *
    *
    - *
    Lucene Conceptual Scoring Formula
    + * Lucene Conceptual Scoring Formula *
    *
     
    @@ -246,28 +254,31 @@ import org.apache.lucene.util.SmallFloat; * The color codes demonstrate how it relates * to those of the conceptual formula: * - * + *
    + * * * *
    formatting only
    - * + *
    + * *
    formatting only
    - * + *
    + * * - * - * * * * - * + * * * *
    Lucene conceptual scoring formula
    + * * score(q,d)   =   - * + * * - * ( + * + * ( * tf(t in d)  ·  * idf(t)2  ·  * t.getBoost() ·  * norm(t,d) - * ) + * ) *
    t in qt in q
    @@ -275,14 +286,14 @@ import org.apache.lucene.util.SmallFloat; *
    *
    - *
    Lucene Practical Scoring Function
    + * Lucene Practical Scoring Function *
    * *

    where *

      *
    1. - * + * * tf(t in d) * correlates to the term's frequency, * defined as the number of times term t appears in the currently scored document d. @@ -295,13 +306,14 @@ import org.apache.lucene.util.SmallFloat; * {@link org.apache.lucene.search.similarities.ClassicSimilarity#tf(float) ClassicSimilarity} is: * *
       
      - * + *
      + * * - * - * * *
      term frequency computation
      + * * {@link org.apache.lucene.search.similarities.ClassicSimilarity#tf(float) tf(t in d)}   =   * - * frequency½ + * + * frequency½ *
      @@ -309,7 +321,7 @@ import org.apache.lucene.util.SmallFloat; *
    2. * *
    3. - * + * * idf(t) stands for Inverse Document Frequency. This value * correlates to the inverse of docFreq * (the number of documents in which the term t appears). @@ -320,23 +332,25 @@ import org.apache.lucene.util.SmallFloat; * {@link org.apache.lucene.search.similarities.ClassicSimilarity#idf(long, long) ClassicSimilarity} is: * *
       
      - * + *
      + * * - * - * - *
      inverse document frequency computation
      + * * {@link org.apache.lucene.search.similarities.ClassicSimilarity#idf(long, long) idf(t)}  =   * - * 1 + log ( + * + * 1 + log ( * - * - * - * - * + * - * * *
      docCount+1
      –––––––––
      docFreq+1
      + * + * + * + * + * *
      inverse document frequency computation
      docCount+1
      –––––––––
      docFreq+1
      *
      - * ) + * + * ) *
      @@ -344,7 +358,7 @@ import org.apache.lucene.util.SmallFloat; * * *
    4. - * + * * t.getBoost() * is a search time boost of term t in the query q as * specified in the query text @@ -360,7 +374,7 @@ import org.apache.lucene.util.SmallFloat; *
    5. * *
    6. - * + * * norm(t,d) is an index-time boost factor that solely * depends on the number of tokens of this field in the document, so * that shorter fields contribute more to the score. diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java b/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java index 997d5d6e342..5dfa4eda6c8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java @@ -29,7 +29,7 @@ * * * - * + * *

      Summary of the Ranking Methods

      * *

      {@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized @@ -46,7 +46,7 @@ * Lucene ships the following methods built on * {@link org.apache.lucene.search.similarities.SimilarityBase}: * - * + * *

        *
      • Amati and Rijsbergen's {@linkplain org.apache.lucene.search.similarities.DFRSimilarity DFR} framework;
      • *
      • Clinchant and Gaussier's {@linkplain org.apache.lucene.search.similarities.IBSimilarity Information-based models} @@ -66,7 +66,7 @@ * optimizations can always be implemented in subclasses; see * below. * - * + * *

        Changing Similarity

        * *

        Chances are the available Similarities are sufficient for all @@ -94,12 +94,12 @@ *

        {@link org.apache.lucene.search.similarities.BM25Similarity} has * two parameters that may be tuned: *

          - *
        • k1, which calibrates term frequency saturation and must be + *
        • k1, which calibrates term frequency saturation and must be * positive or null. A value of {@code 0} makes term frequency completely - * ignored, making documents scored only based on the value of the IDF - * of the matched terms. Higher values of k1 increase the impact of + * ignored, making documents scored only based on the value of the IDF + * of the matched terms. Higher values of k1 increase the impact of * term frequency on the final score. Default value is {@code 1.2}.
        • - *
        • b, which controls how much document length should normalize + *
        • b, which controls how much document length should normalize * term frequency values and must be in {@code [0, 1]}. A value of {@code 0} * disables length normalization completely. Default value is {@code 0.75}.
        • *
        diff --git a/lucene/core/src/java/org/apache/lucene/store/DataOutput.java b/lucene/core/src/java/org/apache/lucene/store/DataOutput.java index 9c112496127..3767af3c8a2 100644 --- a/lucene/core/src/java/org/apache/lucene/store/DataOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/DataOutput.java @@ -90,12 +90,13 @@ public abstract class DataOutput { * resulting integer value. Thus values from zero to 127 may be stored in a single * byte, values from 128 to 16,383 may be stored in two bytes, and so on.

        *

        VByte Encoding Example

        - * + *
        + * * - * - * - * - * + * + * + * + * * * * diff --git a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java index d20df51d3ee..c76a3c35ad4 100644 --- a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java @@ -47,7 +47,7 @@ import org.apache.lucene.util.IOUtils; /** * Base class for Directory implementations that store index * files in the file system. - * + * * There are currently three core * subclasses: * diff --git a/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java b/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java index c26f750cacb..a62d265b7a8 100644 --- a/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java @@ -35,7 +35,7 @@ public class OutputStreamIndexOutput extends IndexOutput { /** * Creates a new {@link OutputStreamIndexOutput} with the given buffer size. * @param bufferSize the buffer size in bytes used to buffer writes internally. - * @throws IllegalArgumentException if the given buffer size is less or equal to 0 + * @throws IllegalArgumentException if the given buffer size is less or equal to 0 */ public OutputStreamIndexOutput(String resourceDescription, String name, OutputStream out, int bufferSize) { super(resourceDescription, name); diff --git a/lucene/core/src/java/org/apache/lucene/util/BitUtil.java b/lucene/core/src/java/org/apache/lucene/util/BitUtil.java index a475fe29d3e..65f69b06c92 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BitUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/BitUtil.java @@ -167,8 +167,8 @@ public final class BitUtil { /** * Zig-zag * encode the provided long. Assuming the input is a signed long whose - * absolute value can be stored on n bits, the returned value will - * be an unsigned long that can be stored on n+1 bits. + * absolute value can be stored on n bits, the returned value will + * be an unsigned long that can be stored on n+1 bits. */ public static long zigZagEncode(long l) { return (l >> 63) ^ (l << 1); diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 7649c2c4b6a..756e2883b90 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -138,7 +138,7 @@ public final class ByteBlockPool implements Accountable { /** * Resets the pool to its initial state reusing the first buffer and fills all - * buffers with 0 bytes before they reused or passed to + * buffers with 0 bytes before they reused or passed to * {@link Allocator#recycleByteBlocks(byte[][], int, int)}. Calling * {@link ByteBlockPool#nextBuffer()} is not needed after reset. */ @@ -149,7 +149,7 @@ public final class ByteBlockPool implements Accountable { /** * Expert: Resets the pool to its initial state reusing the first buffer. Calling * {@link ByteBlockPool#nextBuffer()} is not needed after reset. - * @param zeroFillBuffers if true the buffers are filled with 0. + * @param zeroFillBuffers if true the buffers are filled with 0. * This should be set to true if this pool is used with slices. * @param reuseFirst if true the first buffer will be reused and calling * {@link ByteBlockPool#nextBuffer()} is not needed after reset iff the @@ -349,7 +349,7 @@ public final class ByteBlockPool implements Accountable { /** * Reads bytes out of the pool starting at the given offset with the given - * length into the given byte array at offset off. + * length into the given byte array at offset off. *

        Note: this method allows to copy across block boundaries.

        */ public void readBytes(final long offset, final byte bytes[], int bytesOffset, int bytesLength) { diff --git a/lucene/core/src/java/org/apache/lucene/util/Constants.java b/lucene/core/src/java/org/apache/lucene/util/Constants.java index 653c1e30102..29e5c6cc4a4 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Constants.java +++ b/lucene/core/src/java/org/apache/lucene/util/Constants.java @@ -33,10 +33,10 @@ public final class Constants { public static final String JVM_NAME = System.getProperty("java.vm.name"); public static final String JVM_SPEC_VERSION = System.getProperty("java.specification.version"); - /** The value of System.getProperty("java.version"). **/ + /** The value of System.getProperty("java.version"). **/ public static final String JAVA_VERSION = System.getProperty("java.version"); - /** The value of System.getProperty("os.name"). **/ + /** The value of System.getProperty("os.name"). **/ public static final String OS_NAME = System.getProperty("os.name"); /** True iff running on Linux. */ public static final boolean LINUX = OS_NAME.startsWith("Linux"); diff --git a/lucene/core/src/java/org/apache/lucene/util/FilterIterator.java b/lucene/core/src/java/org/apache/lucene/util/FilterIterator.java index 0dbfef2edc9..89cde6cdafb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FilterIterator.java +++ b/lucene/core/src/java/org/apache/lucene/util/FilterIterator.java @@ -23,7 +23,7 @@ import java.util.NoSuchElementException; * An {@link Iterator} implementation that filters elements with a boolean predicate. * * @param generic parameter for this iterator instance: this iterator implements {@link Iterator Iterator<T>} - * @param generic parameter of the wrapped iterator, must be T or extend T + * @param generic parameter of the wrapped iterator, must be T or extend T * @see #predicateFunction * @lucene.internal */ diff --git a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java index 95f0759e4bd..80182bf79cd 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java @@ -64,21 +64,21 @@ public final class IOUtils { private IOUtils() {} // no instance /** - * Closes all given Closeables. Some of the - * Closeables may be null; they are + * Closes all given Closeables. Some of the + * Closeables may be null; they are * ignored. After everything is closed, the method either * throws the first exception it hit while closing, or * completes normally if there were no exceptions. * * @param objects - * objects to call close() on + * objects to call close() on */ public static void close(Closeable... objects) throws IOException { close(Arrays.asList(objects)); } /** - * Closes all given Closeables. + * Closes all given Closeables. * @see #close(Closeable...) */ public static void close(Iterable objects) throws IOException { @@ -99,18 +99,18 @@ public final class IOUtils { } /** - * Closes all given Closeables, suppressing all thrown exceptions. - * Some of the Closeables may be null, they are ignored. + * Closes all given Closeables, suppressing all thrown exceptions. + * Some of the Closeables may be null, they are ignored. * * @param objects - * objects to call close() on + * objects to call close() on */ public static void closeWhileHandlingException(Closeable... objects) { closeWhileHandlingException(Arrays.asList(objects)); } /** - * Closes all given Closeables, suppressing all thrown non {@link VirtualMachineError} exceptions. + * Closes all given Closeables, suppressing all thrown non {@link VirtualMachineError} exceptions. * Even if a {@link VirtualMachineError} is thrown all given closeable are closed. * @see #closeWhileHandlingException(Closeable...) */ @@ -261,8 +261,8 @@ public final class IOUtils { } /** - * Deletes all given Paths, if they exist. Some of the - * Files may be null; they are + * Deletes all given Paths, if they exist. Some of the + * Files may be null; they are * ignored. After everything is deleted, the method either * throws the first exception it hit while deleting, or * completes normally if there were no exceptions. @@ -274,8 +274,8 @@ public final class IOUtils { } /** - * Deletes all given Paths, if they exist. Some of the - * Files may be null; they are + * Deletes all given Paths, if they exist. Some of the + * Files may be null; they are * ignored. After everything is deleted, the method either * throws the first exception it hit while deleting, or * completes normally if there were no exceptions. diff --git a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java index 75859800cfd..46500fcdf07 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java @@ -99,7 +99,7 @@ public final class IntBlockPool { /** * Expert: Resets the pool to its initial state reusing the first buffer. - * @param zeroFillBuffers if true the buffers are filled with 0. + * @param zeroFillBuffers if true the buffers are filled with 0. * This should be set to true if this pool is used with * {@link SliceWriter}. * @param reuseFirst if true the first buffer will be reused and calling diff --git a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java index 3c96dc56315..f5721ae1247 100644 --- a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java +++ b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java @@ -114,7 +114,7 @@ public abstract class PriorityQueue implements Iterable { /** Determines the ordering of objects in this priority queue. Subclasses * must define this one method. - * @return true iff parameter a is less than parameter b. + * @return true iff parameter a is less than parameter b. */ protected abstract boolean lessThan(T a, T b); diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java index 4fcbbef3e98..54185db2cd1 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java @@ -26,7 +26,7 @@ import org.apache.lucene.search.DocIdSetIterator; * The way it works is that the space of bits is divided into blocks of * 4096 bits, which is 64 longs. Then for each block, we have:
          *
        • a long[] which stores the non-zero longs for that block
        • - *
        • a long so that bit i being set means that the i-th + *
        • a long so that bit i being set means that the i-th * long of the block is non-null, and its offset in the array of longs is * the number of one bits on the right of the i-th bit.
        * @@ -132,7 +132,7 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable { } /** - * Set the bit at index i. + * Set the bit at index i. */ public void set(int i) { assert consistent(i); @@ -189,7 +189,7 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable { } /** - * Clear the bit at index i. + * Clear the bit at index i. */ public void clear(int i) { assert consistent(i); diff --git a/lucene/core/src/java/org/apache/lucene/util/TimSorter.java b/lucene/core/src/java/org/apache/lucene/util/TimSorter.java index d36b648c045..ea341f2d0e0 100644 --- a/lucene/core/src/java/org/apache/lucene/util/TimSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/TimSorter.java @@ -26,7 +26,7 @@ import java.util.Arrays; *

        This implementation is especially good at sorting partially-sorted * arrays and sorts small arrays with binary sort. *

        NOTE:There are a few differences with the original implementation:

          - *
        • The extra amount of memory to perform merges is + *
        • The extra amount of memory to perform merges is * configurable. This allows small merges to be very fast while large merges * will be performed in-place (slightly slower). You can make sure that the * fast merge routine will always be used by having maxTempSlots diff --git a/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java b/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java index bcee767ad12..34dbd383e2d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java +++ b/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java @@ -45,7 +45,7 @@ import java.util.concurrent.ConcurrentHashMap; * on the values and not-GCed keys. Lucene's implementation also supports {@code null} * keys, but those are never weak! * - *

          The map supports two modes of operation: + *

          The map supports two modes of operation: *

            *
          • {@code reapOnRead = true}: This behaves identical to a {@link java.util.WeakHashMap} * where it also cleans up the reference queue on every read operation ({@link #get(Object)}, diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java index 31cca6de967..e8b37e59a3f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java @@ -40,7 +40,8 @@ import java.util.Set; * Regular Expression extension to Automaton. *

            * Regular expressions are built from the following abstract syntax: - *

        variable length encoding examples
        ValueByte 1Byte 2Byte 3ValueByte 1Byte 2Byte 3
        0
        + *
        + * * * * @@ -59,7 +60,7 @@ import java.util.Set; * * * - * + * * * * @@ -74,7 +75,7 @@ import java.util.Set; * * * - * + * * * * @@ -104,43 +105,43 @@ import java.util.Set; * * * - * + * * * * * * * - * + * * * * * * * - * + * * * * * * * - * - * + * + * * * * * * - * - * + * + * * * * * * - * - * + * + * * * * @@ -154,7 +155,7 @@ import java.util.Set; * * * - * + * * * * @@ -169,14 +170,14 @@ import java.util.Set; * * * - * + * * * * * * * - * + * * * * @@ -206,7 +207,7 @@ import java.util.Set; * * * - * + * * * * @@ -228,56 +229,56 @@ import java.util.Set; * * * - * + * * * * * * * - * + * * * * * * * - * + * * * * * * * - * + * * * * * * * - * + * * * * * * * - * + * * * * * * * - * + * * * * * * * - * + * * * * @@ -292,7 +293,7 @@ import java.util.Set; * * * - * + * * * * @@ -301,13 +302,13 @@ import java.util.Set; * The productions marked [OPTIONAL] are only allowed if * specified by the syntax flags passed to the RegExp constructor. * The reserved characters used in the (enabled) syntax must be escaped with - * backslash (\) or double-quotes ("..."). (In + * backslash (\) or double-quotes ("..."). (In * contrast to other regexp syntaxes, this is required also in character - * classes.) Be aware that dash (-) has a special meaning in + * classes.) Be aware that dash (-) has a special meaning in * charclass expressions. An identifier is a string not containing right - * angle bracket (>) or dash (-). Numerical + * angle bracket (>) or dash (-). Numerical * intervals are specified by non-negative decimal integers and include both end - * points, and if n and m have the same number + * points, and if n and m have the same number * of digits, then the conforming strings must have that length (i.e. prefixed * by 0's). * @@ -320,33 +321,33 @@ public class RegExp { } /** - * Syntax flag, enables intersection (&). + * Syntax flag, enables intersection (&). */ public static final int INTERSECTION = 0x0001; /** - * Syntax flag, enables complement (~). + * Syntax flag, enables complement (~). */ public static final int COMPLEMENT = 0x0002; /** - * Syntax flag, enables empty language (#). + * Syntax flag, enables empty language (#). */ public static final int EMPTY = 0x0004; /** - * Syntax flag, enables anystring (@). + * Syntax flag, enables anystring (@). */ public static final int ANYSTRING = 0x0008; /** - * Syntax flag, enables named automata (<identifier>). + * Syntax flag, enables named automata (<identifier>). */ public static final int AUTOMATON = 0x0010; /** * Syntax flag, enables numerical intervals ( - * <n-m>). + * <n-m>). */ public static final int INTERVAL = 0x0020; diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java b/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java index 7be9339914d..48374151236 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java @@ -61,7 +61,7 @@ public class StatePair { * Checks for equality. * * @param obj object to compare with - * @return true if obj represents the same pair of states as this + * @return true if obj represents the same pair of states as this * pair */ @Override diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/package-info.java b/lucene/core/src/java/org/apache/lucene/util/automaton/package-info.java index 7baea5c96b8..daf2294a91b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/package-info.java @@ -35,12 +35,12 @@ * regular expression operations. *

        * The most commonly used functionality is located in the classes - * {@link org.apache.lucene.util.automaton.Automaton} and - * {@link org.apache.lucene.util.automaton.RegExp}. + * {@link org.apache.lucene.util.automaton.Automaton} and + * {@link org.apache.lucene.util.automaton.RegExp}. *

        * For more information, go to the package home page at - * http://www.brics.dk/automaton/. + * http://www.brics.dk/automaton/. * @lucene.experimental */ package org.apache.lucene.util.automaton; diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java index 9a3a8a485e6..b8085f7a89a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java @@ -50,7 +50,7 @@ abstract class AbstractBlockPackedWriter { /** * Sole constructor. - * @param blockSize the number of values of a single block, must be a multiple of 64 + * @param blockSize the number of values of a single block, must be a multiple of 64 */ public AbstractBlockPackedWriter(DataOutput out, int blockSize) { checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE); diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java b/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java index 512fa36a261..11e6f318439 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java @@ -177,7 +177,7 @@ public final class BlockPackedReaderIterator { return value; } - /** Read between 1 and count values. */ + /** Read between 1 and count values. */ public LongsRef next(int count) throws IOException { assert count > 0; if (ord == valueCount) { diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java b/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java index afe3ccfa450..2a6308749a9 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java @@ -39,17 +39,17 @@ import org.apache.lucene.store.DataOutput; *

      • Block: <Header, (Ints)> *
      • Header: <Token, (MinValue)> *
      • Token: a {@link DataOutput#writeByte(byte) byte}, first 7 bits are the - * number of bits per value (bitsPerValue). If the 8th bit is 1, - * then MinValue (see next) is 0, otherwise MinValue and needs to + * number of bits per value (bitsPerValue). If the 8th bit is 1, + * then MinValue (see next) is 0, otherwise MinValue and needs to * be decoded *
      • MinValue: a * zigzag-encoded * {@link DataOutput#writeVLong(long) variable-length long} whose value * should be added to every int from the block to restore the original * values - *
      • Ints: If the number of bits per value is 0, then there is + *
      • Ints: If the number of bits per value is 0, then there is * nothing to decode and all ints are equal to MinValue. Otherwise: BlockSize - * {@link PackedInts packed ints} encoded on exactly bitsPerValue + * {@link PackedInts packed ints} encoded on exactly bitsPerValue * bits per value. They are the subtraction of the original values and * MinValue * diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java b/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java index da76db9a880..f35e9c0df3c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java @@ -45,7 +45,7 @@ import org.apache.lucene.util.BitUtil; * {@link Float#floatToIntBits(float)} on * {@link DataOutput#writeInt(int) 4 bytes} *
      • BitsPerValue: a {@link DataOutput#writeVInt(int) variable-length int} - *
      • Ints: if BitsPerValue is 0, then there is nothing to read and + *
      • Ints: if BitsPerValue is 0, then there is nothing to read and * all values perfectly match the result of the function. Otherwise, these * are the {@link PackedInts packed} deltas from the expected value * (computed from the function) using exactly BitsPerValue bits per value. diff --git a/lucene/core/src/java/overview.html b/lucene/core/src/java/overview.html index da8f8ea4300..b8f21e97a24 100644 --- a/lucene/core/src/java/overview.html +++ b/lucene/core/src/java/overview.html @@ -147,22 +147,22 @@ index for all the files contained in a directory.
      • queries and searches an index. To demonstrate these, try something like: -
        > java -cp lucene-core.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.IndexFiles -index index -docs rec.food.recipes/soups -
        adding rec.food.recipes/soups/abalone-chowder -
          [ ... ] +
        > java -cp lucene-core.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.IndexFiles -index index -docs rec.food.recipes/soups +
        adding rec.food.recipes/soups/abalone-chowder +
          [ ... ] -

        > java -cp lucene-core.jar:lucene-demo.jar:lucene-queryparser.jar:lucene-analyzers-common.jar org.apache.lucene.demo.SearchFiles -
        Query: chowder -
        Searching for: chowder -
        34 total matching documents -
        1. rec.food.recipes/soups/spam-chowder -
          [ ... thirty-four documents contain the word "chowder" ... ] +

        > java -cp lucene-core.jar:lucene-demo.jar:lucene-queryparser.jar:lucene-analyzers-common.jar org.apache.lucene.demo.SearchFiles +
        Query: chowder +
        Searching for: chowder +
        34 total matching documents +
        1. rec.food.recipes/soups/spam-chowder +
          [ ... thirty-four documents contain the word "chowder" ... ] -

        Query: "clam chowder" AND Manhattan -
        Searching for: +"clam chowder" +manhattan -
        2 total matching documents -
        1. rec.food.recipes/soups/clam-chowder -
          [ ... two documents contain the phrase "clam chowder" +

        Query: "clam chowder" AND Manhattan +
        Searching for: +"clam chowder" +manhattan +
        2 total matching documents +
        1. rec.food.recipes/soups/clam-chowder +
          [ ... two documents contain the phrase "clam chowder" and the word "manhattan" ... ]
            [ Note: "+" and "-" are canonical, but "AND", "OR" and "NOT" may be used. ]

        diff --git a/lucene/demo/src/java/overview.html b/lucene/demo/src/java/overview.html index c20b55b481a..8f1a08a609f 100644 --- a/lucene/demo/src/java/overview.html +++ b/lucene/demo/src/java/overview.html @@ -34,21 +34,21 @@
      • Searching Files
      • - +

        About this Document

        This document is intended as a "getting started" guide to using and running the Lucene demos. It walks you through some basic installation and configuration.

        - +

        About the Demo

        The Lucene command-line demo code consists of an application that demonstrates various functionalities of Lucene and how you can add Lucene to your applications.

        - +

        Setting your CLASSPATH

        - +

        Indexing Files

        Once you've gotten this far you're probably itching to go. Let's build an @@ -85,7 +85,7 @@ You'll see that there are no maching results in the lucene source code. Now try entering the word "string". That should return a whole bunch of documents. The results will page at every tenth result and ask you whether you want more results.

        - +

        About the code

        In this section we walk through the sources behind the command-line Lucene @@ -93,7 +93,7 @@ demo: where to find them, their parts and their function. This section is intended for Java developers wishing to understand how to use Lucene in their applications.

        - +

        Location of the source

        The files discussed here are linked into this documentation directly: @@ -101,9 +101,8 @@ applications.

      • IndexFiles.java: code to create a Lucene index.
      • SearchFiles.java: code to search a Lucene index. -

      • - +

        IndexFiles

        As we discussed in the previous walk-through, the

        - +

        Searching Files

        description of regular expression grammar
        regexp::=
        unionexp::=interexp | unionexpinterexp | unionexp(union)
        interexp::=concatexp & interexpconcatexp & interexp(intersection)[OPTIONAL]
        repeatexp::=repeatexp ?repeatexp ?(zero or one occurrence)
        |repeatexp *repeatexp *(zero or more occurrences)
        |repeatexp +repeatexp +(one or more occurrences)
        |repeatexp {n}(n occurrences)repeatexp {n}(n occurrences)
        |repeatexp {n,}(n or more occurrences)repeatexp {n,}(n or more occurrences)
        |repeatexp {n,m}(n to m occurrences, including both)repeatexp {n,m}(n to m occurrences, including both)
        complexp::=~ complexp~ complexp(complement)[OPTIONAL]
        charclassexp::=[ charclasses ][ charclasses ](character class)
        |[^ charclasses ][^ charclasses ](negated character class)
        charclass::=charexp - charexpcharexp - charexp(character range, including end-points)
        |..(any single character)
        |##(the empty language)[OPTIONAL]
        |@@(any string)[OPTIONAL]
        |" <Unicode string without double-quotes>  "" <Unicode string without double-quotes>  "(a string)
        |( )( )(the empty string)
        |( unionexp )( unionexp )(precedence override)
        |< <identifier> >< <identifier> >(named automaton)[OPTIONAL]
        |<n-m><n-m>(numerical interval)[OPTIONAL]
        |\ <Unicode character> \ <Unicode character> (a single character)
        + *
        + * * * * @@ -164,7 +165,7 @@ *
      • WeightedFragListBuilder using WeightedFieldFragList: sum-of-distinct-weights-approach. The totalBoost is calculated by summarizing the IDF-weights of distinct terms.
      • * *

        Comparison of the two approaches:

        - *
        sample document and query
        Sample TextLucene is a search engine library.
        + *
        * diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index c555297fa95..0c3a0f65d95 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -336,7 +336,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte } /** - * This method intended for use with testHighlightingWithDefaultField() + * This method intended for use with testHighlightingWithDefaultField() */ private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java index 2553a808758..b8ce3dd4840 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java @@ -101,7 +101,7 @@ public class HighlightCustomQueryTest extends LuceneTestCase { /** * This method intended for use with - * testHighlightingWithDefaultField() + * testHighlightingWithDefaultField() */ private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { diff --git a/lucene/misc/src/java/overview.html b/lucene/misc/src/java/overview.html index 8962cfcb8e2..974ad88388c 100644 --- a/lucene/misc/src/java/overview.html +++ b/lucene/misc/src/java/overview.html @@ -27,7 +27,7 @@ The misc package has various tools for splitting/merging indices, changing norms, finding high freq terms, and others. - +

        NativeUnixDirectory

        @@ -47,15 +47,15 @@ for details. Steps to build:

          -
        • cd lucene/misc/ +
        • cd lucene/misc/ -
        • To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run ant build-native-unix. +
        • To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run ant build-native-unix. -
        • libNativePosixUtil.so will be located in the lucene/build/native/ folder +
        • libNativePosixUtil.so will be located in the lucene/build/native/ folder -
        • Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH, where /path/to/dir contains libNativePosixUtil.so) +
        • Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH, where /path/to/dir contains libNativePosixUtil.so) -
        • ant jar to compile the java source and put that JAR on your CLASSPATH +
        • ant jar to compile the java source and put that JAR on your CLASSPATH

        diff --git a/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java index 5c361cf7e9e..b69d52d57df 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java @@ -256,7 +256,7 @@ public class CommonTermsQuery extends Query { * satisfied in order to produce a match on the low frequency terms query * part. This method accepts a float value in the range [0..1) as a fraction * of the actual query terms in the low frequent clause or a number - * >=1 as an absolut number of clauses that need to match. + * >=1 as an absolut number of clauses that need to match. * *

        * By default no optional clauses are necessary for a match (unless there are @@ -284,7 +284,7 @@ public class CommonTermsQuery extends Query { * satisfied in order to produce a match on the low frequency terms query * part. This method accepts a float value in the range [0..1) as a fraction * of the actual query terms in the low frequent clause or a number - * >=1 as an absolut number of clauses that need to match. + * >=1 as an absolut number of clauses that need to match. * *

        * By default no optional clauses are necessary for a match (unless there are diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java index 877f1415865..dcdfa6e8de0 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java @@ -51,7 +51,7 @@ import org.apache.lucene.search.TermRangeQuery; * *

        * In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. - * date:[6/1/2005 TO 6/4/2005] produces a range query that searches + * date:[6/1/2005 TO 6/4/2005] produces a range query that searches * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format * of the accepted input depends on {@link #setLocale(Locale) the locale}. * A {@link org.apache.lucene.document.DateTools.Resolution} has to be set, diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj index 2d280d1ba8d..fdc0cd0ed5e 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj @@ -75,7 +75,7 @@ import org.apache.lucene.search.TermRangeQuery; * *

        * In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. - * date:[6/1/2005 TO 6/4/2005] produces a range query that searches + * date:[6/1/2005 TO 6/4/2005] produces a range query that searches * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format * of the accepted input depends on {@link #setLocale(Locale) the locale}. * A {@link org.apache.lucene.document.DateTools.Resolution} has to be set, diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package-info.java index ab3451f65b0..9f77eb9b7e1 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package-info.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package-info.java @@ -22,7 +22,7 @@ *

        Note that JavaCC defines lots of public classes, methods and fields * that do not need to be public.  These clutter the documentation.  * Sorry. - *

        Note that because JavaCC defines a class named Token, org.apache.lucene.analysis.Token + *

        Note that because JavaCC defines a class named Token, org.apache.lucene.analysis.Token * must always be fully qualified in source code in this package. * *

        NOTE: {@link org.apache.lucene.queryparser.flexible.standard} has an alternative queryparser that matches the syntax of this one, but is more modular, @@ -96,7 +96,7 @@ * * * - * + * *

        Overview

        *
        *

        Although Lucene provides the ability to create your own @@ -140,7 +140,7 @@ *

        * * - * + * *

        Terms

        *
        *

        A query is broken up into terms and operators. There are two types of terms: Single Terms and Phrases. @@ -152,7 +152,7 @@ *

        * * - * + * *

        Fields

        *
        *

        Lucene supports fielded data. When performing a search you can either specify a field, or use the default field. The field names and default field is implementation specific. @@ -169,11 +169,11 @@ *

        * * - * + * *

        Term Modifiers

        *
        *

        Lucene supports modifying query terms to provide a wide range of searching options. - * + * *

        Wildcard Searches

        *

        Lucene supports single and multiple character wildcard searches within single terms * (not within phrase queries). @@ -186,13 +186,13 @@ *

        You can also use the wildcard searches in the middle of a term. *

        te*t
        *

        Note: You cannot use a * or ? symbol as the first character of a search. - * + * *

        Regular Expression Searches

        *

        Lucene supports regular expression searches matching a pattern between forward slashes "/". The syntax may change across releases, but the current supported * syntax is documented in the {@link org.apache.lucene.util.automaton.RegExp RegExp} class. For example to find documents containing "moat" or "boat": * *

        /[mb]oat/
        - * + * *

        Fuzzy Searches

        *

        Lucene supports fuzzy searches based on Damerau-Levenshtein Distance. To do a fuzzy search use the tilde, "~", symbol at the end of a Single word Term. For example to search for a term similar in spelling to "roam" use the fuzzy search: *

        roam~
        @@ -201,11 +201,11 @@ *
        roam~1
        *

        The default that is used if the parameter is not given is 2 edit distances. *

        Previously, a floating point value was allowed here. This syntax is considered deprecated and will be removed in Lucene 5.0 - * + * *

        Proximity Searches

        *

        Lucene supports finding words are a within a specific distance away. To do a proximity search use the tilde, "~", symbol at the end of a Phrase. For example to search for a "apache" and "jakarta" within 10 words of each other in a document use the search: *

        "jakarta apache"~10
        - * + * *

        Range Searches

        *

        Range Queries allow one to match documents whose field(s) values * are between the lower and upper bound specified by the Range Query. @@ -218,7 +218,7 @@ *

        This will find all documents whose titles are between Aida and Carmen, but not including Aida and Carmen. *

        Inclusive range queries are denoted by square brackets. Exclusive range queries are denoted by * curly brackets. - * + * *

        Boosting a Term

        *

        Lucene provides the relevance level of matching documents based on the terms found. To boost a term use the caret, "^", symbol with a boost factor (a number) at the end of the term you are searching. The higher the boost factor, the more relevant the term will be. *

        Boosting allows you to control the relevance of a document by boosting its term. For example, if you are searching for @@ -233,12 +233,12 @@ * * * - * + * *

        Boolean Operators

        *
        *

        Boolean operators allow terms to be combined through logic operators. * Lucene supports AND, "+", OR, NOT and "-" as Boolean operators(Note: Boolean operators must be ALL CAPS). - * + * *

        OR

        *

        The OR operator is the default conjunction operator. This means that if there is no Boolean operator between two terms, the OR operator is used. * The OR operator links two terms and finds a matching document if either of the terms exist in a document. This is equivalent to a union using sets. @@ -247,18 +247,18 @@ *

        "jakarta apache" jakarta
        *

        or *

        "jakarta apache" OR jakarta
        - * + * *

        AND

        *

        The AND operator matches documents where both terms exist anywhere in the text of a single document. * This is equivalent to an intersection using sets. The symbol && can be used in place of the word AND. *

        To search for documents that contain "jakarta apache" and "Apache Lucene" use the query: *

        "jakarta apache" AND "Apache Lucene"
        - * + * *

        +

        *

        The "+" or required operator requires that the term after the "+" symbol exist somewhere in a the field of a single document. *

        To search for documents that must contain "jakarta" and may contain "lucene" use the query: *

        +jakarta lucene
        - * + * *

        NOT

        *

        The NOT operator excludes documents that contain the term after NOT. * This is equivalent to a difference using sets. The symbol ! can be used in place of the word NOT. @@ -266,7 +266,7 @@ *

        "jakarta apache" NOT "Apache Lucene"
        *

        Note: The NOT operator cannot be used with just one term. For example, the following search will return no results: *

        NOT "jakarta apache"
        - * + * *

        -

        *

        The "-" or prohibit operator excludes documents that contain the term after the "-" symbol. *

        To search for documents that contain "jakarta apache" but not "Apache Lucene" use the query: @@ -274,7 +274,7 @@ *

        * * - * + * *

        Grouping

        *
        *

        Lucene supports using parentheses to group clauses to form sub queries. This can be very useful if you want to control the boolean logic for a query. @@ -284,7 +284,7 @@ *

        * * - * + * *

        Field Grouping

        *
        *

        Lucene supports using parentheses to group multiple clauses to a single field. @@ -293,7 +293,7 @@ *

        * * - * + * *

        Escaping Special Characters

        *
        *

        Lucene supports escaping special characters that are part of the query syntax. The current list special characters are diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/builders/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/builders/package-info.java index d24c915ca9e..9649b007c66 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/builders/package-info.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/builders/package-info.java @@ -20,7 +20,7 @@ * *

        Query Parser Builders

        *

        - * The package org.apache.lucene.queryParser.builders contains the interface that + * The package org.apache.lucene.queryParser.builders contains the interface that * builders must implement, it also contain a utility {@link org.apache.lucene.queryparser.flexible.core.builders.QueryTreeBuilder}, which walks the tree * and call the Builder for each node in the tree. * Builder normally convert QueryNode Object into a Lucene Query Object, diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/config/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/config/package-info.java index f3719e984d7..a07c95af0fd 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/config/package-info.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/config/package-info.java @@ -20,7 +20,7 @@ * *

        Query Configuration Interfaces

        *

        - * The package org.apache.lucene.queryparser.flexible.config contains query configuration handler + * The package org.apache.lucene.queryparser.flexible.config contains query configuration handler * abstract class that all config handlers should extend. *

        * See {@link org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler} for a reference diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/messages/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/messages/package-info.java index 75fbb21997e..22b8bebf17f 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/messages/package-info.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/messages/package-info.java @@ -20,7 +20,7 @@ * *

        Query Parser Messages

        * - * Messages for the Flexible Query Parser, they use org.apache.lucene.messages.NLS API. + * Messages for the Flexible Query Parser, they use org.apache.lucene.messages.NLS API. */ package org.apache.lucene.queryparser.flexible.core.messages; diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/nodes/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/nodes/package-info.java index 23c72a1ada9..ee3d67f924a 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/nodes/package-info.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/nodes/package-info.java @@ -20,7 +20,7 @@ * *

        Query Nodes

        *

        - * The package org.apache.lucene.queryParser.nodes contains all the basic query nodes. The interface + * The package org.apache.lucene.queryParser.nodes contains all the basic query nodes. The interface * that represents a query node is {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}. *

        * {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}s are used by the text parser to create a syntax tree. diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/parser/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/parser/package-info.java index 273138d8762..1533cd4169b 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/parser/package-info.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/parser/package-info.java @@ -20,7 +20,7 @@ * *

        Parser

        *

        - * The package org.apache.lucene.queryparser.flexible.parser contains interfaces + * The package org.apache.lucene.queryparser.flexible.parser contains interfaces * that should be implemented by the parsers. * * Parsers produce QueryNode Trees from a string object. diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/processors/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/processors/package-info.java index 86ce57b5d99..8edfdb11639 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/processors/package-info.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/processors/package-info.java @@ -20,7 +20,7 @@ * *

        Query Node Processors

        *

        - * The package org.apache.lucene.queryParser.processors contains interfaces + * The package org.apache.lucene.queryParser.processors contains interfaces * that should be implemented by every query node processor. *

        * The interface that every query node processor should implement is {@link org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor}. diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/simple/SimpleQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/simple/SimpleQueryParser.java index 2a7f7eafbff..18fdcf7068b 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/simple/SimpleQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/simple/SimpleQueryParser.java @@ -48,19 +48,19 @@ import java.util.Map; *

        * Query Operators *

          - *
        • '{@code +}' specifies {@code AND} operation: token1+token2 - *
        • '{@code |}' specifies {@code OR} operation: token1|token2 - *
        • '{@code -}' negates a single token: -token0 - *
        • '{@code "}' creates phrases of terms: "term1 term2 ..." - *
        • '{@code *}' at the end of terms specifies prefix query: term* - *
        • '{@code ~}N' at the end of terms specifies fuzzy query: term~1 - *
        • '{@code ~}N' at the end of phrases specifies near query: "term1 term2"~5 - *
        • '{@code (}' and '{@code )}' specifies precedence: token1 + (token2 | token3) + *
        • '{@code +}' specifies {@code AND} operation: token1+token2 + *
        • '{@code |}' specifies {@code OR} operation: token1|token2 + *
        • '{@code -}' negates a single token: -token0 + *
        • '{@code "}' creates phrases of terms: "term1 term2 ..." + *
        • '{@code *}' at the end of terms specifies prefix query: term* + *
        • '{@code ~}N' at the end of terms specifies fuzzy query: term~1 + *
        • '{@code ~}N' at the end of phrases specifies near query: "term1 term2"~5 + *
        • '{@code (}' and '{@code )}' specifies precedence: token1 + (token2 | token3) *
        *

        * The {@link #setDefaultOperator default operator} is {@code OR} if no other operator is specified. * For example, the following will {@code OR} {@code token1} and {@code token2} together: - * token1 token2 + * token1 token2 *

        * Normal operator precedence will be simple order from right to left. * For example, the following will evaluate {@code token1 OR token2} first, diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/PointRangeQueryBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/PointRangeQueryBuilder.java index 1297ad48843..09cef86ee6f 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/PointRangeQueryBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/PointRangeQueryBuilder.java @@ -30,7 +30,8 @@ import org.w3c.dom.Element; /** * Creates a range query across 1D {@link PointValues}. The table below specifies the required * attributes and the defaults if optional attributes are omitted: - *

        * query = das alte testament (The Old Testament) *
        + *
        + * * * * @@ -45,13 +46,13 @@ import org.w3c.dom.Element; * * * - * + * * * * * * - * + * * * * @@ -64,8 +65,8 @@ import org.w3c.dom.Element; *
        supported attributes
        Attribute nameValues
        lowerTermSpecified by typeSpecified by typeNoInteger.MIN_VALUE Long.MIN_VALUE Float.NEGATIVE_INFINITY Double.NEGATIVE_INFINITY
        upperTermSpecified by typeSpecified by typeNoInteger.MAX_VALUE Long.MAX_VALUE Float.POSITIVE_INFINITY Double.POSITIVE_INFINITY
        *

        * A {@link ParserException} will be thrown if an error occurs parsing the - * supplied lowerTerm or upperTerm into the numeric type - * specified by type. + * supplied lowerTerm or upperTerm into the numeric type + * specified by type. */ public class PointRangeQueryBuilder implements QueryBuilder { diff --git a/lucene/queryparser/src/java/overview.html b/lucene/queryparser/src/java/overview.html index 6e02af90976..ff47848801c 100644 --- a/lucene/queryparser/src/java/overview.html +++ b/lucene/queryparser/src/java/overview.html @@ -33,16 +33,16 @@

      • Surround
      • XML
      -
      -

      Classic

      +
      +

      Classic

      A Simple Lucene QueryParser implemented with JavaCC. -

      Analyzing

      +

      Analyzing

      QueryParser that passes Fuzzy-, Prefix-, Range-, and WildcardQuerys through the given analyzer. -

      Complex Phrase

      +

      Complex Phrase

      QueryParser which permits complex phrase query syntax eg "(john jon jonathan~) peters*" -

      Extendable

      +

      Extendable

      Extendable QueryParser provides a simple and flexible extension mechanism by overloading query field names. -

      Flexible

      +

      Flexible

      This project contains the new Lucene query parser implementation, which matches the syntax of the core QueryParser but offers a more modular architecture to enable customization.

      @@ -53,7 +53,6 @@ It's currently divided in 2 main packages:
    7. {@link org.apache.lucene.queryparser.flexible.core}: it contains the query parser API classes, which should be extended by query parser implementations.
    8. {@link org.apache.lucene.queryparser.flexible.standard}: it contains the current Lucene query parser implementation using the new query parser API.
    9. -

      Features

      @@ -156,13 +155,11 @@ you don't need to worry about dealing with those. config.setAnalyzer(new WhitespaceAnalyzer()); Query query = qpHelper.parse("apache AND lucene", "defaultField"); -

      Surround

      +

      Surround

      A QueryParser that supports the Span family of queries as well as pre and infix notation.

      -

      XML

      +

      XML

      A QueryParser that produces Lucene Query objects from XML streams. -

      -

      diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java index 3fb147900d3..397735300c8 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java @@ -46,9 +46,9 @@ public final class CoveringQuery extends Query implements Accountable { * @param queries Sub queries to match. * @param minimumNumberMatch Per-document long value that records how many queries * should match. Values that are less than 1 are treated - * like 1: only documents that have at least one + * like 1: only documents that have at least one * matching clause will be considered matches. Documents - * that do not have a value for minimumNumberMatch + * that do not have a value for minimumNumberMatch * do not match. */ public CoveringQuery(Collection queries, LongValuesSource minimumNumberMatch) { diff --git a/lucene/spatial-extras/src/java/overview.html b/lucene/spatial-extras/src/java/overview.html index fcce7cb5232..685d23ef273 100644 --- a/lucene/spatial-extras/src/java/overview.html +++ b/lucene/spatial-extras/src/java/overview.html @@ -52,7 +52,6 @@ (via JTS).
    10. Distance and other spatial related math calculations.
    11. -

      Historical note: The new spatial-extras module was once known as Lucene Spatial Playground (LSP) as an external project. In ~March 2012, LSP diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionPostingsFormat.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionPostingsFormat.java index 486d00509c1..1016c1a31ad 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionPostingsFormat.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionPostingsFormat.java @@ -38,11 +38,11 @@ import org.apache.lucene.util.fst.FST; *

      * Files: *

      *

      - * + * *

      Completion Dictionary

      *

      The .lkp file contains an FST for each suggest field *

      @@ -59,7 +59,7 @@ import org.apache.lucene.util.fst.FST; * for the Completion implementation. *
    12. FST maps all analyzed forms to surface forms of a SuggestField
    13. * - * + * *

      Completion Index

      *

      The .cmp file contains an index into the completion dictionary, so that it can be * accessed randomly.

      diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/package.html b/lucene/test-framework/src/java/org/apache/lucene/analysis/package.html index be34d6c7557..0f787b28181 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/package.html +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/package.html @@ -38,6 +38,5 @@ The main classes of interest are: component such as a queryparser or analyzer-wrapper that consumes analysis streams, it's a great idea to test it with this analyzer instead. -

      diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java index 00350339cca..5c1a73fa2e1 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java @@ -536,7 +536,7 @@ public class RandomIndexWriter implements Closeable { } /** - * Simple interface that is executed for each TP {@link InfoStream} component + * Simple interface that is executed for each TP {@link InfoStream} component * message. See also {@link RandomIndexWriter#mockIndexWriter(Random, Directory, IndexWriterConfig, TestPoint)} */ public interface TestPoint { diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/package.html b/lucene/test-framework/src/java/org/apache/lucene/index/package.html index 78568b40b00..383c56c69c7 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/package.html +++ b/lucene/test-framework/src/java/org/apache/lucene/index/package.html @@ -28,6 +28,5 @@ The primary classes are:
    14. {@link org.apache.lucene.index.RandomIndexWriter}: Randomizes the indexing experience.
    15. {@link org.apache.lucene.index.MockRandomMergePolicy}: MergePolicy that makes random decisions. -

      diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/package.html b/lucene/test-framework/src/java/org/apache/lucene/search/package.html index 1fe0c990100..c0360fdd3c7 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/package.html +++ b/lucene/test-framework/src/java/org/apache/lucene/search/package.html @@ -28,6 +28,5 @@ The primary classes are:
    16. {@link org.apache.lucene.search.QueryUtils}: Useful methods for testing Query classes.
    17. {@link org.apache.lucene.search.ShardSearchingTestBase}: Base class for simulating distributed search. -

      diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/package.html b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/package.html index 910a7d9e06b..aaabf2c127b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/package.html +++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/package.html @@ -27,6 +27,5 @@ The primary classes are:
      • {@link org.apache.lucene.search.similarities.RandomSimilarity}: Randomizes similarity per-field in tests.
      -

      diff --git a/lucene/tools/javadoc/table_padding.css b/lucene/tools/javadoc/table_padding.css new file mode 100644 index 00000000000..000c3648502 --- /dev/null +++ b/lucene/tools/javadoc/table_padding.css @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * some css needed for dealing with legacy table attributes in docs + * let's remove this file and fix things to be a cleaner way! + */ + +table.padding1 td,th { + padding: 1px; +} + +table.padding2 td,th { + padding: 2px; +} + +table.padding3 td,th { + padding: 3px; +} + +table.padding4 td,th { + padding: 4px; +} diff --git a/lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java b/lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java index c835b666e8d..96ef01daa2e 100644 --- a/lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java +++ b/lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java @@ -46,8 +46,8 @@ import org.apache.tools.ant.types.resources.Resources; import org.apache.tools.ant.util.FileNameMapper; /** - * An ANT task that verifies if JAR file have associated LICENSE, - * NOTICE, and sha1 files. + * An ANT task that verifies if JAR file have associated LICENSE, + * NOTICE, and sha1 files. */ public class LicenseCheckTask extends Task { diff --git a/solr/build.xml b/solr/build.xml index b57fe998ef2..f20727e6562 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -628,12 +628,6 @@ - - - - diff --git a/solr/contrib/ltr/src/java/overview.html b/solr/contrib/ltr/src/java/overview.html index b0802d25a18..831436088a4 100644 --- a/solr/contrib/ltr/src/java/overview.html +++ b/solr/contrib/ltr/src/java/overview.html @@ -80,8 +80,8 @@ provided by {@link org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransf Models are registered into a unique {@link org.apache.solr.ltr.store.ModelStore ModelStore}, and each model specifies a particular {@link org.apache.solr.ltr.store.FeatureStore FeatureStore} that will contain a particular subset of features. -

      +

      Features and models can be managed through a REST API, provided by the {@link org.apache.solr.rest.ManagedResource Managed Resources} {@link org.apache.solr.ltr.store.rest.ManagedFeatureStore ManagedFeatureStore} diff --git a/solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java b/solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java index 4af3b50af74..201c6364c40 100644 --- a/solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java +++ b/solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java @@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory; * commit points for certain amounts of time to support features such as index replication * or snapshooting directly out of a live index directory. *

      - * NOTE: The {@link #clone()} method returns this in order to make + * NOTE: The {@link #clone()} method returns this in order to make * this {@link IndexDeletionPolicy} instance trackable across {@link IndexWriter} * instantiations. This is correct because each core has its own * {@link IndexDeletionPolicy} and never has more than one open {@link IndexWriter}. diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 2c6b1750fcd..dd1fd1b3980 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -2188,11 +2188,11 @@ public final class SolrCore implements SolrInfoBean, Closeable { * see newSearcher(String name, boolean readOnly). * *

      - * If forceNew==true then + * If forceNew==true then * A new searcher will be opened and registered regardless of whether there is already * a registered searcher or other searchers in the process of being created. *

      - * If forceNew==false then:

        + * If forceNew==false then:
          *
        • If a searcher is already registered, that searcher will be returned
        • *
        • If no searcher is currently registered, but at least one is in the process of being created, then * this call will block until the first searcher is registered
        • @@ -2200,12 +2200,12 @@ public final class SolrCore implements SolrInfoBean, Closeable { * searcher will be created. *
        *

        - * If returnSearcher==true then a {@link RefCounted}<{@link SolrIndexSearcher}> will be returned with + * If returnSearcher==true then a {@link RefCounted}<{@link SolrIndexSearcher}> will be returned with * the reference count incremented. It must be decremented when no longer needed. *

        - * If waitSearcher!=null and a new {@link SolrIndexSearcher} was created, + * If waitSearcher!=null and a new {@link SolrIndexSearcher} was created, * then it is filled in with a Future that will return after the searcher is registered. The Future may be set to - * null in which case the SolrIndexSearcher created has already been registered at the time + * null in which case the SolrIndexSearcher created has already been registered at the time * this method returned. *

        * diff --git a/solr/core/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java index a7e1ab95d48..5038b6a779b 100644 --- a/solr/core/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java @@ -37,13 +37,14 @@ import java.util.Set; /** * Provides the ability to specify multiple field types and field names in the same request. Expected parameters: - * + *
        + * * - * - * - * - * - * + * + * + * + * + * * * * diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java index e514c5f41c9..01ea3005a04 100644 --- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java @@ -90,7 +90,8 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo /** * Initializes the {@link org.apache.solr.request.SolrRequestHandler} by creating three {@link org.apache.solr.common.params.SolrParams} named. - *
        table of parameters
        NameTyperequiredDescriptionMulti-valuedNameTyperequiredDescriptionMulti-valued
        analysis.fieldname
        + *
        + * * * * diff --git a/solr/core/src/java/org/apache/solr/handler/export/PriorityQueue.java b/solr/core/src/java/org/apache/solr/handler/export/PriorityQueue.java index 1552060eeda..00f9986fe37 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/PriorityQueue.java +++ b/solr/core/src/java/org/apache/solr/handler/export/PriorityQueue.java @@ -71,7 +71,7 @@ public abstract class PriorityQueue { /** Determines the ordering of objects in this priority queue. Subclasses * must define this one method. - * @return true iff parameter a is less than parameter b. + * @return true iff parameter a is less than parameter b. */ protected abstract boolean lessThan(T a, T b); diff --git a/solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java index e39cfb7d219..c7f13ab043e 100644 --- a/solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java @@ -111,12 +111,12 @@ import org.apache.lucene.util.NumericUtils; * In practice, we have seen up to 300 terms in most cases (index with 500,000 metadata records * and a uniform value distribution).

        * - *

        Precision Step

        + *

        Precision Step

        *

        You can choose any precisionStep when encoding values. * Lower step values mean more precisions and so more terms in index (and index gets larger). The number * of indexed terms per value is (those are generated by {@link org.apache.solr.legacy.LegacyNumericTokenStream}): *

        - *   indexedTermsPerValue = ceil(bitsPerValue / precisionStep) + *   indexedTermsPerValue = ceil(bitsPerValue / precisionStep) *

        * As the lower precision terms are shared by many values, the additional terms only * slightly grow the term dictionary (approx. 7% for precisionStep=4), but have a larger diff --git a/solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java index 52fae9c8171..eb20ce24d5a 100644 --- a/solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java @@ -379,12 +379,12 @@ public final class LegacyNumericUtils { /** * Filters the given {@link TermsEnum} by accepting only prefix coded 64 bit - * terms with a shift value of 0. + * terms with a shift value of 0. * * @param termsEnum * the terms enum to filter * @return a filtered {@link TermsEnum} that only returns prefix coded 64 bit - * terms with a shift value of 0. + * terms with a shift value of 0. */ public static TermsEnum filterPrefixCodedLongs(TermsEnum termsEnum) { return new SeekingNumericFilteredTermsEnum(termsEnum) { @@ -398,12 +398,12 @@ public final class LegacyNumericUtils { /** * Filters the given {@link TermsEnum} by accepting only prefix coded 32 bit - * terms with a shift value of 0. + * terms with a shift value of 0. * * @param termsEnum * the terms enum to filter * @return a filtered {@link TermsEnum} that only returns prefix coded 32 bit - * terms with a shift value of 0. + * terms with a shift value of 0. */ public static TermsEnum filterPrefixCodedInts(TermsEnum termsEnum) { return new SeekingNumericFilteredTermsEnum(termsEnum) { diff --git a/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java b/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java index 378dee82e62..3a4a94d1b07 100644 --- a/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java +++ b/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java @@ -34,7 +34,7 @@ import org.apache.solr.search.SolrReturnFields; * the response to a query request. * *

        - * Note On Returnable Data...
        + * Note On Returnable Data...
        * A SolrQueryResponse may contain the following types of * Objects generated by the SolrRequestHandler that processed * the request. @@ -305,7 +305,7 @@ public class SolrQueryResponse { * * @param name the name of the response header whose value to return * @return the value of the response header with the given name, - * or null if no header with the given name has been set + * or null if no header with the given name has been set * on this response */ public String getHttpHeader(String name) { @@ -334,7 +334,7 @@ public class SolrQueryResponse { * the specified name).

        * * @param name the name of the response header to remove - * @return the value of the removed entry or null if no + * @return the value of the removed entry or null if no * value is found for the given header name */ public String removeHttpHeader(String name) { diff --git a/solr/core/src/java/org/apache/solr/search/LegacyNumericRangeQueryBuilder.java b/solr/core/src/java/org/apache/solr/search/LegacyNumericRangeQueryBuilder.java index 931634fcc87..cb234d28993 100644 --- a/solr/core/src/java/org/apache/solr/search/LegacyNumericRangeQueryBuilder.java +++ b/solr/core/src/java/org/apache/solr/search/LegacyNumericRangeQueryBuilder.java @@ -30,7 +30,8 @@ import org.w3c.dom.Element; * attributes and the defaults if optional attributes are omitted. For more * detail on what each of the attributes actually do, consult the documentation * for {@link org.apache.solr.legacy.LegacyNumericRangeQuery}: - *
        table of parameters
        NameDescription
        defaultsContains all of the named arguments contained within the list element named "defaults".
        appendsContains all of the named arguments contained within the list element named "appends".
        + *
        + * * * * @@ -45,13 +46,13 @@ import org.w3c.dom.Element; * * * - * + * * * * * * - * + * * * * @@ -82,8 +83,8 @@ import org.w3c.dom.Element; *
        supported attributes
        Attribute nameValues
        lowerTermSpecified by typeSpecified by typeNoNull
        upperTermSpecified by typeSpecified by typeNoNull
        *

        * A {@link ParserException} will be thrown if an error occurs parsing the - * supplied lowerTerm or upperTerm into the numeric type - * specified by type. + * supplied lowerTerm or upperTerm into the numeric type + * specified by type. * @deprecated Index with points and use {@link PointRangeQueryBuilder} instead */ @Deprecated diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/JDBCStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/JDBCStream.java index 35d23ebb98b..ea4071501f4 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/JDBCStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/JDBCStream.java @@ -62,12 +62,16 @@ import static org.apache.solr.common.params.CommonParams.SORT; * The exception are {@link Types#DATE}, {@link Types#TIME} or {@link Types#TIMESTAMP} * which are determined by the JDBC type. * - * + *
        + * + * * * * * * + * + * * * * @@ -113,6 +117,7 @@ import static org.apache.solr.common.params.CommonParams.SORT; * * * + * *
        Supported Java Types
        Java or JDBC TypeTuple TypeNotes
        BooleanBooleanStringSee {@link DateTimeFormatter#ISO_INSTANT}
        * * @since 6.0.0 diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java index 860f3d5fea2..79305a59e16 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java @@ -159,8 +159,8 @@ public class SolrInputDocument extends SolrDocumentBasename, or - * null if there was no field for key. + * @return the previous field with name, or + * null if there was no field for key. */ public SolrInputField removeField(String name) { return _fields.remove( name );