diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index de773746b37..64fc3e103b2 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -90,6 +90,10 @@ public class DocMaker implements Closeable { private Random r; private int updateDocIDLimit; + /** + * Document state, supports reuse of field instances + * across documents (see reuseFields parameter). + */ protected static class DocState { private final Map fields; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Entities.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Entities.java index a1e99a315b9..a2c8c329664 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Entities.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Entities.java @@ -20,6 +20,9 @@ package org.apache.lucene.benchmark.byTask.feeds.demohtml; import java.util.HashMap; import java.util.Map; +/** + * Utility class for encoding and decoding HTML entities. + */ public class Entities { static final Map decoder = new HashMap(300); static final String[] encoder = new String[0x100]; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java index 092a85eeff1..4659f96b87f 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java @@ -5,6 +5,9 @@ import java.io.*; import java.util.Locale; import java.util.Properties; +/** + * Basic html parser (for demo/testing purposes only!) + */ public class HTMLParser implements HTMLParserConstants { public static int SUMMARY_LENGTH = 200; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj index 7aff98e90de..43b4eda54ad 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj @@ -32,6 +32,9 @@ import java.io.*; import java.util.Locale; import java.util.Properties; +/** + * Basic html parser (for demo/testing purposes only!) + */ public class HTMLParser { public static int SUMMARY_LENGTH = 200; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Tags.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Tags.java index 276d2c52b68..b8091280f9f 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Tags.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Tags.java @@ -17,19 +17,22 @@ package org.apache.lucene.benchmark.byTask.feeds.demohtml; * limitations under the License. */ -import java.util.Collections; import java.util.HashSet; import java.util.Set; +/** + * Utility class storing set of commonly-used html tags. + */ public final class Tags { /** * contains all tags for which whitespaces have to be inserted for proper tokenization */ - public static final Set WS_ELEMS = Collections.synchronizedSet(new HashSet()); + public static final Set WS_ELEMS; static{ + WS_ELEMS = new HashSet(); WS_ELEMS.add("" does not need to be listed explicitly WS_ELEMS.add(" */ public class NewCollationAnalyzerTask extends PerfTask { + /** + * Different Collation implementations: currently + * limited to what is provided in the JDK and ICU. + * + * @see + * Comparison of implementations + */ public enum Implementation { JDK("org.apache.lucene.collation.CollationKeyAnalyzer", "java.text.Collator"), diff --git a/lucene/build.xml b/lucene/build.xml index ba3e349e01a..f37f4288855 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -201,16 +201,15 @@ - - + - + @@ -218,7 +217,7 @@ - + diff --git a/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java b/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java index 4c3991baa0d..219160f2185 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java @@ -9,6 +9,7 @@ import java.util.Map; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.PayloadProcessorProvider; +import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor; // javadocs import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -121,6 +122,10 @@ public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider { return null; } + /** + * {@link ReaderPayloadProcessor} that processes + * facet ordinals according to the passed in {@link FacetIndexingParams}. + */ public static class FacetsDirPayloadProcessor extends ReaderPayloadProcessor { private final Map termMap = new HashMap(1); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java index ecd05555432..af34d42dbc3 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java @@ -27,6 +27,11 @@ import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache; */ public class LruTaxonomyWriterCache implements TaxonomyWriterCache { + /** + * Determines cache type. + * For guaranteed correctness - not relying on no-collisions in the hash + * function, LRU_STRING should be used. + */ public enum LRUType { LRU_HASHED, LRU_STRING } private NameIntCacheLRU cache; diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java index a9326166cdb..81384d66138 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java @@ -26,6 +26,15 @@ import org.apache.lucene.util.FixedBitSet; import java.io.IOException; +/** + * Filter to remove duplicate values from search results. + *

+ * WARNING: for this to work correctly, you may have to wrap + * your reader as it cannot current deduplicate across different + * index segments. + * + * @see SlowCompositeReaderWrapper + */ public class DuplicateFilter extends Filter { // TODO: make duplicate filter aware of ReaderContext such that we can // filter duplicates across segments @@ -45,7 +54,7 @@ public class DuplicateFilter extends Filter { * for documents that contain the given field and are identified as none-duplicates. *

* "Fast" processing sets all bits to true then unsets all duplicate docs found for the - * given field. This approach avoids the need to read TermDocs for terms that are seen + * given field. This approach avoids the need to read DocsEnum for terms that are seen * to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially * faster approach , the downside is that bitsets produced will include bits set for * documents that do not actually contain the field given.