LUCENE-3902: add javadoc

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1334769 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-05-06 21:06:45 +00:00
parent 41796ae9e2
commit 4c151d54e0
11 changed files with 51 additions and 7 deletions

View File

@ -90,6 +90,10 @@ public class DocMaker implements Closeable {
private Random r;
private int updateDocIDLimit;
/**
* Document state, supports reuse of field instances
* across documents (see <code>reuseFields</code> parameter).
*/
protected static class DocState {
private final Map<String,Field> fields;

View File

@ -20,6 +20,9 @@ package org.apache.lucene.benchmark.byTask.feeds.demohtml;
import java.util.HashMap;
import java.util.Map;
/**
* Utility class for encoding and decoding HTML entities.
*/
public class Entities {
static final Map<String,String> decoder = new HashMap<String,String>(300);
static final String[] encoder = new String[0x100];

View File

@ -5,6 +5,9 @@ import java.io.*;
import java.util.Locale;
import java.util.Properties;
/**
* Basic html parser (for demo/testing purposes only!)
*/
public class HTMLParser implements HTMLParserConstants {
public static int SUMMARY_LENGTH = 200;

View File

@ -32,6 +32,9 @@ import java.io.*;
import java.util.Locale;
import java.util.Properties;
/**
* Basic html parser (for demo/testing purposes only!)
*/
public class HTMLParser {
public static int SUMMARY_LENGTH = 200;

View File

@ -17,19 +17,22 @@ package org.apache.lucene.benchmark.byTask.feeds.demohtml;
* limitations under the License.
*/
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
/**
* Utility class storing set of commonly-used html tags.
*/
public final class Tags {
/**
* contains all tags for which whitespaces have to be inserted for proper tokenization
*/
public static final Set<String> WS_ELEMS = Collections.synchronizedSet(new HashSet<String>());
public static final Set<String> WS_ELEMS;
static{
WS_ELEMS = new HashSet<String>();
WS_ELEMS.add("<hr");
WS_ELEMS.add("<hr/"); // note that "<hr />" does not need to be listed explicitly
WS_ELEMS.add("<br");

View File

@ -21,6 +21,9 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
/**
* Abstract class for benchmarking highlighting performance
*/
public abstract class BenchmarkHighlighter {
public abstract int doHighlight( IndexReader reader, int doc, String field,
Document document, Analyzer analyzer, String text ) throws Exception ;

View File

@ -36,6 +36,13 @@ import org.apache.lucene.util.Version;
* </p>
*/
public class NewCollationAnalyzerTask extends PerfTask {
/**
* Different Collation implementations: currently
* limited to what is provided in the JDK and ICU.
*
* @see <a href="http://site.icu-project.org/charts/collation-icu4j-sun">
* Comparison of implementations</a>
*/
public enum Implementation {
JDK("org.apache.lucene.collation.CollationKeyAnalyzer",
"java.text.Collator"),

View File

@ -201,16 +201,15 @@
<check-missing-javadocs dir="build/docs/analyzers-common" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-icu" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-morfologik" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-smartcn" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
<!-- benchmark: problems -->
<check-missing-javadocs dir="build/docs/benchmark" level="class"/>
<!-- core: problems -->
<check-missing-javadocs dir="build/docs/demo" level="class"/>
<!-- facet: problems -->
<check-missing-javadocs dir="build/docs/facet" level="class"/>
<!-- grouping: problems -->
<!-- highlighter: problems -->
<check-missing-javadocs dir="build/docs/join" level="class"/>
@ -218,7 +217,7 @@
<check-missing-javadocs dir="build/docs/misc" level="class"/>
<!-- queries: problems -->
<!-- queryparser: problems -->
<!-- sandbox: problems -->
<check-missing-javadocs dir="build/docs/sandbox" level="class"/>
<!-- spatial: problems -->
<check-missing-javadocs dir="build/docs/suggest" level="class"/>
<!-- test-framework: problems -->

View File

@ -9,6 +9,7 @@ import java.util.Map;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.PayloadProcessorProvider;
import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor; // javadocs
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
@ -121,6 +122,10 @@ public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider {
return null;
}
/**
* {@link ReaderPayloadProcessor} that processes
* facet ordinals according to the passed in {@link FacetIndexingParams}.
*/
public static class FacetsDirPayloadProcessor extends ReaderPayloadProcessor {
private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);

View File

@ -27,6 +27,11 @@ import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
*/
public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
/**
* Determines cache type.
* For guaranteed correctness - not relying on no-collisions in the hash
* function, LRU_STRING should be used.
*/
public enum LRUType { LRU_HASHED, LRU_STRING }
private NameIntCacheLRU cache;

View File

@ -26,6 +26,15 @@ import org.apache.lucene.util.FixedBitSet;
import java.io.IOException;
/**
* Filter to remove duplicate values from search results.
* <p>
* WARNING: for this to work correctly, you may have to wrap
* your reader as it cannot current deduplicate across different
* index segments.
*
* @see SlowCompositeReaderWrapper
*/
public class DuplicateFilter extends Filter {
// TODO: make duplicate filter aware of ReaderContext such that we can
// filter duplicates across segments
@ -45,7 +54,7 @@ public class DuplicateFilter extends Filter {
* for documents that contain the given field and are identified as none-duplicates.
* <p/>
* "Fast" processing sets all bits to true then unsets all duplicate docs found for the
* given field. This approach avoids the need to read TermDocs for terms that are seen
* given field. This approach avoids the need to read DocsEnum for terms that are seen
* to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially
* faster approach , the downside is that bitsets produced will include bits set for
* documents that do not actually contain the field given.