mirror of https://github.com/apache/lucene.git
LUCENE-3902: add javadoc
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1334769 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
41796ae9e2
commit
4c151d54e0
|
@ -90,6 +90,10 @@ public class DocMaker implements Closeable {
|
||||||
private Random r;
|
private Random r;
|
||||||
private int updateDocIDLimit;
|
private int updateDocIDLimit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Document state, supports reuse of field instances
|
||||||
|
* across documents (see <code>reuseFields</code> parameter).
|
||||||
|
*/
|
||||||
protected static class DocState {
|
protected static class DocState {
|
||||||
|
|
||||||
private final Map<String,Field> fields;
|
private final Map<String,Field> fields;
|
||||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class for encoding and decoding HTML entities.
|
||||||
|
*/
|
||||||
public class Entities {
|
public class Entities {
|
||||||
static final Map<String,String> decoder = new HashMap<String,String>(300);
|
static final Map<String,String> decoder = new HashMap<String,String>(300);
|
||||||
static final String[] encoder = new String[0x100];
|
static final String[] encoder = new String[0x100];
|
||||||
|
|
|
@ -5,6 +5,9 @@ import java.io.*;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic html parser (for demo/testing purposes only!)
|
||||||
|
*/
|
||||||
public class HTMLParser implements HTMLParserConstants {
|
public class HTMLParser implements HTMLParserConstants {
|
||||||
public static int SUMMARY_LENGTH = 200;
|
public static int SUMMARY_LENGTH = 200;
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,9 @@ import java.io.*;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic html parser (for demo/testing purposes only!)
|
||||||
|
*/
|
||||||
public class HTMLParser {
|
public class HTMLParser {
|
||||||
public static int SUMMARY_LENGTH = 200;
|
public static int SUMMARY_LENGTH = 200;
|
||||||
|
|
||||||
|
|
|
@ -17,19 +17,22 @@ package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class storing set of commonly-used html tags.
|
||||||
|
*/
|
||||||
public final class Tags {
|
public final class Tags {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* contains all tags for which whitespaces have to be inserted for proper tokenization
|
* contains all tags for which whitespaces have to be inserted for proper tokenization
|
||||||
*/
|
*/
|
||||||
public static final Set<String> WS_ELEMS = Collections.synchronizedSet(new HashSet<String>());
|
public static final Set<String> WS_ELEMS;
|
||||||
|
|
||||||
static{
|
static{
|
||||||
|
WS_ELEMS = new HashSet<String>();
|
||||||
WS_ELEMS.add("<hr");
|
WS_ELEMS.add("<hr");
|
||||||
WS_ELEMS.add("<hr/"); // note that "<hr />" does not need to be listed explicitly
|
WS_ELEMS.add("<hr/"); // note that "<hr />" does not need to be listed explicitly
|
||||||
WS_ELEMS.add("<br");
|
WS_ELEMS.add("<br");
|
||||||
|
|
|
@ -21,6 +21,9 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract class for benchmarking highlighting performance
|
||||||
|
*/
|
||||||
public abstract class BenchmarkHighlighter {
|
public abstract class BenchmarkHighlighter {
|
||||||
public abstract int doHighlight( IndexReader reader, int doc, String field,
|
public abstract int doHighlight( IndexReader reader, int doc, String field,
|
||||||
Document document, Analyzer analyzer, String text ) throws Exception ;
|
Document document, Analyzer analyzer, String text ) throws Exception ;
|
||||||
|
|
|
@ -36,6 +36,13 @@ import org.apache.lucene.util.Version;
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class NewCollationAnalyzerTask extends PerfTask {
|
public class NewCollationAnalyzerTask extends PerfTask {
|
||||||
|
/**
|
||||||
|
* Different Collation implementations: currently
|
||||||
|
* limited to what is provided in the JDK and ICU.
|
||||||
|
*
|
||||||
|
* @see <a href="http://site.icu-project.org/charts/collation-icu4j-sun">
|
||||||
|
* Comparison of implementations</a>
|
||||||
|
*/
|
||||||
public enum Implementation {
|
public enum Implementation {
|
||||||
JDK("org.apache.lucene.collation.CollationKeyAnalyzer",
|
JDK("org.apache.lucene.collation.CollationKeyAnalyzer",
|
||||||
"java.text.Collator"),
|
"java.text.Collator"),
|
||||||
|
|
|
@ -201,16 +201,15 @@
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-common" level="class"/>
|
<check-missing-javadocs dir="build/docs/analyzers-common" level="class"/>
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-icu" level="class"/>
|
<check-missing-javadocs dir="build/docs/analyzers-icu" level="class"/>
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
|
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
|
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-morfologik" level="class"/>
|
<check-missing-javadocs dir="build/docs/analyzers-morfologik" level="class"/>
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="class"/>
|
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="class"/>
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-smartcn" level="class"/>
|
<check-missing-javadocs dir="build/docs/analyzers-smartcn" level="class"/>
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
|
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
|
||||||
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
|
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
|
||||||
<!-- benchmark: problems -->
|
<check-missing-javadocs dir="build/docs/benchmark" level="class"/>
|
||||||
<!-- core: problems -->
|
<!-- core: problems -->
|
||||||
<check-missing-javadocs dir="build/docs/demo" level="class"/>
|
<check-missing-javadocs dir="build/docs/demo" level="class"/>
|
||||||
<!-- facet: problems -->
|
<check-missing-javadocs dir="build/docs/facet" level="class"/>
|
||||||
<!-- grouping: problems -->
|
<!-- grouping: problems -->
|
||||||
<!-- highlighter: problems -->
|
<!-- highlighter: problems -->
|
||||||
<check-missing-javadocs dir="build/docs/join" level="class"/>
|
<check-missing-javadocs dir="build/docs/join" level="class"/>
|
||||||
|
@ -218,7 +217,7 @@
|
||||||
<check-missing-javadocs dir="build/docs/misc" level="class"/>
|
<check-missing-javadocs dir="build/docs/misc" level="class"/>
|
||||||
<!-- queries: problems -->
|
<!-- queries: problems -->
|
||||||
<!-- queryparser: problems -->
|
<!-- queryparser: problems -->
|
||||||
<!-- sandbox: problems -->
|
<check-missing-javadocs dir="build/docs/sandbox" level="class"/>
|
||||||
<!-- spatial: problems -->
|
<!-- spatial: problems -->
|
||||||
<check-missing-javadocs dir="build/docs/suggest" level="class"/>
|
<check-missing-javadocs dir="build/docs/suggest" level="class"/>
|
||||||
<!-- test-framework: problems -->
|
<!-- test-framework: problems -->
|
||||||
|
|
|
@ -9,6 +9,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.index.PayloadProcessorProvider;
|
import org.apache.lucene.index.PayloadProcessorProvider;
|
||||||
|
import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor; // javadocs
|
||||||
import org.apache.lucene.index.SegmentReader;
|
import org.apache.lucene.index.SegmentReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -121,6 +122,10 @@ public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link ReaderPayloadProcessor} that processes
|
||||||
|
* facet ordinals according to the passed in {@link FacetIndexingParams}.
|
||||||
|
*/
|
||||||
public static class FacetsDirPayloadProcessor extends ReaderPayloadProcessor {
|
public static class FacetsDirPayloadProcessor extends ReaderPayloadProcessor {
|
||||||
|
|
||||||
private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);
|
private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);
|
||||||
|
|
|
@ -27,6 +27,11 @@ import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
||||||
*/
|
*/
|
||||||
public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines cache type.
|
||||||
|
* For guaranteed correctness - not relying on no-collisions in the hash
|
||||||
|
* function, LRU_STRING should be used.
|
||||||
|
*/
|
||||||
public enum LRUType { LRU_HASHED, LRU_STRING }
|
public enum LRUType { LRU_HASHED, LRU_STRING }
|
||||||
|
|
||||||
private NameIntCacheLRU cache;
|
private NameIntCacheLRU cache;
|
||||||
|
|
|
@ -26,6 +26,15 @@ import org.apache.lucene.util.FixedBitSet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filter to remove duplicate values from search results.
|
||||||
|
* <p>
|
||||||
|
* WARNING: for this to work correctly, you may have to wrap
|
||||||
|
* your reader as it cannot current deduplicate across different
|
||||||
|
* index segments.
|
||||||
|
*
|
||||||
|
* @see SlowCompositeReaderWrapper
|
||||||
|
*/
|
||||||
public class DuplicateFilter extends Filter {
|
public class DuplicateFilter extends Filter {
|
||||||
// TODO: make duplicate filter aware of ReaderContext such that we can
|
// TODO: make duplicate filter aware of ReaderContext such that we can
|
||||||
// filter duplicates across segments
|
// filter duplicates across segments
|
||||||
|
@ -45,7 +54,7 @@ public class DuplicateFilter extends Filter {
|
||||||
* for documents that contain the given field and are identified as none-duplicates.
|
* for documents that contain the given field and are identified as none-duplicates.
|
||||||
* <p/>
|
* <p/>
|
||||||
* "Fast" processing sets all bits to true then unsets all duplicate docs found for the
|
* "Fast" processing sets all bits to true then unsets all duplicate docs found for the
|
||||||
* given field. This approach avoids the need to read TermDocs for terms that are seen
|
* given field. This approach avoids the need to read DocsEnum for terms that are seen
|
||||||
* to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially
|
* to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially
|
||||||
* faster approach , the downside is that bitsets produced will include bits set for
|
* faster approach , the downside is that bitsets produced will include bits set for
|
||||||
* documents that do not actually contain the field given.
|
* documents that do not actually contain the field given.
|
||||||
|
|
Loading…
Reference in New Issue