mirror of https://github.com/apache/lucene.git
LUCENE-3902: add javadoc
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1334769 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
41796ae9e2
commit
4c151d54e0
|
@ -90,6 +90,10 @@ public class DocMaker implements Closeable {
|
|||
private Random r;
|
||||
private int updateDocIDLimit;
|
||||
|
||||
/**
|
||||
* Document state, supports reuse of field instances
|
||||
* across documents (see <code>reuseFields</code> parameter).
|
||||
*/
|
||||
protected static class DocState {
|
||||
|
||||
private final Map<String,Field> fields;
|
||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Utility class for encoding and decoding HTML entities.
|
||||
*/
|
||||
public class Entities {
|
||||
static final Map<String,String> decoder = new HashMap<String,String>(300);
|
||||
static final String[] encoder = new String[0x100];
|
||||
|
|
|
@ -5,6 +5,9 @@ import java.io.*;
|
|||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Basic html parser (for demo/testing purposes only!)
|
||||
*/
|
||||
public class HTMLParser implements HTMLParserConstants {
|
||||
public static int SUMMARY_LENGTH = 200;
|
||||
|
||||
|
|
|
@ -32,6 +32,9 @@ import java.io.*;
|
|||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Basic html parser (for demo/testing purposes only!)
|
||||
*/
|
||||
public class HTMLParser {
|
||||
public static int SUMMARY_LENGTH = 200;
|
||||
|
||||
|
|
|
@ -17,19 +17,22 @@ package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
* Utility class storing set of commonly-used html tags.
|
||||
*/
|
||||
public final class Tags {
|
||||
|
||||
/**
|
||||
* contains all tags for which whitespaces have to be inserted for proper tokenization
|
||||
*/
|
||||
public static final Set<String> WS_ELEMS = Collections.synchronizedSet(new HashSet<String>());
|
||||
public static final Set<String> WS_ELEMS;
|
||||
|
||||
static{
|
||||
WS_ELEMS = new HashSet<String>();
|
||||
WS_ELEMS.add("<hr");
|
||||
WS_ELEMS.add("<hr/"); // note that "<hr />" does not need to be listed explicitly
|
||||
WS_ELEMS.add("<br");
|
||||
|
|
|
@ -21,6 +21,9 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
/**
|
||||
* Abstract class for benchmarking highlighting performance
|
||||
*/
|
||||
public abstract class BenchmarkHighlighter {
|
||||
public abstract int doHighlight( IndexReader reader, int doc, String field,
|
||||
Document document, Analyzer analyzer, String text ) throws Exception ;
|
||||
|
|
|
@ -36,6 +36,13 @@ import org.apache.lucene.util.Version;
|
|||
* </p>
|
||||
*/
|
||||
public class NewCollationAnalyzerTask extends PerfTask {
|
||||
/**
|
||||
* Different Collation implementations: currently
|
||||
* limited to what is provided in the JDK and ICU.
|
||||
*
|
||||
* @see <a href="http://site.icu-project.org/charts/collation-icu4j-sun">
|
||||
* Comparison of implementations</a>
|
||||
*/
|
||||
public enum Implementation {
|
||||
JDK("org.apache.lucene.collation.CollationKeyAnalyzer",
|
||||
"java.text.Collator"),
|
||||
|
|
|
@ -201,16 +201,15 @@
|
|||
<check-missing-javadocs dir="build/docs/analyzers-common" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-icu" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-morfologik" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-smartcn" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
|
||||
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
|
||||
<!-- benchmark: problems -->
|
||||
<check-missing-javadocs dir="build/docs/benchmark" level="class"/>
|
||||
<!-- core: problems -->
|
||||
<check-missing-javadocs dir="build/docs/demo" level="class"/>
|
||||
<!-- facet: problems -->
|
||||
<check-missing-javadocs dir="build/docs/facet" level="class"/>
|
||||
<!-- grouping: problems -->
|
||||
<!-- highlighter: problems -->
|
||||
<check-missing-javadocs dir="build/docs/join" level="class"/>
|
||||
|
@ -218,7 +217,7 @@
|
|||
<check-missing-javadocs dir="build/docs/misc" level="class"/>
|
||||
<!-- queries: problems -->
|
||||
<!-- queryparser: problems -->
|
||||
<!-- sandbox: problems -->
|
||||
<check-missing-javadocs dir="build/docs/sandbox" level="class"/>
|
||||
<!-- spatial: problems -->
|
||||
<check-missing-javadocs dir="build/docs/suggest" level="class"/>
|
||||
<!-- test-framework: problems -->
|
||||
|
|
|
@ -9,6 +9,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.PayloadProcessorProvider;
|
||||
import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor; // javadocs
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -121,6 +122,10 @@ public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@link ReaderPayloadProcessor} that processes
|
||||
* facet ordinals according to the passed in {@link FacetIndexingParams}.
|
||||
*/
|
||||
public static class FacetsDirPayloadProcessor extends ReaderPayloadProcessor {
|
||||
|
||||
private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);
|
||||
|
|
|
@ -27,6 +27,11 @@ import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
|||
*/
|
||||
public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
||||
|
||||
/**
|
||||
* Determines cache type.
|
||||
* For guaranteed correctness - not relying on no-collisions in the hash
|
||||
* function, LRU_STRING should be used.
|
||||
*/
|
||||
public enum LRUType { LRU_HASHED, LRU_STRING }
|
||||
|
||||
private NameIntCacheLRU cache;
|
||||
|
|
|
@ -26,6 +26,15 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Filter to remove duplicate values from search results.
|
||||
* <p>
|
||||
* WARNING: for this to work correctly, you may have to wrap
|
||||
* your reader as it cannot current deduplicate across different
|
||||
* index segments.
|
||||
*
|
||||
* @see SlowCompositeReaderWrapper
|
||||
*/
|
||||
public class DuplicateFilter extends Filter {
|
||||
// TODO: make duplicate filter aware of ReaderContext such that we can
|
||||
// filter duplicates across segments
|
||||
|
@ -45,7 +54,7 @@ public class DuplicateFilter extends Filter {
|
|||
* for documents that contain the given field and are identified as none-duplicates.
|
||||
* <p/>
|
||||
* "Fast" processing sets all bits to true then unsets all duplicate docs found for the
|
||||
* given field. This approach avoids the need to read TermDocs for terms that are seen
|
||||
* given field. This approach avoids the need to read DocsEnum for terms that are seen
|
||||
* to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially
|
||||
* faster approach , the downside is that bitsets produced will include bits set for
|
||||
* documents that do not actually contain the field given.
|
||||
|
|
Loading…
Reference in New Issue