git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1388035 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-20 14:07:10 +00:00
parent 3a0abcb07a
commit e776376727
21 changed files with 93 additions and 11 deletions

View File

@ -2,7 +2,7 @@ package org.apache.lucene.analysis.icu;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.icu.ICUFoldingFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -26,6 +26,9 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/** Factory for {@link ICUFoldingFilter} */
public class ICUFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUFoldingFilterFactory() {}
@Override
public TokenStream create(TokenStream input) {
return new ICUFoldingFilter(input);

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -48,6 +48,9 @@ import com.ibm.icu.text.UnicodeSet;
public class ICUNormalizer2FilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
private Normalizer2 normalizer;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUNormalizer2FilterFactory() {}
// TODO: support custom normalization
@Override
public void init(Map<String,String> args) {

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.icu.ICUTransformFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -40,6 +40,9 @@ import com.ibm.icu.text.Transliterator;
public class ICUTransformFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
private Transliterator transliterator;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUTransformFilterFactory() {}
// TODO: add support for custom rules
@Override
public void init(Map<String,String> args) {

View File

@ -75,6 +75,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
private static final BreakIterator myanmarBreakIterator =
readBreakIterator("Myanmar.brk");
/**
* Creates a new config. This object is lightweight, but the first
* time the class is referenced, breakiterators will be initialized.
*/
public DefaultICUTokenizerConfig() {}
@Override
public BreakIterator getBreakIterator(int script) {
switch(script) {

View File

@ -25,6 +25,12 @@ import com.ibm.icu.text.BreakIterator;
* @lucene.experimental
*/
public abstract class ICUTokenizerConfig {
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
*/
public ICUTokenizerConfig() {}
/** Return a breakiterator capable of processing a given script. */
public abstract BreakIterator getBreakIterator(int script);
/** Return a token type value for a given script and BreakIterator

View File

@ -21,10 +21,15 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenizerFactory;
/** Factory for {@link ICUTokenizer} */
public class ICUTokenizerFactory extends TokenizerFactory {
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUTokenizerFactory() {}
// TODO: add support for custom configs
@Override
public Tokenizer create(Reader input) {

View File

@ -73,6 +73,10 @@ public class LaoBreakIterator extends BreakIterator {
laoSet.freeze();
}
/**
* Creates a new iterator, performing the backtracking verification
* across the provided <code>rules</code>.
*/
public LaoBreakIterator(RuleBasedBreakIterator rules) {
this.rules = (RuleBasedBreakIterator) rules.clone();
this.verify = (RuleBasedBreakIterator) rules.clone();

View File

@ -30,6 +30,9 @@ import com.ibm.icu.lang.UScript;
public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute, Cloneable {
private int code = UScript.COMMON;
/** Initializes this attribute with <code>UScript.COMMON</code> */
public ScriptAttributeImpl() {}
public int getCode() {
return code;
}

View File

@ -25,6 +25,7 @@ import morfologik.stemming.PolishStemmer.DICTIONARY;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.morfologik.MorfologikFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
@ -51,6 +52,9 @@ public class MorfologikFilterFactory extends TokenFilterFactory {
/** Schema attribute. */
public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public MorfologikFilterFactory() {}
/**
* {@inheritDoc}
*/

View File

@ -29,6 +29,9 @@ import org.apache.lucene.util.AttributeImpl;
public class MorphosyntacticTagsAttributeImpl extends AttributeImpl
implements MorphosyntacticTagsAttribute, Cloneable {
/** Initializes this attribute with no tags */
public MorphosyntacticTagsAttributeImpl() {}
/**
* A list of potential tag variants for the current token.
*/

View File

@ -27,6 +27,7 @@ import org.apache.commons.codec.language.bm.PhoneticEngine;
import org.apache.commons.codec.language.bm.RuleType;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
@ -47,6 +48,9 @@ public class BeiderMorseFilterFactory extends TokenFilterFactory {
private PhoneticEngine engine;
private LanguageSet languageSet;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public BeiderMorseFilterFactory() {}
public void init(Map<String,String> args) {
super.init(args);

View File

@ -38,6 +38,10 @@ public final class DoubleMetaphoneFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
/** Creates a DoubleMetaphoneFilter with the specified maximum code length,
* and either adding encoded forms as synonyms (<code>inject=true</code>) or
* replacing them.
*/
public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
super(input);
this.encoder.setMaxCodeLen(maxCodeLength);

View File

@ -21,6 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
@ -36,14 +37,19 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
*/
public class DoubleMetaphoneFilterFactory extends TokenFilterFactory
{
/** parameter name: true if encoded tokens should be added as synonyms */
public static final String INJECT = "inject";
/** parameter name: restricts the length of the phonetic code */
public static final String MAX_CODE_LENGTH = "maxCodeLength";
/** default maxCodeLength if not specified */
public static final int DEFAULT_MAX_CODE_LENGTH = 4;
private boolean inject = true;
private int maxCodeLength = DEFAULT_MAX_CODE_LENGTH;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public DoubleMetaphoneFilterFactory() {}
@Override
public void init(Map<String, String> args) {
super.init(args);

View File

@ -32,13 +32,19 @@ import java.io.IOException;
*/
public final class PhoneticFilter extends TokenFilter
{
/** true if encoded tokens should be added as synonyms */
protected boolean inject = true;
/** phonetic encoder */
protected Encoder encoder = null;
/** captured state, non-null when <code>inject=true</code> and a token is buffered */
protected State save = null;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
/** Creates a PhoneticFilter with the specified encoder, and either
* adding encoded forms as synonyms (<code>inject=true</code>) or
* replacing them.
*/
public PhoneticFilter(TokenStream in, Encoder encoder, boolean inject) {
super(in);
this.encoder = encoder;

View File

@ -27,6 +27,7 @@ import java.util.Map;
import org.apache.commons.codec.Encoder;
import org.apache.commons.codec.language.*;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -61,8 +62,11 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
public class PhoneticFilterFactory extends TokenFilterFactory
implements ResourceLoaderAware
{
/** parameter name: either a short name or a full class name */
public static final String ENCODER = "encoder";
/** parameter name: true if encoded tokens should be added as synonyms */
public static final String INJECT = "inject"; // boolean
/** parameter name: restricts the length of the phonetic code */
public static final String MAX_CODE_LENGTH = "maxCodeLength";
private static final String PACKAGE_CONTAINING_ENCODERS = "org.apache.commons.codec.language.";
@ -83,6 +87,9 @@ public class PhoneticFilterFactory extends TokenFilterFactory
private Class<? extends Encoder> clazz = null;
private Method setMaxCodeLenMethod = null;
private Integer maxCodeLength = null;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public PhoneticFilterFactory() {}
@Override
public void inform(ResourceLoader loader) throws IOException {

View File

@ -21,12 +21,17 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link StempelFilter} using a Polish stemming table.
*/
public class StempelPolishStemFilterFactory extends TokenFilterFactory {
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public StempelPolishStemFilterFactory() {}
public TokenStream create(TokenStream input) {
return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
}

View File

@ -74,7 +74,10 @@ public class Compile {
static boolean backward;
static boolean multi;
static Trie trie;
/** no instantiation */
private Compile() {}
/**
* Entry point to the Compile application.
* <p>

View File

@ -67,6 +67,9 @@ import java.util.StringTokenizer;
*/
public class DiffIt {
/** no instantiation */
private DiffIt() {}
static int get(int i, String s) {
try {
return Integer.parseInt(s.substring(i, i + 1));

View File

@ -376,6 +376,7 @@ public class Trie {
return by.optimize(this);
}
/** writes debugging info to the printstream */
public void printInfo(PrintStream out, CharSequence prefix) {
out.println(prefix + "nds " + rows.size() + " cmds " + cmds.size()
+ " cells " + getCells() + " valcells " + getCellsVal() + " pntcells "

View File

@ -235,18 +235,18 @@
can prevent the modules that don't have problems
from getting any worse -->
<check-missing-javadocs dir="build/docs/analyzers-common" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-icu" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-icu" level="method"/>
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-morfologik" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-morfologik" level="method"/>
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="method"/>
<check-missing-javadocs dir="build/docs/analyzers-smartcn" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="method"/>
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
<check-missing-javadocs dir="build/docs/benchmark" level="class"/>
<check-missing-javadocs dir="build/docs/classification" level="method"/>
<check-missing-javadocs dir="build/docs/codecs" level="class"/>
<check-missing-javadocs dir="build/docs/core" level="class"/>
<check-missing-javadocs dir="build/docs/demo" level="class"/>
<check-missing-javadocs dir="build/docs/demo" level="method"/>
<check-missing-javadocs dir="build/docs/facet" level="class"/>
<check-missing-javadocs dir="build/docs/grouping" level="class"/>
<check-missing-javadocs dir="build/docs/highlighter" level="class"/>

View File

@ -65,6 +65,9 @@ public class FormBasedXmlQueryDemo extends HttpServlet {
private IndexSearcher searcher;
private Analyzer analyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT);
/** for instantiation by the servlet container */
public FormBasedXmlQueryDemo() {}
@Override
public void init(ServletConfig config) throws ServletException {
super.init(config);