git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1388035 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-20 14:07:10 +00:00
parent 3a0abcb07a
commit e776376727
21 changed files with 93 additions and 11 deletions

View File

@ -2,7 +2,7 @@ package org.apache.lucene.analysis.icu;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.icu.ICUFoldingFilter; import org.apache.lucene.analysis.icu.ICUFoldingFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.MultiTermAwareComponent; import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -26,6 +26,9 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/** Factory for {@link ICUFoldingFilter} */ /** Factory for {@link ICUFoldingFilter} */
public class ICUFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent { public class ICUFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUFoldingFilterFactory() {}
@Override @Override
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new ICUFoldingFilter(input); return new ICUFoldingFilter(input);

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.icu.ICUNormalizer2Filter; import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.MultiTermAwareComponent; import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -48,6 +48,9 @@ import com.ibm.icu.text.UnicodeSet;
public class ICUNormalizer2FilterFactory extends TokenFilterFactory implements MultiTermAwareComponent { public class ICUNormalizer2FilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
private Normalizer2 normalizer; private Normalizer2 normalizer;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUNormalizer2FilterFactory() {}
// TODO: support custom normalization // TODO: support custom normalization
@Override @Override
public void init(Map<String,String> args) { public void init(Map<String,String> args) {

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.icu.ICUTransformFilter; import org.apache.lucene.analysis.icu.ICUTransformFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.MultiTermAwareComponent; import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -40,6 +40,9 @@ import com.ibm.icu.text.Transliterator;
public class ICUTransformFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent { public class ICUTransformFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
private Transliterator transliterator; private Transliterator transliterator;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUTransformFilterFactory() {}
// TODO: add support for custom rules // TODO: add support for custom rules
@Override @Override
public void init(Map<String,String> args) { public void init(Map<String,String> args) {

View File

@ -75,6 +75,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
private static final BreakIterator myanmarBreakIterator = private static final BreakIterator myanmarBreakIterator =
readBreakIterator("Myanmar.brk"); readBreakIterator("Myanmar.brk");
/**
* Creates a new config. This object is lightweight, but the first
* time the class is referenced, breakiterators will be initialized.
*/
public DefaultICUTokenizerConfig() {}
@Override @Override
public BreakIterator getBreakIterator(int script) { public BreakIterator getBreakIterator(int script) {
switch(script) { switch(script) {

View File

@ -25,6 +25,12 @@ import com.ibm.icu.text.BreakIterator;
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class ICUTokenizerConfig { public abstract class ICUTokenizerConfig {
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
*/
public ICUTokenizerConfig() {}
/** Return a breakiterator capable of processing a given script. */ /** Return a breakiterator capable of processing a given script. */
public abstract BreakIterator getBreakIterator(int script); public abstract BreakIterator getBreakIterator(int script);
/** Return a token type value for a given script and BreakIterator /** Return a token type value for a given script and BreakIterator

View File

@ -21,10 +21,15 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer; import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
/** Factory for {@link ICUTokenizer} */ /** Factory for {@link ICUTokenizer} */
public class ICUTokenizerFactory extends TokenizerFactory { public class ICUTokenizerFactory extends TokenizerFactory {
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUTokenizerFactory() {}
// TODO: add support for custom configs // TODO: add support for custom configs
@Override @Override
public Tokenizer create(Reader input) { public Tokenizer create(Reader input) {

View File

@ -73,6 +73,10 @@ public class LaoBreakIterator extends BreakIterator {
laoSet.freeze(); laoSet.freeze();
} }
/**
* Creates a new iterator, performing the backtracking verification
* across the provided <code>rules</code>.
*/
public LaoBreakIterator(RuleBasedBreakIterator rules) { public LaoBreakIterator(RuleBasedBreakIterator rules) {
this.rules = (RuleBasedBreakIterator) rules.clone(); this.rules = (RuleBasedBreakIterator) rules.clone();
this.verify = (RuleBasedBreakIterator) rules.clone(); this.verify = (RuleBasedBreakIterator) rules.clone();

View File

@ -30,6 +30,9 @@ import com.ibm.icu.lang.UScript;
public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute, Cloneable { public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute, Cloneable {
private int code = UScript.COMMON; private int code = UScript.COMMON;
/** Initializes this attribute with <code>UScript.COMMON</code> */
public ScriptAttributeImpl() {}
public int getCode() { public int getCode() {
return code; return code;
} }

View File

@ -25,6 +25,7 @@ import morfologik.stemming.PolishStemmer.DICTIONARY;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.morfologik.MorfologikFilter; import org.apache.lucene.analysis.morfologik.MorfologikFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
/** /**
@ -51,6 +52,9 @@ public class MorfologikFilterFactory extends TokenFilterFactory {
/** Schema attribute. */ /** Schema attribute. */
public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary"; public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public MorfologikFilterFactory() {}
/** /**
* {@inheritDoc} * {@inheritDoc}
*/ */

View File

@ -29,6 +29,9 @@ import org.apache.lucene.util.AttributeImpl;
public class MorphosyntacticTagsAttributeImpl extends AttributeImpl public class MorphosyntacticTagsAttributeImpl extends AttributeImpl
implements MorphosyntacticTagsAttribute, Cloneable { implements MorphosyntacticTagsAttribute, Cloneable {
/** Initializes this attribute with no tags */
public MorphosyntacticTagsAttributeImpl() {}
/** /**
* A list of potential tag variants for the current token. * A list of potential tag variants for the current token.
*/ */

View File

@ -27,6 +27,7 @@ import org.apache.commons.codec.language.bm.PhoneticEngine;
import org.apache.commons.codec.language.bm.RuleType; import org.apache.commons.codec.language.bm.RuleType;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.BeiderMorseFilter; import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
/** /**
@ -47,6 +48,9 @@ public class BeiderMorseFilterFactory extends TokenFilterFactory {
private PhoneticEngine engine; private PhoneticEngine engine;
private LanguageSet languageSet; private LanguageSet languageSet;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public BeiderMorseFilterFactory() {}
public void init(Map<String,String> args) { public void init(Map<String,String> args) {
super.init(args); super.init(args);

View File

@ -38,6 +38,10 @@ public final class DoubleMetaphoneFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
/** Creates a DoubleMetaphoneFilter with the specified maximum code length,
* and either adding encoded forms as synonyms (<code>inject=true</code>) or
* replacing them.
*/
public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) { public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
super(input); super(input);
this.encoder.setMaxCodeLen(maxCodeLength); this.encoder.setMaxCodeLen(maxCodeLength);

View File

@ -21,6 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
/** /**
@ -36,14 +37,19 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
*/ */
public class DoubleMetaphoneFilterFactory extends TokenFilterFactory public class DoubleMetaphoneFilterFactory extends TokenFilterFactory
{ {
/** parameter name: true if encoded tokens should be added as synonyms */
public static final String INJECT = "inject"; public static final String INJECT = "inject";
/** parameter name: restricts the length of the phonetic code */
public static final String MAX_CODE_LENGTH = "maxCodeLength"; public static final String MAX_CODE_LENGTH = "maxCodeLength";
/** default maxCodeLength if not specified */
public static final int DEFAULT_MAX_CODE_LENGTH = 4; public static final int DEFAULT_MAX_CODE_LENGTH = 4;
private boolean inject = true; private boolean inject = true;
private int maxCodeLength = DEFAULT_MAX_CODE_LENGTH; private int maxCodeLength = DEFAULT_MAX_CODE_LENGTH;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public DoubleMetaphoneFilterFactory() {}
@Override @Override
public void init(Map<String, String> args) { public void init(Map<String, String> args) {
super.init(args); super.init(args);

View File

@ -32,13 +32,19 @@ import java.io.IOException;
*/ */
public final class PhoneticFilter extends TokenFilter public final class PhoneticFilter extends TokenFilter
{ {
/** true if encoded tokens should be added as synonyms */
protected boolean inject = true; protected boolean inject = true;
/** phonetic encoder */
protected Encoder encoder = null; protected Encoder encoder = null;
/** captured state, non-null when <code>inject=true</code> and a token is buffered */
protected State save = null; protected State save = null;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
/** Creates a PhoneticFilter with the specified encoder, and either
* adding encoded forms as synonyms (<code>inject=true</code>) or
* replacing them.
*/
public PhoneticFilter(TokenStream in, Encoder encoder, boolean inject) { public PhoneticFilter(TokenStream in, Encoder encoder, boolean inject) {
super(in); super(in);
this.encoder = encoder; this.encoder = encoder;

View File

@ -27,6 +27,7 @@ import java.util.Map;
import org.apache.commons.codec.Encoder; import org.apache.commons.codec.Encoder;
import org.apache.commons.codec.language.*; import org.apache.commons.codec.language.*;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -61,8 +62,11 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
public class PhoneticFilterFactory extends TokenFilterFactory public class PhoneticFilterFactory extends TokenFilterFactory
implements ResourceLoaderAware implements ResourceLoaderAware
{ {
/** parameter name: either a short name or a full class name */
public static final String ENCODER = "encoder"; public static final String ENCODER = "encoder";
/** parameter name: true if encoded tokens should be added as synonyms */
public static final String INJECT = "inject"; // boolean public static final String INJECT = "inject"; // boolean
/** parameter name: restricts the length of the phonetic code */
public static final String MAX_CODE_LENGTH = "maxCodeLength"; public static final String MAX_CODE_LENGTH = "maxCodeLength";
private static final String PACKAGE_CONTAINING_ENCODERS = "org.apache.commons.codec.language."; private static final String PACKAGE_CONTAINING_ENCODERS = "org.apache.commons.codec.language.";
@ -83,6 +87,9 @@ public class PhoneticFilterFactory extends TokenFilterFactory
private Class<? extends Encoder> clazz = null; private Class<? extends Encoder> clazz = null;
private Method setMaxCodeLenMethod = null; private Method setMaxCodeLenMethod = null;
private Integer maxCodeLength = null; private Integer maxCodeLength = null;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public PhoneticFilterFactory() {}
@Override @Override
public void inform(ResourceLoader loader) throws IOException { public void inform(ResourceLoader loader) throws IOException {

View File

@ -21,12 +21,17 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pl.PolishAnalyzer; import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.stempel.StempelFilter; import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer; import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
/** /**
* Factory for {@link StempelFilter} using a Polish stemming table. * Factory for {@link StempelFilter} using a Polish stemming table.
*/ */
public class StempelPolishStemFilterFactory extends TokenFilterFactory { public class StempelPolishStemFilterFactory extends TokenFilterFactory {
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public StempelPolishStemFilterFactory() {}
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable())); return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
} }

View File

@ -74,7 +74,10 @@ public class Compile {
static boolean backward; static boolean backward;
static boolean multi; static boolean multi;
static Trie trie; static Trie trie;
/** no instantiation */
private Compile() {}
/** /**
* Entry point to the Compile application. * Entry point to the Compile application.
* <p> * <p>

View File

@ -67,6 +67,9 @@ import java.util.StringTokenizer;
*/ */
public class DiffIt { public class DiffIt {
/** no instantiation */
private DiffIt() {}
static int get(int i, String s) { static int get(int i, String s) {
try { try {
return Integer.parseInt(s.substring(i, i + 1)); return Integer.parseInt(s.substring(i, i + 1));

View File

@ -376,6 +376,7 @@ public class Trie {
return by.optimize(this); return by.optimize(this);
} }
/** writes debugging info to the printstream */
public void printInfo(PrintStream out, CharSequence prefix) { public void printInfo(PrintStream out, CharSequence prefix) {
out.println(prefix + "nds " + rows.size() + " cmds " + cmds.size() out.println(prefix + "nds " + rows.size() + " cmds " + cmds.size()
+ " cells " + getCells() + " valcells " + getCellsVal() + " pntcells " + " cells " + getCells() + " valcells " + getCellsVal() + " pntcells "

View File

@ -235,18 +235,18 @@
can prevent the modules that don't have problems can prevent the modules that don't have problems
from getting any worse --> from getting any worse -->
<check-missing-javadocs dir="build/docs/analyzers-common" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-common" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-icu" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-icu" level="method"/>
<check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-kuromoji" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-morfologik" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-morfologik" level="method"/>
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-phonetic" level="method"/>
<check-missing-javadocs dir="build/docs/analyzers-smartcn" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-smartcn" level="class"/>
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-stempel" level="method"/>
<check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/> <check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
<check-missing-javadocs dir="build/docs/benchmark" level="class"/> <check-missing-javadocs dir="build/docs/benchmark" level="class"/>
<check-missing-javadocs dir="build/docs/classification" level="method"/> <check-missing-javadocs dir="build/docs/classification" level="method"/>
<check-missing-javadocs dir="build/docs/codecs" level="class"/> <check-missing-javadocs dir="build/docs/codecs" level="class"/>
<check-missing-javadocs dir="build/docs/core" level="class"/> <check-missing-javadocs dir="build/docs/core" level="class"/>
<check-missing-javadocs dir="build/docs/demo" level="class"/> <check-missing-javadocs dir="build/docs/demo" level="method"/>
<check-missing-javadocs dir="build/docs/facet" level="class"/> <check-missing-javadocs dir="build/docs/facet" level="class"/>
<check-missing-javadocs dir="build/docs/grouping" level="class"/> <check-missing-javadocs dir="build/docs/grouping" level="class"/>
<check-missing-javadocs dir="build/docs/highlighter" level="class"/> <check-missing-javadocs dir="build/docs/highlighter" level="class"/>

View File

@ -65,6 +65,9 @@ public class FormBasedXmlQueryDemo extends HttpServlet {
private IndexSearcher searcher; private IndexSearcher searcher;
private Analyzer analyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT); private Analyzer analyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT);
/** for instantiation by the servlet container */
public FormBasedXmlQueryDemo() {}
@Override @Override
public void init(ServletConfig config) throws ServletException { public void init(ServletConfig config) throws ServletException {
super.init(config); super.init(config);