LUCENE-8981: update Kuromoji javadocs, adding experimental tags to DictionaryBuilder and JapaneseTokenizer ctor

This commit is contained in:
Michael Sokolov 2019-09-16 13:27:37 -04:00 committed by Michael Sokolov
parent b617769614
commit 93d3e5d666
2 changed files with 15 additions and 4 deletions

View File

@ -219,9 +219,9 @@ public final class JapaneseTokenizer extends Tokenizer {
} }
/** /**
* Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary. * <p>Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary.
* <p> * This constructor provides an entry point for users that want to construct custom language models
* Uses the default AttributeFactory. * that can be used as input to {@link org.apache.lucene.analysis.ja.util.DictionaryBuilder}.</p>
* *
* @param factory the AttributeFactory to use * @param factory the AttributeFactory to use
* @param systemDictionary a custom known token dictionary * @param systemDictionary a custom known token dictionary
@ -230,6 +230,7 @@ public final class JapaneseTokenizer extends Tokenizer {
* @param userDictionary Optional: if non-null, user dictionary. * @param userDictionary Optional: if non-null, user dictionary.
* @param discardPunctuation true if punctuation tokens should be dropped from the output. * @param discardPunctuation true if punctuation tokens should be dropped from the output.
* @param mode tokenization mode. * @param mode tokenization mode.
* @lucene.experimental
*/ */
public JapaneseTokenizer(AttributeFactory factory, public JapaneseTokenizer(AttributeFactory factory,
TokenInfoDictionary systemDictionary, TokenInfoDictionary systemDictionary,

View File

@ -23,7 +23,17 @@ import java.nio.file.Paths;
import java.util.Locale; import java.util.Locale;
/** /**
* Tool to build dictionaries. * Tool to build dictionaries. Usage:
* <pre>
* java -cp [lucene classpath] org.apache.lucene.analysis.ja.util.DictionaryBuilder \
* ${inputDir} ${outputDir} ${encoding}
* </pre>
*
* <p> The input directory is expected to include unk.def, matrix.def, plus any number of .csv
* files, roughly following the conventions of IPADIC. JapaneseTokenizer uses dictionaries built
* with this tool. Note that the input files required by this build generally must be generated from
* a corpus of real text using tools that are not part of Lucene. </p>
* @lucene.experimenal
*/ */
public class DictionaryBuilder { public class DictionaryBuilder {