LUCENE-8981: update Kuromoji javadocs, adding experimental tags to DictionaryBuilder and JapaneseTokenizer ctor

This commit is contained in:
Michael Sokolov 2019-09-16 13:27:37 -04:00
parent fce0a5d45b
commit 48307b5e82
2 changed files with 15 additions and 4 deletions

View File

@ -219,9 +219,9 @@ public final class JapaneseTokenizer extends Tokenizer {
}
/**
* Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary.
* <p>
* Uses the default AttributeFactory.
* <p>Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary.
* This constructor provides an entry point for users that want to construct custom language models
* that can be used as input to {@link org.apache.lucene.analysis.ja.util.DictionaryBuilder}.</p>
*
* @param factory the AttributeFactory to use
* @param systemDictionary a custom known token dictionary
@ -230,6 +230,7 @@ public final class JapaneseTokenizer extends Tokenizer {
* @param userDictionary Optional: if non-null, user dictionary.
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
* @param mode tokenization mode.
* @lucene.experimental
*/
public JapaneseTokenizer(AttributeFactory factory,
TokenInfoDictionary systemDictionary,

View File

@ -23,7 +23,17 @@ import java.nio.file.Paths;
import java.util.Locale;
/**
* Tool to build dictionaries.
* Tool to build dictionaries. Usage:
* <pre>
* java -cp [lucene classpath] org.apache.lucene.analysis.ja.util.DictionaryBuilder \
* ${inputDir} ${outputDir} ${encoding}
* </pre>
*
* <p> The input directory is expected to include unk.def, matrix.def, plus any number of .csv
* files, roughly following the conventions of IPADIC. JapaneseTokenizer uses dictionaries built
* with this tool. Note that the input files required by this build generally must be generated from
* a corpus of real text using tools that are not part of Lucene. </p>
* @lucene.experimenal
*/
public class DictionaryBuilder {