mirror of https://github.com/apache/lucene.git
LUCENE-8981: update Kuromoji javadocs, adding experimental tags to DictionaryBuilder and JapaneseTokenizer ctor
This commit is contained in:
parent
fce0a5d45b
commit
48307b5e82
|
@ -219,9 +219,9 @@ public final class JapaneseTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary.
|
||||
* <p>
|
||||
* Uses the default AttributeFactory.
|
||||
* <p>Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary.
|
||||
* This constructor provides an entry point for users that want to construct custom language models
|
||||
* that can be used as input to {@link org.apache.lucene.analysis.ja.util.DictionaryBuilder}.</p>
|
||||
*
|
||||
* @param factory the AttributeFactory to use
|
||||
* @param systemDictionary a custom known token dictionary
|
||||
|
@ -230,6 +230,7 @@ public final class JapaneseTokenizer extends Tokenizer {
|
|||
* @param userDictionary Optional: if non-null, user dictionary.
|
||||
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
|
||||
* @param mode tokenization mode.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public JapaneseTokenizer(AttributeFactory factory,
|
||||
TokenInfoDictionary systemDictionary,
|
||||
|
|
|
@ -23,7 +23,17 @@ import java.nio.file.Paths;
|
|||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Tool to build dictionaries.
|
||||
* Tool to build dictionaries. Usage:
|
||||
* <pre>
|
||||
* java -cp [lucene classpath] org.apache.lucene.analysis.ja.util.DictionaryBuilder \
|
||||
* ${inputDir} ${outputDir} ${encoding}
|
||||
* </pre>
|
||||
*
|
||||
* <p> The input directory is expected to include unk.def, matrix.def, plus any number of .csv
|
||||
* files, roughly following the conventions of IPADIC. JapaneseTokenizer uses dictionaries built
|
||||
* with this tool. Note that the input files required by this build generally must be generated from
|
||||
* a corpus of real text using tools that are not part of Lucene. </p>
|
||||
* @lucene.experimenal
|
||||
*/
|
||||
public class DictionaryBuilder {
|
||||
|
||||
|
|
Loading…
Reference in New Issue