LUCENE-8981: update Kuromoji javadocs, adding experimental tags to DictionaryBuilder and JapaneseTokenizer ctor

2025-02-08 19:15:06 +00:00 · 2019-09-16 13:27:37 -04:00 · 2019-09-16 13:27:37 -04:00 · 93d3e5d666
commit 93d3e5d666
parent b617769614
2 changed files with 15 additions and 4 deletions
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
@ -219,9 +219,9 @@ public final class JapaneseTokenizer extends Tokenizer {
  }
  /**
-   * Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary.
+   * <p>Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary.
-   * <p>
+   * This constructor provides an entry point for users that want to construct custom language models
-   * Uses the default AttributeFactory.
+   * that can be used as input to {@link org.apache.lucene.analysis.ja.util.DictionaryBuilder}.</p>
   *
   * @param factory the AttributeFactory to use
   * @param systemDictionary a custom known token dictionary
@ -230,6 +230,7 @@ public final class JapaneseTokenizer extends Tokenizer {
   * @param userDictionary Optional: if non-null, user dictionary.
   * @param discardPunctuation true if punctuation tokens should be dropped from the output.
   * @param mode tokenization mode.
   * @lucene.experimental
   */
  public JapaneseTokenizer(AttributeFactory factory,
                           TokenInfoDictionary systemDictionary,
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java
@ -23,7 +23,17 @@ import java.nio.file.Paths;
 import java.util.Locale;
 /**
- * Tool to build dictionaries.
+ * Tool to build dictionaries. Usage:
 * <pre>
 *    java -cp [lucene classpath] org.apache.lucene.analysis.ja.util.DictionaryBuilder \
 *          ${inputDir} ${outputDir} ${encoding}
 * </pre>
 *
 * <p> The input directory is expected to include unk.def, matrix.def, plus any number of .csv
 * files, roughly following the conventions of IPADIC. JapaneseTokenizer uses dictionaries built
 * with this tool. Note that the input files required by this build generally must be generated from
 * a corpus of real text using tools that are not part of Lucene.  </p>
 * @lucene.experimenal
 */
 public class DictionaryBuilder {