fix typo analysis-kuromoji (#12047)

2023-01-02 00:58:50 +09:00 · 2023-01-02 00:58:50 +09:00 · 4676a735c1
parent 4eab1d74e8
commit 4676a735c1
3 changed files with 9 additions and 9 deletions
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
@ -39,9 +39,9 @@ import org.apache.lucene.util.IgnoreRandomChains;
 * <p>Notice that this analyzer uses a token composition scheme and relies on punctuation tokens
 * being found in the token stream. Please make sure your {@link JapaneseTokenizer} has {@code
 * discardPunctuation} set to false. In case punctuation characters, such as ． (U+FF0E FULLWIDTH
- * FULL STOP), is removed from the token stream, this filter would find input tokens tokens ３ and ２千
- * and give outputs 3 and 2000 instead of 3200, which is likely not the intended result. If you want
- * to remove punctuation characters from your index that are not part of normalized numbers, add a
+ * FULL STOP), is removed from the token stream, this filter would find input tokens ３ and ２千 and
+ * give outputs 3 and 2000 instead of 3200, which is likely not the intended result. If you want to
+ * remove punctuation characters from your index that are not part of normalized numbers, add a
 * {@link org.apache.lucene.analysis.StopFilter} with the punctuation you wish to remove after
 * {@link JapaneseNumberFilter} in your analyzer chain.
 *
@ -59,8 +59,8 @@ import org.apache.lucene.util.IgnoreRandomChains;
 *   <li>15,7 becomes 157 (be aware of this weakness)
 * </ul>
 *
- * <p>Tokens preceded by a token with {@link PositionIncrementAttribute} of zero are left left
- * untouched and emitted as-is.
+ * <p>Tokens preceded by a token with {@link PositionIncrementAttribute} of zero are left untouched
+ * and emitted as-is.
 *
 * <p>This filter does not use any part-of-speech information for its normalization and the
 * motivation for this is to also support n-grammed token streams in the future.
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
@ -72,7 +72,7 @@ public class CharSequenceUtils {
    return ch >= 0xff41 && ch <= 0xff5a;
  }

-  /** Convert all hiragana in a string into kanataka */
+  /** Convert all hiragana in a string into Katakana */
  public static String toKatakana(CharSequence s) {
    char[] chars = new char[s.length()];
    for (int i = 0; i < s.length(); i++) {
--- a/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
+++ b/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
@ -1,8 +1,8 @@
-# mapping rules of katakana (an unit of keystroke) to list of acceptable romanizations.
+# mapping rules of katakana (a unit of keystroke) to list of acceptable romanizations.
 # longest-match is used to find entries in this list.
 # covers romanization systems: modified Hepburn-shiki, Kunrei-shiki (Nihon-shiki), and Wāpuro shiki.
 # note: this does not strictly comply with the romanization systems listed above,
-# but tries to cover possible keystoroke supported by various Input Methods.
+# but tries to cover possible keystroke supported by various Input Methods.

 ア,a
 イ,i
@ -341,4 +341,4 @@
 # Chōonpu (Katakana-Hiragana Prolonged Sound Mark)
 ー,ー
 # Interpunct (Middle Dot)
-・,・
+・,・