diff --git a/lucene/analysis/kuromoji/build.xml b/lucene/analysis/kuromoji/build.xml
index 094e2bd1bef..2d531f884dd 100644
--- a/lucene/analysis/kuromoji/build.xml
+++ b/lucene/analysis/kuromoji/build.xml
@@ -69,13 +69,8 @@
originalfile="${dict.src.dir}/Noun.proper.csv"/>
-
-
-
-
-
@@ -108,14 +103,7 @@
-
-
-
-
-
-
-
-
+
diff --git a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
index 465a4327a25..dc2eac3d353 100644
--- a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
@@ -26,6 +26,7 @@ import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
+import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -38,8 +39,6 @@ import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
-import com.ibm.icu.text.Normalizer2;
-
/**
*/
public class TokenInfoDictionaryBuilder {
@@ -49,16 +48,14 @@ public class TokenInfoDictionaryBuilder {
private String encoding = "euc-jp";
- private boolean normalizeEntries = false;
- private Normalizer2 normalizer;
+ private Normalizer.Form normalForm;
private DictionaryFormat format = DictionaryFormat.IPADIC;
public TokenInfoDictionaryBuilder(DictionaryFormat format, String encoding, boolean normalizeEntries) {
this.format = format;
this.encoding = encoding;
- this.normalizeEntries = normalizeEntries;
- this.normalizer = normalizeEntries ? Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE) : null;
+ this.normalForm = normalizeEntries ? Normalizer.Form.NFKC : null;
}
public TokenInfoDictionaryWriter build(String dirname) throws IOException {
@@ -103,13 +100,13 @@ public class TokenInfoDictionaryBuilder {
lines.add(formatted);
// NFKC normalize dictionary entry
- if (normalizeEntries) {
- if (normalizer.isNormalized(entry[0])){
+ if (normalForm != null) {
+ if (Normalizer.isNormalized(entry[0], normalForm)){
continue;
}
String[] normalizedEntry = new String[entry.length];
for (int i = 0; i < entry.length; i++) {
- normalizedEntry[i] = normalizer.normalize(entry[i]);
+ normalizedEntry[i] = Normalizer.normalize(entry[i], normalForm);
}
formatted = formatEntry(normalizedEntry);