From dd7b3d8d953f4dc4a5b7cac2e0a90c69230a6f12 Mon Sep 17 00:00:00 2001 From: Namgyu Kim Date: Tue, 23 Jul 2019 00:08:01 +0900 Subject: [PATCH] remove nori/tools dependency on ICU --- lucene/analysis/nori/build.xml | 14 +------------- .../ko/util/TokenInfoDictionaryBuilder.java | 12 +++++------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/lucene/analysis/nori/build.xml b/lucene/analysis/nori/build.xml index 0938de2a5f3..6b82816b807 100644 --- a/lucene/analysis/nori/build.xml +++ b/lucene/analysis/nori/build.xml @@ -57,13 +57,8 @@ - - - - - @@ -95,14 +90,7 @@ - - - - - - - - + diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java index 6609f50ed54..27c72dadfe6 100644 --- a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java +++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java @@ -25,6 +25,7 @@ import java.io.InputStreamReader; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.text.Normalizer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -35,7 +36,6 @@ import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.FST; -import com.ibm.icu.text.Normalizer2; import org.apache.lucene.util.fst.PositiveIntOutputs; public class TokenInfoDictionaryBuilder { @@ -45,13 +45,11 @@ public class TokenInfoDictionaryBuilder { private String encoding = "utf-8"; - private boolean normalizeEntries = false; - private Normalizer2 normalizer; + private Normalizer.Form normalForm; public TokenInfoDictionaryBuilder(String encoding, boolean normalizeEntries) { this.encoding = encoding; - this.normalizeEntries = normalizeEntries; - this.normalizer = normalizeEntries ? Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE) : null; + this.normalForm = normalizeEntries ? Normalizer.Form.NFKC : null; } public TokenInfoDictionaryWriter build(String dirname) throws IOException { @@ -88,10 +86,10 @@ public class TokenInfoDictionaryBuilder { } // NFKC normalize dictionary entry - if (normalizeEntries) { + if (normalForm != null) { String[] normalizedEntry = new String[entry.length]; for (int i = 0; i < entry.length; i++) { - normalizedEntry[i] = normalizer.normalize(entry[i]); + normalizedEntry[i] = Normalizer.normalize(entry[i], normalForm); } lines.add(normalizedEntry); } else {