remove nori/tools dependency on ICU

This commit is contained in:
Namgyu Kim 2019-07-23 00:08:01 +09:00 committed by GitHub
parent 369a5dc796
commit dd7b3d8d95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 20 deletions

View File

@ -57,13 +57,8 @@
<untar src="${build.dir}/${dict.version}.tar" dest="${build.dir}"/>
</target>
<path id="tools.dependencies">
<fileset dir="../icu/lib"/>
</path>
<path id="tools.classpath">
<path refid="classpath"/>
<path refid="tools.dependencies"/>
<pathelement location="${build.dir}/classes/java"/>
<pathelement location="${build.dir}/classes/tools"/>
</path>
@ -95,14 +90,7 @@
</sequential>
</target>
<!-- we don't actually need to compile this thing, we just want its lib -->
<target name="resolve-icu">
<ant dir="../icu/" target="resolve" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<target name="compile-tools" depends="resolve-icu, compile-core, common.compile-tools">
<target name="compile-tools" depends="compile-core, common.compile-tools">
<compile
srcdir="src/tools/java"
destdir="${build.dir}/classes/tools">

View File

@ -25,6 +25,7 @@ import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -35,7 +36,6 @@ import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import com.ibm.icu.text.Normalizer2;
import org.apache.lucene.util.fst.PositiveIntOutputs;
public class TokenInfoDictionaryBuilder {
@ -45,13 +45,11 @@ public class TokenInfoDictionaryBuilder {
private String encoding = "utf-8";
private boolean normalizeEntries = false;
private Normalizer2 normalizer;
private Normalizer.Form normalForm;
public TokenInfoDictionaryBuilder(String encoding, boolean normalizeEntries) {
this.encoding = encoding;
this.normalizeEntries = normalizeEntries;
this.normalizer = normalizeEntries ? Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE) : null;
this.normalForm = normalizeEntries ? Normalizer.Form.NFKC : null;
}
public TokenInfoDictionaryWriter build(String dirname) throws IOException {
@ -88,10 +86,10 @@ public class TokenInfoDictionaryBuilder {
}
// NFKC normalize dictionary entry
if (normalizeEntries) {
if (normalForm != null) {
String[] normalizedEntry = new String[entry.length];
for (int i = 0; i < entry.length; i++) {
normalizedEntry[i] = normalizer.normalize(entry[i]);
normalizedEntry[i] = Normalizer.normalize(entry[i], normalForm);
}
lines.add(normalizedEntry);
} else {