mirror of https://github.com/apache/lucene.git
Revert "remove nori/tools dependency on ICU"
This reverts commit dd7b3d8d95
.
This commit is contained in:
parent
d92159b896
commit
c5741c201f
|
@ -57,8 +57,13 @@
|
|||
<untar src="${build.dir}/${dict.version}.tar" dest="${build.dir}"/>
|
||||
</target>
|
||||
|
||||
<path id="tools.dependencies">
|
||||
<fileset dir="../icu/lib"/>
|
||||
</path>
|
||||
|
||||
<path id="tools.classpath">
|
||||
<path refid="classpath"/>
|
||||
<path refid="tools.dependencies"/>
|
||||
<pathelement location="${build.dir}/classes/java"/>
|
||||
<pathelement location="${build.dir}/classes/tools"/>
|
||||
</path>
|
||||
|
@ -90,7 +95,14 @@
|
|||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="compile-tools" depends="compile-core, common.compile-tools">
|
||||
<!-- we don't actually need to compile this thing, we just want its lib -->
|
||||
<target name="resolve-icu">
|
||||
<ant dir="../icu/" target="resolve" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
|
||||
<target name="compile-tools" depends="resolve-icu, compile-core, common.compile-tools">
|
||||
<compile
|
||||
srcdir="src/tools/java"
|
||||
destdir="${build.dir}/classes/tools">
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.io.InputStreamReader;
|
|||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.text.Normalizer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
@ -36,6 +35,7 @@ import org.apache.lucene.util.IntsRefBuilder;
|
|||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
||||
public class TokenInfoDictionaryBuilder {
|
||||
|
@ -45,11 +45,13 @@ public class TokenInfoDictionaryBuilder {
|
|||
|
||||
private String encoding = "utf-8";
|
||||
|
||||
private Normalizer.Form normalForm;
|
||||
private boolean normalizeEntries = false;
|
||||
private Normalizer2 normalizer;
|
||||
|
||||
public TokenInfoDictionaryBuilder(String encoding, boolean normalizeEntries) {
|
||||
this.encoding = encoding;
|
||||
this.normalForm = normalizeEntries ? Normalizer.Form.NFKC : null;
|
||||
this.normalizeEntries = normalizeEntries;
|
||||
this.normalizer = normalizeEntries ? Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE) : null;
|
||||
}
|
||||
|
||||
public TokenInfoDictionaryWriter build(String dirname) throws IOException {
|
||||
|
@ -86,10 +88,10 @@ public class TokenInfoDictionaryBuilder {
|
|||
}
|
||||
|
||||
// NFKC normalize dictionary entry
|
||||
if (normalForm != null) {
|
||||
if (normalizeEntries) {
|
||||
String[] normalizedEntry = new String[entry.length];
|
||||
for (int i = 0; i < entry.length; i++) {
|
||||
normalizedEntry[i] = Normalizer.normalize(entry[i], normalForm);
|
||||
normalizedEntry[i] = normalizer.normalize(entry[i]);
|
||||
}
|
||||
lines.add(normalizedEntry);
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue