LUCENE-9155: add missing naist dictionary generation, clean up the code a bit.

This commit is contained in:
Dawid Weiss 2020-02-21 10:24:05 +01:00
parent 9f3f7244ac
commit f8a2c39906
1 changed files with 32 additions and 24 deletions

View File

@ -17,6 +17,18 @@
// This downloads and compiles Kuromoji dictionaries. // This downloads and compiles Kuromoji dictionaries.
def recompileDictionary(project, dictionaryName, Closure closure) {
project.javaexec {
main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
classpath = project.sourceSets.main.runtimeClasspath
jvmArgs '-Xmx1G'
with closure
}
project.logger.lifecycle("Automaton regenerated from dictionary: ${dictionaryName}")
}
configure(project(":lucene:analysis:kuromoji")) { configure(project(":lucene:analysis:kuromoji")) {
apply plugin: 'java-library' apply plugin: 'java-library'
apply plugin: "de.undercouch.download" apply plugin: "de.undercouch.download"
@ -25,10 +37,21 @@ configure(project(":lucene:analysis:kuromoji")) {
targetDir = file("src/resources") targetDir = file("src/resources")
} }
task compileMecabIpadic(type: Download) { task deleteDictionaryData() {
description "Recompile mecab dictionaries." // There should really be just one but since we don't know which
// one it'll be, let's process all of them.
doFirst {
sourceSets.main.resources.srcDirs.each { location ->
delete fileTree(dir: location, include: "org/apache/lucene/analysis/ja/dict/*.dat")
}
}
}
task compileMecab(type: Download) {
description "Recompile dictionaries from Mecab data."
group "generation" group "generation"
dependsOn deleteDictionaryData
dependsOn sourceSets.main.runtimeClasspath dependsOn sourceSets.main.runtimeClasspath
def dictionaryName = "mecab-ipadic-2.7.0-20070801" def dictionaryName = "mecab-ipadic-2.7.0-20070801"
@ -58,12 +81,7 @@ configure(project(":lucene:analysis:kuromoji")) {
} }
// Compile the dictionary // Compile the dictionary
project.javaexec { recompileDictionary(project, dictionaryName, {
main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
classpath = sourceSets.main.runtimeClasspath
jvmArgs '-Xmx1G'
args += [ args += [
"ipadic", "ipadic",
unpackedDir, unpackedDir,
@ -71,19 +89,15 @@ configure(project(":lucene:analysis:kuromoji")) {
"euc-jp", "euc-jp",
false false
] ]
})
logger.lifecycle("Automaton regenerated from dictionary: ${dictionaryName}")
} }
} }
}
/*
TODO: this currently doesn't work because DictionaryBuilder no longer supports this type?
task compileNaist(type: Download) { task compileNaist(type: Download) {
description "Recompile naist dictionaries." description "Recompile dictionaries from Naist data."
group "generation" group "generation"
dependsOn deleteDictionaryData
dependsOn sourceSets.main.runtimeClasspath dependsOn sourceSets.main.runtimeClasspath
def dictionaryName = "mecab-naist-jdic-0.6.3b-20111013" def dictionaryName = "mecab-naist-jdic-0.6.3b-20111013"
@ -103,21 +117,15 @@ configure(project(":lucene:analysis:kuromoji")) {
} }
// Compile the dictionary // Compile the dictionary
project.javaexec { recompileDictionary(project, dictionaryName, {
main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
classpath = sourceSets.main.runtimeClasspath
jvmArgs '-Xmx1G'
args += [ args += [
"naist", "ipadic",
unpackedDir, unpackedDir,
targetDir, targetDir,
"euc-jp", "euc-jp",
false false
] ]
})
} }
} }
}
*/
} }