mirror of https://github.com/apache/lucene.git
LUCENE-9383: benchmark module: Gradle conversion (#1550)
This commit is contained in:
parent
ff8caeb7f4
commit
89784ad7be
|
@ -15,13 +15,13 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
apply plugin: 'java'
|
||||||
apply plugin: 'java-library'
|
// NOT a 'java-library'. Maybe 'application' but seems too limiting.
|
||||||
|
|
||||||
description = 'System for benchmarking Lucene'
|
description = 'System for benchmarking Lucene'
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
api project(':lucene:core')
|
implementation project(':lucene:core')
|
||||||
|
|
||||||
implementation project(':lucene:analysis:common')
|
implementation project(':lucene:analysis:common')
|
||||||
implementation project(':lucene:facet')
|
implementation project(':lucene:facet')
|
||||||
|
@ -37,5 +37,120 @@ dependencies {
|
||||||
exclude module: "xml-apis"
|
exclude module: "xml-apis"
|
||||||
})
|
})
|
||||||
|
|
||||||
|
runtimeOnly project(':lucene:analysis:icu')
|
||||||
|
|
||||||
testImplementation project(':lucene:test-framework')
|
testImplementation project(':lucene:test-framework')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def tempDir = file("temp")
|
||||||
|
def workDir = file("work")
|
||||||
|
|
||||||
|
task run(type: JavaExec) {
|
||||||
|
description "Run a perf test (optional: -PtaskAlg=conf/your-algorithm-file -PmaxHeapSize=1G)"
|
||||||
|
main 'org.apache.lucene.benchmark.byTask.Benchmark'
|
||||||
|
classpath sourceSets.main.runtimeClasspath
|
||||||
|
// allow these to be specified on the CLI via -PtaskAlg= for example
|
||||||
|
args = [propertyOrDefault('taskAlg', 'conf/micro-standard.alg')]
|
||||||
|
|
||||||
|
maxHeapSize = propertyOrDefault('maxHeapSize', '1G')
|
||||||
|
|
||||||
|
String stdOutStr = propertyOrDefault('standardOutput', null)
|
||||||
|
if (stdOutStr != null) {
|
||||||
|
standardOutput = new File(stdOutStr).newOutputStream()
|
||||||
|
}
|
||||||
|
|
||||||
|
debugOptions {
|
||||||
|
enabled = false
|
||||||
|
port = 5005
|
||||||
|
suspend = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Old "collation" Ant target:
|
||||||
|
gradle getTop100kWikiWordFiles run -PtaskAlg=conf/collation.alg -PstandardOutput=work/collation.benchmark.output.txt
|
||||||
|
perl -CSD scripts/collation.bm2jira.pl work/collation.benchmark.output.txt
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Old "shingle" Ant target:
|
||||||
|
gradle getReuters run -PtaskAlg=conf/shingle.alg -PstandardOutput=work/shingle.benchmark.output.txt
|
||||||
|
perl -CSD scripts/shingle.bm2jira.pl work/shingle.benchmark.output.txt
|
||||||
|
*/
|
||||||
|
|
||||||
|
// The remaining tasks just get / extract / prepare data
|
||||||
|
|
||||||
|
task getEnWiki(type: Download) {
|
||||||
|
def finalName = "enwiki-20070527-pages-articles.xml"
|
||||||
|
src "https://home.apache.org/~dsmiley/data/" + finalName + ".bz2"
|
||||||
|
dest file("$tempDir/" + finalName + ".bz2")
|
||||||
|
overwrite false
|
||||||
|
compress false
|
||||||
|
|
||||||
|
doLast {
|
||||||
|
ant.bunzip2(src: dest, dest: tempDir)
|
||||||
|
}
|
||||||
|
outputs.file file("$tempDir/$finalName")
|
||||||
|
}
|
||||||
|
|
||||||
|
task getGeoNames(type: Download) {
|
||||||
|
// note: latest data is at: https://download.geonames.org/export/dump/allCountries.zip
|
||||||
|
// and then randomize with: gsort -R -S 1500M file.txt > file_random.txt
|
||||||
|
// and then compress with: bzip2 -9 -k file_random.txt
|
||||||
|
def finalName = "geonames_20130921_randomOrder_allCountries.txt"
|
||||||
|
src "https://home.apache.org/~dsmiley/data/" + finalName + ".bz2"
|
||||||
|
dest file("$tempDir/" + finalName + ".bz2")
|
||||||
|
overwrite false
|
||||||
|
compress false
|
||||||
|
|
||||||
|
doLast {
|
||||||
|
ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
|
||||||
|
}
|
||||||
|
outputs.file file("$tempDir/$finalName")
|
||||||
|
}
|
||||||
|
|
||||||
|
task getTop100kWikiWordFiles(type: Download) {
|
||||||
|
src "https://home.apache.org/~rmuir/wikipedia/top.100k.words.de.en.fr.uk.wikipedia.2009-11.tar.bz2"
|
||||||
|
dest file("$tempDir/${src.file.split('/').last()}")
|
||||||
|
overwrite false
|
||||||
|
compress false
|
||||||
|
|
||||||
|
def finalPath = file("$workDir/top100k-out")
|
||||||
|
|
||||||
|
doLast {
|
||||||
|
project.sync {
|
||||||
|
from tarTree(dest) // defined above. Will decompress on the fly
|
||||||
|
into finalPath
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outputs.dir finalPath
|
||||||
|
}
|
||||||
|
|
||||||
|
task getReuters(type: Download) {
|
||||||
|
// note: there is no HTTPS url and we don't care because this is merely test/perf data
|
||||||
|
src "http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz"
|
||||||
|
dest file("$tempDir/${src.file.split('/').last()}")
|
||||||
|
overwrite false
|
||||||
|
compress false
|
||||||
|
|
||||||
|
def untarPath = file("$workDir/reuters")
|
||||||
|
def finalPath = file("$workDir/reuters-out")
|
||||||
|
dependsOn sourceSets.main.runtimeClasspath
|
||||||
|
|
||||||
|
doLast {
|
||||||
|
project.sync {
|
||||||
|
from(tarTree(dest)) { // defined above. Will decompress on the fly
|
||||||
|
exclude '*.txt'
|
||||||
|
}
|
||||||
|
into untarPath
|
||||||
|
}
|
||||||
|
println "Extracting reuters to $finalPath"
|
||||||
|
finalPath.deleteDir() // necessary
|
||||||
|
// TODO consider porting ExtractReuters to groovy?
|
||||||
|
project.javaexec {
|
||||||
|
main = 'org.apache.lucene.benchmark.utils.ExtractReuters'
|
||||||
|
classpath = sourceSets.main.runtimeClasspath
|
||||||
|
maxHeapSize = '1G'
|
||||||
|
args = [untarPath, finalPath]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outputs.dir finalPath
|
||||||
|
}
|
|
@ -40,17 +40,17 @@ while (<>) {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Print out platform info
|
# Print out platform info
|
||||||
print "JAVA:\n", `java -version 2>&1`, "\nOS:\n";
|
#print "JAVA:\n", `java -version 2>&1`, "\nOS:\n";
|
||||||
if ($^O =~ /win/i) {
|
#if ($^O =~ /win/i) {
|
||||||
print "$^O\n";
|
# print "$^O\n";
|
||||||
eval {
|
# eval {
|
||||||
require Win32;
|
# require Win32;
|
||||||
print Win32::GetOSName(), "\n", Win32::GetOSVersion(), "\n";
|
# print Win32::GetOSName(), "\n", Win32::GetOSVersion(), "\n";
|
||||||
};
|
# };
|
||||||
die "Error loading Win32: $@" if ($@);
|
# die "Error loading Win32: $@" if ($@);
|
||||||
} else {
|
#} else {
|
||||||
print `uname -a 2>&1`;
|
# print `uname -a 2>&1`;
|
||||||
}
|
#}
|
||||||
|
|
||||||
print "\n||Language||java.text||ICU4J||KeywordAnalyzer||ICU4J Improvement||\n";
|
print "\n||Language||java.text||ICU4J||KeywordAnalyzer||ICU4J Improvement||\n";
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue