LUCENE-9383: benchmark module: Gradle conversion (#1550)

This commit is contained in:
David Smiley 2020-06-01 10:10:36 -04:00
parent ff8caeb7f4
commit 89784ad7be
No known key found for this signature in database
GPG Key ID: 6FDFF3BF6796FD4A
2 changed files with 129 additions and 14 deletions

View File

@ -15,13 +15,13 @@
* limitations under the License. * limitations under the License.
*/ */
apply plugin: 'java'
apply plugin: 'java-library' // NOT a 'java-library'. Maybe 'application' but seems too limiting.
description = 'System for benchmarking Lucene' description = 'System for benchmarking Lucene'
dependencies { dependencies {
api project(':lucene:core') implementation project(':lucene:core')
implementation project(':lucene:analysis:common') implementation project(':lucene:analysis:common')
implementation project(':lucene:facet') implementation project(':lucene:facet')
@ -37,5 +37,120 @@ dependencies {
exclude module: "xml-apis" exclude module: "xml-apis"
}) })
runtimeOnly project(':lucene:analysis:icu')
testImplementation project(':lucene:test-framework') testImplementation project(':lucene:test-framework')
} }
def tempDir = file("temp")
def workDir = file("work")
task run(type: JavaExec) {
description "Run a perf test (optional: -PtaskAlg=conf/your-algorithm-file -PmaxHeapSize=1G)"
main 'org.apache.lucene.benchmark.byTask.Benchmark'
classpath sourceSets.main.runtimeClasspath
// allow these to be specified on the CLI via -PtaskAlg= for example
args = [propertyOrDefault('taskAlg', 'conf/micro-standard.alg')]
maxHeapSize = propertyOrDefault('maxHeapSize', '1G')
String stdOutStr = propertyOrDefault('standardOutput', null)
if (stdOutStr != null) {
standardOutput = new File(stdOutStr).newOutputStream()
}
debugOptions {
enabled = false
port = 5005
suspend = true
}
}
/* Old "collation" Ant target:
gradle getTop100kWikiWordFiles run -PtaskAlg=conf/collation.alg -PstandardOutput=work/collation.benchmark.output.txt
perl -CSD scripts/collation.bm2jira.pl work/collation.benchmark.output.txt
*/
/* Old "shingle" Ant target:
gradle getReuters run -PtaskAlg=conf/shingle.alg -PstandardOutput=work/shingle.benchmark.output.txt
perl -CSD scripts/shingle.bm2jira.pl work/shingle.benchmark.output.txt
*/
// The remaining tasks just get / extract / prepare data
task getEnWiki(type: Download) {
def finalName = "enwiki-20070527-pages-articles.xml"
src "https://home.apache.org/~dsmiley/data/" + finalName + ".bz2"
dest file("$tempDir/" + finalName + ".bz2")
overwrite false
compress false
doLast {
ant.bunzip2(src: dest, dest: tempDir)
}
outputs.file file("$tempDir/$finalName")
}
task getGeoNames(type: Download) {
// note: latest data is at: https://download.geonames.org/export/dump/allCountries.zip
// and then randomize with: gsort -R -S 1500M file.txt > file_random.txt
// and then compress with: bzip2 -9 -k file_random.txt
def finalName = "geonames_20130921_randomOrder_allCountries.txt"
src "https://home.apache.org/~dsmiley/data/" + finalName + ".bz2"
dest file("$tempDir/" + finalName + ".bz2")
overwrite false
compress false
doLast {
ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
}
outputs.file file("$tempDir/$finalName")
}
task getTop100kWikiWordFiles(type: Download) {
src "https://home.apache.org/~rmuir/wikipedia/top.100k.words.de.en.fr.uk.wikipedia.2009-11.tar.bz2"
dest file("$tempDir/${src.file.split('/').last()}")
overwrite false
compress false
def finalPath = file("$workDir/top100k-out")
doLast {
project.sync {
from tarTree(dest) // defined above. Will decompress on the fly
into finalPath
}
}
outputs.dir finalPath
}
task getReuters(type: Download) {
// note: there is no HTTPS url and we don't care because this is merely test/perf data
src "http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz"
dest file("$tempDir/${src.file.split('/').last()}")
overwrite false
compress false
def untarPath = file("$workDir/reuters")
def finalPath = file("$workDir/reuters-out")
dependsOn sourceSets.main.runtimeClasspath
doLast {
project.sync {
from(tarTree(dest)) { // defined above. Will decompress on the fly
exclude '*.txt'
}
into untarPath
}
println "Extracting reuters to $finalPath"
finalPath.deleteDir() // necessary
// TODO consider porting ExtractReuters to groovy?
project.javaexec {
main = 'org.apache.lucene.benchmark.utils.ExtractReuters'
classpath = sourceSets.main.runtimeClasspath
maxHeapSize = '1G'
args = [untarPath, finalPath]
}
}
outputs.dir finalPath
}

View File

@ -40,17 +40,17 @@ while (<>) {
} }
# Print out platform info # Print out platform info
print "JAVA:\n", `java -version 2>&1`, "\nOS:\n"; #print "JAVA:\n", `java -version 2>&1`, "\nOS:\n";
if ($^O =~ /win/i) { #if ($^O =~ /win/i) {
print "$^O\n"; # print "$^O\n";
eval { # eval {
require Win32; # require Win32;
print Win32::GetOSName(), "\n", Win32::GetOSVersion(), "\n"; # print Win32::GetOSName(), "\n", Win32::GetOSVersion(), "\n";
}; # };
die "Error loading Win32: $@" if ($@); # die "Error loading Win32: $@" if ($@);
} else { #} else {
print `uname -a 2>&1`; # print `uname -a 2>&1`;
} #}
print "\n||Language||java.text||ICU4J||KeywordAnalyzer||ICU4J Improvement||\n"; print "\n||Language||java.text||ICU4J||KeywordAnalyzer||ICU4J Improvement||\n";