2007-04-17 03:11:04 -04:00
|
|
|
<?xml version="1.0"?>
|
|
|
|
<project name="benchmark" default="default">
|
|
|
|
|
|
|
|
<description>
|
|
|
|
Lucene Benchmarking Contributions
|
|
|
|
</description>
|
|
|
|
|
|
|
|
<import file="../contrib-build.xml"/>
|
2008-06-10 07:58:00 -04:00
|
|
|
<property name="working.dir" location="work"/>
|
2007-04-17 03:11:04 -04:00
|
|
|
|
|
|
|
|
|
|
|
<target name="check-files">
|
|
|
|
<available file="temp/news20.tar.gz" property="news20.exists"/>
|
|
|
|
|
|
|
|
<available file="${working.dir}/20_newsgroup" property="news20.expanded"/>
|
|
|
|
|
|
|
|
<available file="temp/reuters21578.tar.gz" property="reuters.exists"/>
|
|
|
|
<available file="${working.dir}/reuters" property="reuters.expanded"/>
|
|
|
|
<available file="${working.dir}/reuters-out" property="reuters.extracted"/>
|
|
|
|
<available file="temp/20news-18828.tar.gz" property="20news-18828.exists"/>
|
|
|
|
<available file="${working.dir}/20news-18828" property="20news-18828.expanded"/>
|
|
|
|
<available file="${working.dir}/mini_newsgroups" property="mini.expanded"/>
|
|
|
|
|
2007-06-30 22:19:10 -04:00
|
|
|
<available file="temp/enwiki-20070527-pages-articles.xml.bz2" property="enwiki.exists"/>
|
|
|
|
<available file="temp/enwiki-20070527-pages-articles.xml" property="enwiki.expanded"/>
|
2007-08-09 04:57:26 -04:00
|
|
|
<available file="${working.dir}/enwiki.txt" property="enwiki.extracted"/>
|
2007-06-30 22:19:10 -04:00
|
|
|
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<target name="enwiki-files" depends="check-files">
|
|
|
|
<mkdir dir="temp"/>
|
|
|
|
<antcall target="get-enwiki"/>
|
|
|
|
<antcall target="expand-enwiki"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<target name="get-enwiki" unless="enwiki.exists">
|
|
|
|
<get src="http://people.apache.org/~gsingers/wikipedia/enwiki-20070527-pages-articles.xml.bz2"
|
|
|
|
dest="temp/enwiki-20070527-pages-articles.xml.bz2"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<target name="expand-enwiki" unless="enwiki.expanded">
|
|
|
|
<bunzip2 src="temp/enwiki-20070527-pages-articles.xml.bz2" dest="temp"/>
|
|
|
|
</target>
|
|
|
|
|
2007-04-17 03:11:04 -04:00
|
|
|
<target name="get-news-20" unless="20news-18828.exists">
|
|
|
|
<get src="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.tar.gz"
|
|
|
|
dest="temp/news20.tar.gz"/>
|
|
|
|
|
|
|
|
</target>
|
|
|
|
<target name="get-reuters" unless="reuters.exists">
|
|
|
|
|
|
|
|
<get src="http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz"
|
|
|
|
dest="temp/reuters21578.tar.gz"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<target name="expand-news-20" unless="news20.expanded">
|
|
|
|
<gunzip src="temp/news20.tar.gz" dest="temp"/>
|
|
|
|
<untar src="temp/news20.tar" dest="${working.dir}"/>
|
|
|
|
</target>
|
|
|
|
<target name="expand-reuters" unless="reuters.expanded">
|
|
|
|
<gunzip src="temp/reuters21578.tar.gz" dest="temp"/>
|
|
|
|
<mkdir dir="${working.dir}/reuters"/>
|
|
|
|
<untar src="temp/reuters21578.tar" dest="${working.dir}/reuters"/>
|
|
|
|
<delete >
|
|
|
|
<fileset dir="${working.dir}/reuters">
|
|
|
|
<include name="*.txt"/>
|
|
|
|
</fileset>
|
|
|
|
</delete>
|
|
|
|
|
|
|
|
</target>
|
|
|
|
<target name="extract-reuters" depends="check-files" unless="reuters.extracted">
|
|
|
|
<mkdir dir="${working.dir}/reuters-out"/>
|
|
|
|
<java classname="org.apache.lucene.benchmark.utils.ExtractReuters" maxmemory="1024M" fork="true">
|
|
|
|
<classpath refid="run.classpath"/>
|
2008-06-10 07:58:00 -04:00
|
|
|
<arg file="${working.dir}/reuters"/>
|
|
|
|
<arg file="${working.dir}/reuters-out"/>
|
2007-04-17 03:11:04 -04:00
|
|
|
</java>
|
|
|
|
</target>
|
|
|
|
<target name="get-20news-18828" unless="20news-18828.exists">
|
|
|
|
<get src="http://people.csail.mit.edu/u/j/jrennie/public_html/20Newsgroups/20news-18828.tar.gz"
|
|
|
|
dest="temp/20news-18828.tar.gz"/>
|
|
|
|
|
|
|
|
</target>
|
|
|
|
<target name="expand-20news-18828" unless="20news-18828.expanded">
|
|
|
|
<gunzip src="temp/20news-18828.tar.gz" dest="temp"/>
|
|
|
|
<untar src="temp/20news-18828.tar" dest="${working.dir}"/>
|
|
|
|
</target>
|
|
|
|
<target name="get-mini-news" unless="mini.exists">
|
|
|
|
<get src="http://kdd.ics.uci.edu/databases/20newsgroups/mini_newsgroups.tar.gz"
|
|
|
|
dest="temp/mini_newsgroups.tar.gz"/>
|
|
|
|
</target>
|
|
|
|
<target name="expand-mini-news" unless="mini.expanded">
|
|
|
|
<gunzip src="temp/mini_newsgroups.tar.gz" dest="temp"/>
|
|
|
|
<untar src="temp/mini_newsgroups.tar" dest="${working.dir}"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<target name="get-files" depends="check-files">
|
|
|
|
<mkdir dir="temp"/>
|
|
|
|
<antcall target="get-reuters"/>
|
|
|
|
<antcall target="expand-reuters"/>
|
|
|
|
<antcall target="extract-reuters"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<path id="classpath">
|
|
|
|
<pathelement path="${common.dir}/build/classes/java"/>
|
|
|
|
<pathelement path="${common.dir}/build/classes/demo"/>
|
2008-01-24 09:39:44 -05:00
|
|
|
<pathelement path="${common.dir}/build/contrib/highlighter/classes/java"/>
|
2009-08-05 08:09:48 -04:00
|
|
|
<pathelement path="${common.dir}/build/contrib/memory/classes/java"/>
|
2009-08-04 16:24:02 -04:00
|
|
|
<pathelement path="${common.dir}/build/contrib/fast-vector-highlighter/classes/java"/>
|
2009-04-16 05:46:30 -04:00
|
|
|
<fileset dir="lib">
|
|
|
|
<include name="**/*.jar"/>
|
|
|
|
</fileset>
|
2007-04-17 03:11:04 -04:00
|
|
|
</path>
|
|
|
|
<path id="run.classpath">
|
|
|
|
<path refid="classpath"/>
|
|
|
|
<pathelement location="${build.dir}/classes/java"/>
|
|
|
|
<pathelement location="${benchmark.ext.classpath}"/>
|
|
|
|
</path>
|
|
|
|
|
2008-06-10 07:58:00 -04:00
|
|
|
<property name="task.alg" location="conf/micro-standard.alg"/>
|
2007-04-17 03:11:04 -04:00
|
|
|
<property name="task.mem" value="140M"/>
|
|
|
|
|
2008-06-10 07:58:00 -04:00
|
|
|
<target name="run-task" depends="compile,check-files,get-files"
|
|
|
|
description="Run compound penalty perf test (optional: -Dtask.alg=your-algorithm-file -Dtask.mem=java-max-mem)">
|
2007-04-17 03:11:04 -04:00
|
|
|
<echo>Working Directory: ${working.dir}</echo>
|
|
|
|
<java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="${task.mem}" fork="true">
|
|
|
|
<classpath refid="run.classpath"/>
|
2008-06-10 07:58:00 -04:00
|
|
|
<arg file="${task.alg}"/>
|
2007-04-17 03:11:04 -04:00
|
|
|
</java>
|
|
|
|
</target>
|
|
|
|
|
2007-06-30 22:19:10 -04:00
|
|
|
<target name="enwiki" depends="compile,check-files,enwiki-files">
|
|
|
|
<echo>Working Directory: ${working.dir}</echo>
|
|
|
|
<java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="1024M" fork="true">
|
|
|
|
<assertions>
|
|
|
|
<enable/>
|
|
|
|
</assertions>
|
|
|
|
<classpath refid="run.classpath"/>
|
2008-06-10 07:58:00 -04:00
|
|
|
<arg file="conf/extractWikipedia.alg"/>
|
2007-06-30 22:19:10 -04:00
|
|
|
</java>
|
|
|
|
</target>
|
|
|
|
|
2007-04-17 03:11:04 -04:00
|
|
|
<target name="compile-demo">
|
|
|
|
<subant target="compile-demo">
|
|
|
|
<fileset dir="${common.dir}" includes="build.xml"/>
|
|
|
|
</subant>
|
2008-01-24 09:39:44 -05:00
|
|
|
</target>
|
|
|
|
<target name="compile-highlighter">
|
|
|
|
<subant target="compile">
|
|
|
|
<fileset dir="${common.dir}/contrib/highlighter" includes="build.xml"/>
|
|
|
|
</subant>
|
|
|
|
</target>
|
2009-08-05 08:09:48 -04:00
|
|
|
<target name="compile-memory">
|
|
|
|
<subant target="compile">
|
|
|
|
<fileset dir="${common.dir}/contrib/memory" includes="build.xml"/>
|
|
|
|
</subant>
|
|
|
|
</target>
|
2009-08-04 16:24:02 -04:00
|
|
|
<target name="compile-vector-highlighter">
|
|
|
|
<subant target="compile">
|
|
|
|
<fileset dir="${common.dir}/contrib/fast-vector-highlighter" includes="build.xml"/>
|
|
|
|
</subant>
|
|
|
|
</target>
|
2007-04-17 03:11:04 -04:00
|
|
|
|
2009-08-05 08:09:48 -04:00
|
|
|
<target name="init" depends="common.init,compile-demo,compile-memory,compile-highlighter,compile-vector-highlighter,check-files"/>
|
2007-04-17 03:11:04 -04:00
|
|
|
|
2007-06-27 02:49:38 -04:00
|
|
|
<!-- make sure online collections (reuters) are first downloaded -->
|
|
|
|
<target name="test" depends="init,get-files">
|
|
|
|
<antcall target="common.test" inheritRefs="true" />
|
|
|
|
</target>
|
|
|
|
|
2007-04-17 03:11:04 -04:00
|
|
|
</project>
|