2002-06-01 14:55:16 -04:00
|
|
|
<?xml version="1.0"?>
|
|
|
|
|
2002-09-14 15:16:28 -04:00
|
|
|
<!-- $Id$ -->
|
2002-09-14 20:58:07 -04:00
|
|
|
<project name="webcrawler-LARM" default="compile" basedir=".">
|
2002-06-01 14:55:16 -04:00
|
|
|
|
2002-09-14 15:16:28 -04:00
|
|
|
<property file="${basedir}/build.properties" />
|
|
|
|
<property file="${basedir}/default.build.properties" />
|
|
|
|
|
|
|
|
<!-- <property name="logs.dir" value="./logs"/> -->
|
|
|
|
<!-- <property name="cache.dir" value="./cachingqueue"/> -->
|
|
|
|
<!-- <property name="threads" value="15"/> -->
|
|
|
|
|
|
|
|
<!-- Build classpath -->
|
|
|
|
<path id="classpath">
|
|
|
|
<pathelement location="${build.classes}"/>
|
|
|
|
<pathelement location="${lucene.jar}"/>
|
|
|
|
<pathelement location="${oro.jar}"/>
|
|
|
|
<fileset dir="${lib.dir}">
|
|
|
|
<include name="*.jar" />
|
|
|
|
</fileset>
|
|
|
|
</path>
|
|
|
|
|
|
|
|
<path id="run.classpath">
|
|
|
|
<pathelement location="${build.dir}/${final.name}.jar"/>
|
|
|
|
<pathelement location="${lucene.jar}"/>
|
|
|
|
<pathelement location="${oro.jar}"/>
|
|
|
|
<fileset dir="${lib.dir}">
|
|
|
|
<include name="*.jar" />
|
|
|
|
</fileset>
|
|
|
|
</path>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- I N I T -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="init">
|
|
|
|
<available property="lucene.present"
|
|
|
|
classname="org.apache.lucene.document.Document"
|
|
|
|
classpath="${lucene.jar}"/>
|
|
|
|
|
|
|
|
<mkdir dir="${build.dir}"/>
|
|
|
|
<mkdir dir="${build.classes}"/>
|
|
|
|
<mkdir dir="${build.src}"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="lucene_check" depends="init" unless="lucene.present">
|
|
|
|
<echo>
|
|
|
|
##################################################################
|
|
|
|
Lucene not found.
|
|
|
|
Lucene Jar: ${lucene.jar}
|
|
|
|
##################################################################
|
|
|
|
</echo>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
2002-09-14 20:58:07 -04:00
|
|
|
<!-- C O M P I L E -->
|
2002-09-14 15:16:28 -04:00
|
|
|
<!-- ================================================================== -->
|
2002-09-14 20:58:07 -04:00
|
|
|
<target name="compile" depends="init,lucene_check"
|
|
|
|
description="-> compiles sources">
|
2002-09-14 15:16:28 -04:00
|
|
|
<!-- unzip HTTPClient in the build source tree -->
|
|
|
|
<unzip src="${src.httpclient}" dest="${build.src}" overwrite="false"/>
|
|
|
|
|
|
|
|
<!-- compile -->
|
|
|
|
<javac
|
|
|
|
encoding="${build.encoding}"
|
|
|
|
srcdir="${src.dir}:${build.src}"
|
|
|
|
excludes="**/CVS/*"
|
|
|
|
destdir="${build.classes}"
|
|
|
|
debug="${debug}"
|
|
|
|
deprecation="${deprecation}">
|
|
|
|
<classpath refid="classpath"/>
|
|
|
|
</javac>
|
2002-09-14 20:58:07 -04:00
|
|
|
</target>
|
2002-09-14 15:16:28 -04:00
|
|
|
|
2002-09-14 20:58:07 -04:00
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- D I S T -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="dist" depends="compile"
|
|
|
|
description="-> builds jar file">
|
2002-09-14 15:16:28 -04:00
|
|
|
<!-- make a jar -->
|
|
|
|
<jar
|
|
|
|
jarfile="${build.dir}/${final.name}.jar"
|
|
|
|
basedir="${build.classes}"
|
|
|
|
/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- Check Syntax for Run Task -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- <target name="checksyntax" unless="start"> -->
|
|
|
|
<!-- <echo> -->
|
|
|
|
<!-- use run with the following syntax -->
|
|
|
|
|
|
|
|
<!-- ant run -Dstart=<URL> -Drestrictto=<Pattern> [-Dthreads=<Thread Count>] -->
|
|
|
|
|
|
|
|
<!-- default value for threads is 15 -->
|
|
|
|
<!-- </echo> -->
|
|
|
|
<!-- </target> -->
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- R U N -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- <target name="run" depends="build, checksyntax" if="start" -->
|
|
|
|
<!-- description="-> runs command-line version of the crawler"> -->
|
|
|
|
<!-- <delete dir="${logs.dir}"/> -->
|
|
|
|
<!-- <mkdir dir="${logs.dir}"/> -->
|
|
|
|
<!-- <java -->
|
|
|
|
<!-- classname="de.lanlab.larm.fetcher.FetcherMain" -->
|
|
|
|
<!-- fork="yes"> -->
|
|
|
|
<!-- <jvmarg value="-server"/> -->
|
|
|
|
<!-- <jvmarg value="-Xmx400mb"/> -->
|
|
|
|
<!-- <arg value="-start"/> -->
|
|
|
|
<!-- <arg value="${start}"/> -->
|
|
|
|
<!-- <arg value="-restrictto"/> -->
|
|
|
|
<!-- <arg value="${restrictto}"/> -->
|
|
|
|
<!-- <arg value="-threads"/> -->
|
|
|
|
<!-- <arg value="${threads}"/> -->
|
|
|
|
<!-- <classpath refid="run.classpath"/> -->
|
|
|
|
<!-- </java> -->
|
|
|
|
<!-- </target> -->
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- C L E A N L A S T R U N -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="cleanlastrun"
|
|
|
|
description="-> cleans files created by each run of the crawler">
|
|
|
|
<delete dir="${logs.dir}"/>
|
|
|
|
<delete dir="${cache.dir}"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- C L E A N B U I L D -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="clean"
|
|
|
|
description="-> cleans all build files">
|
|
|
|
<delete dir="${build.dir}"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- C L E A N A L L -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="cleanall" depends="clean, cleanlastrun"
|
|
|
|
description="-> cleans all build and run files"/>
|
2002-06-01 14:55:16 -04:00
|
|
|
</project>
|