2002-06-01 14:55:16 -04:00
|
|
|
<?xml version="1.0"?>
|
|
|
|
|
|
|
|
<project name="webcrawler-LARM" default="build" basedir=".">
|
|
|
|
|
2002-06-17 20:47:39 -04:00
|
|
|
<property file="${basedir}/build.properties" />
|
|
|
|
|
2002-06-01 14:55:16 -04:00
|
|
|
<property name="name" value="webcrawler_LARM"/>
|
|
|
|
<property name="version" value="0.5"/>
|
|
|
|
<property name="final.name" value="${name}-${version}"/>
|
|
|
|
<property name="debug" value="on"/>
|
|
|
|
|
|
|
|
<property name="src.dir" value="./src"/>
|
|
|
|
<property name="lib.dir" value="./libs"/>
|
|
|
|
<property name="logs.dir" value="./logs"/>
|
|
|
|
<property name="cache.dir" value="./cachingqueue"/>
|
|
|
|
<property name="build.dir" value="./build"/>
|
|
|
|
|
|
|
|
<property name="src.httpclient" value="${lib.dir}/HTTPClient.zip"/>
|
|
|
|
<property name="build.classes" value="${build.dir}/src"/>
|
|
|
|
<property name="build.src" value="${build.dir}/src"/>
|
|
|
|
<property name="build.encoding" value="ISO-8859-1"/>
|
|
|
|
|
|
|
|
<property name="threads" value="15"/>
|
|
|
|
|
2002-06-30 10:58:27 -04:00
|
|
|
<!-- Build classpath -->
|
2002-06-01 14:55:16 -04:00
|
|
|
<path id="classpath">
|
|
|
|
<pathelement location="${build.classes}"/>
|
2002-06-17 20:47:39 -04:00
|
|
|
<pathelement location="${lucene.jar}"/>
|
2002-06-30 10:58:27 -04:00
|
|
|
<pathelement location="${oro.jar}"/>
|
2002-06-01 14:55:16 -04:00
|
|
|
<fileset dir="${lib.dir}">
|
2002-06-17 20:47:39 -04:00
|
|
|
<include name="*.jar" />
|
|
|
|
</fileset>
|
|
|
|
</path>
|
2002-06-01 14:55:16 -04:00
|
|
|
|
|
|
|
<path id="run.classpath">
|
|
|
|
<pathelement location="${build.dir}/${final.name}.jar"/>
|
2002-06-17 20:47:39 -04:00
|
|
|
<pathelement location="${lucene.jar}"/>
|
2002-06-30 10:58:27 -04:00
|
|
|
<pathelement location="${oro.jar}"/>
|
2002-06-01 14:55:16 -04:00
|
|
|
<fileset dir="${lib.dir}">
|
|
|
|
<include name="*.jar" />
|
|
|
|
</fileset>
|
|
|
|
</path>
|
|
|
|
|
2002-06-17 20:47:39 -04:00
|
|
|
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- I N I T -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
|
|
|
|
|
|
|
|
<target name="init">
|
|
|
|
<available
|
|
|
|
property="lucene.present"
|
|
|
|
classname="org.apache.lucene.document.Document"
|
|
|
|
classpath="${lucene.jar}"
|
|
|
|
/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<target name="lucene_check" depends="init" unless="lucene.present">
|
|
|
|
<echo>
|
|
|
|
##################################################################
|
|
|
|
Lucene not found.
|
|
|
|
Lucene Home: ${lucene.jar}
|
|
|
|
##################################################################
|
|
|
|
</echo>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
|
2002-06-01 14:55:16 -04:00
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- B U I L D -->
|
|
|
|
<!-- ================================================================== -->
|
2002-06-17 20:47:39 -04:00
|
|
|
<target name="build" depends="init,lucene_check"
|
2002-06-01 14:55:16 -04:00
|
|
|
description="-> builds jar file">
|
|
|
|
<mkdir dir="${build.dir}"/>
|
|
|
|
<mkdir dir="${build.classes}"/>
|
|
|
|
<mkdir dir="${build.src}"/>
|
|
|
|
<unzip src="${src.httpclient}" dest="${build.src}" overwrite="false"/>
|
|
|
|
<javac
|
|
|
|
encoding="${build.encoding}"
|
|
|
|
srcdir="${src.dir}:${build.src}"
|
|
|
|
excludes="**/CVS/*"
|
|
|
|
destdir="${build.classes}"
|
|
|
|
debug="${debug}">
|
|
|
|
<classpath refid="classpath"/>
|
|
|
|
</javac>
|
|
|
|
<jar
|
|
|
|
jarfile="${build.dir}/${final.name}.jar"
|
|
|
|
basedir="${build.classes}"
|
|
|
|
/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- Check Syntax for Run Task -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="checksyntax" unless="start">
|
|
|
|
<echo>
|
|
|
|
use run with the following syntax
|
2002-06-30 10:58:27 -04:00
|
|
|
|
|
|
|
ant run -Dstart=<URL> -Drestrictto=<Pattern> [-Dthreads=<Thread Count>]
|
|
|
|
|
2002-06-01 14:55:16 -04:00
|
|
|
default value for threads is 15
|
|
|
|
</echo>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- R U N -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="run" depends="build, checksyntax" if="start"
|
|
|
|
description="-> runs command-line version of the crawler">
|
|
|
|
<delete dir="${logs.dir}"/>
|
|
|
|
<mkdir dir="${logs.dir}"/>
|
2002-06-30 10:58:27 -04:00
|
|
|
<java
|
2002-06-01 14:55:16 -04:00
|
|
|
classname="de.lanlab.larm.fetcher.FetcherMain"
|
|
|
|
fork="yes">
|
|
|
|
<jvmarg value="-server"/>
|
|
|
|
<jvmarg value="-Xmx400mb"/>
|
|
|
|
<arg value="-start"/>
|
|
|
|
<arg value="${start}"/>
|
|
|
|
<arg value="-restrictto"/>
|
|
|
|
<arg value="${restrictto}"/>
|
|
|
|
<arg value="-threads"/>
|
|
|
|
<arg value="${threads}"/>
|
|
|
|
<classpath refid="run.classpath"/>
|
|
|
|
</java>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- C L E A N L A S T R U N -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="cleanlastrun"
|
|
|
|
description="-> cleans files created by each run of the crawler">
|
|
|
|
<delete dir="${logs.dir}"/>
|
|
|
|
<delete dir="${cache.dir}"/>
|
|
|
|
</target>
|
|
|
|
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<!-- C L E A N A L L -->
|
|
|
|
<!-- ================================================================== -->
|
|
|
|
<target name="cleanall" depends="cleanlastrun"
|
|
|
|
description="-> cleans all build and run files">
|
|
|
|
<delete dir="${build.dir}"/>
|
|
|
|
</target>
|
|
|
|
</project>
|