fixed build issues

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150874 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
cmarschner 2003-04-11 13:43:41 +00:00
parent c9f9dc68d3
commit 61aa56d778
6 changed files with 14 additions and 75 deletions

View File

@ -1,5 +1,8 @@
$Id$
2003-04-11 (cmarschner)
* fixed build issues
2002-06-18 (cmarschner)
* added an experimental version of Lucene storage. see FetcherMain.java for details how to use it
LuceneStorage simply saves all fields as specified in WebDocument. add a converter to the

View File

@ -1,3 +0,0 @@
lucene.jar=/usr/local/jakarta-lucene/lucene.jar
oro.jar=/usr/local/jakarta-oro/oro.jar
debug=on

View File

@ -1,23 +0,0 @@
#!/bin/sh
#clean
echo cleaning
rm -r build
rm -r classes
rm -r cachingqueue
rm -r logs
#build
echo making build directory
mkdir build
cd build
echo extracting http client
jar xvf ../libs/HTTPClient.zip >/dev/nul
cd ..
cp -r src/* build
mkdir classes
echo compiling
javac -g -d classes -sourcepath build build/HTTPClient/*.java
javac -g -classpath ./libs/jakarta-oro-2.0.5.jar -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java

View File

@ -1,3 +0,0 @@
rmdir /s /q -r logs
mkdir logs
java -server -Xmx400mb -classpath classes;libs/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://www.cis.uni-muenchen.de/ -restrictto http://.*\.uni-muenchen\.de.* -threads 15

View File

@ -1,41 +0,0 @@
#!/bin/sh
#
# $Id$
#
BASE_DIR=./runtime
LOG_DIR=$BASE_DIR/logs
CACHE_DIR=$BASE_DIR/cachingqueue
CLASSPATH=build/classes:libs/jakarta-oro-2.0.5.jar:libs/HTTPClient.zip:/usr/local/jakarta-lucene/lucene.jar
SLEEP_TIME=2
if [ $# -lt 4 ]
then
echo "Usage: `basename $0` <start url> <score regex> <# threads> <max mem>" >&2
exit 1
fi
START_URL=$1
SCOPE_REGEX=$2
THREAD_COUNT=$3
MAX_MEM=$4
echo Removing $LOG_DIR...
sleep $SLEEP_TIME
rm -r $LOG_DIR
echo Removing $CACHE_DIR...
sleep $SLEEP_TIME
rm -r $CACHE_DIR
echo Creating $LOG_DIR
sleep $SLEEP_TIME
mkdir -p $LOG_DIR
echo Creating $CACHE_DIR
sleep $SLEEP_TIME
mkdir -p $CACHE_DIR
CMD="java -server -Xmx$MAX_MEM -classpath $CLASSPATH de.lanlab.larm.fetcher.FetcherMain -start $START_URL -restrictto $SCOPE_REGEX -threads $THREAD_COUNT"
echo Starting LARM with: $CMD
$CMD

View File

@ -65,7 +65,6 @@ import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import javax.swing.UIManager;
/**
@ -206,7 +205,7 @@ public class FetcherMain
// file number, the offset within that file, and the document's length
// FIXME: default constructor for all storages + bean access methods
storage.addDocStorage(new LogStorage(storeLog, /* save in page files? */ false,
storage.addDocStorage(new LogStorage(storeLog, /* save in page files? */ true,
/* page file prefix */ "logs/pagefile"));
storage.addLinkStorage(new LinkLogStorage(linksLog));
storage.addLinkStorage(messageHandler);
@ -234,7 +233,10 @@ public class FetcherMain
// dnsResolver = new DNSResolver();
hostManager = new HostManager(1000);
hostResolver = new HostResolver();
hostResolver.initFromFile(hostResolverFile);
if(hostResolverFile != null && !"".equals(hostResolverFile))
{
hostResolver.initFromFile(hostResolverFile);
}
hostManager.setHostResolver(hostResolver);
// hostManager.addSynonym("www.fachsprachen.uni-muenchen.de", "www.fremdsprachen.uni-muenchen.de");
@ -248,6 +250,10 @@ public class FetcherMain
fetcher = new Fetcher(nrThreads, storage, storage, hostManager);
urlLengthFilter = new URLLengthFilter(500, lengthLog);
//knownPathsFilter = new KnownPathsFilter()
// prevent message box popups
HTTPConnection.setDefaultAllowUserInteraction(false);
@ -278,7 +284,7 @@ public class FetcherMain
messageHandler.addListener(urlScopeFilter);
messageHandler.addListener(reFilter);
messageHandler.addListener(urlVisitedFilter);
messageHandler.addListener(knownPathsFilter);
//messageHandler.addListener(knownPathsFilter);
messageHandler.addListener(fetcher);
@ -484,7 +490,7 @@ public class FetcherMain
// replaced by HTTPClient
FetcherMain f = new FetcherMain(nrThreads, hostResolverFile);
if (showInfo || "".equals(hostResolverFile) || (startURLs.isEmpty() && gui == false))
if (showInfo || (startURLs.isEmpty() && gui == false))
{
System.out.println("The LARM crawler\n" +
"\n" +