mirror of https://github.com/apache/lucene.git
fixed build issues
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150874 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c9f9dc68d3
commit
61aa56d778
|
@ -1,5 +1,8 @@
|
|||
$Id$
|
||||
|
||||
2003-04-11 (cmarschner)
|
||||
* fixed build issues
|
||||
|
||||
2002-06-18 (cmarschner)
|
||||
* added an experimental version of Lucene storage. see FetcherMain.java for details how to use it
|
||||
LuceneStorage simply saves all fields as specified in WebDocument. add a converter to the
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
lucene.jar=/usr/local/jakarta-lucene/lucene.jar
|
||||
oro.jar=/usr/local/jakarta-oro/oro.jar
|
||||
debug=on
|
|
@ -1,23 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
#clean
|
||||
echo cleaning
|
||||
rm -r build
|
||||
rm -r classes
|
||||
rm -r cachingqueue
|
||||
rm -r logs
|
||||
|
||||
#build
|
||||
echo making build directory
|
||||
mkdir build
|
||||
cd build
|
||||
echo extracting http client
|
||||
jar xvf ../libs/HTTPClient.zip >/dev/nul
|
||||
cd ..
|
||||
cp -r src/* build
|
||||
mkdir classes
|
||||
echo compiling
|
||||
javac -g -d classes -sourcepath build build/HTTPClient/*.java
|
||||
javac -g -classpath ./libs/jakarta-oro-2.0.5.jar -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java
|
||||
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
rmdir /s /q -r logs
|
||||
mkdir logs
|
||||
java -server -Xmx400mb -classpath classes;libs/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://www.cis.uni-muenchen.de/ -restrictto http://.*\.uni-muenchen\.de.* -threads 15
|
|
@ -1,41 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
#
|
||||
# $Id$
|
||||
#
|
||||
|
||||
BASE_DIR=./runtime
|
||||
LOG_DIR=$BASE_DIR/logs
|
||||
CACHE_DIR=$BASE_DIR/cachingqueue
|
||||
CLASSPATH=build/classes:libs/jakarta-oro-2.0.5.jar:libs/HTTPClient.zip:/usr/local/jakarta-lucene/lucene.jar
|
||||
SLEEP_TIME=2
|
||||
|
||||
if [ $# -lt 4 ]
|
||||
then
|
||||
echo "Usage: `basename $0` <start url> <score regex> <# threads> <max mem>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
START_URL=$1
|
||||
SCOPE_REGEX=$2
|
||||
THREAD_COUNT=$3
|
||||
MAX_MEM=$4
|
||||
|
||||
|
||||
echo Removing $LOG_DIR...
|
||||
sleep $SLEEP_TIME
|
||||
rm -r $LOG_DIR
|
||||
echo Removing $CACHE_DIR...
|
||||
sleep $SLEEP_TIME
|
||||
rm -r $CACHE_DIR
|
||||
echo Creating $LOG_DIR
|
||||
sleep $SLEEP_TIME
|
||||
mkdir -p $LOG_DIR
|
||||
echo Creating $CACHE_DIR
|
||||
sleep $SLEEP_TIME
|
||||
mkdir -p $CACHE_DIR
|
||||
|
||||
CMD="java -server -Xmx$MAX_MEM -classpath $CLASSPATH de.lanlab.larm.fetcher.FetcherMain -start $START_URL -restrictto $SCOPE_REGEX -threads $THREAD_COUNT"
|
||||
echo Starting LARM with: $CMD
|
||||
|
||||
$CMD
|
|
@ -65,7 +65,6 @@ import java.io.*;
|
|||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import javax.swing.UIManager;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -206,7 +205,7 @@ public class FetcherMain
|
|||
// file number, the offset within that file, and the document's length
|
||||
|
||||
// FIXME: default constructor for all storages + bean access methods
|
||||
storage.addDocStorage(new LogStorage(storeLog, /* save in page files? */ false,
|
||||
storage.addDocStorage(new LogStorage(storeLog, /* save in page files? */ true,
|
||||
/* page file prefix */ "logs/pagefile"));
|
||||
storage.addLinkStorage(new LinkLogStorage(linksLog));
|
||||
storage.addLinkStorage(messageHandler);
|
||||
|
@ -234,7 +233,10 @@ public class FetcherMain
|
|||
// dnsResolver = new DNSResolver();
|
||||
hostManager = new HostManager(1000);
|
||||
hostResolver = new HostResolver();
|
||||
hostResolver.initFromFile(hostResolverFile);
|
||||
if(hostResolverFile != null && !"".equals(hostResolverFile))
|
||||
{
|
||||
hostResolver.initFromFile(hostResolverFile);
|
||||
}
|
||||
hostManager.setHostResolver(hostResolver);
|
||||
|
||||
// hostManager.addSynonym("www.fachsprachen.uni-muenchen.de", "www.fremdsprachen.uni-muenchen.de");
|
||||
|
@ -248,6 +250,10 @@ public class FetcherMain
|
|||
|
||||
fetcher = new Fetcher(nrThreads, storage, storage, hostManager);
|
||||
|
||||
urlLengthFilter = new URLLengthFilter(500, lengthLog);
|
||||
|
||||
//knownPathsFilter = new KnownPathsFilter()
|
||||
|
||||
// prevent message box popups
|
||||
HTTPConnection.setDefaultAllowUserInteraction(false);
|
||||
|
||||
|
@ -278,7 +284,7 @@ public class FetcherMain
|
|||
messageHandler.addListener(urlScopeFilter);
|
||||
messageHandler.addListener(reFilter);
|
||||
messageHandler.addListener(urlVisitedFilter);
|
||||
messageHandler.addListener(knownPathsFilter);
|
||||
//messageHandler.addListener(knownPathsFilter);
|
||||
|
||||
messageHandler.addListener(fetcher);
|
||||
|
||||
|
@ -484,7 +490,7 @@ public class FetcherMain
|
|||
// replaced by HTTPClient
|
||||
|
||||
FetcherMain f = new FetcherMain(nrThreads, hostResolverFile);
|
||||
if (showInfo || "".equals(hostResolverFile) || (startURLs.isEmpty() && gui == false))
|
||||
if (showInfo || (startURLs.isEmpty() && gui == false))
|
||||
{
|
||||
System.out.println("The LARM crawler\n" +
|
||||
"\n" +
|
||||
|
|
Loading…
Reference in New Issue