- Made constructor private, added a few FIXMEs, etc.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150826 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2002-09-15 00:38:14 +00:00
parent c3a69fd77d
commit 69397847cd
1 changed files with 30 additions and 36 deletions

View File

@ -56,16 +56,16 @@ package de.lanlab.larm.fetcher;
import de.lanlab.larm.threads.ThreadPoolObserver;
import de.lanlab.larm.threads.ThreadPool;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import de.lanlab.larm.gui.*;
import de.lanlab.larm.util.*;
import de.lanlab.larm.storage.*;
import de.lanlab.larm.net.*;
import javax.swing.UIManager;
import HTTPClient.*;
import org.apache.oro.text.regex.MalformedPatternException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import javax.swing.UIManager;
/**
@ -109,11 +109,6 @@ public class FetcherMain
*/
protected RobotExclusionFilter reFilter;
/**
* the host manager keeps track of all hosts and is used by the filters.
*/
protected HostManager hostManager;
/**
* this rather flaky filter just filters out some URLs, i.e. different views
* of Apache the apache DirIndex module. Has to be made
@ -121,19 +116,28 @@ public class FetcherMain
*/
protected KnownPathsFilter knownPathsFilter;
/**
* the URL length filter filters URLs that are too long, i.e. because of errors
* in the implementation of dynamic web sites
*/
protected URLLengthFilter urlLengthFilter;
/**
* the host manager keeps track of all hosts and is used by the filters.
*/
protected HostManager hostManager;
/**
* this is the main document fetcher. It contains a thread pool that fetches the
* documents and stores them
*/
protected Fetcher fetcher;
/**
* the thread monitor once was only a monitoring tool, but now has become a
* vital part of the system that computes statistics and
* flushes the log file buffers
*/
protected ThreadMonitor monitor;
/**
@ -142,25 +146,19 @@ public class FetcherMain
*/
protected DocumentStorage storage;
/**
* the URL length filter filters URLs that are too long, i.e. because of errors
* in the implementation of dynamic web sites
*/
protected URLLengthFilter urlLengthFilter;
/**
* initializes all classes and registers anonymous adapter classes as
* listeners for fetcher events.
*
* @param nrThreads number of fetcher threads to be created
*/
public FetcherMain(int nrThreads)
private FetcherMain(int nrThreads)
{
// to make things clear, this method is commented a bit better than
// the rest of the program...
// this is the main message queue. handlers are registered with
// the queue, and whenever a message is put in it, they are passed to the
// the queue, and whenever a message is put in it, the message is passed to the
// filters in a "chain of responibility" manner. Every listener can decide
// to throw the message away
messageHandler = new MessageHandler();
@ -169,7 +167,6 @@ public class FetcherMain
// matter how it does it, whether it's in a file, in a database or
// whatever
// example for the (very slow) SQL Server storage:
// this.storage = new SQLServerStorage("sun.jdbc.odbc.JdbcOdbcDriver","jdbc:odbc:search","sa","...",nrThreads);
@ -190,6 +187,7 @@ public class FetcherMain
LuceneStorage luceneStorage = new LuceneStorage();
luceneStorage.setAnalyzer(new org.apache.lucene.analysis.de.GermanAnalyzer());
luceneStorage.setCreate(true);
// FIXME: index name and path need to be configurable
luceneStorage.setIndexName("luceneIndex");
luceneStorage.setFieldInfo("url", LuceneStorage.INDEX | LuceneStorage.STORE);
luceneStorage.setFieldInfo("content", LuceneStorage.INDEX | LuceneStorage.STORE | LuceneStorage.TOKEN);
@ -202,30 +200,24 @@ public class FetcherMain
// heat, which evaporates above the processor
// NullStorage();
// create the filters and add them to the message queue
urlScopeFilter = new URLScopeFilter();
urlVisitedFilter = new URLVisitedFilter(100000);
// dnsResolver = new DNSResolver();
hostManager = new HostManager(1000);
// create the filters and add them to the message queue
reFilter = new RobotExclusionFilter(hostManager);
fetcher = new Fetcher(nrThreads, storage, storage, hostManager);
urlScopeFilter = new URLScopeFilter();
urlVisitedFilter = new URLVisitedFilter(100000);
knownPathsFilter = new KnownPathsFilter();
urlLengthFilter = new URLLengthFilter(255);
// dnsResolver = new DNSResolver();
fetcher = new Fetcher(nrThreads, storage, storage, hostManager);
// prevent message box popups
HTTPConnection.setDefaultAllowUserInteraction(false);
// prevent GZipped files from being decoded
HTTPConnection.removeDefaultModule(HTTPClient.ContentEncodingModule.class);
// initialize the threads
fetcher.init();
@ -266,9 +258,9 @@ public class FetcherMain
/**
* Sets the RexString attribute of the FetcherMain object
* Sets the RexString attribute of <code>UrlScopeFilter</code>.
*
* @param restrictTo The new RexString value
* @param restrictTo the new RexString value
*/
public void setRexString(String restrictTo) throws MalformedPatternException
{
@ -292,6 +284,7 @@ public class FetcherMain
}
catch (Exception e)
{
// FIXME: replace with logging
System.out.println("Exception: " + e.getMessage());
e.printStackTrace();
}
@ -344,7 +337,7 @@ public class FetcherMain
/**
* The main program. parsed
* The main program.
*
* @param args The command line arguments
*/
@ -357,6 +350,8 @@ public class FetcherMain
boolean gui = false;
boolean showInfo = false;
System.out.println("LARM - LANLab Retrieval Machine - Fetcher - V 1.00 - (C) LANLab 2000-02");
// FIXME: consider using Jakarta Commons' CLI package for command line parameters
for (int i = 0; i < args.length; i++)
{
if (args[i].equals("-start"))
@ -419,7 +414,6 @@ public class FetcherMain
catch (MalformedURLException e)
{
System.out.println("Malformed URL");
}
}
}