Initial revision

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2002-05-04 13:58:45 +00:00
parent 95eb8721c6
commit cf2fa142c8
118 changed files with 21884 additions and 0 deletions
sandbox/contributions/webcrawler-LARM
build.shclean.shcleanlastrun.shog-build.shrun.sh
src
HTTPClient
de/lanlab/larm
hplb

View File

@ -0,0 +1,23 @@
#!/bin/sh
#clean
echo cleaning
rm -r build
rm -r classes
rm -r cachingqueue
rm -r logs
#build
echo making build directory
mkdir build
cd build
echo extracting http client
jar xvf ../lib/HTTPClient.zip >/dev/nul
cd ..
cp -r src/* build
mkdir classes
echo compiling
javac -g -d classes -sourcepath build build/HTTPClient/*.java
javac -g -classpath ./lib/jakarta-oro-2.0.5.jar -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java

View File

@ -0,0 +1,5 @@
#!/bin/sh
./cleanlastrun.sh
rm -r build
rm -r classes

View File

@ -0,0 +1,4 @@
#!/bin/sh
rm -r logs
rm -r cachingqueue

View File

@ -0,0 +1,23 @@
#!/bin/sh
#clean
echo cleaning
rm -r build
rm -r classes
rm -r cachingqueue
rm -r logs
#build
echo making build directory
mkdir build
cd build
#echo extracting http client
#jar xvf ../lib/HTTPClient.zip >/dev/null
cd ..
cp -r src/* build
mkdir classes
echo compiling
#javac -g -d classes -sourcepath build build/HTTPClient/*.java
javac -g -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java

View File

@ -0,0 +1,4 @@
#!/bin/sh
rm -r logs
mkdir logs
java -server -Xmx400mb -classpath classes:lib/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://www.cis.uni-muenchen.de/ -restrictto http://[^/]*\.uni-muenchen\.de.* -threads 15

View File

@ -0,0 +1,278 @@
/*
* @(#)ContentEncodingModule.java 0.3-3 06/05/2001
*
* This file is part of the HTTPClient package
* Copyright (C) 1996-2001 Ronald Tschalär
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307, USA
*
* For questions, suggestions, bug-reports, enhancement-requests etc.
* I may be contacted at:
*
* ronald@innovation.ch
*
* The HTTPClient's home page is located at:
*
* http://www.innovation.ch/java/HTTPClient/
*
*/
package HTTPClient;
import java.io.IOException;
import java.util.Vector;
import java.util.zip.InflaterInputStream;
import java.util.zip.GZIPInputStream;
/**
* This module handles the Content-Encoding response header. It currently
* handles the "gzip", "deflate", "compress" and "identity" tokens.
*
* @author Ronald Tschalär
* @created 29. Dezember 2001
* @version 0.3-3 06/05/2001
*/
public class ContentEncodingModule implements HTTPClientModule
{
// Methods
/**
* Invoked by the HTTPClient.
*
* @param req Description of the Parameter
* @param resp Description of the Parameter
* @return Description of the Return Value
* @exception ModuleException Description of the Exception
*/
public int requestHandler(Request req, Response[] resp)
throws ModuleException
{
// parse Accept-Encoding header
int idx;
NVPair[] hdrs = req.getHeaders();
for (idx = 0; idx < hdrs.length; idx++)
{
if (hdrs[idx].getName().equalsIgnoreCase("Accept-Encoding"))
{
break;
}
}
Vector pae;
if (idx == hdrs.length)
{
hdrs = Util.resizeArray(hdrs, idx + 1);
req.setHeaders(hdrs);
pae = new Vector();
}
else
{
try
{
pae = Util.parseHeader(hdrs[idx].getValue());
}
catch (ParseException pe)
{
throw new ModuleException(pe.toString());
}
}
// done if "*;q=1.0" present
HttpHeaderElement all = Util.getElement(pae, "*");
if (all != null)
{
NVPair[] params = all.getParams();
for (idx = 0; idx < params.length; idx++)
{
if (params[idx].getName().equalsIgnoreCase("q"))
{
break;
}
}
if (idx == params.length)
{
// no qvalue, i.e. q=1.0
return REQ_CONTINUE;
}
if (params[idx].getValue() == null ||
params[idx].getValue().length() == 0)
{
throw new ModuleException("Invalid q value for \"*\" in " +
"Accept-Encoding header: ");
}
try
{
if (Float.valueOf(params[idx].getValue()).floatValue() > 0.)
{
return REQ_CONTINUE;
}
}
catch (NumberFormatException nfe)
{
throw new ModuleException("Invalid q value for \"*\" in " +
"Accept-Encoding header: " + nfe.getMessage());
}
}
// Add gzip, deflate and compress tokens to the Accept-Encoding header
if (!pae.contains(new HttpHeaderElement("deflate")))
{
pae.addElement(new HttpHeaderElement("deflate"));
}
if (!pae.contains(new HttpHeaderElement("gzip")))
{
pae.addElement(new HttpHeaderElement("gzip"));
}
if (!pae.contains(new HttpHeaderElement("x-gzip")))
{
pae.addElement(new HttpHeaderElement("x-gzip"));
}
if (!pae.contains(new HttpHeaderElement("compress")))
{
pae.addElement(new HttpHeaderElement("compress"));
}
if (!pae.contains(new HttpHeaderElement("x-compress")))
{
pae.addElement(new HttpHeaderElement("x-compress"));
}
hdrs[idx] = new NVPair("Accept-Encoding", Util.assembleHeader(pae));
return REQ_CONTINUE;
}
/**
* Invoked by the HTTPClient.
*
* @param resp Description of the Parameter
* @param req Description of the Parameter
*/
public void responsePhase1Handler(Response resp, RoRequest req)
{
}
/**
* Invoked by the HTTPClient.
*
* @param resp Description of the Parameter
* @param req Description of the Parameter
* @return Description of the Return Value
*/
public int responsePhase2Handler(Response resp, Request req)
{
return RSP_CONTINUE;
}
/**
* Invoked by the HTTPClient.
*
* @param resp Description of the Parameter
* @param req Description of the Parameter
* @exception IOException Description of the Exception
* @exception ModuleException Description of the Exception
*/
public void responsePhase3Handler(Response resp, RoRequest req)
throws IOException, ModuleException
{
String ce = resp.getHeader("Content-Encoding");
if (ce == null || req.getMethod().equals("HEAD") ||
resp.getStatusCode() == 206)
{
return;
}
Vector pce;
try
{
pce = Util.parseHeader(ce);
}
catch (ParseException pe)
{
throw new ModuleException(pe.toString());
}
if (pce.size() == 0)
{
return;
}
String encoding = ((HttpHeaderElement) pce.firstElement()).getName();
if (encoding.equalsIgnoreCase("gzip") ||
encoding.equalsIgnoreCase("x-gzip"))
{
Log.write(Log.MODS, "CEM: pushing gzip-input-stream");
resp.inp_stream = new GZIPInputStream(resp.inp_stream);
pce.removeElementAt(pce.size() - 1);
resp.deleteHeader("Content-length");
}
else if (encoding.equalsIgnoreCase("deflate"))
{
Log.write(Log.MODS, "CEM: pushing inflater-input-stream");
resp.inp_stream = new InflaterInputStream(resp.inp_stream);
pce.removeElementAt(pce.size() - 1);
resp.deleteHeader("Content-length");
}
else if (encoding.equalsIgnoreCase("compress") ||
encoding.equalsIgnoreCase("x-compress"))
{
Log.write(Log.MODS, "CEM: pushing uncompress-input-stream");
resp.inp_stream = new UncompressInputStream(resp.inp_stream);
pce.removeElementAt(pce.size() - 1);
resp.deleteHeader("Content-length");
}
else if (encoding.equalsIgnoreCase("identity"))
{
Log.write(Log.MODS, "CEM: ignoring 'identity' token");
pce.removeElementAt(pce.size() - 1);
}
else
{
Log.write(Log.MODS, "CEM: Unknown content encoding '" +
encoding + "'");
}
if (pce.size() > 0)
{
resp.setHeader("Content-Encoding", Util.assembleHeader(pce));
}
else
{
resp.deleteHeader("Content-Encoding");
}
}
/**
* Invoked by the HTTPClient.
*
* @param resp Description of the Parameter
* @param req Description of the Parameter
*/
public void trailerHandler(Response resp, RoRequest req)
{
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,38 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c) <p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.fetcher;
/**
* contains all global constants used in this package
*/
public class Constants
{
/**
* user agent string a fetcher task gives to the corresponding server
*/
public static final String USER_AGENT = "Mozilla/4.06 [en] (WinNT; I)";
/**
* Crawler Identification
*/
public static final String CRAWLER_AGENT = "Fetcher/0.95";
/**
* size of the temporary buffer to read web documents in
*/
public final static int FETCHERTASK_READSIZE = 4096;
/**
* don't read more than... bytes
*/
public final static int FETCHERTASK_MAXFILESIZE = 2000000;
}

View File

@ -0,0 +1,73 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.fetcher;
import java.util.*;
import java.net.*;
/**
* filter class; gets IP Adresses from host names and forwards them to
* the other parts of the application
* since URLs cache their IP addresses themselves, and HTTP 1.1 needs the
* host names to be sent to the server, this class is not used anymore
*/
public class DNSResolver implements MessageListener
{
HashMap ipCache = new HashMap();
public DNSResolver()
{
}
public void notifyAddedToMessageHandler(MessageHandler m)
{
this.messageHandler = m;
}
MessageHandler messageHandler;
public Message handleRequest(Message message)
{
if(message instanceof URLMessage)
{
URL url = ((URLMessage)message).getUrl();
String host = url.getHost();
InetAddress ip;
/*InetAddress ip = (InetAddress)ipCache.get(host);
if(ip == null)
{
*/
try
{
ip = InetAddress.getByName(host);
/*
ipCache.put(host, ip);
//System.out.println("DNSResolver: new Cache Entry \"" + host + "\" = \"" + ip.getHostAddress() + "\"");*/
}
catch(UnknownHostException e)
{
ip = null;
return null;
//System.out.println("DNSResolver: unknown host \"" + host + "\"");
}
/*}
else
{
//System.out.println("DNSResolver: Cache hit: " + ip.getHostAddress());
}*/
//((URLMessage)message).setIpAddress(ip);
}
return message;
}
}

View File

@ -0,0 +1,224 @@
/*
* LARM - LANLab Retrieval Machine
*
* $history: $
*
*/
package de.lanlab.larm.fetcher;
import de.lanlab.larm.threads.ThreadPool;
import de.lanlab.larm.threads.ThreadPoolObserver;
import de.lanlab.larm.threads.InterruptableTask;
import de.lanlab.larm.storage.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import de.lanlab.larm.fetcher.FetcherTask;
/**
* filter class; the Fetcher is the main class which keeps the ThreadPool that
* gets the documents. It should be placed at the very end of the MessageQueue,
* so that all filtering can be made beforehand.
*
* @author Clemens Marschner
*
*/
public class Fetcher implements MessageListener
{
/**
* holds the threads
*/
ThreadPool fetcherPool;
/**
* total number of docs read
*/
int docsRead = 0;
/**
* the storage where the docs are saved to
*/
DocumentStorage storage;
/**
* the host manager keeps track of host information
*/
HostManager hostManager;
/**
* initializes the fetcher with the given number of threads in the thread
* pool and a document storage.
*
* @param maxThreads the number of threads in the ThreadPool
* @param storage the storage where all documents are stored
* @param hostManager the host manager
*/
public Fetcher(int maxThreads, DocumentStorage storage, HostManager hostManager)
{
this.storage = storage;
FetcherTask.setStorage(storage);
fetcherPool = new ThreadPool(maxThreads, new FetcherThreadFactory(hostManager));
fetcherPool.setQueue(new FetcherTaskQueue());
docsRead = 0;
this.hostManager = hostManager;
}
/**
* initializes the pool with default values (5 threads, NullStorage)
*/
public void init()
{
fetcherPool.init();
}
/**
* initializes the pool with a NullStorage and the given number of threads
*
* @param maxThreads the number of threads in the thread pool
*/
public void init(int maxThreads)
{
fetcherPool.init();
docsRead = 0;
}
/**
* this function will be called by the message handler each time a URL
* passes all filters and gets to the fetcher. From here, it will be
* distributed to the FetcherPool, a thread pool which carries out the task,
* that is to fetch the document from the web.
*
* @param message the message, which should actually be a URLMessage
* @return Description of the Return Value
*/
public Message handleRequest(Message message)
{
URLMessage urlMessage = (URLMessage) message;
fetcherPool.doTask(new FetcherTask(urlMessage), "");
docsRead++;
// eat the message
return null;
}
/**
* called by the message handler when this object is added to it
*
* @param handler the message handler
*/
public void notifyAddedToMessageHandler(MessageHandler handler)
{
this.messageHandler = handler;
FetcherTask.setMessageHandler(handler);
}
MessageHandler messageHandler;
/**
* the thread pool observer will be called each time a thread changes its
* state, i.e. from IDLE to RUNNING, and each time the number of thread
* queue entries change.
* this just wraps the thread pool method
*
* @param t the class that implements the ThreadPoolObserver interface
*/
public void addThreadPoolObserver(ThreadPoolObserver t)
{
fetcherPool.addThreadPoolObserver(t);
}
/**
* returns the number of tasks queued. Should return 0 if there are any idle
* threads. this method just wraps the ThreadPool method
*
* @return The queueSize value
*/
public int getQueueSize()
{
return fetcherPool.getQueueSize();
}
/**
* get the total number of threads.
* this method just wraps the ThreadPool method
*
* @return The workingThreadsCount value
*/
public int getWorkingThreadsCount()
{
return fetcherPool.getIdleThreadsCount() + fetcherPool.getBusyThreadsCount();
}
/**
* get the number of threads that are currently idle.
* this method just wraps the ThreadPool method
*
* @return The idleThreadsCount value
*/
public int getIdleThreadsCount()
{
return fetcherPool.getIdleThreadsCount();
}
/**
* get the number of threads that are currently busy.
* this method just wraps the ThreadPool method
*
* @return The busyThreadsCount value
*/
public int getBusyThreadsCount()
{
return fetcherPool.getBusyThreadsCount();
}
/**
* Gets the threadPool attribute of the Fetcher object
* beware: the original object is returned
*
* @TODO remove this / make it private if possible
* @return The threadPool value
*/
public ThreadPool getThreadPool()
{
return fetcherPool;
}
/**
* Gets the total number of docs read
*
* @return number of docs read
*/
public int getDocsRead()
{
return docsRead;
}
/**
* returns the (original) task queue
* @TODO remove this if possible
* @return The taskQueue value
*/
public FetcherTaskQueue getTaskQueue()
{
return (FetcherTaskQueue) this.fetcherPool.getTaskQueue();
}
}

View File

@ -0,0 +1,150 @@
package de.lanlab.larm.fetcher;
import java.awt.event.ActionListener;
import java.awt.event.ActionEvent;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.awt.event.*;
import de.lanlab.larm.gui.*;
import de.lanlab.larm.threads.*;
/**
* this was used to connect the GUI to the fetcher
* @TODO put this into the GUI package, probably?
*/
public class FetcherGUIController implements ActionListener
{
FetcherMain fetcherMain;
FetcherSummaryFrame fetcherFrame;
public FetcherGUIController(FetcherMain fetcherMainPrg, FetcherSummaryFrame fetcherFrameWin, String defaultStartURL)
{
this.fetcherMain = fetcherMainPrg;
this.fetcherFrame = fetcherFrameWin;
fetcherFrame.setRestrictTo(fetcherMain.urlScopeFilter.getRexString());
fetcherFrame.setStartURL(defaultStartURL);
fetcherMain.fetcher.addThreadPoolObserver(
new ThreadPoolObserver()
{
public void threadUpdate(int threadNr, String action, String info)
{
String status = threadNr + ": " + action + ": " + info;
fetcherFrame.setIdleThreadsCount(fetcherMain.fetcher.getIdleThreadsCount());
fetcherFrame.setBusyThreadsCount(fetcherMain.fetcher.getBusyThreadsCount());
fetcherFrame.setWorkingThreadsCount(fetcherMain.fetcher.getWorkingThreadsCount());
}
public void queueUpdate(String info, String action)
{
fetcherFrame.setRequestQueueCount(fetcherMain.fetcher.getQueueSize());
}
}
);
fetcherMain.monitor.addObserver(new Observer()
{
public void update(Observable o, Object arg)
{
// der ThreadMonitor wurde geupdated
//fetcherFrame.setStalledThreads(fetcherMain.monitor.getStalledThreadCount(10, 500.0));
//fetcherFrame.setBytesPerSecond(fetcherMain.monitor.getAverageReadCount(5));
// fetcherFrame.setDocsPerSecond(fetcherMain.monitor.getDocsPerSecond(5));
// wir nutzen die Gelegenheit, den aktuellen Speicherbestand auszugeben
fetcherFrame.setFreeMem(Runtime.getRuntime().freeMemory());
fetcherFrame.setTotalMem(Runtime.getRuntime().totalMemory());
}
});
/* fetcherMain.reFilter.addObserver(
new Observer()
{
public void update(Observable o, Object arg)
{
fetcherFrame.setRobotsTxtCount(fetcherMain.reFilter.getExcludingHostsCount());
}
}
);*/
fetcherMain.messageHandler.addMessageQueueObserver(new Observer()
{
public void update(Observable o, Object arg)
{
// a message has been added or deleted
fetcherFrame.setURLsQueued(fetcherMain.messageHandler.getQueued());
}
}
);
// this observer will be called if a filter has decided to throw a
// message away.
fetcherMain.messageHandler.addMessageProcessorObserver(new Observer()
{
public void update(Observable o, Object arg)
{
if(arg == fetcherMain.urlScopeFilter)
{
fetcherFrame.setScopeFiltered(fetcherMain.urlScopeFilter.getFiltered());
}
else if(arg == fetcherMain.urlVisitedFilter)
{
fetcherFrame.setVisitedFiltered(fetcherMain.urlVisitedFilter.getFiltered());
}
else if(arg == fetcherMain.reFilter)
{
fetcherFrame.setURLsCaughtCount(fetcherMain.reFilter.getFiltered());
}
else // it's the fetcher
{
fetcherFrame.setDocsRead(fetcherMain.fetcher.getDocsRead());
}
}
}
);
fetcherFrame.addWindowListener(
new WindowAdapter()
{
public void windowClosed(WindowEvent e)
{
System.out.println("window Closed");
System.exit(0);
}
}
);
fetcherFrame.addStartButtonListener((ActionListener)this);
}
/**
* will be called when the start button is pressed
*/
public void actionPerformed(ActionEvent e)
{
System.out.println("Füge Start-URL ein");
try
{
// urlVisitedFilter.printAllURLs();
// urlVisitedFilter.clearHashtable();
fetcherMain.setRexString(fetcherFrame.getRestrictTo());
fetcherMain.startMonitor();
fetcherMain.putURL(new URL(fetcherFrame.getStartURL()), false);
}
catch(Exception ex)
{
System.out.println("actionPerformed: Exception: " + ex.getMessage());
}
}
}

View File

@ -0,0 +1,362 @@
/*
* LARM - LANLab Retrieval Machine
*
* $history: $
*
*/
package de.lanlab.larm.fetcher;
import de.lanlab.larm.threads.ThreadPoolObserver;
import de.lanlab.larm.threads.ThreadPool;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import de.lanlab.larm.gui.*;
import de.lanlab.larm.util.*;
import de.lanlab.larm.storage.*;
import javax.swing.UIManager;
import HTTPClient.*;
import org.apache.oro.text.regex.MalformedPatternException;
/**
* ENTRY POINT: this class contains the main()-method of the application, does
* all the initializing and optionally connects the fetcher with the GUI.
*
* @author Clemens Marschner
* @created December 16, 2000
*/
public class FetcherMain
{
/**
* the main message pipeline
*/
protected MessageHandler messageHandler;
/**
* this filter records all incoming URLs and filters everything it already
* knows
*/
protected URLVisitedFilter urlVisitedFilter;
/**
* the scope filter filters URLs that fall out of the scope given by the
* regular expression
*/
protected URLScopeFilter urlScopeFilter;
/*
* The DNS resolver was supposed to hold the host addresses for all hosts
* this is done by URL itself today
*
* protected DNSResolver dnsResolver;
*/
/**
* the robot exclusion filter looks if a robots.txt is present on a host
* before it is first accessed
*/
protected RobotExclusionFilter reFilter;
/**
* the host manager keeps track of all hosts and is used by the filters.
*/
protected HostManager hostManager;
/**
* this rather flaky filter just filters out some URLs, i.e. different views
* of Apache the apache DirIndex module. Has to be made
* configurable in near future
*/
protected KnownPathsFilter knownPathsFilter;
/**
* this is the main document fetcher. It contains a thread pool that fetches the
* documents and stores them
*/
protected Fetcher fetcher;
/**
* the thread monitor once was only a monitoring tool, but now has become a
* vital part of the system that computes statistics and
* flushes the log file buffers
*/
protected ThreadMonitor monitor;
/**
* the storage is a central class that puts all fetched documents somewhere.
* Several differnt implementations exist.
*/
protected DocumentStorage storage;
/**
* the URL length filter filters URLs that are too long, i.e. because of errors
* in the implementation of dynamic web sites
*/
protected URLLengthFilter urlLengthFilter;
/**
* initializes all classes and registers anonymous adapter classes as
* listeners for fetcher events.
*
* @param nrThreads number of fetcher threads to be created
*/
public FetcherMain(int nrThreads)
{
// to make things clear, this method is commented a bit better than
// the rest of the program...
// this is the main message queue. handlers are registered with
// the queue, and whenever a message is put in it, they are passed to the
// filters in a "chain of responibility" manner. Every listener can decide
// to throw the message away
messageHandler = new MessageHandler();
// the storage is the class which saves a WebDocument somewhere, no
// matter how it does it, whether it's in a file, in a database or
// whatever
// example for the (very slow) SQL Server storage:
// this.storage = new SQLServerStorage("sun.jdbc.odbc.JdbcOdbcDriver","jdbc:odbc:search","sa","...",nrThreads);
// the LogStorage used here does extensive logging. It logs all links and
// document information.
// it also saves all documents to page files. Probably this single storage
// could also be replaced by a pipeline; or even incorporated into the
// existing message pipeline
SimpleLogger log = new SimpleLogger("store", false);
this.storage = new LogStorage(log, true, "logs/pagefile");
// a third example would be the NullStorage, which converts the documents into
// heat, which evaporates above the processor
// NullStorage();
// create the filters and add them to the message queue
urlScopeFilter = new URLScopeFilter();
urlVisitedFilter = new URLVisitedFilter(100000, log);
// dnsResolver = new DNSResolver();
hostManager = new HostManager(1000);
reFilter = new RobotExclusionFilter(hostManager);
fetcher = new Fetcher(nrThreads, storage, hostManager);
knownPathsFilter = new KnownPathsFilter();
urlLengthFilter = new URLLengthFilter(255);
// prevent message box popups
HTTPConnection.setDefaultAllowUserInteraction(false);
// prevent GZipped files from being decoded
HTTPConnection.removeDefaultModule(HTTPClient.ContentEncodingModule.class);
// initialize the threads
fetcher.init();
// the thread monitor watches the thread pool.
monitor = new ThreadMonitor(urlLengthFilter,
urlVisitedFilter,
urlScopeFilter,
/*dnsResolver,*/
reFilter,
messageHandler,
fetcher.getThreadPool(),
hostManager,
5000 // wake up every 5 seconds
);
// add all filters to the handler.
messageHandler.addListener(urlLengthFilter);
messageHandler.addListener(urlScopeFilter);
messageHandler.addListener(reFilter);
messageHandler.addListener(urlVisitedFilter);
messageHandler.addListener(knownPathsFilter);
messageHandler.addListener(fetcher);
/* uncomment this to enable HTTPClient logging
try
{
HTTPClient.Log.setLogWriter(new java.io.FileWriter("logs/HttpClient.log"),false);
HTTPClient.Log.setLogging(HTTPClient.Log.ALL, true);
}
catch (Exception e)
{
e.printStackTrace();
}
*/
}
/**
* Sets the RexString attribute of the FetcherMain object
*
* @param restrictTo The new RexString value
*/
public void setRexString(String restrictTo) throws MalformedPatternException
{
urlScopeFilter.setRexString(restrictTo);
}
/**
* Description of the Method
*
* @param url Description of Parameter
* @param isFrame Description of the Parameter
* @exception java.net.MalformedURLException Description of Exception
*/
public void putURL(URL url, boolean isFrame)
throws java.net.MalformedURLException
{
try
{
messageHandler.putMessage(new URLMessage(url, null, isFrame));
}
catch (Exception e)
{
System.out.println("Exception: " + e.getMessage());
e.printStackTrace();
}
//System.out.println("URLs geschrieben");
}
/**
* Description of the Method
*/
public void startMonitor()
{
monitor.start();
}
/*
* the GUI is not working at this time. It was used in the very beginning, but
* synchronous updates turned out to slow down the program a lot, even if the
* GUI would be turned off. Thus, a lot
* of Observer messages where removed later. Nontheless, it's quite cool to see
* it working...
*
* @param f Description of Parameter
* @param startURL Description of Parameter
*/
/*
public void initGui(FetcherMain f, String startURL)
{
// if we're on a windows platform, make it look a bit more convenient
try
{
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
}
catch (Exception e)
{
// dann halt nicht...
}
System.out.println("Init FetcherFrame");
FetcherSummaryFrame fetcherFrame;
fetcherFrame = new FetcherSummaryFrame();
fetcherFrame.setSize(640, 450);
fetcherFrame.setVisible(true);
FetcherGUIController guiController = new FetcherGUIController(f, fetcherFrame, startURL);
}
*/
/**
* The main program. parsed
*
* @param args The command line arguments
*/
public static void main(String[] args)
{
int nrThreads = 10;
String startURL = "";
String restrictTo = "http://141.84.120.82/ll/cmarschn/.*";
boolean gui = false;
boolean showInfo = false;
System.out.println("LARM - LANLab Retrieval Machine - Fetcher - V 1.00 - (C) LANLab 2000-02");
for (int i = 0; i < args.length; i++)
{
if (args[i].equals("-start"))
{
i++;
startURL = args[i];
System.out.println("Start-URL set to: " + startURL);
}
else if (args[i].equals("-restrictto"))
{
i++;
restrictTo = args[i];
System.out.println("Restricting URLs to " + restrictTo);
}
else if (args[i].equals("-threads"))
{
i++;
nrThreads = Integer.parseInt(args[i]);
System.out.println("Threads set to " + nrThreads);
}
else if (args[i].equals("-gui"))
{
gui = true;
}
else if (args[i].equals("-?"))
{
showInfo = true;
}
else
{
System.out.println("Unknown option: " + args[i] + "; use -? to get syntax");
System.exit(0);
}
}
//URL.setURLStreamHandlerFactory(new HttpTimeoutFactory(500));
// replaced by HTTPClient
FetcherMain f = new FetcherMain(nrThreads);
if (showInfo || (startURL.equals("") && gui == false))
{
System.out.println("Usage: FetcherMain -start <URL> -restrictto <RegEx> [-threads <nr=10>]"); // [-gui]
System.exit(0);
}
try
{
f.setRexString(restrictTo);
if (gui)
{
// f.initGui(f, startURL);
}
else
{
try
{
f.startMonitor();
f.putURL(new URL(startURL), false);
}
catch (MalformedURLException e)
{
System.out.println("Malformed URL");
}
}
}
catch (MalformedPatternException e)
{
System.out.println("Wrong RegEx syntax. Must be a valid PERL RE");
}
}
}

View File

@ -0,0 +1,617 @@
/*
* LARM - LANLab Retrieval Machine
*
* $history: $
*
*/
package de.lanlab.larm.fetcher;
import java.net.URL;
import de.lanlab.larm.threads.*;
import de.lanlab.larm.util.InputStreamObserver;
import de.lanlab.larm.util.ObservableInputStream;
import de.lanlab.larm.util.WebDocument;
import de.lanlab.larm.util.SimpleCharArrayReader;
import de.lanlab.larm.storage.DocumentStorage;
import de.lanlab.larm.util.State;
import de.lanlab.larm.util.SimpleLogger;
import de.lanlab.larm.net.HttpTimeoutFactory;
import HTTPClient.*;
import java.net.*;
import java.io.*;
import java.util.*;
import java.text.*;
import de.lanlab.larm.parser.Tokenizer;
import de.lanlab.larm.parser.LinkHandler;
/**
* this class gets the documents from the web. It connects to the server given
* by the IP address in the URLMessage, gets the document, and forwards it to
* the storage. If it's an HTML document, it will be parsed and all links will
* be put into the message handler again.
*
* @author Clemens Marschner
*
*/
public class FetcherTask
implements InterruptableTask, LinkHandler, Serializable
{
protected volatile boolean isInterrupted = false;
/**
* each task has its own number. the class variable counts up if an instance
* of a fetcher task is created
*/
static volatile int taskIdentity = 0;
/**
* the number of this object
*/
int taskNr;
/**
* the BASE Href (defaults to contextUrl, may be changed with a <base> tag
* only valid within a doTask call
*/
private volatile URL base;
/**
* the URL of the docuzment
* only valid within a doTask call
*/
private volatile URL contextUrl;
/**
* the message handler the URL message comes from; same for all tasks
*/
protected static volatile MessageHandler messageHandler;
/**
* actual number of bytes read
* only valid within a doTask call
*/
private volatile long bytesRead = 0;
/**
* the storage this task will put the document to
*/
private static volatile DocumentStorage storage;
/**
* task state IDs. comparisons will be done by their references, so always
* use the IDs
*/
public final static String FT_IDLE = "idle";
public final static String FT_STARTED = "started";
public final static String FT_OPENCONNECTION = "opening connection";
public final static String FT_CONNECTING = "connecting";
public final static String FT_GETTING = "getting";
public final static String FT_READING = "reading";
public final static String FT_SCANNING = "scanning";
public final static String FT_STORING = "storing";
public final static String FT_READY = "ready";
public final static String FT_CLOSING = "closing";
public final static String FT_EXCEPTION = "exception";
public final static String FT_INTERRUPTED = "interrupted";
private volatile State taskState = new State(FT_IDLE);
/**
* the URLs found will be stored and only added to the message handler in the very
* end, to avoid too many synchronizations
*/
private volatile LinkedList foundUrls;
/**
* the URL to be get
*/
protected volatile URLMessage actURLMessage;
/**
* the document title, if present
*/
private volatile String title;
/**
* headers for HTTPClient
*/
private static volatile NVPair headers[] = new NVPair[1];
static
{
headers[0] = new HTTPClient.NVPair("User-Agent", Constants.CRAWLER_AGENT);
}
/**
* Gets a copy of the current taskState
*
* @return The taskState value
*/
public State getTaskState()
{
return taskState.cloneState();
}
/**
* Constructor for the FetcherTask object
*
* @param urlMessage Description of the Parameter
*/
public FetcherTask(URLMessage urlMessage)
{
actURLMessage = urlMessage;
}
/**
* Gets the uRLMessages attribute of the FetcherTask object
*
* @return The uRLMessages value
*/
public URLMessage getActURLMessage()
{
return this.actURLMessage;
}
/**
* Sets the document storage
*
* @param storage The new storage
*/
public static void setStorage(DocumentStorage storage)
{
FetcherTask.storage = storage;
}
/**
* Sets the messageHandler
*
* @param messageHandler The new messageHandler
*/
public static void setMessageHandler(MessageHandler messageHandler)
{
FetcherTask.messageHandler = messageHandler;
}
/**
* @return the URL as a string
*/
public String getInfo()
{
return actURLMessage.getURLString();
}
/**
* Gets the uRL attribute of the FetcherTask object
*
* @return The uRL value
*/
public URL getURL()
{
return actURLMessage.getUrl();
}
SimpleLogger log;
SimpleLogger errorLog;
//private long startTime;
/**
* this will be called by the fetcher thread and will do all the work
*
* @TODO probably split this up into different processing steps
* @param thread Description of the Parameter
*/
public void run(ServerThread thread)
{
taskState.setState(FT_STARTED); // state information is always set to make the thread monitor happy
log = thread.getLog();
HostManager hm = ((FetcherThread)thread).getHostManager();
errorLog = thread.getErrorLog();
// startTime = System.currentTimeMillis();
int threadNr = ((FetcherThread) thread).getThreadNumber();
log.log("start");
base = contextUrl = actURLMessage.getUrl();
String urlString = actURLMessage.getURLString();
String host = contextUrl.getHost();
int hostPos = urlString.indexOf(host);
int hostLen = host.length();
HostInfo hi = hm.getHostInfo(host); // get and create
if(!hi.isHealthy())
{
// we make this check as late as possible to get the most current information
log.log("Bad Host: " + contextUrl + "; returning");
System.out.println("[" + threadNr + "] bad host: " + this.actURLMessage.getUrl());
taskState.setState(FT_READY, null);
return;
}
foundUrls = new java.util.LinkedList();
HTTPConnection conn = null;
title = "*untitled*";
int size = 1;
InputStream in = null;
bytesRead = 0;
try
{
URL ipURL = contextUrl;
taskState.setState(FT_OPENCONNECTION, urlString);
log.log("connecting to " + ipURL.getHost());
taskState.setState(FT_CONNECTING, ipURL);
conn = new HTTPConnection(host);
conn.setDefaultTimeout(75000);
// 75 s
conn.setDefaultAllowUserInteraction(false);
taskState.setState(this.FT_GETTING, ipURL);
log.log("getting");
HTTPResponse response = conn.Get(ipURL.getFile(), "", headers);
response.setReadIncrement(2720);
int statusCode = response.getStatusCode();
byte[] fullBuffer = null;
String contentType = "";
int contentLength = 0;
if (statusCode != 404 && statusCode != 403)
{
// read up to Constants.FETCHERTASK_MAXFILESIZE bytes into a byte array
taskState.setState(FT_READING, ipURL);
contentType = response.getHeader("Content-Type");
String length = response.getHeader("Content-Length");
if (length != null)
{
contentLength = Integer.parseInt(length);
}
log.log("reading");
fullBuffer = response.getData(Constants.FETCHERTASK_MAXFILESIZE); // max. 2 MB
if (fullBuffer != null)
{
contentLength = fullBuffer.length;
this.bytesRead += contentLength;
}
}
//conn.stop(); // close connection. todo: Do some caching...
/*
* conn.disconnect();
*/
if (isInterrupted)
{
System.out.println("FetcherTask: interrupted while reading. File truncated");
log.log("interrupted while reading. File truncated");
}
else
{
if (fullBuffer != null)
{
taskState.setState(FT_SCANNING, ipURL);
log.log("read file (" + fullBuffer.length + " bytes). Now scanning.");
if (contentType.startsWith("text/html"))
{
// ouch. I haven't found a better solution yet. just slower ones.
char[] fullCharBuffer = new char[contentLength];
new InputStreamReader(new ByteArrayInputStream(fullBuffer)).read(fullCharBuffer);
Tokenizer tok = new Tokenizer();
tok.setLinkHandler(this);
tok.parse(new SimpleCharArrayReader(fullCharBuffer));
}
else
{
// System.out.println("Discovered unknown content type: " + contentType + " at " + urlString);
errorLog.log("[" + threadNr + "] Discovered unknown content type at " + urlString + ": " + contentType + ". just storing");
}
log.log("scanned");
}
taskState.setState(FT_STORING, ipURL);
messageHandler.putMessages(foundUrls);
storage.store(new WebDocument(contextUrl, contentType, fullBuffer, statusCode, actURLMessage.getReferer(), contentLength, title));
log.log("stored");
}
}
catch (InterruptedIOException e)
{
// timeout while reading this file
System.out.println("[" + threadNr + "] FetcherTask: Timeout while opening: " + this.actURLMessage.getUrl());
errorLog.log("error: Timeout: " + this.actURLMessage.getUrl());
hi.badRequest();
}
catch (FileNotFoundException e)
{
taskState.setState(FT_EXCEPTION);
System.out.println("[" + threadNr + "] FetcherTask: File not Found: " + this.actURLMessage.getUrl());
errorLog.log("error: File not Found: " + this.actURLMessage.getUrl());
}
catch(NoRouteToHostException e)
{
// router is down or firewall prevents to connect
hi.setReachable(false);
taskState.setState(FT_EXCEPTION);
System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
// e.printStackTrace();
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
}
catch(ConnectException e)
{
// no server is listening at this port
hi.setReachable(false);
taskState.setState(FT_EXCEPTION);
System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
// e.printStackTrace();
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
}
catch (SocketException e)
{
taskState.setState(FT_EXCEPTION);
System.out.println("[" + threadNr + "]: SocketException:" + e.getMessage());
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
}
catch(UnknownHostException e)
{
// IP Address not to be determined
hi.setReachable(false);
taskState.setState(FT_EXCEPTION);
System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
// e.printStackTrace();
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
}
catch (IOException e)
{
taskState.setState(FT_EXCEPTION);
System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
// e.printStackTrace();
errorLog.log("error: IOException: " + e.getClass().getName() + ": " + e.getMessage());
}
catch (OutOfMemoryError ome)
{
taskState.setState(FT_EXCEPTION);
System.out.println("[" + threadNr + "] Task " + this.taskNr + " OutOfMemory after " + size + " bytes");
errorLog.log("error: OutOfMemory after " + size + " bytes");
}
catch (Throwable e)
{
taskState.setState(FT_EXCEPTION);
System.out.println("[" + threadNr + "] " + e.getMessage() + " type: " + e.getClass().getName());
e.printStackTrace();
System.out.println("[" + threadNr + "]: stopping");
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage() + "; stopping");
}
finally
{
if (isInterrupted)
{
System.out.println("Task was interrupted");
log.log("interrupted");
taskState.setState(FT_INTERRUPTED);
}
}
if (isInterrupted)
{
System.out.println("Task: closed everything");
}
/*
* }
*/
taskState.setState(FT_CLOSING);
conn.stop();
taskState.setState(FT_READY);
foundUrls = null;
}
/**
* the interrupt method. not in use since the change to HTTPClient
* @TODO decide if we need this anymore
*/
public void interrupt()
{
System.out.println("FetcherTask: interrupted!");
this.isInterrupted = true;
/*
* try
* {
* if (conn != null)
* {
* ((HttpURLConnection) conn).disconnect();
* System.out.println("FetcherTask: disconnected URL Connection");
* conn = null;
* }
* if (in != null)
* {
* in.close();
* / possibly hangs at close() .> KeepAliveStream.close() -> MeteredStream.skip()
* System.out.println("FetcherTask: Closed Input Stream");
* in = null;
* }
* }
* catch (IOException e)
* {
* System.out.println("IOException while interrupting: ");
* e.printStackTrace();
* }
* System.out.println("FetcherTask: Set all IOs to null");
*/
}
/**
* this is called whenever a links was found in the current document,
* Don't create too many objects here, this will be called
* millions of times
*
* @param link Description of the Parameter
*/
public void handleLink(String link, boolean isFrame)
{
try
{
// cut out Ref part
int refPart = link.indexOf("#");
//System.out.println(link);
if (refPart == 0)
{
return;
}
else if (refPart > 0)
{
link = link.substring(0, refPart);
}
URL url = null;
if (link.startsWith("http:"))
{
// distinguish between absolute and relative URLs
url = new URL(link);
}
else
{
// relative url
url = new URL(base, link);
}
URLMessage urlMessage = new URLMessage(url, contextUrl, isFrame);
String urlString = urlMessage.getURLString();
foundUrls.add(urlMessage);
//messageHandler.putMessage(new actURLMessage(url)); // put them in the very end
}
catch (MalformedURLException e)
{
//log.log("malformed url: base:" + base + " -+- link:" + link);
log.log("warning: " + e.getClass().getName() + ": " + e.getMessage());
}
catch (Exception e)
{
log.log("warning: " + e.getClass().getName() + ": " + e.getMessage());
// e.printStackTrace();
}
}
/**
* called when a BASE tag was found
*
* @param base the HREF attribute
*/
public void handleBase(String base)
{
try
{
this.base = new URL(base);
}
catch (MalformedURLException e)
{
log.log("warning: " + e.getClass().getName() + ": " + e.getMessage() + " while converting '" + base + "' to URL in document " + contextUrl);
}
}
/**
* called when a TITLE tag was found
*
* @param title the string between &lt;title> and &gt;/title>
*/
public void handleTitle(String title)
{
this.title = title;
}
/*
* public void notifyOpened(ObservableInputStream in, long timeElapsed)
* {
* }
* public void notifyClosed(ObservableInputStream in, long timeElapsed)
* {
* }
* public void notifyRead(ObservableInputStream in, long timeElapsed, int nrRead, int totalRead)
* {
* if(totalRead / ((double)timeElapsed) < 0.3) // weniger als 300 bytes/s
* {
* System.out.println("Task " + this.taskNr + " stalled at pos " + totalRead + " with " + totalRead / (timeElapsed / 1000.0) + " bytes/s");
* }
* }
* public void notifyFinished(ObservableInputStream in, long timeElapsed, int totalRead)
* {
* /System.out.println("Task " + this.taskNr + " finished (" + totalRead + " bytes in " + timeElapsed + " ms with " + totalRead / (timeElapsed / 1000.0) + " bytes/s)");
* }
*/
public long getBytesRead()
{
return bytesRead;
}
/**
* do nothing if a warning occurs within the html parser
*
* @param message Description of the Parameter
* @param systemID Description of the Parameter
* @param line Description of the Parameter
* @param column Description of the Parameter
* @exception java.lang.Exception Description of the Exception
*/
public void warning(String message, String systemID, int line, int column)
throws java.lang.Exception { }
/**
* do nothing if a fatal error occurs...
*
* @param message Description of the Parameter
* @param systemID Description of the Parameter
* @param line Description of the Parameter
* @param column Description of the Parameter
* @exception Exception Description of the Exception
*/
public void fatal(String message, String systemID, int line, int column)
throws Exception
{
System.out.println("fatal error: " + message);
log.log("fatal error: " + message);
}
}

View File

@ -0,0 +1,198 @@
package de.lanlab.larm.fetcher;
import de.lanlab.larm.threads.*;
import de.lanlab.larm.util.*;
import java.util.*;
import java.net.URL;
/**
* this special kind of task queue reorders the incoming tasks so that every subsequent
* task is for a different host.
* This is done by a "HashedCircularLinkedList" which allows random adding while
* a differnet thread iterates through the collection circularly.
*
* @author Clemens Marschner
* @created 23. November 2001
*/
public class FetcherTaskQueue extends TaskQueue
{
/**
* this is a hash that contains an entry for each server, which by itself is a
* CachingQueue that stores all tasks for this server
* @TODO probably link this to the host info structure
*/
HashedCircularLinkedList servers = new HashedCircularLinkedList(100, 0.75f);
int size = 0;
/**
* Constructor for the FetcherTaskQueue object. Does nothing
*/
public FetcherTaskQueue() { }
/**
* true if no task is queued
*
* @return The empty value
*/
public boolean isEmpty()
{
return (size == 0);
}
/**
* clear the queue. not synchronized.
*/
public void clear()
{
servers.clear();
}
/**
* puts task into Queue.
* Warning: not synchronized
*
* @param t the task to be added. must be a FetcherTask
*/
public void insert(Object t)
{
// assert (t != null && t.getURL() != null)
URLMessage um = ((FetcherTask)t).getActURLMessage();
URL act = um.getUrl();
String host = act.getHost();
Queue q;
q = ((Queue) servers.get(host));
if (q == null)
{
// add a new host to the queue
//String host2 = host.replace(':', '_').replace('/', '_').replace('\\', '_');
// make it file system ready
q = new CachingQueue(host, 100);
servers.put(host, q);
}
// assert((q != null) && (q instanceof FetcherTaskQueue));
q.insert(t);
size++;
}
/**
* the size of the queue. make sure that insert() and size() calls are synchronized
* if the exact number matters.
*
* @return Description of the Return Value
*/
public int size()
{
return size;
}
/**
* the number of different hosts queued at the moment
*/
public int getNumHosts()
{
return servers.size();
}
/**
* get the next task. warning: not synchronized
*
* @return Description of the Return Value
*/
public Object remove()
{
FetcherTask t = null;
if (servers.size() > 0)
{
Queue q = (Queue) servers.next();
// assert(q != null && q.size() > 0)
t = (FetcherTask)q.remove();
if (q.size() == 0)
{
servers.removeCurrent();
q = null;
}
size--;
}
return t;
}
/**
* tests
*
* @param args Description of the Parameter
*/
public static void main(String args[])
{
FetcherTaskQueue q = new FetcherTaskQueue();
System.out.println("Test 1. put in 4 yahoos and 3 lmus. pull out LMU/Yahoo/LMU/Yahoo/LMU/Yahoo/Yahoo");
try
{
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false)));
}
catch (Throwable t)
{
t.printStackTrace();
}
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println("Test 2. new Queue");
q = new FetcherTaskQueue();
System.out.println("size [0]:");
System.out.println(q.size());
try
{
System.out.println("put 3 lmus.");
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false)));
System.out.print("pull out 1st element [lmu/1]: ");
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println("size now [2]: " + q.size());
System.out.print("pull out 2nd element [lmu/2]: ");
System.out.println(((FetcherTask) q.remove()).getInfo());
System.out.println("size now [1]: " + q.size());
System.out.println("put in 3 yahoos");
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false)));
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false)));
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
System.out.println("Size now [3]: " + q.size());
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
System.out.println("Size now [2]: " + q.size());
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
System.out.println("Size now [1]: " + q.size());
System.out.println("put in another Yahoo");
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false)));
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
System.out.println("Size now [1]: " + q.size());
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
System.out.println("Size now [0]: " + q.size());
}
catch (Throwable t)
{
t.printStackTrace();
}
}
}

View File

@ -0,0 +1,91 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.fetcher;
import de.lanlab.larm.threads.ServerThread;
import de.lanlab.larm.util.State;
/**
* a server thread for the thread pool that records the number
* of bytes read and the number of tasks run
* mainly for statistical purposes and to keep most of the information a task needs
* static
*/
public class FetcherThread extends ServerThread
{
long totalBytesRead = 0;
long totalTasksRun = 0;
HostManager hostManager;
byte[] documentBuffer = new byte[Constants.FETCHERTASK_READSIZE];
public HostManager getHostManager()
{
return hostManager;
}
public FetcherThread(int threadNumber, ThreadGroup threadGroup, HostManager hostManager)
{
super(threadNumber,"FetcherThread " + threadNumber, threadGroup);
this.hostManager = hostManager;
}
public static String STATE_IDLE = "Idle";
State idleState = new State(STATE_IDLE); // only set if task is finished
protected void taskReady()
{
totalBytesRead += ((FetcherTask)task).getBytesRead();
totalTasksRun++;
super.taskReady();
idleState.setState(STATE_IDLE);
}
public long getTotalBytesRead()
{
if(task != null)
{
return totalBytesRead + ((FetcherTask)task).getBytesRead();
}
else
{
return totalBytesRead;
}
}
public long getTotalTasksRun()
{
return totalTasksRun;
}
public byte[] getDocumentBuffer()
{
return documentBuffer;
}
public State getTaskState()
{
if(task != null)
{
// task could be null here
return ((FetcherTask)task).getTaskState();
}
else
{
return idleState.cloneState();
}
}
}

View File

@ -0,0 +1,38 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.fetcher;
import de.lanlab.larm.threads.*;
/**
* this factory simply creates fetcher threads. It's passed
* to the ThreadPool because the pool is creating the threads on its own
*/
public class FetcherThreadFactory extends ThreadFactory
{
//static int count = 0;
ThreadGroup threadGroup = new ThreadGroup("FetcherThreads");
HostManager hostManager;
public FetcherThreadFactory(HostManager hostManager)
{
this.hostManager = hostManager;
}
public ServerThread createServerThread(int count)
{
ServerThread newThread = new FetcherThread(count, threadGroup, hostManager);
newThread.setPriority(4);
return newThread;
}
}

View File

@ -0,0 +1,29 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.fetcher;
/**
* base class of all filter classes
*/
public abstract class Filter
{
/**
* number of items filtered. augmented directly by
* the inheriting classes
*/
protected int filtered = 0;
public int getFiltered()
{
return filtered;
}
}

View File

@ -0,0 +1,56 @@
package de.lanlab.larm.fetcher;
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* @author
* @version 1.0
*/
import java.io.*;
import java.util.zip.*;
import java.net.*;
/**
* Description of the Class
*
* @author Administrator
* @created 28. Januar 2002
*/
public class GZipTest
{
/**
* Constructor for the GZipTest object
*/
public GZipTest() { }
/**
* The main program for the GZipTest class
*
* @param args The command line arguments
*/
public static void main(String[] args)
{
try
{
String url = "http://speechdat.phonetik.uni-muenchen.de/speechdt//speechDB/FIXED1SL/BLOCK00/SES0006/A10006O5.aif";
ByteArrayOutputStream a = new ByteArrayOutputStream(url.length());
GZIPOutputStream g = new GZIPOutputStream(a);
OutputStreamWriter o = new OutputStreamWriter(g,"ISO-8859-1");
o.write(url);
o.close();
g.finish();
byte[] array = a.toByteArray();
System.out.println("URL: " + url + " \n Length: " + url.length() + "\n zipped: " + array.length
);
}
catch (Exception e)
{ e.printStackTrace();
}
}
}

View File

@ -0,0 +1,121 @@
package de.lanlab.larm.fetcher;
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* @author Clemens Marschner
* @version 1.0
*/
import java.util.HashMap;
import java.net.*;
import de.lanlab.larm.util.CachingQueue;
import de.lanlab.larm.util.Queue;
/**
* contains information about a host. If a host doesn't respond too often, it's
* excluded from the crawl.
* This class is used by the HostManager
*
* @author Clemens Marschner
* @created 16. Februar 2002
*/
public class HostInfo
{
static final String[] emptyKeepOutDirectories = new String[0];
int id;
int healthyCount = 5; // five strikes, and you're out
boolean isReachable = true;
boolean robotTxtChecked = false;
String[] disallows; // robot exclusion
boolean isLoadingRobotsTxt = false;
Queue queuedRequests = null; // robot exclusion
String hostName;
public HostInfo(String hostName, int id)
{
this.id = id;
this.disallows = HostInfo.emptyKeepOutDirectories;
this.hostName = hostName;
}
/**
* is this host reachable and responding?
*/
public boolean isHealthy()
{
return (healthyCount > 0) && isReachable;
}
/**
* signals that the host returned with a bad request of whatever type
*/
public void badRequest()
{
healthyCount--;
}
public void setReachable(boolean reachable)
{
isReachable = reachable;
}
public boolean isReachable()
{
return isReachable;
}
public boolean isRobotTxtChecked()
{
return robotTxtChecked;
}
/**
* must be synchronized externally
*/
public boolean isLoadingRobotsTxt()
{
return this.isLoadingRobotsTxt;
}
public void setLoadingRobotsTxt(boolean isLoading)
{
this.isLoadingRobotsTxt = isLoading;
if(isLoading)
{
this.queuedRequests = new CachingQueue("HostInfo_" + id + "_QueuedRequests", 100);
}
}
public void setRobotsChecked(boolean isChecked, String[] disallows)
{
this.robotTxtChecked = isChecked;
if(disallows != null)
{
this.disallows = disallows;
}
else
{
this.disallows = emptyKeepOutDirectories;
}
}
public synchronized boolean isAllowed(String path)
{
// assume keepOutDirectories is pretty short
// assert disallows != null
int length = disallows.length;
for(int i=0; i<length; i++)
{
if(path.startsWith(disallows[i]))
{
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,86 @@
package de.lanlab.larm.fetcher;
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* @author
* @version 1.0
*/
import java.util.HashMap;
/**
* Description of the Class
*
* @author Administrator
* @created 16. Februar 2002
*/
public class HostManager
{
HashMap hosts;
static int hostCount = 0;
/**
* Constructor for the HostInfo object
*
* @param initialSize Description of the Parameter
*/
public HostManager(int initialCapacity)
{
hosts = new HashMap(initialCapacity);
}
/**
* Description of the Method
*
* @param hostName Description of the Parameter
* @return Description of the Return Value
*/
public HostInfo put(String hostName)
{
if (!hosts.containsKey(hostName))
{
int hostID;
synchronized (this)
{
hostID = hostCount++;
}
HostInfo hi = new HostInfo(hostName,hostID);
hosts.put(hostName, hi);
return hi;
}
return (HostInfo)hosts.get(hostName);
/*else
{
hostID = hosts.get()
}
// assert hostID != -1;
return hostID;*/
}
/**
* Gets the hostID attribute of the HostInfo object
*
* @param hostName Description of the Parameter
* @return The hostID value
*/
public HostInfo getHostInfo(String hostName)
{
HostInfo hi = (HostInfo)hosts.get(hostName);
if(hi == null)
{
return put(hostName);
}
return hi;
}
public int getSize()
{
return hosts.size();
}
}

View File

@ -0,0 +1,111 @@
package de.lanlab.larm.fetcher;
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* @author
* @created 17. Februar 2002
* @version 1.0
*/
import java.net.*;
/**
* this can be considered a hack
* @TODO implement this as a fast way to filter out different URL endings or beginnings
*/
public class KnownPathsFilter extends Filter implements MessageListener
{
MessageHandler messageHandler;
String[] pathsToFilter =
{
"/robots.txt"
};
String[] hostFilter =
{
"www.nm.informatik.uni-muenchen.de",
"cgi.cip.informatik.uni-muenchen.de"
};
String[] filesToFilter =
{
// exclude Apache directory files
"/?D=D",
"/?S=D",
"/?M=D",
"/?N=D",
"/?D=A",
"/?S=A",
"/?M=A",
"/?N=A",
};
int pathLength;
int fileLength;
int hostLength;
/**
* Constructor for the KnownPathsFilter object
*/
public KnownPathsFilter()
{
pathLength = pathsToFilter.length;
fileLength = filesToFilter.length;
hostLength = hostFilter.length;
}
/**
* Description of the Method
*
* @param message Description of the Parameter
* @return Description of the Return Value
*/
public Message handleRequest(Message message)
{
URL url = ((URLMessage)message).getUrl();
String file = url.getFile();
String host = url.getHost();
int i;
for (i = 0; i < pathLength; i++)
{
if (file.startsWith(pathsToFilter[i]))
{
filtered++;
return null;
}
}
for (i = 0; i < fileLength; i++)
{
if (file.endsWith(filesToFilter[i]))
{
filtered++;
return null;
}
}
for (i = 0; i<hostLength; i++)
{
if(hostFilter[i].equals(host))
{
filtered++;
return null;
}
}
return message;
}
/**
* will be called as soon as the Listener is added to the Message Queue
*
* @param handler the Message Handler
*/
public void notifyAddedToMessageHandler(MessageHandler handler)
{
this.messageHandler = messageHandler;
}
}

View File

@ -0,0 +1,11 @@
package de.lanlab.larm.fetcher;
import java.io.*;
/**
* Marker interface.
* represents a simple message.
*/
public interface Message
{
}

View File

@ -0,0 +1,248 @@
package de.lanlab.larm.fetcher;
import java.util.*;
import de.lanlab.larm.util.SimpleObservable;
import de.lanlab.larm.util.CachingQueue;
import de.lanlab.larm.util.UnderflowException;
/**
* this is a message handler that runs in its own thread.
* Messages can be put via <code>putMessage</code> or <code>putMessages</code>
* (use the latter whenever possible).<br>
* The messages are passed to the filters in the order in which the filters where
* added to the handler.<br>
* They can consume the message by returning null. Otherwise, they return a Message
* object, usually the one they got.<br>
* The filters will run synchronously within the message handler thread<br>
* This implements a chain of responsibility-style message handling
*/
public class MessageHandler implements Runnable
{
/**
* the queue where messages are put in.
* Holds max. 2 x 5000 = 10.000 messages in RAM
*/
private CachingQueue messageQueue = new CachingQueue("fetcherURLMessageQueue", 5000);
/**
* list of Observers
*/
private LinkedList listeners = new LinkedList();
/**
* true as long as the thread is running
*/
private boolean running = true;
/**
* the message handler thread
*/
private Thread t;
/**
* flag for thread communication
*/
boolean messagesWaiting = false;
/**
* true when a message is processed by the filters
*/
boolean workingOnMessage = false;
Object queueMonitor = new Object();
SimpleObservable messageQueueObservable = new SimpleObservable();
SimpleObservable messageProcessorObservable = new SimpleObservable();
public boolean isWorkingOnMessage()
{
return workingOnMessage;
}
/**
* messageHandler-Thread erzeugen und starten
*/
MessageHandler()
{
t = new Thread(this,"MessageHandler Thread");
t.setPriority(5); // higher priority to prevent starving when a lot of fetcher threads are used
t.start();
}
/**
* join messageHandler-Thread
*/
public void finalize()
{
if(t != null)
{
try
{
t.join();
t = null;
}
catch(InterruptedException e) {}
}
}
/**
* registers a filter to the message handler
* @param MessageListener - the Listener
*/
public void addListener(MessageListener m)
{
m.notifyAddedToMessageHandler(this);
listeners.addLast(m);
}
/**
* registers a MessageQueueObserver
* It will be notified whenever a message is put into the Queue (Parameter is Int(1)) oder
* removed (Parameter is Int(-1))
* @param o the Observer
*/
public void addMessageQueueObserver(Observer o)
{
messageQueueObservable.addObserver(o);
}
/**
* adds a message processorObeserver
* It will be notified when a message is consumed. In this case the parameter
* is the filter that consumed the message
* @param o the Observer
*/
public void addMessageProcessorObserver(Observer o)
{
messageProcessorObservable.addObserver(o);
}
/**
* einen Event in die Schlange schreiben
*/
public void putMessage(Message msg)
{
messageQueue.insert(msg);
messageQueueObservable.setChanged();
messageQueueObservable.notifyObservers(new Integer(1));
synchronized(queueMonitor)
{
messagesWaiting = true;
queueMonitor.notify();
}
}
/**
* add a collection of events to the message queue
*/
public void putMessages(Collection msgs)
{
for(Iterator i = msgs.iterator(); i.hasNext();)
{
Message msg = (Message)i.next();
messageQueue.insert(msg);
}
messageQueueObservable.setChanged();
messageQueueObservable.notifyObservers(new Integer(1));
synchronized(queueMonitor)
{
messagesWaiting = true;
queueMonitor.notify();
}
}
/**
* the main messageHandler-Thread.
*/
public void run()
{
while(running)
{
//System.out.println("MessageHandler-Thread started");
synchronized(queueMonitor)
{
// wait for new messages
workingOnMessage=false;
try
{
queueMonitor.wait();
}
catch(InterruptedException e)
{
System.out.println("MessageHandler: Caught InterruptedException");
}
workingOnMessage=true;
}
//messagesWaiting = false;
Message m;
try
{
while(messagesWaiting)
{
synchronized(this.queueMonitor)
{
m = (Message)messageQueue.remove();
if(messageQueue.size() == 0)
{
messagesWaiting = false;
}
}
//System.out.println("MessageHandler:run: Entferne erstes Element");
messageQueueObservable.setChanged();
messageQueueObservable.notifyObservers(new Integer(-1)); // Message processed
// und verteilen. Die Listener erhalten die Message in ihrer
// Eintragungsreihenfolge und können die Message auch verändern
Iterator i = listeners.iterator();
while(i.hasNext())
{
//System.out.println("Verteile...");
try
{
MessageListener listener = (MessageListener)i.next();
m = (Message)listener.handleRequest(m);
if (m == null)
{
messageProcessorObservable.setChanged();
messageProcessorObservable.notifyObservers(listener);
break; // Handler hat die Message konsumiert
}
}
catch(ClassCastException e)
{
System.out.println("MessageHandler:run: ClassCastException(2): " + e.getMessage());
}
}
}
}
catch (ClassCastException e)
{
System.out.println("MessageHandler:run: ClassCastException: " + e.getMessage());
}
catch (UnderflowException e)
{
messagesWaiting = false;
// System.out.println("MessageHandler: messagesWaiting = true although nothing queued!");
// @FIXME: here is still a multi threading issue. I don't get it why this happens.
// does someone want to draw a petri net of this?
}
catch (Exception e)
{
System.out.println("MessageHandler: " + e.getClass() + " " + e.getMessage());
e.printStackTrace();
}
}
}
public int getQueued()
{
return messageQueue.size();
}
}

View File

@ -0,0 +1,36 @@
/*
* LARM - LANLab Retrieval Machine
*
* $history: $
*
*
*/
package de.lanlab.larm.fetcher;
/**
* A Message Listener works on messages in a message queue Usually it returns
* the message back into the queue. But it can also change the message or create
* a new object. If it returns null, the message handler stops
*
* @author Administrator
* @created 24. November 2001
*/
public interface MessageListener
{
/**
* the handler
*
* @param message the message to be handled
* @return Message usually the original message
* null: the message was consumed
*/
public Message handleRequest(Message message);
/**
* will be called as soon as the Listener is added to the Message Queue
*
* @param handler the Message Handler
*/
public void notifyAddedToMessageHandler(MessageHandler handler);
}

View File

@ -0,0 +1,429 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
*
* Description: <p>
*
* Copyright: Copyright (c)<p>
*
* Company: <p>
*
*
*
* @author Clemens Marschner
* @version 1.0
*/
package de.lanlab.larm.fetcher;
import de.lanlab.larm.util.SimpleObservable;
import de.lanlab.larm.util.State;
import java.util.*;
import java.net.*;
import java.io.*;
import org.apache.oro.text.perl.Perl5Util;
import de.lanlab.larm.util.*;
import de.lanlab.larm.threads.*;
import HTTPClient.*;
/**
* this factory simply creates fetcher threads. It's gonna be passed to the
* ThreadPool because the pool is creating the threads on its own
*
* @author Administrator
* @created 17. Februar 2002
*/
class REFThreadFactory extends ThreadFactory
{
ThreadGroup threadGroup = new ThreadGroup("RobotExclusionFilter");
/**
* Description of the Method
*
* @param count Description of the Parameter
* @return Description of the Return Value
*/
public ServerThread createServerThread(int count)
{
ServerThread newThread = new ServerThread(count, "REF-" + count, threadGroup);
newThread.setPriority(4);
return newThread;
}
}
/**
* the RE filter obeys the robot exclusion standard. If a new host name is supposed
* to be accessed, it first loads a "/robots.txt" on the given server and records the
* disallows stated in that file.
* The REFilter has a thread pool on its own to prevent the message handler from being
* clogged up if the server doesn't respond. Incoming messages are queued while the
* robots.txt is loaded.
* The information is stored in HostInfo records of the host manager class
*
* @author Clemens Marschner
* @created 17. Februar 2002
*/
public class RobotExclusionFilter extends Filter implements MessageListener
{
protected HostManager hostManager;
protected SimpleLogger log;
/**
* Constructor for the RobotExclusionFilter object
*
* @param hm Description of the Parameter
*/
public RobotExclusionFilter(HostManager hm)
{
log = new SimpleLogger("RobotExclusionFilter");
hostManager = hm;
rePool = new ThreadPool(2, new REFThreadFactory());
rePool.init();
log.setFlushAtOnce(true);
log.log("refilter: initialized");
}
/**
* called by the message handler
*/
public void notifyAddedToMessageHandler(MessageHandler handler)
{
this.messageHandler = handler;
}
MessageHandler messageHandler = null;
ThreadPool rePool;
/**
* method that handles each URL request<p>
*
* This method will get the robots.txt file the first time a server is
* requested. See the description above.
*
* @param message
* the (URL)Message
* @return
* the original message or NULL if this host had a disallow on that URL
* @link{http://info.webcrawler.com/mak/projects/robots/norobots.html})
*/
public Message handleRequest(Message message)
{
//log.logThreadSafe("handleRequest: got message: " + message);
try
{
// assert message instanceof URLMessage;
URLMessage urlMsg = ((URLMessage) message);
URL url = urlMsg.getUrl();
//assert url != null;
HostInfo h = hostManager.getHostInfo(url.getHost());
if (!h.isRobotTxtChecked() && !h.isLoadingRobotsTxt())
{
log.logThreadSafe("handleRequest: starting to get robots.txt");
// probably this results in Race Conditions here
rePool.doTask(new RobotExclusionTask(h), new Integer(h.id));
h.setLoadingRobotsTxt(true);
}
synchronized (h)
{
// isLoading...() and queuedRequest.insert() must be atomic
if (h.isLoadingRobotsTxt())
{
//log.logThreadSafe("handleRequest: other thread is loading");
// assert h.queuedRequests != null
h.queuedRequests.insert(message);
// not thread safe
log.logThreadSafe("handleRequest: queued file " + url);
return null;
}
}
//log.logThreadSafe("handleRequest: no thread is loading; robots.txt loaded");
//log.logThreadSafe("handleRequest: checking if allowed");
String path = url.getPath();
if (path == null || path.equals(""))
{
path = "/";
}
if (h.isAllowed(path))
{
// log.logThreadSafe("handleRequest: file " + urlMsg.getURLString() + " ok");
return message;
}
log.logThreadSafe("handleRequest: file " + urlMsg.getURLString() + " filtered");
this.filtered++;
}
catch (Exception e)
{
e.printStackTrace();
}
return null;
}
private static volatile NVPair headers[] = new NVPair[1];
static
{
headers[0] = new HTTPClient.NVPair("User-Agent", Constants.CRAWLER_AGENT);
}
/**
* the task that actually loads and parses the robots.txt files
*
* @author Clemens Marschner
* @created 17. Februar 2002
*/
class RobotExclusionTask implements InterruptableTask
{
HostInfo hostInfo;
/**
* Constructor for the RobotExclusionTask object
*
* @param hostInfo Description of the Parameter
*/
public RobotExclusionTask(HostInfo hostInfo)
{
this.hostInfo = hostInfo;
}
/**
* dummy
*
* @return The info value
*/
public String getInfo()
{
return "";
}
/**
* not used
*/
public void interrupt() { }
/**
* gets a robots.txt file and adds the information to the hostInfo
* structure
*
* @param thread the server thread (passed by the thread pool)
*/
public void run(ServerThread thread)
{
// assert hostInfo != null;
String threadName = Thread.currentThread().getName();
log.logThreadSafe("task " + threadName + ": starting to load " + hostInfo.hostName);
//hostInfo.setLoadingRobotsTxt(true);
String[] disallows = null;
boolean errorOccured = false;
try
{
log.logThreadSafe("task " + threadName + ": getting connection");
HTTPConnection conn = new HTTPConnection(hostInfo.hostName);
conn.setTimeout(30000);
// wait at most 20 secs
HTTPResponse res = conn.Get("/robots.txt", (String) null, headers);
log.logThreadSafe("task " + threadName + ": got connection.");
if (res.getStatusCode() != 200)
{
errorOccured = true;
}
else
{
log.logThreadSafe("task " + threadName + ": reading");
byte[] file = res.getData(40000);
// max. 40 kb
log.logThreadSafe("task " + threadName + ": reading done. parsing");
disallows = parse(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(file))));
log.logThreadSafe("task " + threadName + ": parsing done. found " + disallows.length + " disallows");
// assert disallows != null
// HostInfo hostInfo = hostManager.getHostInfo(this.hostName);
// assert hostInfo != null
log.logThreadSafe("task " + threadName + ": setting disallows");
}
}
catch (java.net.UnknownHostException e)
{
hostInfo.setReachable(false);
log.logThreadSafe("task " + threadName + ": unknown host. setting to unreachable");
errorOccured = true;
}
catch (java.net.NoRouteToHostException e)
{
hostInfo.setReachable(false);
log.logThreadSafe("task " + threadName + ": no route to. setting to unreachable");
errorOccured = true;
}
catch (java.net.ConnectException e)
{
hostInfo.setReachable(false);
log.logThreadSafe("task " + threadName + ": connect exception. setting to unreachable");
errorOccured = true;
}
catch (java.io.InterruptedIOException e)
{
// time out. fatal in this case
hostInfo.setReachable(false);
log.logThreadSafe("task " + threadName + ": time out. setting to unreachable");
errorOccured = true;
}
catch (Throwable e)
{
errorOccured = true;
log.log("task " + threadName + ": unknown exception: " + e.getClass().getName() + ": " + e.getMessage() + ". continuing");
log.log(e);
}
finally
{
if (errorOccured)
{
synchronized (hostInfo)
{
hostInfo.setRobotsChecked(true, null);
// crawl everything
hostInfo.setLoadingRobotsTxt(false);
log.logThreadSafe("task " + threadName + ": error occured");
log.logThreadSafe("task " + threadName + ": now put " + hostInfo.queuedRequests.size() + " queueud requests back");
hostInfo.isLoadingRobotsTxt = false;
putBackURLs();
}
}
else
{
synchronized (hostInfo)
{
hostInfo.setRobotsChecked(true, disallows);
log.logThreadSafe("task " + threadName + ": done");
log.logThreadSafe("task " + threadName + ": now put " + hostInfo.queuedRequests.size() + " queueud requests back");
hostInfo.isLoadingRobotsTxt = false;
putBackURLs();
}
}
}
}
/**
* put back queued URLs
*/
private void putBackURLs()
{
while (hostInfo.queuedRequests.size() > 0)
{
messageHandler.putMessage((Message) hostInfo.queuedRequests.remove());
}
log.logThreadSafe("task " + Thread.currentThread().getName() + ": finished");
hostInfo.queuedRequests = null;
}
/**
* this parses the robots.txt file. It was taken from the PERL implementation
* Since this is only rarely called, it's not optimized for speed
*
* @param r the robots.txt file
* @return the disallows
* @exception IOException any IOException
*/
public String[] parse(BufferedReader r)
throws IOException
{
// taken from Perl
Perl5Util p = new Perl5Util();
String line;
boolean isMe = false;
boolean isAnon = false;
ArrayList disallowed = new ArrayList();
String ua = null;
while ((line = r.readLine()) != null)
{
if (p.match("/^#.*/", line))
{
// a comment
continue;
}
line = p.substitute("s/\\s*\\#.* //", line);
if (p.match("/^\\s*$/", line))
{
if (isMe)
{
break;
}
}
else if (p.match("/^User-Agent:\\s*(.*)/i", line))
{
ua = p.group(1);
ua = p.substitute("s/\\s+$//", ua);
if (isMe)
{
break;
}
else if (ua.equals("*"))
{
isAnon = true;
}
else if (Constants.CRAWLER_AGENT.startsWith(ua))
{
isMe = true;
}
}
else if (p.match("/^Disallow:\\s*(.*)/i", line))
{
if (ua == null)
{
isAnon = true;
// warn...
}
String disallow = p.group(1);
if (disallow != null && disallow.length() > 0)
{
// assume we have a relative path
;
}
else
{
disallow = "/";
}
if (isMe || isAnon)
{
disallowed.add(disallow);
}
}
else
{
// warn: unexpected line
}
}
String[] disalloweds = new String[disallowed.size()];
disallowed.toArray(disalloweds);
return disalloweds;
}
}
}

View File

@ -0,0 +1,545 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.fetcher;
import de.lanlab.larm.threads.*;
import java.util.*;
import java.text.*;
import java.io.*;
import de.lanlab.larm.util.State;
import de.lanlab.larm.util.SimpleLoggerManager;
/**
* this monitor takes a sample of every thread every x milliseconds,
* and logs a lot of information. In the near past it has evolved into the multi
* purpose monitoring and maintenance facility.
* At the moment it prints status information
* to log files and to the console
* @TODO this can be done better. Probably with an agent where different services
* can be registered to be called every X seconds
*/
public class ThreadMonitor extends Observable implements Runnable
{
/**
* a reference to the thread pool that's gonna be observed
*/
private ThreadPool threadPool;
class Sample
{
long bytesRead;
long docsRead;
long time;
public Sample(long bytesRead, long docsRead, long time)
{
this.bytesRead = bytesRead;
this.docsRead = docsRead;
this.time = time;
}
}
ArrayList bytesReadPerPeriod;
/**
* Zeit zwischen den Messungen
*/
int sampleDelta;
/**
* the thread where this monitor runs in. Will run with high priority
*/
Thread thread;
URLVisitedFilter urlVisitedFilter;
URLScopeFilter urlScopeFilter;
// DNSResolver dnsResolver;
RobotExclusionFilter reFilter;
MessageHandler messageHandler;
URLLengthFilter urlLengthFilter;
HostManager hostManager;
public final static double KBYTE = 1024;
public final static double MBYTE = 1024 * KBYTE;
public final static double ONEGBYTE = 1024 * MBYTE;
String formatBytes(long lbytes)
{
double bytes = (double)lbytes;
if(bytes >= ONEGBYTE)
{
return fractionFormat.format((bytes/ONEGBYTE)) + " GB";
}
else if(bytes >= MBYTE)
{
return fractionFormat.format(bytes/MBYTE) + " MB";
}
else if(bytes >= KBYTE)
{
return fractionFormat.format(bytes/KBYTE) + " KB";
}
else
{
return fractionFormat.format(bytes) + " Bytes";
}
}
/**
* a logfile where status information is posted
* FIXME: put that in a seperate class (double code in FetcherTask)
*/
PrintWriter logWriter;
private SimpleDateFormat formatter
= new SimpleDateFormat ("hh:mm:ss:SSSS");
private DecimalFormat fractionFormat = new DecimalFormat("0.00");
long startTime = System.currentTimeMillis();
private void log(String text)
{
try
{
logWriter.println(formatter.format(new Date()) + ";" + (System.currentTimeMillis()-startTime) + ";" + text);
logWriter.flush();
}
catch(Exception e)
{
System.out.println("Couldn't write to logfile");
}
}
/**
* construct the monitor gets a reference to all monitored filters
* @param threadPool the pool to be observed
* @param sampleDelta time in ms between samples
*/
public ThreadMonitor(URLLengthFilter urlLengthFilter,
URLVisitedFilter urlVisitedFilter,
URLScopeFilter urlScopeFilter,
/*DNSResolver dnsResolver,*/
RobotExclusionFilter reFilter,
MessageHandler messageHandler,
ThreadPool threadPool,
HostManager hostManager,
int sampleDelta)
{
this.urlLengthFilter = urlLengthFilter;
this.urlVisitedFilter = urlVisitedFilter;
this.urlScopeFilter = urlScopeFilter;
/* this.dnsResolver = dnsResolver;*/
this.hostManager = hostManager;
this.reFilter = reFilter;
this.messageHandler = messageHandler;
this.threadPool = threadPool;
bytesReadPerPeriod = new ArrayList();
this.sampleDelta = sampleDelta;
this.thread = new Thread(this, "ThreadMonitor");
this.thread.setPriority(7);
try
{
File logDir = new File("logs");
logDir.mkdir();
logWriter = new PrintWriter(new BufferedWriter(new FileWriter("logs/ThreadMonitor.log")));
}
catch(IOException e)
{
System.out.println("Couldn't create logfile (ThreadMonitor)");
}
}
/**
* java.lang.Threads run method. To be invoked via start()
* the monitor's main thread takes the samples every sampleDelta ms
* Since Java is not real time, it remembers
*/
public void run()
{
int nothingReadCount = 0;
long lastPeriodBytesRead = -1;
long monitorRunCount = 0;
long startTime = System.currentTimeMillis();
log("time;overallBytesRead;overallTasksRun;urlsQueued;urlsWaiting;isWorkingOnMessage;urlsScopeFiltered;urlsVisitedFiltered;urlsREFiltered;memUsed;memFree;totalMem;nrHosts;visitedSize;visitedStringSize;urlLengthFiltered");
while(true)
{
try
{
try
{
thread.sleep(sampleDelta);
}
catch(InterruptedException e)
{
return;
}
Iterator threadIterator = threadPool.getThreadIterator();
int i=0;
StringBuffer bytesReadString = new StringBuffer(200);
StringBuffer rawBytesReadString = new StringBuffer(200);
StringBuffer tasksRunString = new StringBuffer(200);
long overallBytesRead = 0;
long overallTasksRun = 0;
long now = System.currentTimeMillis();
boolean finished = false;
//System.out.print("\f");
/*while(!finished)
{
boolean restart = false;*/
boolean allThreadsIdle = true;
StringBuffer sb = new StringBuffer(500);
while(threadIterator.hasNext())
{
FetcherThread thread = (FetcherThread)threadIterator.next();
long totalBytesRead = thread.getTotalBytesRead();
overallBytesRead += totalBytesRead;
bytesReadString.append(formatBytes(totalBytesRead)).append( "; ");
rawBytesReadString.append(totalBytesRead).append("; ");
long tasksRun = thread.getTotalTasksRun();
overallTasksRun += tasksRun;
tasksRunString.append(tasksRun).append("; ");
// check task status
State state = thread.getTaskState();
//StringBuffer sb = new StringBuffer(200);
sb.setLength(0);
System.out.println(sb + "[" + thread.getThreadNumber() + "] " + state.getState() + " for " +
(now - state.getStateSince() ) + " ms " +
(state.getInfo() != null ? "(" + state.getInfo() +")" : "")
);
if(!(state.getState().equals(FetcherThread.STATE_IDLE)))
{
//if(allThreadsIdle) System.out.println("(not all threads are idle, '"+state.getState()+"' != '"+FetcherThread.STATE_IDLE+"')");
allThreadsIdle = false;
}
if (((state.equals(FetcherTask.FT_CONNECTING)) || (state.equals(FetcherTask.FT_GETTING)) || (state.equals(FetcherTask.FT_READING)) || (state.equals(FetcherTask.FT_CLOSING)))
&& ((now - state.getStateSince()) > 160000))
{
System.out.println("****Restarting Thread " + thread.getThreadNumber());
threadPool.restartThread(thread.getThreadNumber());
break; // Iterator is invalid
}
}
/*if(restart)
{
continue;
}
finished = true;
}*/
/*
if(overallBytesRead == lastPeriodBytesRead)
{
*
disabled kickout feature - cm
nothingReadCount ++;
System.out.println("Anomaly: nothing read during the last period(s). " + (20-nothingReadCount+1) + " periods to exit");
if(nothingReadCount > 20) // nothing happens anymore
{
log("Ending");
System.out.println("End at " + new Date().toString());
// print some information
System.exit(0);
}
}
else
{
nothingReadCount = 0;
}*/
lastPeriodBytesRead = overallBytesRead;
//State reState = new State("hhh"); //reFilter.getState();
sb.setLength(0);
//System.out.println(sb + "Robot-Excl.Filter State: " + reState.getState() + " since " + (now-reState.getStateSince()) + " ms " + (reState.getInfo() != null ? " at " + reState.getInfo() : ""));
addSample(new Sample(overallBytesRead, overallTasksRun, System.currentTimeMillis()));
int nrHosts = ((FetcherTaskQueue)threadPool.getTaskQueue()).getNumHosts();
int visitedSize = urlVisitedFilter.size();
int visitedStringSize = urlVisitedFilter.getStringSize();
double bytesPerSecond = getAverageBytesRead();
double docsPerSecond = getAverageDocsRead();
sb.setLength(0);
System.out.println(sb + "\nBytes total: " + formatBytes(overallBytesRead) + " (" + formatBytes((long)(((double)overallBytesRead)*1000/(System.currentTimeMillis()-startTime))) + " per second since start)" +
"\nBytes per Second: " + formatBytes((int)bytesPerSecond) + " (50 secs)" +
"\nDocs per Second: " + docsPerSecond +
"\nBytes per Thread: " + bytesReadString);
double docsPerSecondTotal = ((double)overallTasksRun)*1000/(System.currentTimeMillis()-startTime);
sb.setLength(0);
System.out.println(sb + "Docs read total: " + overallTasksRun + " Docs/s: " + fractionFormat.format(docsPerSecondTotal) +
"\nDocs p.thread: " + tasksRunString);
long memUsed = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
long memFree = Runtime.getRuntime().freeMemory();
long totalMem = Runtime.getRuntime().totalMemory();
sb.setLength(0);
System.out.println(sb + "Mem used: " + formatBytes(memUsed) + ", free: " + formatBytes(memFree) + " total VM: " + totalMem);
int urlsQueued = messageHandler.getQueued();
int urlsWaiting = threadPool.getQueueSize();
boolean isWorkingOnMessage = messageHandler.isWorkingOnMessage();
int urlsScopeFiltered = urlScopeFilter.getFiltered();
int urlsVisitedFiltered = urlVisitedFilter.getFiltered();
int urlsREFiltered = reFilter.getFiltered();
int urlLengthFiltered = urlLengthFilter.getFiltered();
sb.setLength(0);
System.out.println(sb + "URLs queued: " + urlsQueued + " waiting: " + urlsWaiting);
sb.setLength(0);
System.out.println(sb + "Message is being processed: " + isWorkingOnMessage);
sb.setLength(0);
System.out.println(sb + "URLs Filtered: length: " + urlLengthFiltered + " scope: " + urlsScopeFiltered + " visited: " + urlsVisitedFiltered + " robot.txt: " + urlsREFiltered);
sb.setLength(0);
System.out.println(sb + "Visited size: " + visitedSize + "; String Size in VisitedFilter: " + visitedStringSize + "; Number of Hosts: " + nrHosts + "; hosts in Host Manager: " + hostManager.getSize() + "\n");
sb.setLength(0);
log(sb + "" + now + ";" + overallBytesRead + ";" + overallTasksRun + ";" + urlsQueued + ";" + urlsWaiting + ";" + isWorkingOnMessage + ";" + urlsScopeFiltered + ";" + urlsVisitedFiltered + ";" + urlsREFiltered + ";" + memUsed + ";" + memFree + ";" + totalMem + ";" + nrHosts + ";" + visitedSize + ";" + visitedStringSize + ";" + rawBytesReadString + ";" + urlLengthFiltered);
if(!isWorkingOnMessage && (urlsQueued == 0) && (urlsWaiting == 0) && allThreadsIdle)
{
nothingReadCount++;
if(nothingReadCount > 3)
{
SimpleLoggerManager.getInstance().flush();
System.exit(0);
}
}
else
{
nothingReadCount = 0;
}
this.setChanged();
this.notifyObservers();
// Request Garbage Collection
monitorRunCount++;
if(monitorRunCount % 6 == 0)
{
System.runFinalization();
}
if(monitorRunCount % 2 == 0)
{
System.gc();
SimpleLoggerManager.getInstance().flush();
}
}
catch(Exception e)
{
System.out.println("Monitor: Exception: " + e.getClass().getName());
e.printStackTrace();
}
}
}
/**
* start the thread
*/
public void start()
{
this.clear();
thread.start();
}
/**
* interrupt the monitor thread
*/
public void interrupt()
{
thread.interrupt();
}
public synchronized void clear()
{
//sampleTimeStamps.clear();
/*for(int i=0; i < timeSamples.length; i++)
{
timeSamples[i].clear();
}
*/
}
/* public synchronized double getAverageReadCount(int maxPeriods)
{
int lastPeriod = bytesReadPerPeriod.size()-1;
int periods = Math.min(lastPeriod, maxPeriods);
if(periods < 2)
{
return 0.0;
}
long bytesLastPeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod)).bytesRead;
long bytesBeforePeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod - periods)).bytesRead;
long bytesRead = bytesLastPeriod - bytesBeforePeriod;
long endTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue();
long startTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1 - periods)).longValue();
long duration = endTime - startTime;
System.out.println("bytes read: " + bytesRead + " duration in s: " + duration/1000.0 + " = " + ((double)bytesRead) / (duration/1000.0) + " per second");
return ((double)bytesRead) / (duration/1000.0);
}
*/
/*public synchronized double getDocsPerSecond(int maxPeriods)
{
int lastPeriod = bytesReadPerPeriod.size()-1;
int periods = Math.min(lastPeriod, maxPeriods);
if(periods < 2)
{
return 0.0;
}
long docsLastPeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod)).docsRead;
long docsBeforePeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod - periods)).docsRead;
long docsRead = docsLastPeriod - docsBeforePeriod;
long endTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue();
long startTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size() - periods)).longValue();
long duration = endTime - startTime;
System.out.println("docs read: " + docsRead + " duration in s: " + duration/1000.0 + " = " + ((double)docsRead) / (duration/1000.0) + " per second");
return ((double)docsRead) / (duration/1000.0);
}*/
/**
* retrieves the number of threads whose byteCount is below the threshold
* @param maxPeriods the number of periods to look back
* @param threshold the number of bytes per second that acts as the threshold for a stalled thread
*/
/*public synchronized int getStalledThreadCount(int maxPeriods, double threshold)
{
int periods = Math.min(sampleTimeStamps.size(), maxPeriods);
int stalledThreads = 0;
int j=0, i=0;
if(periods > 1)
{
for(j=0; j<timeSamples.length; j++)
{
long threadByteCount = 0;
ArrayList actArrayList = timeSamples[j];
double bytesPerSecond = 0;
try
{
for(i=0; i<periods; i++)
{
Sample actSample = (Sample)(actArrayList.get(i));
threadByteCount += actSample.bytesRead;
}
}
catch(Exception e)
{
System.out.println("getAverageReadCount: " + e.getClass().getName() + ": " + e.getMessage() + "(" + i + ";" + j + ")");
e.printStackTrace();
}
bytesPerSecond = ((double)threadByteCount) /
((double)((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue()
- ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-periods)).longValue()) * 1000.0;
if(bytesPerSecond < threshold)
{
stalledThreads++;
}
}
}
return stalledThreads;
}
*/
int samples=0;
public void addSample(Sample s)
{
if(samples < 10)
{
bytesReadPerPeriod.add(s);
samples++;
}
else
{
bytesReadPerPeriod.set(samples % 10, s);
}
}
public double getAverageBytesRead()
{
Iterator i = bytesReadPerPeriod.iterator();
Sample oldest = null;
Sample newest = null;
while(i.hasNext())
{
Sample s = (Sample)i.next();
if(oldest == null)
{
oldest = newest = s;
}
else
{
if(s.time < oldest.time)
{
oldest = s;
}
else if(s.time > newest.time)
{
newest = s;
}
}
}
return ((newest.bytesRead - oldest.bytesRead)/((newest.time - oldest.time)/1000.0));
}
public double getAverageDocsRead()
{
Iterator i = bytesReadPerPeriod.iterator();
Sample oldest = null;
Sample newest = null;
while(i.hasNext())
{
Sample s = (Sample)i.next();
if(oldest == null)
{
oldest = newest = s;
}
else
{
if(s.time < oldest.time)
{
oldest = s;
}
else if(s.time > newest.time)
{
newest = s;
}
}
}
return ((newest.docsRead - oldest.docsRead)/((newest.time - oldest.time)/1000.0));
}
}

View File

@ -0,0 +1,69 @@
package de.lanlab.larm.fetcher;
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* @author
* @created 28. Januar 2002
* @version 1.0
*/
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* kills URLs longer than X characters. Used to prevent endless loops where
* the page contains the current URL + some extension
*
* @author Clemens Marschner
* @created 28. Januar 2002
*/
public class URLLengthFilter extends Filter implements MessageListener
{
/**
* called by the message handler
*
* @param handler the handler
*/
public void notifyAddedToMessageHandler(MessageHandler handler)
{
this.messageHandler = handler;
}
MessageHandler messageHandler;
int maxLength;
/**
* Constructor for the URLLengthFilter object
*
* @param maxLength max length of the _total_ URL (protocol+host+port+path)
*/
public URLLengthFilter(int maxLength)
{
this.maxLength = maxLength;
}
/**
* handles the message
*
* @param message Description of the Parameter
* @return the original message or NULL if the URL was too long
*/
public Message handleRequest(Message message)
{
URLMessage m = (URLMessage) message;
String file = m.getUrl().getFile();
if (file != null && file.length() > maxLength) // path + query
{
filtered++;
return null;
}
return message;
}
}

View File

@ -0,0 +1,87 @@
package de.lanlab.larm.fetcher;
import java.net.*;
import java.io.*;
import de.lanlab.larm.util.URLUtils;
/**
* represents a URL which is passed around in the messageHandler
*/
public class URLMessage implements Message, Serializable
{
/**
* the URL
*/
protected URL url;
protected String urlString;
protected URL referer;
protected String refererString;
boolean isFrame;
public URLMessage(URL url, URL referer, boolean isFrame)
{
//super();
this.url = url;
this.urlString = url != null ? URLUtils.toExternalFormNoRef(url) : null;
this.referer = referer;
this.refererString = referer != null ? URLUtils.toExternalFormNoRef(referer) : null;
this.isFrame = isFrame;
//System.out.println("" + refererString + " -> " + urlString);
}
public URL getUrl()
{
return this.url;
}
public URL getReferer()
{
return this.referer;
}
public String toString()
{
return urlString;
}
public String getURLString()
{
return urlString;
}
public String getRefererString()
{
return refererString;
}
public int hashCode()
{
return url.hashCode();
}
private void writeObject(java.io.ObjectOutputStream out) throws IOException
{
out.writeObject(url);
out.writeObject(referer);
out.writeBoolean(isFrame);
}
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException
{
url = (URL)in.readObject();
referer = (URL)in.readObject();
urlString = url.toExternalForm();
refererString = referer.toExternalForm();
isFrame = in.readBoolean();
}
public String getInfo()
{
return (referer != null ? refererString : "<start>") + "\t" + urlString + "\t" + (isFrame ? "1" : "0");
}
}

View File

@ -0,0 +1,75 @@
package de.lanlab.larm.fetcher;
import org.apache.oro.text.regex.Perl5Matcher;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Pattern;
/**
* Filter-Klasse; prüft eine eingegangene Message auf Einhaltung eines
* regulären Ausdrucks. Wenn die URL diesem Ausdruck
* nicht entspricht, wird sie verworfen
* @author Clemens Marschner
*/
class URLScopeFilter extends Filter implements MessageListener
{
public void notifyAddedToMessageHandler(MessageHandler handler)
{
this.messageHandler = handler;
}
MessageHandler messageHandler;
/**
* the regular expression which describes a valid URL
*/
private Pattern pattern;
private Perl5Matcher matcher;
private Perl5Compiler compiler;
public URLScopeFilter()
{
matcher = new Perl5Matcher();
compiler = new Perl5Compiler();
}
public String getRexString()
{
return pattern.toString();
}
/**
* set the regular expression
* @param rexString the expression
*/
public void setRexString(String rexString) throws org.apache.oro.text.regex.MalformedPatternException
{
this.pattern = compiler.compile(rexString, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK);
//System.out.println("pattern set to: " + pattern);
}
/**
* this method will be called by the message handler. Tests the URL
* and throws it out if it's not in the scope
*/
public Message handleRequest(Message message)
{
if(message instanceof URLMessage)
{
String urlString = ((URLMessage)message).toString();
int length = urlString.length();
char buffer[] = new char[length];
urlString.getChars(0,length,buffer,0);
//System.out.println("using pattern: " + pattern);
boolean match = matcher.matches(buffer, pattern);
if(!match)
{
//System.out.println("not in Scope: " + urlString);
filtered++;
return null;
}
}
return message;
}
}

View File

@ -0,0 +1,114 @@
package de.lanlab.larm.fetcher;
import java.net.URL;
import java.util.*;
import de.lanlab.larm.util.SimpleLogger;
/**
* contains a HashMap of all URLs already passed. Adds each URL to that list, or
* consumes it if it is already present
*
* @todo find ways to reduce memory consumption here. the approach is somewhat naive
*
* @author Clemens Marschner
* @created 3. Januar 2002
*/
class URLVisitedFilter extends Filter implements MessageListener
{
/**
* Description of the Method
*
* @param handler Description of the Parameter
*/
public void notifyAddedToMessageHandler(MessageHandler handler)
{
this.messageHandler = handler;
}
MessageHandler messageHandler;
SimpleLogger log;
HashSet urlHash;
static Boolean dummy = new Boolean(true);
/**
* Constructor for the URLVisitedFilter object
*
* @param initialHashCapacity Description of the Parameter
*/
public URLVisitedFilter(int initialHashCapacity, SimpleLogger log)
{
urlHash = new HashSet(initialHashCapacity);
this.log = log;
//urlVector = new Vector(initialHashCapacity);
}
/**
* clears everything
*/
public void clearHashtable()
{
urlHash.clear();
// urlVector.clear();
}
/**
* @param message Description of the Parameter
* @return Description of the Return Value
*/
public Message handleRequest(Message message)
{
if (message instanceof URLMessage)
{
URLMessage urlMessage = ((URLMessage) message);
URL url = urlMessage.getUrl();
String urlString = urlMessage.getURLString();
if (urlHash.contains(urlString))
{
//System.out.println("URLVisitedFilter: " + urlString + " already present.");
filtered++;
if(log != null)
{
log.logThreadSafe(urlMessage.getInfo());
}
return null;
}
else
{
// System.out.println("URLVisitedFilter: " + urlString + " not present yet.");
urlHash.add(urlString);
stringSize += urlString.length(); // see below
//urlVector.add(urlString);
}
}
return message;
}
private int stringSize = 0;
/**
* just a method to get a rough number of characters contained in the array
* with that you see that the total memory is mostly used by this class
*/
public int getStringSize()
{
return stringSize;
}
public int size()
{
return urlHash.size();
}
}

View File

@ -0,0 +1,875 @@
package de.lanlab.larm.graph;
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* @author
* @version 1.0
*/
import java.io.*;
import java.util.*;
/**
* Description of the Class
*
* @author Administrator
* @created 30. Januar 2002
*/
class Node implements Comparable
{
LinkedList incoming;
// 16 + 4 per entry
//HashSet incomingNodes; // 16 + 16 per entry, 11 x 16 default size = 192
LinkedList outgoing;
// 16 + 4 per entry
//Object o;
//HashSet outgoingNodes; // 16 + 16 per entry, 11 x 16 default size = 192
//LinkedList shortestIncoming;
int id;
// 4
float distance;
// 8
String name;
// 4 + String object
String title;
// 4 + String object
float nodeRank[] = new float[2];
// 16
// 470 bytes + 2 string objects
/**
* Description of the Field
*/
public static int sortType = 0;
/**
* Description of the Method
*
* @param n Description of the Parameter
* @return Description of the Return Value
*/
public int compareTo(Object n)
{
if (sortType < 2)
{
double diff = ((Node) n).nodeRank[sortType] - nodeRank[sortType];
return diff < 0 ? -1 : diff > 0 ? 1 : 0;
}
else
{
return (((Node) n).incoming.size() - incoming.size());
}
}
/**
* Constructor for the Node object
*
* @param id Description of the Parameter
* @param name Description of the Parameter
* @param title Description of the Parameter
*/
public Node(int id, String name, String title)
{
this.id = id;
this.name = name;
this.title = title;
this.incoming = new LinkedList();
this.outgoing = new LinkedList();
//this.incomingNodes = new HashSet();
//this.outgoingNodes = new HashSet();
this.distance = Float.MAX_VALUE;
this.nodeRank[0] = this.nodeRank[1] = 1;
}
/**
* Adds a feature to the Incoming attribute of the Node object
*
* @param incomingT The feature to be added to the Incoming attribute
* @return Description of the Return Value
*/
public boolean addIncoming(Transition incomingT)
{
Integer id = new Integer(incomingT.getFrom().id);
if (!incoming.contains(id))
{
// attn: doesn't scale well, but also saves memory
incoming.addLast(incomingT);
//incomingNodes.add(id);
return true;
}
else
{
return false;
}
}
/**
* Adds a feature to the Outgoing attribute of the Node object
*
* @param outgoingT The feature to be added to the Outgoing attribute
* @return Description of the Return Value
*/
public boolean addOutgoing(Transition outgoingT)
{
Integer id = new Integer(outgoingT.getTo().id);
if (!outgoing.contains(id))
{
outgoing.addLast(outgoingT);
//outgoingNodes.add(id);
return true;
}
else
{
return false;
}
}
/**
* Gets the incoming attribute of the Node object
*
* @return The incoming value
*/
public LinkedList getIncoming()
{
return incoming;
}
/**
* Gets the outgoing attribute of the Node object
*
* @return The outgoing value
*/
public LinkedList getOutgoing()
{
return outgoing;
}
/**
* Sets the distance attribute of the Node object
*
* @param distance The new distance value
*/
public void setDistance(float distance)
{
this.distance = distance;
}
/**
* Gets the distance attribute of the Node object
*
* @return The distance value
*/
public float getDistance()
{
return distance;
}
/**
* Gets the name attribute of the Node object
*
* @return The name value
*/
public String getName()
{
return name;
}
/**
* Sets the title attribute of the Node object
*
* @param title The new title value
*/
public void setTitle(String title)
{
this.title = title;
}
/**
* Gets the title attribute of the Node object
*
* @return The title value
*/
public String getTitle()
{
return title;
}
/**
* Gets the nodeRank attribute of the Node object
*
* @param idx Description of the Parameter
* @return The nodeRank value
*/
public float getNodeRank(int idx)
{
return nodeRank[idx];
}
/**
* Sets the nodeRank attribute of the Node object
*
* @param nodeRank The new nodeRank value
* @param idx The new nodeRank value
*/
public void setNodeRank(float nodeRank, int idx)
{
this.nodeRank[idx] = nodeRank;
}
}
/**
* Description of the Class
*
* @author Administrator
* @created 30. Januar 2002
*/
class Transition
{
Node from;
Node to;
float distance;
float linkRank[] = new float[2];
boolean isFrame;
/**
* Constructor for the Transition object
*
* @param from Description of the Parameter
* @param to Description of the Parameter
* @param isFrame Description of the Parameter
*/
public Transition(Node from, Node to, boolean isFrame)
{
LinkedList l = from.getOutgoing();
Iterator i = l.iterator();
while(i.hasNext())
{
Transition t = (Transition)i.next();
if(t.getTo() == to)
{
return; // schon enthalten
}
}
this.from = from;
this.to = to;
from.addOutgoing(this);
to.addIncoming(this);
this.distance = Integer.MAX_VALUE;
this.isFrame = isFrame;
this.linkRank[0] = this.linkRank[1] = 1;
}
/**
* Gets the to attribute of the Transition object
*
* @return The to value
*/
public Node getTo()
{
return to;
}
/**
* Gets the from attribute of the Transition object
*
* @return The from value
*/
public Node getFrom()
{
return from;
}
/**
* Gets the distance attribute of the Transition object
*
* @return The distance value
*/
public float getDistance()
{
return distance;
}
/**
* Sets the distance attribute of the Transition object
*
* @param distance The new distance value
*/
public void setDistance(float distance)
{
this.distance = distance;
}
/**
* Gets the frame attribute of the Transition object
*
* @return The frame value
*/
public boolean isFrame()
{
return isFrame;
}
/**
* Gets the linkRank attribute of the Transition object
*
* @param idx Description of the Parameter
* @return The linkRank value
*/
public float getLinkRank(int idx)
{
return linkRank[idx];
}
/**
* Sets the linkRank attribute of the Transition object
*
* @param linkRank The new linkRank value
* @param idx The new linkRank value
*/
public void setLinkRank(float linkRank, int idx)
{
this.linkRank[idx] = linkRank;
}
}
/**
* Description of the Class
*
* @author Administrator
* @created 30. Januar 2002
*/
public class DistanceCount
{
HashMap nodes = new HashMap(100000);
LinkedList nodesToDo = new LinkedList();
static int id = 0;
/**
* Gets the orCreateNode attribute of the DistanceCount object
*
* @param name Description of the Parameter
* @param title Description of the Parameter
* @return The orCreateNode value
*/
Node getOrCreateNode(String name, String title)
{
Node node = (Node) nodes.get(name);
if (node != null)
{
if (title != null)
{
node.setTitle(title);
}
return node;
}
else
{
node = new Node(id++, name, title);
nodes.put(name, node);
return node;
}
}
/**
* Constructor for the DistanceCount object
*
* @param filename Description of the Parameter
* @exception IOException Description of the Exception
*/
public DistanceCount(String filename)
throws IOException
{
System.out.println("reading file...");
long t1 = System.currentTimeMillis();
BufferedReader b = new BufferedReader(new FileReader(filename));
String line;
boolean firstNotFound = true;
Node firstNode = null;
int lines = 0;
while ((line = b.readLine()) != null)
{
lines++;
String title = null;
try
{
//StringTokenizer st = new StringTokenizer(line, " ");
StringTokenizer st = new StringTokenizer(line, "\t");
String from = st.nextToken();
if (from.endsWith("/"))
{
from = from.substring(0, from.length() - 1);
}
from = from.toLowerCase();
String to = st.nextToken();
if (to.endsWith("/"))
{
to = to.substring(0, to.length() - 1);
}
to = to.toLowerCase();
boolean isFrame = (Integer.parseInt(st.nextToken()) == 1);
if (st.countTokens() > 3)
{
title = "<untitled>";
//StringBuffer sb = new StringBuffer();
st.nextToken();
// result
st.nextToken();
// Mime Type
st.nextToken();
// Size
/*
* while(st.hasMoreTokens())
* {
* sb.append(st.nextToken()).append(" ");
* }
*/
title = st.nextToken();
if (title.length() > 2)
{
title = title.substring(1, title.length() - 1);
int indexOfPara = title.indexOf("\"");
if (indexOfPara > -1)
{
title = title.substring(0, indexOfPara);
}
}
}
Node fromNode = getOrCreateNode(from, null);
Node toNode = getOrCreateNode(to, title);
Transition t = new Transition(fromNode, toNode, isFrame);
/*
* if(firstNotFound && to.equals("http://127.0.0.1"))
* {
* firstNode = toNode;
* firstNotFound = false;
* }
*/
if (lines % 10000 == 0)
{
System.out.println("" + lines + " Lines; " + nodes.size() + " nodes");
}
}
catch (NoSuchElementException e)
{
System.out.println("Malformed line " + lines + ": field number doesn't match");
}
catch (NumberFormatException e)
{
System.out.println("Malformed line " + lines + ": NumberFormat wrong");
}
}
System.out.println("finished; b" + lines + " Lines; " + nodes.size() + " nodes");
long t2 = System.currentTimeMillis();
System.out.println("" + (t2 - t1) + " ms");
/*
* if(firstNotFound)
* {
* System.out.println("Couldn't find start page");
* System.exit(-1);
* }
*/
}
/**
* Description of the Method
*
* @param firstNode Description of the Parameter
*/
public void calculateShortestDistance(Node firstNode)
{
clearDistances();
firstNode.setDistance(0);
nodesToDo.addLast(firstNode);
int calculations = 0;
while (!nodesToDo.isEmpty())
{
if (calculations % 100000 == 0)
{
System.out.println("Calculations: " + calculations + "; nodes to go: " + nodesToDo.size() + " total Mem: " + Runtime.getRuntime().totalMemory() + "; free mem: " + Runtime.getRuntime().freeMemory());
}
calculations++;
Node act = (Node) nodesToDo.removeFirst();
LinkedList outTrans = act.getOutgoing();
float distance = act.getDistance();
Iterator i = outTrans.iterator();
//distance++;
while (i.hasNext())
{
Transition t = (Transition) i.next();
float transDistance = t.getDistance();
/*if (t.isFrame())
{
System.out.println("Frame from " + t.from.getName() + " to " + t.to.getName());
}*/
float newDistance = distance + (t.isFrame() ? 0.25f : 1f);
if (transDistance > newDistance)
{
t.setDistance(newDistance);
Node to = t.getTo();
if (to.distance > distance)
{
to.setDistance(newDistance);
nodesToDo.addLast(to);
}
}
}
/*
* if(looksGood)
* {
* System.out.println("Node " + act.id + " looks good");
* }
*/
}
System.out.println("Calculations: " + calculations );
}
public void clearDistances()
{
System.out.println("Clearing distance data...");
Iterator it = nodes.values().iterator();
int nr = 0;
while (it.hasNext())
{
Node n = (Node) it.next();
nr++;
n.setDistance(Float.MAX_VALUE);
}
System.out.println("cleared " + nr + " nodes. done");
}
/**
* Description of the Method
*
* @param nodeFrom Description of the Parameter
* @param nodeTo Description of the Parameter
*/
public void printDistance(String nodeFrom, String nodeTo)
{
Node firstNode = (Node) nodes.get(nodeFrom);
if (firstNode == null)
{
System.out.println("FROM node not found");
return;
}
Node toNode = (Node) nodes.get(nodeTo);
if (toNode == null)
{
System.out.println("TO node not found");
return;
}
//System.out.println("resetting node distance...");
//clearDistances();
System.out.println("calculating...");
calculateShortestDistance(firstNode);
//t1 = System.currentTimeMillis();
//System.out.println("" + (t1-t2) + " ms");
System.out.println("\nSorting...");
/*
* Collection nodeCollection = nodes.values();
* Object[] nodeArray = nodeCollection.toArray();
* Arrays.sort(nodeArray);
* t2 = System.currentTimeMillis();
* System.out.println("" + (t2-t1) + " ms");
* int from = 0;
* int to = 1;
*/
/*
* /calculate page Rank
* for(int i = 0; i< 1; i++)
* {
* from = i%2;
* to = (i+1) % 2;
* for(int j = 0; j<nodeArray.length; j++)
* {
* Node act = (Node)nodeArray[j];
* LinkedList inc = act.getIncoming();
* float pageRank = 0;
* Iterator it = inc.iterator();
* while(it.hasNext())
* {
* Transition t = (Transition)it.next();
* pageRank += t.getLinkRank(from);
* }
* act.setNodeRank(pageRank, to);
* LinkedList out = act.getOutgoing();
* int size = out.size();
* if(size > 0)
* {
* float linkRank = pageRank / size;
* it = out.iterator();
* while(it.hasNext())
* {
* Transition t = (Transition)it.next();
* t.setLinkRank(linkRank, to);
* }
* }
* }
* }
*/
/*
* System.out.println("\nLink Count:");
* for(int i=0; i<10; i++)
* {
* Node n = ((Node)nodeArray[i]);
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
* }
* for(int i=nodeArray.length/2; i<nodeArray.length/2+10; i++)
* {
* Node n = ((Node)nodeArray[i]);
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
* }
* for(int i=nodeArray.length-10; i<nodeArray.length; i++)
* {
* Node n = ((Node)nodeArray[i]);
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
* }
* Node.sortType = to;
* Arrays.sort(nodeArray);
* System.out.println("\nPageRank Count:");
* for(int i=0; i<10; i++)
* {
* Node n = ((Node)nodeArray[i]);
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
* }
* for(int i=nodeArray.length/2; i<nodeArray.length/2+10; i++)
* {
* Node n = ((Node)nodeArray[i]);
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
* }
* for(int i=nodeArray.length-10; i<nodeArray.length; i++)
* {
* Node n = ((Node)nodeArray[i]);
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
* }
* System.out.println("\nStats...");
* float distanceAccumulated=0;
* float distanceMax = 0;
* int notCounted = 0;
* for(int j = 0; j<nodeArray.length; j++)
* {
* Node n = (Node)nodeArray[j];
* if(n.distance != Integer.MAX_VALUE)
* {
* distanceAccumulated += n.distance;
* distanceMax = Math.max(distanceMax, n.distance);
* }
* else
* {
* notCounted++;
* }
* }
* System.out.println("Mean Distance: " + ((double)distanceAccumulated)/nodeArray.length);
* System.out.println("Max Distance: " + (distanceMax));
* System.out.println("Not reachable nodes(?): " + notCounted);
* System.out.println("Referer Median: " + ((Node)(nodeArray[Math.round(nodeArray.length/2)])).incoming.size());
* System.out.println("\nSamples:");
*/
printShortestRoute(toNode, 0,0);
}
/**
* Description of the Method
*/
public void printRandomRoute()
{
Random r = new java.util.Random(System.currentTimeMillis());
Collection nodeColl = nodes.values();
Object[] nodeArray = (Object[])nodeColl.toArray();
int rnd = (int) (r.nextDouble() * nodeArray.length);
Node from = (Node) nodeArray[rnd];
rnd = (int) (r.nextDouble() * nodeArray.length);
Node to = (Node) nodeArray[rnd];
System.out.println("Calculating distance...");
calculateShortestDistance(from);
System.out.println("printing...");
printShortestRoute(to, 0,0);
}
/**
* Description of the Method
*
* @param n Description of the Parameter
* @param indent Description of the Parameter
*/
public void printShortestRoute(Node n, int indent, int linkCount)
{
String spaces = " ".substring(0, indent);
if (n.getIncoming().isEmpty())
{
System.out.println(spaces + "<start>");
}
else
{
System.out.print(spaces + "+- " + n.name + " (" + (n.getTitle() != null ? n.getTitle().substring(0,Math.min(n.getTitle().length(),25)) : "") + "\") D:" + n.distance + "; L:" + n.getIncoming().size() + "; C:" + linkCount);
Iterator it = n.getIncoming().iterator();
float dist = n.distance;
if (dist > 10000000)
{
System.out.println(spaces + "\n--no link--");
return;
}
while (it.hasNext())
{
Transition t = (Transition) it.next();
if (t.distance <= dist)
{
if (t.isFrame())
{
System.out.println(" **F** ->");
}
else
{
System.out.println(" -> ");
}
printShortestRoute(t.getFrom(), indent + 1, linkCount + n.getIncoming().size());
}
}
}
//System.out.println("");
}
/**
* this class reads in store.log, constructs a graph of the crawled web and is able
* to perform a breadth-first search for the shortest distance between two nodes<br>
* Note: this is experimental stuff. get into the source code to see how it works
* @param args args[0] must point to the store.log file
*/
public static void main(String[] args)
{
// Syntax: DistanceCount <store.log>
try
{
DistanceCount dc = new DistanceCount(args[0]);
boolean running = true;
BufferedReader in = new BufferedReader(new InputStreamReader(System.in),400);
while (running)
{
System.out.print("\n\nCommand (? for help) > ");
String newL;
String input = "";
//while((newL = in.readLine()) != null)
//{
input = in.readLine();
StringTokenizer st = new StringTokenizer(input," ");
String command;
boolean printHelp = false;
if (!st.hasMoreTokens())
{
printHelp = true;
command = "?";
}
else
{
command = st.nextToken();
}
try
{
if ("?".equals(command))
{
printHelp = true;
}
else if ("d".equals(command))
{
String from = st.nextToken();
String to = st.nextToken();
dc.printDistance(from ,to);
}
else if ("q".equals(command))
{
running = false;
}
else if ("r".equals(command))
{
dc.printRandomRoute();
}
else
{
System.out.println("unknown command '" + command + "'");
}
}
catch (java.util.NoSuchElementException e)
{
System.out.println("Syntax error");
e.printStackTrace();
printHelp = true;
}
catch(Exception e)
{
e.printStackTrace();
}
if (printHelp)
{
System.out.println("\nSyntax\n" +
"? print this help message\n" +
"d <page1> <page2> print shortest route from page1 to page2\n" +
"r print random walk\n" +
"q quit");
}
}
}
catch (IOException e)
{
e.printStackTrace();
}
catch (ArrayIndexOutOfBoundsException e)
{
System.out.println("Syntax: java ... store.log");
}
}
}

View File

@ -0,0 +1,154 @@
package de.lanlab.larm.gui;
/*
A basic extension of the java.awt.Dialog class
*/
import java.awt.*;
public class AboutDialog extends Dialog {
public AboutDialog(Frame parent, boolean modal)
{
super(parent, modal);
// This code is automatically generated by Visual Cafe when you add
// components to the visual environment. It instantiates and initializes
// the components. To modify the code, only use code syntax that matches
// what Visual Cafe can generate, or Visual Cafe may be unable to back
// parse your Java file into its visual environment.
//{{INIT_CONTROLS
setLayout(null);
setSize(249,150);
setVisible(false);
label1.setText("LARM - LANLab Retrieval Machine");
add(label1);
label1.setBounds(12,12,228,24);
okButton.setLabel("OK");
add(okButton);
okButton.setBounds(95,85,66,27);
label2.setText("(C) 2000 Clemens Marschner");
add(label2);
label2.setBounds(12,36,228,24);
setTitle("AWT-Anwendung - Info");
//}}
//{{REGISTER_LISTENERS
SymWindow aSymWindow = new SymWindow();
this.addWindowListener(aSymWindow);
SymAction lSymAction = new SymAction();
okButton.addActionListener(lSymAction);
//}}
}
public AboutDialog(Frame parent, String title, boolean modal)
{
this(parent, modal);
setTitle(title);
}
public void addNotify()
{
// Record the size of the window prior to calling parents addNotify.
Dimension d = getSize();
super.addNotify();
// Only do this once.
if (fComponentsAdjusted)
return;
// Adjust components according to the insets
Insets insets = getInsets();
setSize(insets.left + insets.right + d.width, insets.top + insets.bottom + d.height);
Component components[] = getComponents();
for (int i = 0; i < components.length; i++)
{
Point p = components[i].getLocation();
p.translate(insets.left, insets.top);
components[i].setLocation(p);
}
// Used for addNotify check.
fComponentsAdjusted = true;
}
public void setVisible(boolean b)
{
if (b)
{
Rectangle bounds = getParent().getBounds();
Rectangle abounds = getBounds();
setLocation(bounds.x + (bounds.width - abounds.width)/ 2,
bounds.y + (bounds.height - abounds.height)/2);
}
super.setVisible(b);
}
//{{DECLARE_CONTROLS
java.awt.Label label1 = new java.awt.Label();
java.awt.Button okButton = new java.awt.Button();
java.awt.Label label2 = new java.awt.Label();
//}}
// Used for addNotify check.
boolean fComponentsAdjusted = false;
class SymAction implements java.awt.event.ActionListener
{
public void actionPerformed(java.awt.event.ActionEvent event)
{
Object object = event.getSource();
if (object == okButton)
okButton_ActionPerformed(event);
}
}
void okButton_ActionPerformed(java.awt.event.ActionEvent event)
{
// to do: code goes here.
okButton_ActionPerformed_Interaction1(event);
}
void okButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
{
try {
this.dispose();
} catch (Exception e) {
}
}
class SymWindow extends java.awt.event.WindowAdapter
{
public void windowClosing(java.awt.event.WindowEvent event)
{
Object object = event.getSource();
if (object == AboutDialog.this)
AboutDialog_WindowClosing(event);
}
}
void AboutDialog_WindowClosing(java.awt.event.WindowEvent event)
{
// to do: code goes here.
AboutDialog_WindowClosing_Interaction1(event);
}
void AboutDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
{
try {
this.dispose();
} catch (Exception e) {
}
}
}

View File

@ -0,0 +1,485 @@
package de.lanlab.larm.gui;
/*
This simple extension of the java.awt.Frame class
contains all the elements necessary to act as the
main window of an application.
*/
import java.awt.*;
import java.awt.event.ActionListener;
//import com.sun.java.swing.*;
public class FetcherFrame extends Frame
{
public FetcherFrame()
{
// This code is automatically generated by Visual Cafe when you add
// components to the visual environment. It instantiates and initializes
// the components. To modify the code, only use code syntax that matches
// what Visual Cafe can generate, or Visual Cafe may be unable to back
// parse your Java file into its visual environment.
//{{INIT_CONTROLS
setLayout(new BorderLayout(0,0));
setSize(800,600);
setVisible(false);
openFileDialog1.setMode(FileDialog.LOAD);
openFileDialog1.setTitle("Öffnen");
//$$ openFileDialog1.move(24,312);
mainPanelWithBorders.setLayout(new BorderLayout(0,0));
add("Center", mainPanelWithBorders);
mainPanelWithBorders.setBounds(0,0,800,600);
northBorder.setLayout(null);
mainPanelWithBorders.add("North", northBorder);
northBorder.setBackground(java.awt.Color.lightGray);
northBorder.setBounds(0,0,800,3);
southBorder.setLayout(null);
mainPanelWithBorders.add("South", southBorder);
southBorder.setBackground(java.awt.Color.lightGray);
southBorder.setBounds(0,597,800,3);
westBorder.setLayout(null);
mainPanelWithBorders.add("West", westBorder);
westBorder.setBackground(java.awt.Color.lightGray);
westBorder.setBounds(0,3,3,594);
eastBorder.setLayout(null);
mainPanelWithBorders.add("East", eastBorder);
eastBorder.setBackground(java.awt.Color.lightGray);
eastBorder.setBounds(797,3,3,594);
mainPanel.setLayout(new BorderLayout(0,3));
mainPanelWithBorders.add("Center", mainPanel);
mainPanel.setBackground(java.awt.Color.lightGray);
mainPanel.setBounds(3,3,794,594);
upperPanel.setLayout(new GridLayout(1,2,0,0));
mainPanel.add("North", upperPanel);
upperPanel.setBounds(0,0,794,150);
preferencesPanel.setLayout(null);
upperPanel.add(preferencesPanel);
preferencesPanel.setBounds(0,0,397,150);
startURLlabel.setText("Start-URL");
preferencesPanel.add(startURLlabel);
startURLlabel.setBounds(12,0,121,24);
startURL.setText("uni-muenchen.de");
preferencesPanel.add(startURL);
startURL.setBounds(132,0,133,24);
startButton.setLabel("Start");
preferencesPanel.add(startButton);
startButton.setFont(new Font("Dialog", Font.BOLD, 12));
startButton.setBounds(288,36,99,24);
restrictToLabel.setText("Restrict host to");
preferencesPanel.add(restrictToLabel);
restrictToLabel.setBounds(12,36,121,28);
preferencesPanel.add(restrictTo);
restrictTo.setBounds(133,36,133,24);
logPanel.setLayout(new BorderLayout(0,0));
upperPanel.add(logPanel);
logPanel.setBounds(397,0,397,150);
logPanel.add("Center", logList);
logList.setBackground(java.awt.Color.white);
logList.setBounds(0,0,397,150);
lowerPanel.setLayout(new GridLayout(1,3,3,3));
mainPanel.add("Center", lowerPanel);
lowerPanel.setBounds(0,153,794,441);
urlQueuePanel.setLayout(new BorderLayout(0,0));
lowerPanel.add(urlQueuePanel);
urlQueuePanel.setBounds(0,0,196,441);
urlQueueLabel.setText("URLQueue");
urlQueuePanel.add("North", urlQueueLabel);
urlQueueLabel.setBounds(0,0,196,23);
urlQueuePanel.add("Center", urlQueueList);
urlQueueList.setBackground(java.awt.Color.white);
urlQueueList.setBounds(0,23,196,418);
urlThreadPanel.setLayout(new BorderLayout(0,0));
lowerPanel.add(urlThreadPanel);
urlThreadPanel.setBounds(199,0,196,441);
urlThreadLabel.setText("URLThreads");
urlThreadPanel.add("North", urlThreadLabel);
urlThreadLabel.setBounds(0,0,196,23);
urlThreadPanel.add("Center", urlThreadList);
urlThreadList.setBackground(java.awt.Color.white);
urlThreadList.setBounds(0,23,196,418);
docQueuePanel.setLayout(new BorderLayout(0,0));
lowerPanel.add(docQueuePanel);
docQueuePanel.setBounds(398,0,196,441);
docQueueLabel.setText("DocQueue");
docQueuePanel.add("North", docQueueLabel);
docQueueLabel.setBounds(0,0,196,23);
docQueuePanel.add("Center", docQueueList);
docQueueList.setBackground(java.awt.Color.white);
docQueueList.setBounds(0,23,196,418);
docThreadPanel.setLayout(new BorderLayout(0,0));
lowerPanel.add(docThreadPanel);
docThreadPanel.setBounds(597,0,196,441);
docThreadLabel.setText("DocThreads");
docThreadPanel.add("North", docThreadLabel);
docThreadLabel.setBounds(0,0,196,23);
docThreadPanel.add("Center", docThreadList);
docThreadList.setBackground(java.awt.Color.white);
docThreadList.setBounds(0,23,196,418);
setTitle("LARM - Fetcher");
//}}
//{{INIT_MENUS
menu1.setLabel("Datei");
menu1.add(newMenuItem);
newMenuItem.setEnabled(false);
newMenuItem.setLabel("Neu");
newMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_N,false));
menu1.add(openMenuItem);
openMenuItem.setLabel("Öffnen...");
openMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_O,false));
menu1.add(saveMenuItem);
saveMenuItem.setEnabled(false);
saveMenuItem.setLabel("Speichern");
saveMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_S,false));
menu1.add(saveAsMenuItem);
saveAsMenuItem.setEnabled(false);
saveAsMenuItem.setLabel("Speichern unter...");
menu1.add(separatorMenuItem);
separatorMenuItem.setLabel("-");
menu1.add(exitMenuItem);
exitMenuItem.setLabel("Beenden");
mainMenuBar.add(menu1);
menu2.setLabel("Bearbeiten");
menu2.add(cutMenuItem);
cutMenuItem.setEnabled(false);
cutMenuItem.setLabel("Ausschneiden");
cutMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_X,false));
menu2.add(copyMenuItem);
copyMenuItem.setEnabled(false);
copyMenuItem.setLabel("Kopieren");
copyMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_C,false));
menu2.add(pasteMenuItem);
pasteMenuItem.setEnabled(false);
pasteMenuItem.setLabel("Einfügen");
pasteMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_V,false));
mainMenuBar.add(menu2);
menu3.setLabel("Hilfe");
menu3.add(aboutMenuItem);
aboutMenuItem.setLabel("Info...");
mainMenuBar.add(menu3);
//$$ mainMenuBar.move(0,312);
setMenuBar(mainMenuBar);
//}}
//{{REGISTER_LISTENERS
SymWindow aSymWindow = new SymWindow();
this.addWindowListener(aSymWindow);
SymAction lSymAction = new SymAction();
openMenuItem.addActionListener(lSymAction);
exitMenuItem.addActionListener(lSymAction);
aboutMenuItem.addActionListener(lSymAction);
startButton.addActionListener(lSymAction);
//}}
}
public FetcherFrame(String title)
{
this();
setTitle(title);
}
/**
* Shows or hides the component depending on the boolean flag b.
* @param b if true, show the component; otherwise, hide the component.
* @see java.awt.Component#isVisible
*/
public void setVisible(boolean b)
{
if(b)
{
setLocation(50, 50);
}
super.setVisible(b);
}
static public void main(String args[])
{
try
{
//Create a new instance of our application's frame, and make it visible.
(new FetcherFrame()).setVisible(true);
}
catch (Throwable t)
{
System.err.println(t);
t.printStackTrace();
//Ensure the application exits with an error condition.
System.exit(1);
}
}
public void addNotify()
{
// Record the size of the window prior to calling parents addNotify.
Dimension d = getSize();
super.addNotify();
if (fComponentsAdjusted)
return;
// Adjust components according to the insets
setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height);
Component components[] = getComponents();
for (int i = 0; i < components.length; i++)
{
Point p = components[i].getLocation();
p.translate(getInsets().left, getInsets().top);
components[i].setLocation(p);
}
fComponentsAdjusted = true;
}
// Used for addNotify check.
boolean fComponentsAdjusted = false;
//{{DECLARE_CONTROLS
java.awt.FileDialog openFileDialog1 = new java.awt.FileDialog(this);
java.awt.Panel mainPanelWithBorders = new java.awt.Panel();
java.awt.Panel northBorder = new java.awt.Panel();
java.awt.Panel southBorder = new java.awt.Panel();
java.awt.Panel westBorder = new java.awt.Panel();
java.awt.Panel eastBorder = new java.awt.Panel();
java.awt.Panel mainPanel = new java.awt.Panel();
java.awt.Panel upperPanel = new java.awt.Panel();
java.awt.Panel preferencesPanel = new java.awt.Panel();
java.awt.Label startURLlabel = new java.awt.Label();
java.awt.TextField startURL = new java.awt.TextField(30);
java.awt.Button startButton = new java.awt.Button();
java.awt.Label restrictToLabel = new java.awt.Label();
java.awt.TextField restrictTo = new java.awt.TextField();
java.awt.Panel logPanel = new java.awt.Panel();
java.awt.List logList = new java.awt.List(8);
java.awt.Panel lowerPanel = new java.awt.Panel();
java.awt.Panel urlQueuePanel = new java.awt.Panel();
java.awt.Label urlQueueLabel = new java.awt.Label();
java.awt.List urlQueueList = new java.awt.List(5);
java.awt.Panel urlThreadPanel = new java.awt.Panel();
java.awt.Label urlThreadLabel = new java.awt.Label();
java.awt.List urlThreadList = new java.awt.List(4);
java.awt.Panel docQueuePanel = new java.awt.Panel();
java.awt.Label docQueueLabel = new java.awt.Label();
java.awt.List docQueueList = new java.awt.List(4);
java.awt.Panel docThreadPanel = new java.awt.Panel();
java.awt.Label docThreadLabel = new java.awt.Label();
java.awt.List docThreadList = new java.awt.List(4);
//}}
//{{DECLARE_MENUS
java.awt.MenuBar mainMenuBar = new java.awt.MenuBar();
java.awt.Menu menu1 = new java.awt.Menu();
java.awt.MenuItem newMenuItem = new java.awt.MenuItem();
java.awt.MenuItem openMenuItem = new java.awt.MenuItem();
java.awt.MenuItem saveMenuItem = new java.awt.MenuItem();
java.awt.MenuItem saveAsMenuItem = new java.awt.MenuItem();
java.awt.MenuItem separatorMenuItem = new java.awt.MenuItem();
java.awt.MenuItem exitMenuItem = new java.awt.MenuItem();
java.awt.Menu menu2 = new java.awt.Menu();
java.awt.MenuItem cutMenuItem = new java.awt.MenuItem();
java.awt.MenuItem copyMenuItem = new java.awt.MenuItem();
java.awt.MenuItem pasteMenuItem = new java.awt.MenuItem();
java.awt.Menu menu3 = new java.awt.Menu();
java.awt.MenuItem aboutMenuItem = new java.awt.MenuItem();
//}}
class SymWindow extends java.awt.event.WindowAdapter
{
public void windowClosing(java.awt.event.WindowEvent event)
{
Object object = event.getSource();
if (object == FetcherFrame.this)
FetcherFrame_WindowClosing(event);
}
}
void FetcherFrame_WindowClosing(java.awt.event.WindowEvent event)
{
// to do: code goes here.
FetcherFrame_WindowClosing_Interaction1(event);
}
void FetcherFrame_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
{
try {
// QuitDialog Create and show as modal
(new QuitDialog(this, true)).setVisible(true);
} catch (Exception e) {
}
}
class SymAction implements java.awt.event.ActionListener
{
public void actionPerformed(java.awt.event.ActionEvent event)
{
Object object = event.getSource();
if (object == openMenuItem)
openMenuItem_ActionPerformed(event);
else if (object == aboutMenuItem)
aboutMenuItem_ActionPerformed(event);
else if (object == exitMenuItem)
exitMenuItem_ActionPerformed(event);
else if (object == startButton)
startButton_ActionPerformed(event);
}
}
void openMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
{
// to do: code goes here.
openMenuItem_ActionPerformed_Interaction1(event);
}
void openMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
{
try {
// OpenFileDialog Create and show as modal
int defMode = openFileDialog1.getMode();
String defTitle = openFileDialog1.getTitle();
String defDirectory = openFileDialog1.getDirectory();
String defFile = openFileDialog1.getFile();
openFileDialog1 = new java.awt.FileDialog(this, defTitle, defMode);
openFileDialog1.setDirectory(defDirectory);
openFileDialog1.setFile(defFile);
openFileDialog1.setVisible(true);
} catch (Exception e) {
}
}
void aboutMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
{
// to do: code goes here.
aboutMenuItem_ActionPerformed_Interaction1(event);
}
void aboutMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
{
try {
// AboutDialog Create and show as modal
(new AboutDialog(this, true)).setVisible(true);
} catch (Exception e) {
}
}
void exitMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
{
// to do: code goes here.
exitMenuItem_ActionPerformed_Interaction1(event);
}
void exitMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
{
try {
// QuitDialog Create and show as modal
(new QuitDialog(this, true)).setVisible(true);
} catch (Exception e) {
}
}
public void startButton_ActionPerformed(java.awt.event.ActionEvent event)
{
// to do: code goes here.
}
public void addUrlQueueItem(String item)
{
urlQueueList.add(item);
}
public void removeUrlQueueItem(String item)
{
urlQueueList.remove(item);
}
public void addDocQueueItem(String item)
{
docQueueList.add(item);
}
public void removeDocQueueItem(String item)
{
docQueueList.remove(item);
}
public synchronized int addUrlThreadItem(String item)
{
urlThreadList.add(item);
return urlThreadList.getItemCount();
}
public synchronized int addUrlThreadItem(String item, int pos)
{
urlThreadList.add(item,pos);
return urlThreadList.getItemCount();
}
public void replaceUrlThreadItem(String item, int index)
{
urlThreadList.replaceItem(item,index);
}
public synchronized int addDocThreadItem(String item)
{
docThreadList.add(item);
return docThreadList.getItemCount();
}
public void replaceDocThreadItem(String item, int index)
{
docThreadList.replaceItem(item,index);
}
public void addLogEntry(String entry)
{
logList.add(entry);
logList.makeVisible(logList.getItemCount()-1);
}
public void clearLog()
{
logList.removeAll();
}
public void addStartButtonListener(ActionListener a)
{
startButton.addActionListener(a);
}
public String getRestrictTo()
{
return restrictTo.getText();
}
public void setRestrictTo(String restrictTo)
{
this.restrictTo.setText(restrictTo);
}
public String getStartURL()
{
return startURL.getText();
}
public void setStartURL(String startURL)
{
this.startURL.setText(startURL);
}
//public void setInfoText(String text)
//{
// thi
//}
}

View File

@ -0,0 +1,332 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c) <p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.gui;
import javax.swing.*;
import java.awt.*;
import java.awt.event.*;
public class FetcherSummaryFrame extends JFrame
{
JPanel lowerPanel = new JPanel();
JPanel progressPanel = new JPanel();
JPanel middlePanel = new JPanel();
JPanel rightPanel = new JPanel();
BorderLayout borderLayout1 = new BorderLayout();
JPanel propertyPanel = new JPanel();
JLabel hostLabel = new JLabel();
JLabel urlRestrictionFrame = new JLabel();
JTextField startURL = new JTextField();
JTextField restrictTo = new JTextField();
JButton startButton = new JButton();
GridLayout gridLayout1 = new GridLayout();
JProgressBar urlQueuedProgress = new JProgressBar(0,100);
JLabel urlQueuedLabel = new JLabel();
JLabel scopeFilteredLabel = new JLabel();
JProgressBar scopeFilteredProgress = new JProgressBar(0,100);
JLabel visitedFilteredLabel = new JLabel();
JProgressBar visitedFilteredProgress = new JProgressBar(0,100);
JLabel workingThreadsLabel = new JLabel();
JProgressBar workingThreadsProgress = new JProgressBar(0,100);
JLabel idleThreadsLabel = new JLabel();
JProgressBar idleThreadsProgress = new JProgressBar(0,100);
JLabel busyThreadsLabel = new JLabel();
JProgressBar busyThreadsProgress = new JProgressBar(0,100);
JLabel requestQueueLabel = new JLabel();
JProgressBar requestQueueProgress = new JProgressBar();
JLabel stalledThreadsLabel = new JLabel();
JProgressBar stalledThreadsProgress = new JProgressBar();
JLabel dnsLabel = new JLabel();
JProgressBar dnsProgress = new JProgressBar(0,100);
JLabel freeMemLabel = new JLabel();
JLabel freeMemText = new JLabel();
JLabel totalMemLabel = new JLabel();
JLabel totalMemText = new JLabel();
JLabel bpsLabel = new JLabel();
JLabel bpsText = new JLabel();
JLabel docsLabel = new JLabel();
JLabel docsText = new JLabel();
JLabel docsReadLabel = new JLabel();
JLabel docsReadText = new JLabel();
JProgressBar urlsCaughtProgress = new JProgressBar(0,100);
JLabel urlsCaughtText = new JLabel();
JLabel robotsTxtsText = new JLabel();
JProgressBar robotsTxtsProgress = new JProgressBar(0,100);
public FetcherSummaryFrame()
{
try
{
jbInit();
this.setTitle("LARM - LANLab Retrieval Machine");
this.setSize(new Dimension(640,350));
this.urlQueuedProgress.setStringPainted(true);
this.urlQueuedProgress.setString("0");
this.scopeFilteredProgress.setStringPainted(true);
this.scopeFilteredProgress.setString("0");
this.visitedFilteredProgress.setStringPainted(true);
this.visitedFilteredProgress.setString("0");
workingThreadsProgress.setStringPainted(true);
workingThreadsProgress.setString("0");
idleThreadsProgress.setStringPainted(true);
idleThreadsProgress.setString("0");
busyThreadsProgress.setStringPainted(true);
busyThreadsProgress.setString("0");
stalledThreadsProgress.setStringPainted(true);
stalledThreadsProgress.setString("0");
requestQueueProgress.setStringPainted(true);
requestQueueProgress.setString("0");
dnsProgress.setStringPainted(true);
dnsProgress.setString("0");
urlsCaughtProgress.setStringPainted(true);
urlsCaughtProgress.setString("0");
robotsTxtsProgress.setStringPainted(true);
robotsTxtsProgress.setString("0");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private void jbInit() throws Exception
{
this.getContentPane().setLayout(borderLayout1);
propertyPanel.setMinimumSize(new Dimension(10, 70));
propertyPanel.setPreferredSize(new Dimension(10, 80));
propertyPanel.setLayout(null);
hostLabel.setText("Startseite");
hostLabel.setBounds(new Rectangle(18, 15, 76, 17));
urlRestrictionFrame.setText("URL-Restriction (regul. Ausdruck)");
urlRestrictionFrame.setBounds(new Rectangle(18, 37, 208, 17));
startURL.setBounds(new Rectangle(224, 14, 281, 21));
restrictTo.setBounds(new Rectangle(224, 38, 281, 21));
startButton.setActionCommand("start");
startButton.setText("Start");
startButton.setBounds(new Rectangle(528, 14, 79, 47));
lowerPanel.setLayout(gridLayout1);
urlQueuedLabel.setToolTipText("");
urlQueuedLabel.setText("URLs queued");
scopeFilteredLabel.setToolTipText("");
scopeFilteredLabel.setText("Scope-gefiltert");
visitedFilteredLabel.setText("Visited gefiltert");
workingThreadsLabel.setText("Number of Working Threads");
idleThreadsLabel.setText("Idle Threads");
busyThreadsLabel.setText("Busy Threads");
requestQueueLabel.setText("requests queued");
stalledThreadsLabel.setText("stalled Threads");
stalledThreadsProgress.setPreferredSize(new Dimension(190, 25));
requestQueueProgress.setPreferredSize(new Dimension(190, 25));
busyThreadsProgress.setPreferredSize(new Dimension(190, 25));
idleThreadsProgress.setPreferredSize(new Dimension(190, 25));
workingThreadsProgress.setPreferredSize(new Dimension(190, 25));
urlQueuedProgress.setPreferredSize(new Dimension(190, 25));
scopeFilteredProgress.setPreferredSize(new Dimension(190, 25));
visitedFilteredProgress.setPreferredSize(new Dimension(190, 25));
dnsLabel.setText("DNS Hosts cached");
dnsProgress.setPreferredSize(new Dimension(190, 25));
freeMemLabel.setText("Free Mem");
freeMemLabel.setPreferredSize(new Dimension(60, 17));
freeMemText.setText("0");
freeMemText.setPreferredSize(new Dimension(120, 17));
freeMemText.setMinimumSize(new Dimension(100, 17));
totalMemLabel.setText("total Mem");
totalMemLabel.setPreferredSize(new Dimension(60, 17));
totalMemText.setText("0");
totalMemText.setPreferredSize(new Dimension(120, 17));
totalMemText.setMinimumSize(new Dimension(100, 17));
bpsLabel.setPreferredSize(new Dimension(60, 17));
bpsLabel.setText("Bytes/s");
bpsText.setMinimumSize(new Dimension(100, 17));
bpsText.setPreferredSize(new Dimension(120, 17));
bpsText.setText("0");
docsLabel.setText("Docs/s");
docsLabel.setPreferredSize(new Dimension(60, 17));
docsText.setText("0");
docsText.setPreferredSize(new Dimension(120, 17));
docsText.setMinimumSize(new Dimension(100, 17));
docsReadLabel.setText("Docs read");
docsReadLabel.setPreferredSize(new Dimension(60, 17));
docsReadText.setText("0");
docsReadText.setPreferredSize(new Dimension(120, 17));
docsReadText.setMinimumSize(new Dimension(100, 17));
urlsCaughtProgress.setPreferredSize(new Dimension(190, 25));
urlsCaughtText.setText("URLs caught by Robots.txt");
robotsTxtsText.setText("Robots.txts found");
robotsTxtsProgress.setPreferredSize(new Dimension(190, 25));
this.getContentPane().add(lowerPanel, BorderLayout.CENTER);
lowerPanel.add(progressPanel, null);
progressPanel.add(urlQueuedLabel, null);
progressPanel.add(urlQueuedProgress, null);
progressPanel.add(scopeFilteredLabel, null);
progressPanel.add(scopeFilteredProgress, null);
progressPanel.add(visitedFilteredLabel, null);
progressPanel.add(visitedFilteredProgress, null);
progressPanel.add(dnsLabel, null);
progressPanel.add(dnsProgress, null);
progressPanel.add(robotsTxtsText, null);
progressPanel.add(robotsTxtsProgress, null);
progressPanel.add(urlsCaughtText, null);
progressPanel.add(urlsCaughtProgress, null);
lowerPanel.add(middlePanel, null);
middlePanel.add(workingThreadsLabel, null);
middlePanel.add(workingThreadsProgress, null);
middlePanel.add(idleThreadsLabel, null);
middlePanel.add(idleThreadsProgress, null);
middlePanel.add(busyThreadsLabel, null);
middlePanel.add(busyThreadsProgress, null);
middlePanel.add(requestQueueLabel, null);
middlePanel.add(requestQueueProgress, null);
middlePanel.add(stalledThreadsLabel, null);
middlePanel.add(stalledThreadsProgress, null);
lowerPanel.add(rightPanel, null);
rightPanel.add(docsLabel, null);
rightPanel.add(docsText, null);
rightPanel.add(docsReadLabel, null);
rightPanel.add(docsReadText, null);
rightPanel.add(bpsLabel, null);
rightPanel.add(bpsText, null);
rightPanel.add(totalMemLabel, null);
rightPanel.add(totalMemText, null);
rightPanel.add(freeMemLabel, null);
rightPanel.add(freeMemText, null);
this.getContentPane().add(propertyPanel, BorderLayout.NORTH);
propertyPanel.add(urlRestrictionFrame, null);
propertyPanel.add(restrictTo, null);
propertyPanel.add(hostLabel, null);
propertyPanel.add(startButton, null);
propertyPanel.add(startURL, null);
}
public void setCounterProgressBar(JProgressBar p, int value)
{
int oldMax = p.getMaximum();
int oldValue = p.getValue();
if(value > oldMax)
{
p.setMaximum(oldMax * 2);
}
else if (value < oldMax / 2 && oldValue >= oldMax / 2)
{
p.setMaximum(oldMax / 2);
}
p.setValue(value);
p.setString("" + value);
}
public void setURLsQueued(int queued)
{
setCounterProgressBar(this.urlQueuedProgress, queued);
}
public void setScopeFiltered(int filtered)
{
setCounterProgressBar(this.scopeFilteredProgress, filtered);
}
public void setVisitedFiltered(int filtered)
{
setCounterProgressBar(this.visitedFilteredProgress, filtered);
}
public void setWorkingThreadsCount(int threads)
{
setCounterProgressBar(this.workingThreadsProgress, threads);
}
public void setIdleThreadsCount(int threads)
{
setCounterProgressBar(this.idleThreadsProgress, threads);
}
public void setBusyThreadsCount(int threads)
{
setCounterProgressBar(this.busyThreadsProgress, threads);
}
public void setRequestQueueCount(int requests)
{
setCounterProgressBar(this.requestQueueProgress, requests);
}
public void setDNSCount(int count)
{
setCounterProgressBar(this.dnsProgress, count);
}
public void setURLsCaughtCount(int count)
{
setCounterProgressBar(this.urlQueuedProgress, count);
}
public void addStartButtonListener(ActionListener a)
{
startButton.addActionListener(a);
}
public String getRestrictTo()
{
return restrictTo.getText();
}
public void setRestrictTo(String restrictTo)
{
this.restrictTo.setText(restrictTo);
}
public String getStartURL()
{
return startURL.getText();
}
public void setStartURL(String startURL)
{
this.startURL.setText(startURL);
}
public void setStalledThreads(int stalled)
{
stalledThreadsProgress.setValue(stalled);
}
public void setBytesPerSecond(double bps)
{
bpsText.setText("" + bps);
}
public void setDocsPerSecond(double docs)
{
bpsText.setText("" + docs);
}
public void setFreeMem(long freeMem)
{
freeMemText.setText("" + freeMem);
}
public void setTotalMem(long totalMem)
{
totalMemText.setText("" + totalMem);
}
public void setRobotsTxtCount(int robotsTxtCount)
{
setCounterProgressBar(robotsTxtsProgress, robotsTxtCount);
}
public void setDocsRead(int docs)
{
bpsText.setText("" + docs);
}
}

View File

@ -0,0 +1,184 @@
package de.lanlab.larm.gui;
/*
A basic extension of the java.awt.Dialog class
*/
import java.awt.*;
import java.awt.event.*;
public class QuitDialog extends Dialog
{
public QuitDialog(Frame parent, boolean modal)
{
super(parent, modal);
//Keep a local reference to the invoking frame
frame = parent;
// This code is automatically generated by Visual Cafe when you add
// components to the visual environment. It instantiates and initializes
// the components. To modify the code, only use code syntax that matches
// what Visual Cafe can generate, or Visual Cafe may be unable to back
// parse your Java file into its visual environment.
//{{INIT_CONTROLS
setLayout(null);
setSize(337,135);
setVisible(false);
yesButton.setLabel(" Ja ");
add(yesButton);
yesButton.setFont(new Font("Dialog", Font.BOLD, 12));
yesButton.setBounds(72,80,79,22);
noButton.setLabel(" Nein ");
add(noButton);
noButton.setFont(new Font("Dialog", Font.BOLD, 12));
noButton.setBounds(185,80,79,22);
label1.setText("Möchten Sie LARM beenden?");
label1.setAlignment(java.awt.Label.CENTER);
add(label1);
label1.setBounds(68,33,220,23);
setTitle("LARM - Beenden");
//}}
//{{REGISTER_LISTENERS
SymWindow aSymWindow = new SymWindow();
this.addWindowListener(aSymWindow);
SymAction lSymAction = new SymAction();
noButton.addActionListener(lSymAction);
yesButton.addActionListener(lSymAction);
//}}
}
public void addNotify()
{
// Record the size of the window prior to calling parents addNotify.
Dimension d = getSize();
super.addNotify();
if (fComponentsAdjusted)
return;
// Adjust components according to the insets
setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height);
Component components[] = getComponents();
for (int i = 0; i < components.length; i++)
{
Point p = components[i].getLocation();
p.translate(getInsets().left, getInsets().top);
components[i].setLocation(p);
}
fComponentsAdjusted = true;
}
public QuitDialog(Frame parent, String title, boolean modal)
{
this(parent, modal);
setTitle(title);
}
/**
* Shows or hides the component depending on the boolean flag b.
* @param b if true, show the component; otherwise, hide the component.
* @see java.awt.Component#isVisible
*/
public void setVisible(boolean b)
{
if(b)
{
Rectangle bounds = getParent().getBounds();
Rectangle abounds = getBounds();
setLocation(bounds.x + (bounds.width - abounds.width)/ 2,
bounds.y + (bounds.height - abounds.height)/2);
Toolkit.getDefaultToolkit().beep();
}
super.setVisible(b);
}
// Used for addNotify check.
boolean fComponentsAdjusted = false;
// Invoking frame
Frame frame = null;
//{{DECLARE_CONTROLS
java.awt.Button yesButton = new java.awt.Button();
java.awt.Button noButton = new java.awt.Button();
java.awt.Label label1 = new java.awt.Label();
//}}
class SymAction implements java.awt.event.ActionListener
{
public void actionPerformed(java.awt.event.ActionEvent event)
{
Object object = event.getSource();
if (object == yesButton)
yesButton_ActionPerformed(event);
else if (object == noButton)
noButton_ActionPerformed(event);
}
}
void yesButton_ActionPerformed(java.awt.event.ActionEvent event)
{
// to do: code goes here.
yesButton_ActionPerformed_Interaction1(event);
}
void yesButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
{
try {
frame.setVisible(false); // Hide the invoking frame
frame.dispose(); // Free system resources
this.dispose(); // Free system resources
System.exit(0); // close the application
} catch (Exception e) {
}
}
void noButton_ActionPerformed(java.awt.event.ActionEvent event)
{
// to do: code goes here.
noButton_ActionPerformed_Interaction1(event);
}
void noButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
{
try {
this.dispose();
} catch (Exception e) {
}
}
class SymWindow extends java.awt.event.WindowAdapter
{
public void windowClosing(java.awt.event.WindowEvent event)
{
Object object = event.getSource();
if (object == QuitDialog.this)
QuitDialog_WindowClosing(event);
}
}
void QuitDialog_WindowClosing(java.awt.event.WindowEvent event)
{
// to do: code goes here.
QuitDialog_WindowClosing_Interaction1(event);
}
void QuitDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
{
try {
this.dispose();
} catch (Exception e) {
}
}
}

View File

@ -0,0 +1,136 @@
package de.lanlab.larm.net;
// whatever package you want
import sun.net.www.http.HttpClient;
import sun.net.www.MessageHeader;
import sun.net.ProgressEntry;
import java.net.*;
import java.io.*;
/**
* Description of the Class
*
*@author cmarschn
*@created 2. Mai 2001
*/
public class HttpClientTimeout extends HttpClient {
private int timeout = -1;
/**
* Constructor for the HttpClientTimeout object
*
*@param url Description of Parameter
*@param proxy Description of Parameter
*@param proxyPort Description of Parameter
*@exception IOException Description of Exception
*/
public HttpClientTimeout(URL url, String proxy, int proxyPort) throws IOException {
super(url, proxy, proxyPort);
}
/**
* Constructor for the HttpClientTimeout object
*
*@param url Description of Parameter
*@exception IOException Description of Exception
*/
public HttpClientTimeout(URL url) throws IOException {
super(url, null, -1);
}
/**
* Sets the Timeout attribute of the HttpClientTimeout object
*
*@param i The new Timeout value
*@exception SocketException Description of Exception
*/
public void setTimeout(int i) throws SocketException {
this.timeout = -1;
serverSocket.setSoTimeout(i);
}
/**
* Gets the Socket attribute of the HttpClientTimeout object
*
*@return The Socket value
*/
public Socket getSocket() {
return serverSocket;
}
/**
* Description of the Method
*
*@param header Description of Parameter
*@param entry Description of Parameter
*@return Description of the Returned Value
*@exception java.io.IOException Description of Exception
*/
public boolean parseHTTP(MessageHeader header, ProgressEntry entry) throws java.io.IOException {
if (this.timeout != -1) {
try {
serverSocket.setSoTimeout(this.timeout);
}
catch (SocketException e) {
throw new java.io.IOException("unable to set socket timeout!");
}
}
return super.parseHTTP(header, entry);
}
/**
* Description of the Method
*
*@exception IOException Description of Exception
*/
public void close() throws IOException {
serverSocket.close();
}
/*
* public void SetTimeout(int i) throws SocketException {
* serverSocket.setSoTimeout(i);
* }
*/
/*
* This class has no public constructor for HTTP. This method is used to
* get an HttpClient to the specifed URL. If there's currently an
* active HttpClient to that server/port, you'll get that one.
*
* no longer syncrhonized -- it slows things down too much
* synchronize at a higher level
*/
/**
* Gets the New attribute of the HttpClientTimeout class
*
*@param url Description of Parameter
*@return The New value
*@exception IOException Description of Exception
*/
public static HttpClientTimeout getNew(URL url) throws IOException {
/*
* see if one's already around
*/
HttpClientTimeout ret = (HttpClientTimeout) kac.get(url);
if (ret == null) {
ret = new HttpClientTimeout(url);
// CTOR called openServer()
}
else {
ret.url = url;
}
// don't know if we're keeping alive until we parse the headers
// for now, keepingAlive is false
return ret;
}
}

View File

@ -0,0 +1,50 @@
package de.lanlab.larm.net;
import java.net.*;
/**
* Description of the Class
*
*@author cmarschn
*@created 2. Mai 2001
*/
public class HttpTimeoutFactory implements URLStreamHandlerFactory {
int fiTimeoutVal;
/**
* Constructor for the HttpTimeoutFactory object
*
*@param iT Description of Parameter
*/
public HttpTimeoutFactory(int iT) {
fiTimeoutVal = iT;
}
/**
* Description of the Method
*
*@param str Description of Parameter
*@return Description of the Returned Value
*/
public URLStreamHandler createURLStreamHandler(String str) {
return new HttpTimeoutHandler(fiTimeoutVal);
}
static HttpTimeoutFactory instance = null;
/**
* gets an instance. only the first call will create it. In subsequent calls the iT
* parameter doesn't have a meaning.
*/
public static HttpTimeoutFactory getInstance(int iT)
{
if(instance == null)
{
instance = new HttpTimeoutFactory(iT);
}
return instance;
}
}

View File

@ -0,0 +1,80 @@
package de.lanlab.larm.net;
import java.net.*;
import java.io.IOException;
/**
* Description of the Class
*
*@author cmarschn
*@created 2. Mai 2001
*/
public class HttpTimeoutHandler extends sun.net.www.protocol.http.Handler {
int timeoutVal;
HttpURLConnectionTimeout fHUCT;
/**
* Constructor for the HttpTimeoutHandler object
*
*@param iT Description of Parameter
*/
public HttpTimeoutHandler(int iT) {
timeoutVal = iT;
}
/**
* Gets the Socket attribute of the HttpTimeoutHandler object
*
*@return The Socket value
*/
public Socket getSocket() {
return fHUCT.getSocket();
}
/**
* Description of the Method
*
*@exception Exception Description of Exception
*/
public void close() throws Exception {
fHUCT.close();
}
/**
* Description of the Method
*
*@param u Description of Parameter
*@return Description of the Returned Value
*@exception IOException Description of Exception
*/
protected java.net.URLConnection openConnection(URL u) throws IOException {
return fHUCT = new HttpURLConnectionTimeout(u, this, timeoutVal);
}
/**
* Gets the Proxy attribute of the HttpTimeoutHandler object
*
*@return The Proxy value
*/
String getProxy() {
return proxy;
// breaking encapsulation
}
/**
* Gets the ProxyPort attribute of the HttpTimeoutHandler object
*
*@return The ProxyPort value
*/
int getProxyPort() {
return proxyPort;
// breaking encapsulation
}
}

View File

@ -0,0 +1,226 @@
package de.lanlab.larm.net;
import java.net.*;
import java.io.*;
import sun.net.www.http.HttpClient;
/**
* Description of the Class
*
*@author cmarschn
*@created 2. Mai 2001
*/
public class HttpURLConnectionTimeout extends sun.net.www.protocol.http.HttpURLConnection {
int fiTimeoutVal;
HttpTimeoutHandler fHandler;
HttpClientTimeout fClient;
/**
* Constructor for the HttpURLConnectionTimeout object
*
*@param u Description of Parameter
*@param handler Description of Parameter
*@param iTimeout Description of Parameter
*@exception IOException Description of Exception
*/
public HttpURLConnectionTimeout(URL u, HttpTimeoutHandler handler, int iTimeout) throws IOException {
super(u, handler);
fHandler = handler;
fiTimeoutVal = iTimeout;
}
/**
* Constructor for the HttpURLConnectionTimeout object
*
*@param u Description of Parameter
*@param host Description of Parameter
*@param port Description of Parameter
*@exception IOException Description of Exception
*/
public HttpURLConnectionTimeout(URL u, String host, int port) throws IOException {
super(u, host, port);
}
/**
* Description of the Method
*
*@exception IOException Description of Exception
*/
public void connect() throws IOException {
if (connected) {
return;
}
try {
if ("http".equals(url.getProtocol())
/*
* && !failedOnce <- PRIVATE
*/
) {
// for safety's sake, as reported by KLGroup
synchronized (url) {
http = HttpClientTimeout.getNew(url);
}
fClient = (HttpClientTimeout) http;
((HttpClientTimeout) http).setTimeout(fiTimeoutVal);
}
else {
// make sure to construct new connection if first
// attempt failed
http = new HttpClientTimeout(url, fHandler.getProxy(), fHandler.getProxyPort());
}
ps = (PrintStream) http.getOutputStream();
}
catch (IOException e) {
throw e;
}
// this was missing from the original version
connected = true;
}
/**
* Create a new HttpClient object, bypassing the cache of HTTP client
* objects/connections.
*
*@param url the URL being accessed
*@return The NewClient value
*@exception IOException Description of Exception
*/
protected HttpClient getNewClient(URL url)
throws IOException {
HttpClientTimeout client = new HttpClientTimeout(url, (String) null, -1);
try {
client.setTimeout(fiTimeoutVal);
}
catch (Exception e) {
System.out.println("Unable to set timeout value");
}
return (HttpClient) client;
}
/**
* Gets the Socket attribute of the HttpURLConnectionTimeout object
*
*@return The Socket value
*/
Socket getSocket() {
return fClient.getSocket();
}
/**
* Description of the Method
*
*@exception Exception Description of Exception
*/
void close() throws Exception {
fClient.close();
}
/**
* opens a stream allowing redirects only to the same host.
*
*@param c Description of Parameter
*@return Description of the Returned Value
*@exception IOException Description of Exception
*/
public static InputStream openConnectionCheckRedirects(URLConnection c)
throws IOException {
boolean redir;
int redirects = 0;
InputStream in = null;
do {
if (c instanceof HttpURLConnectionTimeout) {
((HttpURLConnectionTimeout) c).setInstanceFollowRedirects(false);
}
// We want to open the input stream before
// getting headers, because getHeaderField()
// et al swallow IOExceptions.
in = c.getInputStream();
redir = false;
if (c instanceof HttpURLConnectionTimeout) {
HttpURLConnectionTimeout http = (HttpURLConnectionTimeout) c;
int stat = http.getResponseCode();
if (stat >= 300 && stat <= 305 &&
stat != HttpURLConnection.HTTP_NOT_MODIFIED) {
URL base = http.getURL();
String loc = http.getHeaderField("Location");
URL target = null;
if (loc != null) {
target = new URL(base, loc);
}
http.disconnect();
if (target == null
|| !base.getProtocol().equals(target.getProtocol())
|| base.getPort() != target.getPort()
|| !HostsEquals(base, target)
|| redirects >= 5) {
throw new SecurityException("illegal URL redirect");
}
redir = true;
c = target.openConnection();
redirects++;
}
}
} while (redir);
return in;
}
// Same as java.net.URL.hostsEqual
/**
* Description of the Method
*
*@param u1 Description of Parameter
*@param u2 Description of Parameter
*@return Description of the Returned Value
*/
static boolean HostsEquals(URL u1, URL u2) {
final String h1 = u1.getHost();
final String h2 = u2.getHost();
if (h1 == null) {
return h2 == null;
}
else if (h2 == null) {
return false;
}
else if (h1.equalsIgnoreCase(h2)) {
return true;
}
// Have to resolve addresses before comparing, otherwise
// names like tachyon and tachyon.eng would compare different
final boolean result[] = {false};
java.security.AccessController.doPrivileged(
new java.security.PrivilegedAction() {
/**
* Main processing method for the HttpURLConnectionTimeout object
*
*@return Description of the Returned Value
*/
public Object run() {
try {
InetAddress a1 = InetAddress.getByName(h1);
InetAddress a2 = InetAddress.getByName(h2);
result[0] = a1.equals(a2);
}
catch (UnknownHostException e) {
}
catch (SecurityException e) {
}
return null;
}
});
return result[0];
}
}

View File

@ -0,0 +1,17 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.parser;
public interface LinkHandler
{
public void handleLink(String value, boolean isFrame);
public void handleBase(String value);
public void handleTitle(String value);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,37 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
*
* Description: <p>
*
* Copyright: Copyright (c)<p>
*
* Company: <p>
*
*
*
* @author
* @version 1.0
*/
package de.lanlab.larm.storage;
import de.lanlab.larm.util.*;
/**
* This interface stores documents provided by a fetcher task
* @author Clemens Marschner
*/
public interface DocumentStorage
{
/**
* called once when the storage is supposed to be initialized
*/
public void open();
/**
* called to store a web document
*
* @param doc the document
*/
public void store(WebDocument doc);
}

View File

@ -0,0 +1,165 @@
package de.lanlab.larm.storage;
import de.lanlab.larm.util.WebDocument;
import de.lanlab.larm.util.SimpleLogger;
import java.io.*;
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* @author
* @created 11. Januar 2002
* @version 1.0
*/
/**
* this class saves the documents into page files of 50 MB and keeps a record of all
* the positions into a Logger. the log file contains URL, page file number, and
* index within the page file.
*
*/
public class LogStorage implements DocumentStorage
{
SimpleLogger log;
File pageFile;
FileOutputStream out;
int pageFileCount;
String filePrefix;
int offset;
boolean isValid = false;
/**
* Description of the Field
*/
public final static int MAXLENGTH = 50000000;
boolean logContents = false;
String fileName;
/**
* Constructor for the LogStorage object
*
* @param log the logger where index information is saved to
* @param logContents whether all docs are to be stored in page files or not
* @param filePrefix the file name where the page file number is appended
*/
public LogStorage(SimpleLogger log, boolean logContents, String filePrefix)
{
this.log = log;
pageFileCount = 0;
this.filePrefix = filePrefix;
this.logContents = logContents;
if (logContents)
{
openPageFile();
}
}
/**
* Description of the Method
*/
public void open() { }
/**
* Description of the Method
*/
public void openPageFile()
{
int id = ++pageFileCount;
fileName = filePrefix + "_" + id + ".pfl";
try
{
this.offset = 0;
out = new FileOutputStream(fileName);
isValid = true;
}
catch (IOException io)
{
log.logThreadSafe("**ERROR: IOException while opening pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
isValid = false;
}
}
/**
* Gets the outputStream attribute of the LogStorage object
*
* @return The outputStream value
*/
public OutputStream getOutputStream()
{
if (offset > MAXLENGTH)
{
try
{
out.close();
}
catch (IOException io)
{
log.logThreadSafe("**ERROR: IOException while closing pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
}
openPageFile();
}
return out;
}
/**
* Description of the Method
*
* @param bytes Description of the Parameter
* @return Description of the Return Value
*/
public synchronized int writeToPageFile(byte[] bytes)
{
try
{
OutputStream out = getOutputStream();
int oldOffset = this.offset;
out.write(bytes);
this.offset += bytes.length;
return oldOffset;
}
catch (IOException io)
{
log.logThreadSafe("**ERROR: IOException while writing " + bytes.length + " bytes to pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
}
return -1;
}
/**
* Sets the logger attribute of the LogStorage object
*
* @param log The new logger value
*/
public void setLogger(SimpleLogger log)
{
this.log = log;
}
/**
* stores the document if storing is enabled
*
* @param doc Description of the Parameter
*/
public void store(WebDocument doc)
{
String docInfo = doc.getInfo();
if (logContents && isValid && doc.getDocumentBytes() != null)
{
int offset = writeToPageFile(doc.getDocumentBytes());
docInfo = docInfo + "\t" + pageFileCount + "\t" + offset;
}
log.logThreadSafe(docInfo);
}
}

View File

@ -0,0 +1,26 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.storage;
import de.lanlab.larm.util.*;
/**
* doesn't do a lot
*/
public class NullStorage implements DocumentStorage
{
public NullStorage()
{
}
public void open() {}
public void store(WebDocument doc) {}
}

View File

@ -0,0 +1,176 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.storage;
import java.sql.*;
import de.lanlab.larm.util.*;
import java.util.*;
/**
* saves the document into an sql table. At this time only in MS SQL (and probably Sybase)
* a table "Document" with the columns DO_URL(varchar), DO_MimeType(varchar) and
* DO_Data2(BLOB) is created after start<br>
* notes: experimental; slow
*/
public class SQLServerStorage implements DocumentStorage
{
private Vector freeCons;
private Vector busyCons;
private Vector freeStatements;
private Vector busyStatements;
private PreparedStatement addDoc;
public SQLServerStorage(String driver, String connectionString, String account, String password, int nrConnections)
{
try
{
Class.forName(driver);
freeCons = new Vector(nrConnections);
busyCons = new Vector(nrConnections);
freeStatements = new Vector(nrConnections);
busyStatements = new Vector(nrConnections);
Connection sqlConn;
PreparedStatement statement;
for(int i=0; i<nrConnections; i++)
{
sqlConn = DriverManager.getConnection(connectionString, account, password);
statement = sqlConn.prepareStatement("INSERT INTO Document (DO_URL, DO_MimeType, DO_Data2) VALUES (?,?,?)");
freeCons.add(sqlConn);
freeStatements.add(statement);
}
}
catch(SQLException e)
{
synchronized(this)
{
System.out.println(/*"Task " + taskNr + ": */ "SQLException: " + e.getMessage());
System.err.println(" SQLState: " + e.getSQLState());
System.err.println(" VendorError: " + e.getErrorCode());
}
return;
}
catch(Exception e)
{
System.out.println("SQLServerStorage: " + e.getClass().getName() + ": " + e.getMessage());
e.printStackTrace();
System.exit(0);
}
}
public Connection getConnection()
{
synchronized(this)
{
Connection actual = (Connection)freeCons.firstElement();
freeCons.removeElementAt(0);
if(actual == null)
{
return null;
}
busyCons.add(actual);
return actual;
}
}
public void releaseConnection(Connection con)
{
synchronized(this)
{
busyCons.remove(con);
freeCons.add(con);
}
}
public PreparedStatement getStatement()
{
synchronized(this)
{
PreparedStatement actual = (PreparedStatement)freeStatements.firstElement();
freeStatements.removeElementAt(0);
if(actual == null)
{
return null;
}
busyStatements.add(actual);
return actual;
}
}
public void releaseStatement(PreparedStatement statement)
{
synchronized(this)
{
busyStatements.remove(statement);
freeStatements.add(statement);
}
}
public void open()
{
Connection conn = null;
try
{
conn = getConnection();
Statement delDoc = conn.createStatement();
// bisherige Daten löschen, indem die Tabelle neu angelegt wird (geht schneller)
delDoc.executeUpdate("if exists (select * from sysobjects where id = object_id(N'[dbo].[Document]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)drop table [dbo].[Document]");
delDoc.executeUpdate("CREATE TABLE [dbo].[Document] ([DO_ID] [int] IDENTITY (1, 1) NOT NULL , [DA_CrawlPass] [int] NULL , [DO_URL] [varchar] (255) NULL , [DO_ContentType] [varchar] (50) NULL , [DO_Data] [text] NULL , [DO_Hashcode] [int] NULL , [DO_ContentLength] [int] NULL , [DO_ContentEncoding] [varchar] (20) NULL , [DO_Data2] [image] NULL, [DO_MimeType] [varchar] (255) NULL) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]"); // löschen
}
catch(SQLException e)
{
System.out.println(/*"Task " + taskNr + ": */"SQLException: " + e.getMessage());
System.err.println(" SQLState: " + e.getSQLState());
System.err.println(" VendorError: " + e.getErrorCode());
}
finally
{
if(conn != null)
{
releaseConnection(conn);
}
}
}
public void store(WebDocument document)
{
PreparedStatement addDoc = null;
try
{
addDoc = getStatement();
addDoc.setString(1, document.getURLString());
addDoc.setString(2, document.getMimeType());
addDoc.setBytes(3, document.getDocumentBytes());
addDoc.execute();
}
catch(SQLException e)
{
System.out.println(/* "Task " + taskNr + ": */ "SQLException: " + e.getMessage());
System.err.println(" SQLState: " + e.getSQLState());
System.err.println(" VendorError: " + e.getErrorCode());
}
finally
{
if(addDoc != null)
{
releaseStatement(addDoc);
}
}
}
}

View File

@ -0,0 +1,9 @@
package de.lanlab.larm.threads;
public interface InterruptableTask
{
public void run(ServerThread thread);
public void interrupt();
public String getInfo();
}

View File

@ -0,0 +1,173 @@
package de.lanlab.larm.threads;
import java.util.Vector;
import java.util.Iterator;
import java.io.*;
import java.util.*;
import de.lanlab.larm.util.*;
/**
* This thread class acts like a server. It's running idle within
* a thread pool until "runTask" is called. The given task will then
* be executed asynchronously
*/
public class ServerThread extends Thread
{
/**
* the task that is to be executed. null in idle-mode
*/
protected InterruptableTask task = null;
private boolean busy = false;
private ArrayList listeners = new ArrayList();
private boolean isInterrupted = false;
private int threadNumber;
SimpleLogger log;
SimpleLogger errorLog;
public ServerThread(int threadNumber, String name, ThreadGroup threadGroup)
{
super(threadGroup, name);
init(threadNumber);
}
public ServerThread(int threadNumber, String name)
{
super(name);
init(threadNumber);
}
void init(int threadNumber)
{
this.threadNumber = threadNumber;
File logDir = new File("logs");
logDir.mkdir();
log = new SimpleLogger("thread" + threadNumber);
errorLog = new SimpleLogger("thread" + threadNumber + "_errors");
}
/**
* constructor
* @param threadNumber assigns an arbitrary number to this thread
* used by ServerThreadFactory
*/
public ServerThread(int threadNumber)
{
init(threadNumber);
}
/**
* the run method runs asynchronously. It waits until runTask() is
* called
*/
public void run()
{
try
{
while(!isInterrupted)
{
synchronized(this)
{
while(task == null)
{
wait();
}
}
task.run(this);
taskReady();
}
}
catch(InterruptedException e)
{
System.out.println("ServerThread " + threadNumber + " interrupted");
log.log("** Thread Interrupted **");
}
}
/**
* this is the main method that will invoke a task to run.
*/
public synchronized void runTask(InterruptableTask t)
{
busy = true;
task = t;
notify();
}
/**
* it should be possible to interrupt a task with this function.
* therefore, the task has to check its interrupted()-state
*/
public void interruptTask()
{
if(task != null)
{
task.interrupt();
}
}
/**
* the server thread can either be in idle or busy mode
*/
public boolean isBusy()
{
return busy;
}
public void addTaskReadyListener(TaskReadyListener l)
{
listeners.add(l);
}
public void removeTaskReadyListener(TaskReadyListener l)
{
listeners.remove(l);
}
public void interrupt()
{
super.interrupt();
isInterrupted = true;
}
public int getThreadNumber()
{
return this.threadNumber;
}
public InterruptableTask getTask()
{
return task;
}
/**
* this method will be called when the task ends. It notifies all
* of its observers about its changed state
*/
protected void taskReady()
{
task = null;
busy = false;
Iterator Ie = listeners.iterator();
while(Ie.hasNext())
{
((TaskReadyListener)Ie.next()).taskReady(this);
}
}
public SimpleLogger getLog()
{
return log;
}
public SimpleLogger getErrorLog()
{
return errorLog;
}
}

View File

@ -0,0 +1,80 @@
package de.lanlab.larm.threads;
import de.lanlab.larm.util.Queue;
import java.util.Collection;
/**
* Title: LARM Lanlab Retrieval Machine
* Description:
* Copyright: Copyright (c)
* Company:
* @author
* @version 1.0
*/
import java.util.LinkedList;
import java.util.Iterator;
public class TaskQueue implements Queue
{
LinkedList queue = new LinkedList();
/**
*
*/
public TaskQueue()
{
}
public void insertMultiple(Collection c)
{
throw new UnsupportedOperationException();
}
/**
* push a task to the start of the queue
* @param i the task
*/
public void insert(Object i)
{
queue.addFirst(i);
}
/**
* get the last element out of the queue
* The element will be removed from the queue
* @return the task
*/
public Object remove()
{
return queue.isEmpty() ? null : (InterruptableTask)queue.removeLast();
}
/**
*
*/
public Iterator iterator()
{
return queue.iterator();
}
/**
*
*/
public void clear()
{
queue.clear();
}
public boolean isEmpty()
{
return queue.isEmpty();
}
public int size()
{
return queue.size();
}
}

View File

@ -0,0 +1,9 @@
package de.lanlab.larm.threads;
import de.lanlab.larm.util.Observer;
public interface TaskReadyListener extends Observer
{
public void taskReady(ServerThread s);
}

View File

@ -0,0 +1,20 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c)<p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.threads;
public class ThreadFactory
{
// static int count = 0;
public ServerThread createServerThread(int count)
{
return new ServerThread(count);
}
}

View File

@ -0,0 +1,380 @@
package de.lanlab.larm.threads;
//import java.util.Vector;
import java.util.*;
/**
* if you have many tasks to accomplish, you can do this with one of the
* following strategies:
* <uL>
* <li> do it one after another (single threaded). this may often be
* inefficient because most programs often wait for external resources
* <li> assign a new thread for each task (thread on demand). This will clog
* up the system if many tasks have to be accomplished synchronously
* <li> hold a number of tasks, and queue the requests if there are more
* tasks than threads (ThreadPool).
* </ul>
* This thread pool is based on an article in Java-Magazin 06/2000.
* synchronizations were removed unless necessary
*
*
*/
public class ThreadPool implements ThreadingStrategy, TaskReadyListener {
private int maxThreads = MAX_THREADS;
/**
* references to all threads are stored here
*/
private HashMap allThreads = new HashMap();
/**
* this vector takes all idle threads
*/
private Vector idleThreads = new Vector();
/**
* this vector takes all threads that are in operation (busy)
*/
private Vector busyThreads = new Vector();
/**
* if there are no idleThreads, tasks will go here
*/
private TaskQueue queue = new TaskQueue();
/**
* thread pool observers will be notified of status changes
*/
private Vector threadPoolObservers = new Vector();
private boolean isStopped = false;
/**
* default maximum number of threads, if not given by the user
*/
public final static int MAX_THREADS = 5;
/**
* thread was created
*/
public final static String THREAD_CREATE = "T_CREATE";
/**
* thread was created
*/
public final static String THREAD_START = "T_START";
/**
* thread is running
*/
public final static String THREAD_RUNNING = "T_RUNNING";
/**
* thread was stopped
*/
public final static String THREAD_STOP = "T_STOP";
/**
* thread was destroyed
*/
public final static String THREAD_END = "T_END";
/**
* thread is idle
*/
public final static String THREAD_IDLE = "T_IDLE";
/**
* a task was added to the queue, because all threads were busy
*/
public final static String THREADQUEUE_ADD = "TQ_ADD";
/**
* a task was removed from the queue, because a thread had finished and was
* ready
*/
public final static String THREADQUEUE_REMOVE = "TQ_REMOVE";
/**
* this factory will create the tasks
*/
ThreadFactory factory;
/**
* this constructor will create the pool with MAX_THREADS threads and the
* default factory
*/
public ThreadPool() {
this(MAX_THREADS, new ThreadFactory());
}
/**
* this constructor will create the pool with the default Factory
*
*@param max the maximum number of threads
*/
public ThreadPool(int max) {
this(max, new ThreadFactory());
}
/**
* constructor
*
*@param max maximum number of threads
*@param factory the thread factory with which the threads will be created
*/
public ThreadPool(int max, ThreadFactory factory) {
maxThreads = max;
this.factory = factory;
}
/**
* this init method will create the tasks. It must be called by hand
*/
public void init() {
for (int i = 0; i < maxThreads; i++) {
createThread(i);
}
}
/**
* Description of the Method
*
*@param i Description of the Parameter
*/
public void createThread(int i) {
ServerThread s = factory.createServerThread(i);
idleThreads.add(s);
allThreads.put(new Integer(i), s);
s.addTaskReadyListener(this);
sendMessage(i, THREAD_CREATE, "");
s.start();
sendMessage(i, THREAD_IDLE, "");
}
// FIXME: synchronisationstechnisch buggy
/**
* Description of the Method
*
*@param i Description of the Parameter
*/
public void restartThread(int i) {
sendMessage(i, THREAD_STOP, "");
ServerThread t = (ServerThread) allThreads.get(new Integer(i));
idleThreads.remove(t);
busyThreads.remove(t);
allThreads.remove(new Integer(i));
t.interruptTask();
t.interrupt();
//t.join();
// deprecated, I know, but the only way to overcome SUN's bugs
t = null;
createThread(i);
}
/**
* Description of the Method
*
*@param t Description of the Parameter
*@param key Description of the Parameter
*/
public synchronized void doTask(InterruptableTask t, Object key) {
if (!idleThreads.isEmpty()) {
ServerThread s = (ServerThread) idleThreads.firstElement();
idleThreads.remove(s);
busyThreads.add(s);
sendMessage(s.getThreadNumber(), THREAD_START, t.getInfo());
s.runTask(t);
sendMessage(s.getThreadNumber(), THREAD_RUNNING, t.getInfo());
} else {
queue.insert(t);
sendMessage(-1, THREADQUEUE_ADD, t.getInfo());
}
}
/**
* this will interrupt all threads. Therefore the InterruptableTasks must
* attend on the interrupted-flag
*/
public void interrupt() {
Iterator tasks = queue.iterator();
while (tasks.hasNext()) {
InterruptableTask t = (InterruptableTask) tasks.next();
t.interrupt();
sendMessage(-1, THREADQUEUE_REMOVE, t.getInfo());
// In der Hoffnung, dass alles klappt...
}
queue.clear();
Iterator threads = busyThreads.iterator();
while (threads.hasNext()) {
((ServerThread) threads.next()).interruptTask();
}
}
/**
* this will interrupt the tasks and end all threads
*/
public void stop() {
isStopped = true;
interrupt();
Iterator threads = idleThreads.iterator();
while (threads.hasNext()) {
((ServerThread) threads.next()).interruptTask();
}
idleThreads.clear();
}
/**
* wird von einem ServerThread aufgerufen, wenn dieser fertig ist
*
*@param s Description of the Parameter
*@param: ServerThread s - der aufrufende Thread
*/
public synchronized void taskReady(ServerThread s) {
if (isStopped) {
s.interrupt();
sendMessage(s.getThreadNumber(), THREAD_STOP, s.getTask().getInfo());
busyThreads.remove(s);
} else if (!queue.isEmpty()) {
InterruptableTask t = (InterruptableTask) queue.remove();
//queue.remove(t);
sendMessage(-1, THREADQUEUE_REMOVE, t.getInfo());
sendMessage(s.getThreadNumber(), THREAD_START, "");
s.runTask(t);
sendMessage(s.getThreadNumber(), THREAD_RUNNING, s.getTask().getInfo());
} else {
sendMessage(s.getThreadNumber(), THREAD_IDLE, "");
idleThreads.add(s);
busyThreads.remove(s);
}
synchronized (idleThreads) {
idleThreads.notify();
}
}
/**
* Description of the Method
*/
public void waitForFinish() {
synchronized (idleThreads) {
while (busyThreads.size() != 0) {
//System.out.println("busyThreads: " + busyThreads.size());
try {
idleThreads.wait();
} catch (InterruptedException e) {
System.out.println("Interrupted: " + e.getMessage());
}
}
//System.out.println("busyThreads: " + busyThreads.size());
}
}
/**
* Adds a feature to the ThreadPoolObserver attribute of the ThreadPool
* object
*
*@param o The feature to be added to the ThreadPoolObserver attribute
*/
public void addThreadPoolObserver(ThreadPoolObserver o) {
threadPoolObservers.add(o);
}
/**
* Description of the Method
*
*@param threadNr Description of the Parameter
*@param action Description of the Parameter
*@param info Description of the Parameter
*/
protected void sendMessage(int threadNr, String action, String info) {
Iterator Ie = threadPoolObservers.iterator();
//System.out.println("ThreadPool: Sende " + action + " message an " + threadPoolObservers.size() + " Observers");
if (threadNr != -1) {
while (Ie.hasNext()) {
((ThreadPoolObserver) Ie.next()).threadUpdate(threadNr, action, info);
}
} else {
while (Ie.hasNext()) {
((ThreadPoolObserver) Ie.next()).queueUpdate(info, action);
}
}
}
/**
* Gets the queueSize attribute of the ThreadPool object
*
*@return The queueSize value
*/
public synchronized int getQueueSize() {
return this.queue.size();
}
/**
* Gets the idleThreadsCount attribute of the ThreadPool object
*
*@return The idleThreadsCount value
*/
public synchronized int getIdleThreadsCount() {
return this.idleThreads.size();
}
/**
* Gets the busyThreadsCount attribute of the ThreadPool object
*
*@return The busyThreadsCount value
*/
public synchronized int getBusyThreadsCount() {
return this.busyThreads.size();
}
/**
* Gets the threadCount attribute of the ThreadPool object
*
*@return The threadCount value
*/
public synchronized int getThreadCount() {
return this.idleThreads.size() + this.busyThreads.size();
}
/**
* Gets the threadIterator attribute of the ThreadPool object
*
*@return The threadIterator value
*/
public Iterator getThreadIterator() {
return allThreads.values().iterator();
// return allThreads.iterator();
}
/**
* Description of the Method
*
*@param queue Description of the Parameter
*/
public void setQueue(TaskQueue queue) {
this.queue = queue;
}
public TaskQueue getTaskQueue()
{
return queue;
}
}

View File

@ -0,0 +1,12 @@
package de.lanlab.larm.threads;
import de.lanlab.larm.util.Observer;
/**
* an observer that observes the thread pool...
*/
public interface ThreadPoolObserver extends Observer
{
public void queueUpdate(String info, String action);
public void threadUpdate(int threadNr, String action, String info);
}

View File

@ -0,0 +1,8 @@
package de.lanlab.larm.threads;
public interface ThreadingStrategy
{
public void doTask(InterruptableTask t, Object key);
public void interrupt();
public void stop();
}

View File

@ -0,0 +1,721 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
*
* Description: <p>
*
* Copyright: Copyright (c)<p>
*
* Company: <p>
*
*
*
* @author
* @version 1.0
*/
package de.lanlab.larm.util;
import java.io.*;
import java.util.*;
class StoreException extends RuntimeException
{
Exception origException;
/**
* Constructor for the StoreException object
*
* @param e Description of the Parameter
*/
public StoreException(Exception e)
{
origException = e;
}
/**
* Gets the message attribute of the StoreException object
*
* @return The message value
*/
public String getMessage()
{
return origException.getMessage();
}
/**
* Description of the Method
*/
public void printStackTrace()
{
System.err.println("StoreException occured with reason: " + origException.getMessage());
origException.printStackTrace();
}
}
/**
* internal class that represents one block within a queue
*
* @author Clemens Marschner
* @created 3. Januar 2002
*/
class QueueBlock
{
/**
* the elements section will be set to null if it is on disk Vector elements
* must be Serializable
*/
LinkedList elements;
/**
* Anzahl Elemente im Block. Kopie von elements.size()
*/
int size;
/**
* maximale Blockgröße
*/
int maxSize;
/**
* if set, elements is null and block was written to file
*/
boolean onDisk;
/**
* Blockname
*/
String name;
/**
* initialisiert den Block
*
* @param name Der Blockname (muss eindeutig sein, sonst Kollision auf
* Dateiebene)
* @param maxSize maximale Blockgröße. Über- und Unterläufe werden durch
* Exceptions behandelt
*/
public QueueBlock(String name, int maxSize)
{
this.name = name;
this.onDisk = false;
this.elements = new LinkedList();
this.maxSize = maxSize;
}
/**
* serialisiert und speichert den Block auf Platte
*
* @exception StoreException Description of the Exception
*/
public void store()
throws StoreException
{
try
{
ObjectOutputStream o = new ObjectOutputStream(new FileOutputStream(getFileName()));
o.writeObject(elements);
elements = null;
o.close();
onDisk = true;
//System.out.println("CachingQueue.store: Block stored");
}
catch (IOException e)
{
System.err.println("CachingQueue.store: IOException");
throw new StoreException(e);
}
}
/**
* @return the filename of the block
*/
String getFileName()
{
// package protected!
return "cachingqueue/" + name + ".cqb";
}
/**
* load the block from disk
*
* @exception StoreException Description of the Exception
*/
public void load()
throws StoreException
{
try
{
ObjectInputStream i = new ObjectInputStream(new FileInputStream(getFileName()));
elements = (LinkedList) i.readObject();
i.close();
onDisk = false;
size = elements.size();
if (!(new File(getFileName()).delete()))
{
System.err.println("CachingQueue.load: file could not be deleted");
}
//System.out.println("CachingQueue.load: Block loaded");
}
catch (Exception e)
{
System.err.println("CachingQueue.load: Exception " + e.getClass().getName() + " occured");
throw new StoreException(e);
}
}
/**
* inserts an object at the start of the queue must be synchronized by
* calling class to be thread safe
*
* @param o Description of the Parameter
* @exception StoreException Description of the Exception
*/
public void insert(Object o)
throws StoreException
{
if (onDisk)
{
load();
}
if (size >= maxSize)
{
throw new OverflowException();
}
elements.addFirst(o);
size++;
}
/**
* gibt das letzte Element aus der Queue zurück und löscht dieses must be
* made synchronized by calling class to be thread safe
*
* @return Description of the Return Value
* @exception UnderflowException Description of the Exception
* @exception StoreException Description of the Exception
*/
public Object remove()
throws UnderflowException, StoreException
{
if (onDisk)
{
load();
}
if (size <= 0)
{
throw new UnderflowException();
}
size--;
return elements.removeLast();
}
/**
* @return the number of elements in the block
*/
public int size()
{
return size;
}
/**
* destructor. Assures that all files are deleted, even if the queue was not
* empty at the time when the program ended
*/
public void finalize()
{
// System.err.println("finalize von " + name + " called");
if (onDisk)
{
// temp-Datei löschen. Passiert, wenn z.B. eine Exception aufgetreten ist
// System.err.println("CachingQueue.finalize von Block " + name + ": lösche Datei");
if (!(new File(getFileName()).delete()))
{
// Dateifehler möglich durch Exception: ignorieren
// System.err.println("CachingQueue.finalize: file could not be deleted although onDisk was true");
}
}
}
}
/**
* this class holds a queue whose data is kept on disk whenever possible.
* It's a single ended queue, meaning data can only be added at the front and
* taken from the back. the queue itself is divided into blocks. Only the first
* and last blocks are kept in main memory, the rest is stored on disk. Only a
* LinkedList entry is kept in memory then.
* Blocks are swapped if an overflow (in case of insertions) or underflow (in case
* of removals) occur.<br>
*
* <pre>
* +---+---+---+---+-+
* put -> | M | S | S | S |M| -> remove
* +---+---+---+---+-+
* </pre>
* the maximum number of entries can be specified with the blockSize parameter. Thus,
* the queue actually holds a maximum number of 2 x blockSize objects in main memory,
* plus a few bytes for each block.<br>
* The objects contained in the blocks are stored with the standard Java
* serialization mechanism
* The files are named "cachingqueue\\Queuename_BlockNumber.cqb"
* note that the class is not synchronized
* @author Clemens Marschner
* @created 3. Januar 2002
*/
public class CachingQueue implements Queue
{
/**
* the Blocks
*/
LinkedList queueBlocks;
/**
* fast access to the first block
*/
QueueBlock first = null;
/**
* fast access to the last block
*/
QueueBlock last = null;
/**
* maximum block size
*/
int blockSize;
/**
* "primary key" identity count for each block
*/
int blockCount = 0;
/**
* active blocks
*/
int numBlocks = 0;
/**
* queue name
*/
String name;
/**
* total number of objects
*/
int size;
/**
* init
*
* @param name the name of the queue, used in files names
* @param blockSize maximum number of objects stored in one block
*/
public CachingQueue(String name, int blockSize)
{
queueBlocks = new LinkedList();
this.name = name;
this.blockSize = blockSize;
File cq = new File("cachingqueue");
cq.mkdir();
}
/**
* inserts an object to the front of the queue
*
* @param o the object to be inserted. must implement Serializable
* @exception StoreException encapsulates Exceptions that occur when writing to hard disk
*/
public synchronized void insert(Object o)
throws StoreException
{
if (last == null && first == null)
{
first = last = newBlock();
queueBlocks.addFirst(first);
numBlocks++;
}
if (last == null && first != null)
{
// assert((last==null && first==null) || (last!= null && first!=null));
System.err.println("Error in CachingQueue: last!=first==null");
}
if (first.size() >= blockSize)
{
// save block and create a new one
QueueBlock newBlock = newBlock();
numBlocks++;
if (last != first)
{
first.store();
}
queueBlocks.addFirst(newBlock);
first = newBlock;
}
first.insert(o);
size++;
}
/**
* returns the last object from the queue
*
* @return the object returned
*
* @exception StoreException Description of the Exception
* @exception UnderflowException if the queue was empty
*/
public synchronized Object remove()
throws StoreException, UnderflowException
{
if (last == null)
{
throw new UnderflowException();
}
if (last.size() <= 0)
{
queueBlocks.removeLast();
numBlocks--;
if (numBlocks == 1)
{
last = first;
}
else if (numBlocks == 0)
{
first = last = null;
throw new UnderflowException();
}
else if (numBlocks < 0)
{
// assert(numBlocks >= 0)
System.err.println("CachingQueue.remove: numBlocks<0!");
throw new UnderflowException();
}
else
{
last = (QueueBlock) queueBlocks.getLast();
}
}
--size;
return last.remove();
}
/**
* not supported
*
* @param c Description of the Parameter
*/
public void insertMultiple(java.util.Collection c)
{
throw new UnsupportedOperationException();
}
/**
* creates a new block
*
* @return Description of the Return Value
*/
private QueueBlock newBlock()
{
return new QueueBlock(name + "_" + blockCount++, blockSize);
}
/**
* total number of objects contained in the queue
*
* @return Description of the Return Value
*/
public int size()
{
return size;
}
/**
* testing
*
* @param args The command line arguments
*/
public static void main(String[] args)
{
System.out.println("Test1: " + CachingQueueTester.testUnderflow());
System.out.println("Test2: " + CachingQueueTester.testInsert());
System.out.println("Test3: " + CachingQueueTester.testBufReadWrite());
System.out.println("Test4: " + CachingQueueTester.testBufReadWrite2());
System.out.println("Test5: " + CachingQueueTester.testUnderflow2());
System.out.println("Test6: " + CachingQueueTester.testBufReadWrite3());
System.out.println("Test7: " + CachingQueueTester.testExceptions());
}
}
/**
* Testklasse TODO: auslagern und per JUnit handhaben
*
* @author Administrator
* @created 3. Januar 2002
*/
class AssertionFailedException extends RuntimeException
{
}
/**
* Testklasse. Enthält einige Tests für die Funktionalität der CachingQueue
*
* @author Administrator
* @created 3. Januar 2002
*/
class CachingQueueTester
{
/**
* A unit test for JUnit
*
* @return Description of the Return Value
*/
public static boolean testUnderflow()
{
CachingQueue cq = new CachingQueue("testQueue1", 10);
try
{
cq.remove();
}
catch (UnderflowException e)
{
return true;
}
catch (Exception e)
{
e.printStackTrace();
}
return false;
}
/**
* A unit test for JUnit
*
* @return Description of the Return Value
*/
public static boolean testInsert()
{
CachingQueue cq = new CachingQueue("testQueue2", 10);
String test = "Test1";
assert(cq.size() == 0);
cq.insert(test);
assert(cq.size() == 1);
return (cq.remove() == test);
}
/**
* A unit test for JUnit
*
* @return Description of the Return Value
*/
public static boolean testBufReadWrite()
{
CachingQueue cq = new CachingQueue("testQueue3", 2);
String test1 = "Test1";
String test2 = "Test2";
String test3 = "Test3";
cq.insert(test1);
cq.insert(test2);
cq.insert(test3);
assert(cq.size() == 3);
cq.remove();
cq.remove();
assert(cq.size() == 1);
return (cq.remove() == test3);
}
/**
* A unit test for JUnit
*
* @return Description of the Return Value
*/
public static boolean testBufReadWrite2()
{
CachingQueue cq = new CachingQueue("testQueue4", 2);
String test1 = "Test1";
String test2 = "Test2";
String test3 = "Test3";
String test4 = "Test4";
String test5 = "Test5";
cq.insert(test1);
cq.insert(test2);
cq.insert(test3);
cq.insert(test4);
cq.insert(test5);
assert(cq.size() == 5);
String t = (String) cq.remove();
assert(t.equals(test1));
t = (String) cq.remove();
assert(t.equals(test2));
t = (String) cq.remove();
assert(t.equals(test3));
t = (String) cq.remove();
assert(t.equals(test4));
t = (String) cq.remove();
assert(cq.size() == 0);
return (t.equals(test5));
}
/**
* Description of the Method
*
* @param expr Description of the Parameter
*/
public static void assert(boolean expr)
{
if (!expr)
{
throw new AssertionFailedException();
}
}
/**
* A unit test for JUnit
*
* @return Description of the Return Value
*/
public static boolean testUnderflow2()
{
CachingQueue cq = new CachingQueue("testQueue5", 2);
String test1 = "Test1";
String test2 = "Test2";
String test3 = "Test3";
String test4 = "Test4";
String test5 = "Test5";
cq.insert(test1);
cq.insert(test2);
cq.insert(test3);
cq.insert(test4);
cq.insert(test5);
assert(cq.remove().equals(test1));
assert(cq.remove().equals(test2));
assert(cq.remove().equals(test3));
assert(cq.remove().equals(test4));
assert(cq.remove().equals(test5));
try
{
cq.remove();
}
catch (UnderflowException e)
{
return true;
}
return false;
}
/**
* A unit test for JUnit
*
* @return Description of the Return Value
*/
public static boolean testBufReadWrite3()
{
CachingQueue cq = new CachingQueue("testQueue4", 1);
String test1 = "Test1";
String test2 = "Test2";
String test3 = "Test3";
String test4 = "Test4";
String test5 = "Test5";
cq.insert(test1);
cq.insert(test2);
cq.insert(test3);
cq.insert(test4);
cq.insert(test5);
String t = (String) cq.remove();
assert(t.equals(test1));
t = (String) cq.remove();
assert(t.equals(test2));
t = (String) cq.remove();
assert(t.equals(test3));
t = (String) cq.remove();
assert(t.equals(test4));
t = (String) cq.remove();
return (t.equals(test5));
}
/**
* A unit test for JUnit
*
* @return Description of the Return Value
*/
public static boolean testExceptions()
{
System.gc();
CachingQueue cq = new CachingQueue("testQueue5", 1);
String test1 = "Test1";
String test2 = "Test2";
String test3 = "Test3";
String test4 = "Test4";
String test5 = "Test5";
cq.insert(test1);
cq.insert(test2);
cq.insert(test3);
cq.insert(test4);
cq.insert(test5);
try
{
if (!(new File("testQueue5_1.cqb").delete()))
{
System.err.println("CachingQueueTester.textExceptions: Store 1 nicht vorhanden. Filename geändert?");
}
if (!(new File("testQueue5_2.cqb").delete()))
{
System.err.println("CachingQueueTester.textExceptions: Store 2 nicht vorhanden. Filename geändert?");
}
String t = (String) cq.remove();
assert(t.equals(test1));
t = (String) cq.remove();
assert(t.equals(test2));
t = (String) cq.remove();
assert(t.equals(test3));
t = (String) cq.remove();
assert(t.equals(test4));
t = (String) cq.remove();
assert(t.equals(test5));
}
catch (StoreException e)
{
return true;
}
finally
{
cq = null;
System.gc();
// finalizer müssten aufgerufen werden
}
return false;
}
}

View File

@ -0,0 +1,273 @@
package de.lanlab.larm.util;
import java.lang.reflect.*;
import java.io.*;
import java.util.*;
/**
* Title: LARM Lanlab Retrieval Machine
* Description:
* Copyright: Copyright (c)
* Company:
* @author
* @version 1.0
*/
/**
* prints class information with the reflection api
* for debugging only
*/
public class ClassInfo
{
public ClassInfo()
{
}
/**
* Usage: java ClassInfo PackageName.MyNewClassName PackageName.DerivedClassName
*/
public static void main(String[] args)
{
String name = args[0];
String derivedName = args[1];
LinkedList l = new LinkedList();
ListIterator itry = l.listIterator();
try
{
Class cls = Class.forName(name);
name = cls.getName();
String pkg = getPackageName(name);
String clss = getClassName(name);
StringWriter importsWriter = new StringWriter();
PrintWriter imports = new PrintWriter(importsWriter);
StringWriter outWriter = new StringWriter();
PrintWriter out = new PrintWriter(outWriter);
TreeSet importClasses = new TreeSet();
importClasses.add(getImportStatement(name));
out.println("/**\n * (class description here)\n */\npublic class " + derivedName + " " + (cls.isInterface() ? "implements " : "extends ") + clss + "\n{");
Method[] m = cls.getMethods();
for(int i= 0; i< m.length; i++)
{
Method thism = m[i];
if((thism.getModifiers() & Modifier.PRIVATE) == 0 && ((thism.getModifiers() & Modifier.FINAL) == 0)
&& (thism.getDeclaringClass().getName() != "java.lang.Object"))
{
out.println(" /**");
out.println(" * (method description here)");
out.println(" * defined in " + thism.getDeclaringClass().getName());
Class[] parameters = thism.getParameterTypes();
for(int j = 0; j < parameters.length; j ++)
{
if(getPackageName(parameters[j].getName()) != "")
{
importClasses.add(getImportStatement(parameters[j].getName()));
}
out.println(" * @param p" + j + " (parameter description here)");
}
if(thism.getReturnType().getName() != "void")
{
String returnPackage = getPackageName(thism.getReturnType().getName());
if(returnPackage != "")
{
importClasses.add(getImportStatement(thism.getReturnType().getName()));
}
out.println(" * @return (return value description here)");
}
out.println(" */");
out.print(" " + getModifierString(thism.getModifiers()) + getClassName(thism.getReturnType().getName()) + " ");
out.print(thism.getName() + "(");
for(int j = 0; j < parameters.length; j ++)
{
if(j>0)
{
out.print(", ");
}
out.print(getClassName(parameters[j].getName()) + " p" + j);
}
out.print(")");
Class[] exceptions = thism.getExceptionTypes();
if (exceptions.length > 0)
{
out.print(" throws ");
}
for(int k = 0; k < exceptions.length; k++)
{
if(k > 0)
{
out.print(", ");
}
String exCompleteName = exceptions[k].getName();
String exName = getClassName(exCompleteName);
importClasses.add(getImportStatement(exCompleteName));
out.print(exName);
}
out.print("\n" +
" {\n" +
" /**@todo: Implement this " + thism.getName() + "() method */\n" +
" throw new UnsupportedOperationException(\"Method " + thism.getName() + "() not yet implemented.\");\n" +
" }\n\n");
}
}
out.println("}");
Iterator importIterator = importClasses.iterator();
while(importIterator.hasNext())
{
String importName = (String)importIterator.next();
if(!importName.startsWith("java.lang"))
{
imports.println("import " + importName + ";");
}
}
out.flush();
imports.flush();
if(getPackageName(derivedName) != "")
{
System.out.println("package " + getPackageName(derivedName) + ";\n");
}
System.out.println( "/**\n" +
" * Title: \n" +
" * Description:\n" +
" * Copyright: Copyright (c)\n" +
" * Company:\n" +
" * @author\n" +
" * @version 1.0\n" +
" */\n");
System.out.println(importsWriter.getBuffer());
System.out.print(outWriter.getBuffer());
}
catch(Throwable t)
{
t.printStackTrace();
}
}
public static String getPackageName(String className)
{
if(className.charAt(0) == '[')
{
switch(className.charAt(1))
{
case 'L':
return getPackageName(className.substring(2,className.length()-1));
default:
return "";
}
}
String name = className.lastIndexOf(".") != -1 ? className.substring(0, className.lastIndexOf(".")) : "";
//System.out.println("Package: " + name);
return name;
}
public static String getClassName(String className)
{
if(className.charAt(0) == '[')
{
switch(className.charAt(1))
{
case 'L':
return getClassName(className.substring(2,className.length()-1)) + "[]";
case 'C':
return "char[]";
case 'I':
return "int[]";
case 'B':
return "byte[]";
// rest is missing here
}
}
String name = (className.lastIndexOf(".") > -1) ? className.substring(className.lastIndexOf(".")+1) : className;
//System.out.println("Class: " + name);
return name;
}
static String getImportStatement(String className)
{
String pack = getPackageName(className);
String clss = getClassName(className);
if(clss.indexOf("[]") > -1)
{
return pack + "." + clss.substring(0,clss.length() - 2);
}
else
{
return pack + "." + clss;
}
}
public static String getModifierString(int modifiers)
{
StringBuffer mods = new StringBuffer();
if((modifiers & Modifier.ABSTRACT) != 0)
{
mods.append("abstract ");
}
if((modifiers & Modifier.FINAL) != 0)
{
mods.append("final ");
}
if((modifiers & Modifier.INTERFACE) != 0)
{
mods.append("interface ");
}
if((modifiers & Modifier.NATIVE) != 0)
{
mods.append("native ");
}
if((modifiers & Modifier.PRIVATE) != 0)
{
mods.append("private ");
}
if((modifiers & Modifier.PROTECTED) != 0)
{
mods.append("protected ");
}
if((modifiers & Modifier.PUBLIC) != 0)
{
mods.append("public ");
}
if((modifiers & Modifier.STATIC) != 0)
{
mods.append("static ");
}
if((modifiers & Modifier.STRICT) != 0)
{
mods.append("strictfp ");
}
if((modifiers & Modifier.SYNCHRONIZED) != 0)
{
mods.append("synchronized ");
}
if((modifiers & Modifier.TRANSIENT) != 0)
{
mods.append("transient ");
}
if((modifiers & Modifier.VOLATILE) != 0)
{
mods.append("volatile ");
}
return mods.toString();
}
}

View File

@ -0,0 +1,319 @@
package de.lanlab.larm.util;
/**
* Title:
* Description:
* Copyright: Copyright (c)
* Company:
* @author
* @version 1.0
*/
import java.util.*;
/**
* simple hashed linked list. It allows for inserting and removing elements like
* in a hash table (in fact, it uses a HashMap), while still being able to easily
* traverse the collection like a list. In addition, the iterator is circular. It
* always returns a next element as long as there are elements in the list. In
* contrast to the iterator of Sun's collection classes, this class can cope with
* inserts and removals while traversing the list.<p>
* Elements are always added to the end of the list, that is, always at the same place<br>
* All operations should work in near constant time as the list grows. Only the
* trade-off costs of a hash (memory versus speed) have to be considered.
* The List doesn't accept null elements
* @todo put the traversal function into an Iterator
* @todo implement the class as a derivate from a Hash
*/
public class HashedCircularLinkedList
{
/**
* Entry class.
*/
private static class Entry
{
Object key;
Object element;
Entry next;
Entry previous;
Entry(Object element, Entry next, Entry previous, Object key)
{
this.element = element;
this.next = next;
this.previous = previous;
this.key = key;
}
}
/**
* the list. contains objects
*/
private transient Entry header = new Entry(null, null, null, null);
/**
* the hash. maps keys to entries, which by themselves map to objects
*/
HashMap keys;
private transient int size = 0;
/** the current entry in the traversal */
Entry current = null;
/**
* Constructs an empty list.
*/
public HashedCircularLinkedList(int initialCapacity, float loadFactor)
{
header.next = header.previous = header;
keys = new HashMap(initialCapacity, loadFactor);
}
/**
* Returns the number of elements in this list.
*
* @return the number of elements in this list.
*/
public int size()
{
return size;
}
/**
* Removes the first occurrence of the specified element in this list. If
* the list does not contain the element, it is unchanged. More formally,
* removes the element with the lowest index <tt>i</tt> such that
* <tt>(o==null ? get(i)==null : o.equals(get(i)))</tt> (if such an
* element exists).
*
* @param o element to be removed from this list, if present.
* @return <tt>true</tt> if the list contained the specified element.
*/
public boolean removeByKey(Object o)
{
// assert(o != null)
Entry e = (Entry)keys.get(o);
if(e != null)
{
if(e == current)
{
if(size > 1)
{
current = previousEntry(current);
}
else
{
current = null;
}
}
this.removeEntryFromList(e);
keys.remove(o);
size--;
return true;
}
else
{
return false;
}
}
/**
* Removes all of the elements from this list.
*/
public void clear()
{
// list
header.next = header.previous = header;
// hash
keys.clear();
size = 0;
current = null;
}
private Entry addEntryBefore(Object key, Object o, Entry e)
{
Entry newEntry = new Entry(o, e, e.previous, key);
newEntry.previous.next = newEntry;
newEntry.next.previous = newEntry;
return newEntry;
}
private void removeEntryFromList(Entry e)
{
if(e != null)
{
if (e == header)
{
throw new NoSuchElementException();
}
e.previous.next = e.next;
e.next.previous = e.previous;
}
}
/**
* (method description here)
* defined in java.util.Map
* @param p0 (parameter description here)
* @param p1 (parameter description here)
* @return (return value description here)
*/
public boolean put(Object key, Object value)
{
if(key != null && !keys.containsKey(key))
{
Entry e = addEntryBefore(key, value, header); // add it as the last element
keys.put(key, e); // link key to entry
size++;
return true;
}
else
{
return false;
}
}
public boolean hasNext()
{
return (size > 0);
}
private Entry nextEntry(Entry e)
{
// assert(e != null)
if(size > 1)
{
if(e == null)
{
e = header;
}
Entry next = e.next;
if(next == header)
{
next = next.next;
}
return next;
}
else if(size == 1)
{
return header.next;
}
else
{
return null;
}
}
private Entry previousEntry(Entry e)
{
// assert(e != null)
if(size > 1)
{
if(e == null)
{
e = header;
}
Entry previous = e.previous;
if(previous == header)
{
previous = previous.previous;
}
return previous;
}
else if(size == 1)
{
return header.previous;
}
else
{
return null;
}
}
public Object next()
{
current = nextEntry(current);
if(current != null)
{
return current.element;
}
else
{
return null;
}
}
public void removeCurrent()
{
keys.remove(current.key);
removeEntryFromList(current);
}
public Object get(Object key)
{
Entry e = ((Entry)keys.get(key));
if(e != null)
{
return e.element;
}
else
{
return null;
}
}
/**
* testing
*/
public static void main(String[] args)
{
HashedCircularLinkedList h = new HashedCircularLinkedList(20, 0.75f);
h.put("1", "a");
h.put("2", "b");
h.put("3", "c");
String t;
System.out.println("size [3]: " + h.size());
t = (String)h.next();
System.out.println("2nd element via get [b]: " + h.get("2"));
System.out.println("next element [a]: " + t);
t = (String)h.next();
System.out.println("next element [b]: " + t);
t = (String)h.next();
System.out.println("next element [c]: " + t);
t = (String)h.next();
System.out.println("1st element after circular traversal [a]: " + t);
h.removeByKey("1");
System.out.println("1st element after remove [null]: " + h.get("1"));
System.out.println("size after removal [2]: " + h.size());
t = (String)h.next();
System.out.println("next element [b]: " + t);
t = (String)h.next();
System.out.println("next element [c]: " + t);
t = (String)h.next();
System.out.println("next element [b]: " + t);
h.removeCurrent();
t = (String)h.next();
System.out.println("next element after 1 removal [c]: " + t);
t = (String)h.next();
System.out.println("next element: [c]: " + t);
h.removeByKey("3");
System.out.println("size after 3 removals [0]: " + h.size());
t = (String)h.next();
System.out.println("next element [null]: " + t);
}
}

View File

@ -0,0 +1,18 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c) <p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.util;
public interface InputStreamObserver
{
public void notifyOpened(ObservableInputStream in, long timeElapsed);
public void notifyClosed(ObservableInputStream in, long timeElapsed);
public void notifyRead(ObservableInputStream in, long timeElapsed, int nrRead, int totalRead);
public void notifyFinished(ObservableInputStream in, long timeElapsed, int totalRead);
}

View File

@ -0,0 +1,19 @@
/*
*
*
*
*/
package de.lanlab.larm.util;
import java.io.*;
public class Logger
{
private FileOutputStream out;
public Logger(String fileName)
{
}
}

View File

@ -0,0 +1,101 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c) <p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.util;
import java.io.*;
public class ObservableInputStream extends FilterInputStream
{
private boolean reporting = true;
private long startTime;
private int totalRead = 0;
private int step = 1;
private int nextStep = 0;
InputStreamObserver observer;
public ObservableInputStream(InputStream in, InputStreamObserver iso, int reportingStep)
{
super(in);
startTime = System.currentTimeMillis();
observer = iso;
observer.notifyOpened(this, System.currentTimeMillis() - startTime);
nextStep = step = reportingStep;
}
public void close() throws IOException
{
super.close();
observer.notifyClosed(this, System.currentTimeMillis() - startTime);
}
public void setReporting(boolean reporting)
{
this.reporting = reporting;
}
public boolean isReporting()
{
return reporting;
}
public void setReportingStep(int step)
{
this.step = step;
}
public int read() throws IOException
{
int readByte = super.read();
if(reporting)
{
notifyObserver(readByte>=0? 1 : 0);
}
return readByte;
}
public int read(byte[] b) throws IOException
{
int nrRead = super.read(b);
if(reporting)
{
notifyObserver(nrRead);
}
return nrRead;
}
private void notifyObserver(int nrRead)
{
if(nrRead > 0)
{
totalRead += nrRead;
if(totalRead > nextStep)
{
nextStep += step;
observer.notifyRead(this, System.currentTimeMillis() - startTime, nrRead, totalRead);
}
}
else
{
observer.notifyFinished(this, System.currentTimeMillis() - startTime, totalRead);
}
}
public int read(byte[] b, int offs, int size) throws IOException
{
int nrRead = super.read(b, offs, size);
if(reporting)
{
notifyObserver(nrRead);
}
return nrRead;
}
}

View File

@ -0,0 +1,9 @@
package de.lanlab.larm.util;
/**
* not used
*/
public interface Observer
{
}

View File

@ -0,0 +1,15 @@
package de.lanlab.larm.util;
/**
* Title: LARM
* Description:
* Copyright: Copyright (c) 2001
* Company: LMU-IP
* @author Clemens Marschner
* @version 1.0
*/
public class OverflowException extends RuntimeException
{
}

View File

@ -0,0 +1,20 @@
package de.lanlab.larm.util;
/**
* Title: LARM Lanlab Retrieval Machine
* Description:
* Copyright: Copyright (c)
* Company:
* @author
* @version 1.0
*/
import java.util.Collection;
public interface Queue
{
public Object remove();
public void insert(Object o);
public void insertMultiple(Collection c);
public int size();
}

View File

@ -0,0 +1,285 @@
/*
* @(#)SimpleCharArrayReader.java 1.35 00/02/02
*
*/
package de.lanlab.larm.util;
import java.io.*;
/**
* A <code>SimpleCharArrayReader</code> contains
* an internal buffer that contains bytes that
* may be read from the stream. An internal
* counter keeps track of the next byte to
* be supplied by the <code>read</code> method.
* <br>
* In contrast to the original <code>CharArrayReader</code> this
* version is not thread safe. The monitor on the read()-function caused programs
* to slow down much, because this function is called for every character. This
* class can thus only be used if only one thread is accessing the stream
* @author Clemens Marschner
* @version 1.00
* @see java.io.ByteArrayInputStream
*/
public
class SimpleCharArrayReader extends Reader
{
/**
* A flag that is set to true when this stream is closed.
*/
private boolean isClosed = false;
/**
* An array of bytes that was provided
* by the creator of the stream. Elements <code>buf[0]</code>
* through <code>buf[count-1]</code> are the
* only bytes that can ever be read from the
* stream; element <code>buf[pos]</code> is
* the next byte to be read.
*/
protected char buf[];
/**
* The index of the next character to read from the input stream buffer.
* This value should always be nonnegative
* and not larger than the value of <code>count</code>.
* The next byte to be read from the input stream buffer
* will be <code>buf[pos]</code>.
*/
protected int pos;
/**
* The currently marked position in the stream.
* SimpleCharArrayReader objects are marked at position zero by
* default when constructed. They may be marked at another
* position within the buffer by the <code>mark()</code> method.
* The current buffer position is set to this point by the
* <code>reset()</code> method.
*
* @since JDK1.1
*/
protected int mark = 0;
/**
* The index one greater than the last valid character in the input
* stream buffer.
* This value should always be nonnegative
* and not larger than the length of <code>buf</code>.
* It is one greater than the position of
* the last byte within <code>buf</code> that
* can ever be read from the input stream buffer.
*/
protected int count;
/**
* Creates a <code>SimpleCharArrayReader</code>
* so that it uses <code>buf</code> as its
* buffer array.
* The buffer array is not copied.
* The initial value of <code>pos</code>
* is <code>0</code> and the initial value
* of <code>count</code> is the length of
* <code>buf</code>.
*
* @param buf the input buffer.
*/
public SimpleCharArrayReader(char buf[])
{
this.buf = buf;
this.pos = 0;
this.count = buf.length;
}
/**
* Creates <code>SimpleCharArrayReader</code>
* that uses <code>buf</code> as its
* buffer array. The initial value of <code>pos</code>
* is <code>offset</code> and the initial value
* of <code>count</code> is <code>offset+len</code>.
* The buffer array is not copied.
* <p>
* Note that if bytes are simply read from
* the resulting input stream, elements <code>buf[pos]</code>
* through <code>buf[pos+len-1]</code> will
* be read; however, if a <code>reset</code>
* operation is performed, then bytes <code>buf[0]</code>
* through b<code>uf[pos-1]</code> will then
* become available for input.
*
* @param buf the input buffer.
* @param offset the offset in the buffer of the first byte to read.
* @param length the maximum number of bytes to read from the buffer.
*/
public SimpleCharArrayReader(char buf[], int offset, int length)
{
this.buf = buf;
this.pos = offset;
this.count = Math.min(offset + length, buf.length);
this.mark = offset;
}
/**
* Reads the next byte of data from this input stream. The value
* byte is returned as an <code>int</code> in the range
* <code>0</code> to <code>255</code>. If no byte is available
* because the end of the stream has been reached, the value
* <code>-1</code> is returned.
* <p>
*
* @return the next byte of data, or <code>-1</code> if the end of the
* stream has been reached.
*/
public int read()
{
return (pos < count) ? (buf[pos++] & 0xff) : -1;
}
/**
* Reads up to <code>len</code> bytes of data into an array of bytes
* from this input stream.
* If <code>pos</code> equals <code>count</code>,
* then <code>-1</code> is returned to indicate
* end of file. Otherwise, the number <code>k</code>
* of bytes read is equal to the smaller of
* <code>len</code> and <code>count-pos</code>.
* If <code>k</code> is positive, then bytes
* <code>buf[pos]</code> through <code>buf[pos+k-1]</code>
* are copied into <code>b[off]</code> through
* <code>b[off+k-1]</code> in the manner performed
* by <code>System.arraycopy</code>. The
* value <code>k</code> is added into <code>pos</code>
* and <code>k</code> is returned.
* <p>
* This <code>read</code> method cannot block.
*
* @param b the buffer into which the data is read.
* @param off the start offset of the data.
* @param len the maximum number of bytes read.
* @return the total number of bytes read into the buffer, or
* <code>-1</code> if there is no more data because the end of
* the stream has been reached.
*/
public int read(char b[], int off, int len)
{
if (b == null)
{
throw new NullPointerException();
}
else if ((off < 0) || (off > b.length) || (len < 0) ||
((off + len) > b.length) || ((off + len) < 0))
{
throw new IndexOutOfBoundsException();
}
if (pos >= count)
{
return -1;
}
if (pos + len > count)
{
len = count - pos;
}
if (len <= 0)
{
return 0;
}
System.arraycopy(buf, pos, b, off, len);
pos += len;
return len;
}
/**
* Skips <code>n</code> bytes of input from this input stream. Fewer
* bytes might be skipped if the end of the input stream is reached.
* The actual number <code>k</code>
* of bytes to be skipped is equal to the smaller
* of <code>n</code> and <code>count-pos</code>.
* The value <code>k</code> is added into <code>pos</code>
* and <code>k</code> is returned.
*
* @param n the number of bytes to be skipped.
* @return the actual number of bytes skipped.
*/
public long skip(long n)
{
if (pos + n > count)
{
n = count - pos;
}
if (n < 0)
{
return 0;
}
pos += n;
return n;
}
/**
* Returns the number of bytes that can be read from this input
* stream without blocking.
* The value returned is
* <code>count&nbsp;- pos</code>,
* which is the number of bytes remaining to be read from the input buffer.
*
* @return the number of bytes that can be read from the input stream
* without blocking.
*/
public int available()
{
return count - pos;
}
/**
* Tests if SimpleCharArrayReader supports mark/reset.
*
* @since JDK1.1
*/
public boolean markSupported()
{
return true;
}
/**
* Set the current marked position in the stream.
* SimpleCharArrayReader objects are marked at position zero by
* default when constructed. They may be marked at another
* position within the buffer by this method.
*
* @since JDK1.1
*/
public void mark(int readAheadLimit)
{
mark = pos;
}
/**
* Resets the buffer to the marked position. The marked position
* is the beginning unless another position was marked.
* The value of <code>pos</code> is set to 0.
*/
public void reset()
{
pos = mark;
}
/**
* Closes this input stream and releases any system resources
* associated with the stream.
* <p>
*/
public void close() throws IOException
{
isClosed = true;
}
/** Check to make sure that the stream has not been closed */
private void ensureOpen()
{
/* This method does nothing for now. Once we add throws clauses
* to the I/O methods in this class, it will throw an IOException
* if the stream has been closed.
*/
}
}

View File

@ -0,0 +1,112 @@
package de.lanlab.larm.util;
/**
* Title: LARM Lanlab Retrieval Machine
* Description:
* Copyright: Copyright (c)
* Company:
* @author
* @version 1.0
*/
import java.io.*;
import java.util.*;
import java.text.*;
/**
* this class is only used for SPEED. Its log function is not thread safe by
* default.
* It uses a BufferdWriter.
* It registers with a logger manager, which can be used to flush several loggers
* at once
* @todo: including the date slows down a lot
*
*/
public class SimpleLogger
{
private SimpleDateFormat formatter = new SimpleDateFormat ("HH:mm:ss:SSSS");
Writer logFile;
StringBuffer buffer = new StringBuffer(1000);
long startTime = System.currentTimeMillis();
boolean includeDate;
public void setStartTime(long startTime)
{
this.startTime = startTime;
}
public synchronized void logThreadSafe(String text)
{
log(text);
}
public synchronized void logThreadSafe(Throwable t)
{
log(t);
}
public void log(String text)
{
try
{
buffer.setLength(0);
if(includeDate)
{
buffer.append(formatter.format(new Date())).append(": ").append(System.currentTimeMillis()-startTime).append(" ms: ");
}
buffer.append(text).append("\n");
logFile.write(buffer.toString());
if(flushAtOnce)
{
logFile.flush();
}
}
catch(IOException e)
{
System.out.println("Couldn't write to logfile");
}
}
public void log(Throwable t)
{
t.printStackTrace(new PrintWriter(logFile));
}
boolean flushAtOnce = false;
public void setFlushAtOnce(boolean flush)
{
this.flushAtOnce = flush;
}
public SimpleLogger(String name)
{
init(name, true);
}
public SimpleLogger(String name, boolean includeDate)
{
init(name, includeDate);
}
public void flush() throws IOException
{
logFile.flush();
}
private void init(String name, boolean includeDate)
{
try
{
logFile = new BufferedWriter(new FileWriter("logs/" + name + ".log"));
SimpleLoggerManager.getInstance().register(this);
}
catch(IOException e)
{
System.out.println("IOException while creating logfile " + name + ":");
e.printStackTrace();
}
}
}

View File

@ -0,0 +1,65 @@
package de.lanlab.larm.util;
/**
* Title: LARM Lanlab Retrieval Machine
* Description:
* Copyright: Copyright (c)
* Company:
* @author
* @version 1.0
*/
import java.util.*;
import java.io.IOException;
/**
* this singleton manages all loggers. It can be used to flush all SimpleLoggers
* at once
*/
public class SimpleLoggerManager
{
static SimpleLoggerManager instance = null;
ArrayList logs;
private SimpleLoggerManager()
{
logs = new ArrayList();
}
public void register(SimpleLogger logger)
{
logs.add(logger);
}
public void flush() throws IOException
{
Iterator it = logs.iterator();
IOException ex = null;
while(it.hasNext())
{
try
{
SimpleLogger logger = (SimpleLogger)it.next();
logger.flush();
}
catch(IOException e)
{
ex = e;
}
}
if(ex != null)
{
throw ex;
}
}
public static SimpleLoggerManager getInstance()
{
if(instance == null)
{
instance = new SimpleLoggerManager();
}
return instance;
}
}

View File

@ -0,0 +1,21 @@
/**
* Title: LARM Lanlab Retrieval Machine<p>
* Description: <p>
* Copyright: Copyright (c) <p>
* Company: <p>
* @author
* @version 1.0
*/
package de.lanlab.larm.util;
import java.util.Observable;
public class SimpleObservable extends Observable
{
public void setChanged()
{
super.setChanged();
}
}

View File

@ -0,0 +1,91 @@
package de.lanlab.larm.util;
import java.io.Serializable;
/**
* Title: LARM Lanlab Retrieval Machine
* Description:
* Copyright: Copyright (c)
* Company:
* @author
* @version 1.0
*/
/**
* thread safe state information.
* The get methods are not synchronized. Clone the state object before using them
* If you use a state object in a class, always return a clone
* <pre>public class MyClass {
* State state = new State("Running");
* public State getState() { return state.cloneState() }</pre>
*
* note on serialization: if you deserialize a state, the state string will be newly created.
* that means you then have to compare the states via equal() and not ==
*/
public class State implements Cloneable, Serializable
{
private String state;
private long stateSince;
private Object info;
public State(String state)
{
setState(state);
}
private State(String state, long stateSince)
{
init(state, stateSince, null);
}
private State(String state, long stateSince, Object info)
{
init(state, stateSince, info);
}
private void init(String state, long stateSince, Object info)
{
this.state = state;
this.stateSince = stateSince;
this.info = info;
}
public void setState(String state)
{
setState(state, null);
}
public synchronized void setState(String state, Object info)
{
this.state = state;
this.stateSince = System.currentTimeMillis();
this.info = info;
}
public String getState()
{
return state;
}
public long getStateSince()
{
return stateSince;
}
public Object getInfo()
{
return info;
}
public synchronized Object clone()
{
return new State(state, stateSince, info);
}
public State cloneState()
{
return (State)clone();
}
}

View File

@ -0,0 +1,60 @@
package de.lanlab.larm.util;
/**
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
* Company:
*
* @author
* @version 1.0
*/
import java.net.URL;
/**
* Description of the Class
*
* @author Administrator
* @created 27. Januar 2002
*/
public class URLUtils
{
/**
* does the same as URL.toExternalForm(), but leaves out the Ref part (which we would
* cut off anyway) and handles the String Buffer so that no call of expandCapacity() will
* be necessary
* only meaningful if the default URLStreamHandler is used (as is the case with http, https, or shttp)
*
* @param u the URL to be converted
* @return the URL as String
*/
public static String toExternalFormNoRef(URL u)
{
String protocol = u.getProtocol();
String authority = u.getAuthority();
String file = u.getFile();
StringBuffer result = new StringBuffer(
(protocol == null ? 0 : protocol.length()) +
(authority == null ? 0 : authority.length()) +
(file == null ? 1 : file.length()) + 3
);
result.append(protocol);
result.append(":");
if (u.getAuthority() != null && u.getAuthority().length() > 0)
{
result.append("//");
result.append(u.getAuthority());
}
if (u.getFile() != null && u.getFile().length() > 0)
{
result.append(u.getFile());
}
else
{
result.append("/");
}
return result.toString();
}
}

View File

@ -0,0 +1,15 @@
package de.lanlab.larm.util;
/**
* Title: LARM
* Description:
* Copyright: Copyright (c) 2001
* Company: LMU-IP
* @author Clemens Marschner
* @version 1.0
*/
public class UnderflowException extends RuntimeException
{
}

View File

@ -0,0 +1,94 @@
package de.lanlab.larm.util;
import java.net.URL;
import de.lanlab.larm.fetcher.URLMessage;
/**
* a web document of whatever type. generated by a fetcher task
*/
public class WebDocument extends URLMessage
{
protected String mimeType;
protected byte[] document;
protected int resultCode;
protected int size;
protected String title;
public WebDocument(URL url, String mimeType, byte[] document, int resultCode, URL referer, int size, String title)
{
super(url, referer, false);
this.url = url;
this.mimeType = mimeType;
this.document = document;
this.resultCode = resultCode;
this.size = size;
this.title = title;
}
public String getTitle()
{
return title;
}
public URL getUrl()
{
return url;
}
public int getSize()
{
return this.size;
}
public void setSize(int size)
{
this.size = size;
}
public void setDocument(byte[] document)
{
this.document = document;
}
public int getResultCode()
{
return resultCode;
}
public void setResultCode(int resultCode)
{
this.resultCode = resultCode;
}
public byte[] getDocumentBytes()
{
return this.document;
}
public void setUrl(URL url)
{
this.url = url;
}
public void setMimeType(String mimeType)
{
this.mimeType = mimeType;
}
public String getMimeType()
{
return mimeType;
}
public String getInfo()
{
return super.getInfo() + "\t" +
this.resultCode + "\t" +
this.mimeType + "\t" +
this.size + "\t" +
"\"" + this.title.replace('\"', (char)0xff ).replace('\n',' ').replace('\r',' ') + "\"";
}
}

View File

@ -0,0 +1,294 @@
/*
* $Id$
*
* Copyright 1997 Hewlett-Packard Company
*
* This file may be copied, modified and distributed only in
* accordance with the terms of the limited licence contained
* in the accompanying file LICENSE.TXT.
*/
package hplb.misc;
import java.io.*;
import java.net.*;
/**
* This class is a container for algorithms working on byte arrays - some
* of the algorithms are analogous to those in java.lang.String.
* @author Anders Kristensen
*/
public class ByteArray {
/** Returns copy of characters in s as a new byte array. */
public static final byte[] getBytes(String s) {
int len = s.length();
byte b[] = new byte[len];
s.getBytes(0, len, b, 0);
return b;
}
/** Returns contents of file as byte array. */
public static byte[] loadFromFile(String filename) throws IOException {
return loadFromFile(new File(filename));
}
/** Returns contents of file <i>file</i> as byte array. */
public static byte[] loadFromFile(File file) throws IOException {
int n, nread = 0, len = (int) file.length();
FileInputStream fin = new FileInputStream(file);
byte[] content = new byte[len];
while (nread < len) {
if ((n = fin.read(content, nread, len - nread)) == -1)
throw new IOException("Error loading Compound from file");
nread += n;
}
return content;
}
/**
* Reads n bytes from the specified input stream. It will return
* fewer bytes if fewer bytes are available on the stream.
* Hence the application should check the resulting arrays length.
*/
public static byte[] readn(InputStream in, int n) throws IOException {
byte[] buf = new byte[n];
int ntotal = 0;
int nread;
while (ntotal < n) {
nread = in.read(buf, ntotal, n - ntotal);
if (nread < 0) {
// we got less than expected - return what we got
byte[] newbuf = new byte[ntotal];
System.arraycopy(buf, 0, newbuf, 0, ntotal);
return newbuf;
}
ntotal += nread;
}
return buf;
}
/**
* Return contents of a WWW resource identified by a URL.
* @param url the resource to retrieve
* @return the resource contents as a byte array
*/
public static byte[] getContent(URL url) throws IOException {
URLConnection conn = url.openConnection();
InputStream in = conn.getInputStream();
int length;
/*
* N.B. URLConnection.getContentLength() is buggy for "http" resources
* (at least in JDK1.0.2) and won't work for "file" URLs either.
*/
length = length = conn.getContentLength();
if (length == -1)
length = conn.getHeaderFieldInt("Content-Length", -1);
if (length == -1)
return readAll(in);
return readn(in, length);
}
/**
* Read all input from an InputStream and return as a byte array.
* This method will not return before the end of the stream is reached.
* @return contents of the stream
*/
public static byte[] readAll(InputStream in) throws IOException {
byte[] buf = new byte[1024];
int nread, ntotal = 0;
while ((nread = in.read(buf, ntotal, buf.length - ntotal)) > -1) {
ntotal += nread;
if (ntotal == buf.length) {
// extend buffer
byte[] newbuf = new byte[buf.length * 2];
System.arraycopy(buf, 0, newbuf, 0, buf.length);
buf = newbuf;
}
}
if (ntotal < buf.length) {
// we cannot have excess space
byte[] newbuf = new byte[ntotal];
System.arraycopy(buf, 0, newbuf, 0, ntotal);
buf = newbuf;
}
return buf;
}
/**
* Copies data from the specified input stream to the output stream
* until end of file is met.
* @return the total number of bytes written to the output stream
*/
public static int cpybytes(InputStream in, OutputStream out)
throws IOException
{
byte[] buf = new byte[1024];
int n, ntotal = 0;
while ((n = in.read(buf)) > -1) {
out.write(buf, 0, n);
ntotal += n;
}
return ntotal;
}
/**
* Copies data from the specified input stream to the output stream
* until <em>n</em> bytes has been copied or end of file is met.
* @return the total number of bytes written to the output stream
*/
public static int cpybytes(InputStream in, OutputStream out, int n)
throws IOException
{
int sz = n < 1024 ? n : 1024;
byte[] buf = new byte[sz];
int chunk, nread, ntotal = 0;
chunk = sz;
while (ntotal < n && (nread = in.read(buf, 0, chunk)) > -1) {
out.write(buf, 0, nread);
ntotal += nread;
chunk = (n - ntotal < sz) ? n - ntotal : sz;
}
return ntotal;
}
/**
* Returns the index within this String of the first occurrence of the
* specified character or -1 if the character is not found.
* @params buf the buffer to search
* @params ch the character to search for
*/
public static final int indexOf(byte[] buf,
int ch) {
return indexOf(buf, ch, 0, buf.length);
}
/**
* Returns the index within this String of the first occurrence of the
* specified character, starting the search at fromIndex. This method
* returns -1 if the character is not found.
* @params buf the buffer to search
* @params ch the character to search for
* @params fromIndex the index to start the search from
* @params toIndex the highest possible index returned plus 1
*/
public static final int indexOf(byte[] buf,
int ch,
int fromIndex,
int toIndex) {
int i;
for (i = fromIndex; i < toIndex && buf[i] != ch; i++)
; // do nothing
if (i < toIndex)
return i;
else
return -1;
}
/**
* Returns the index of the first occurrence of s in the specified
* buffer or -1 if this is not found.
*/
public static final int indexOf(byte[] buf, String s) {
return indexOf(buf, s, 0);
}
/**
* Returns the index of the first occurrence of s in the specified
* buffer. The search starts from fromIndex. This method returns -1
* if the index is not found.
*/
public static final int indexOf(byte[] buf, String s, int fromIndex) {
int i; // index into buf
int j; // index into s
int max_i = buf.length;
int max_j = s.length();
for (i = fromIndex; i + max_j <= max_i; i++) {
for (j = 0; j < max_j; j++) {
if (buf[j + i] != s.charAt(j))
break;
}
if (j == max_j) return i;
}
return -1;
}
/*
// for testing indexOf(byte[], String, int)
public static void main(String[] args) {
byte[] buf = getBytes(args[0]);
System.out.println("IndexOf(arg0, arg1, 0) = " + indexOf(buf, args[1], 3));
}
*/
public static final boolean isSpace(int ch) {
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') return true;
else return false;
}
public static final int skipSpaces(byte[] buf, int fromIndex, int toIndex) {
int i;
for (i = fromIndex; i < toIndex && isSpace(buf[i]); i++)
;
return i;
}
/**
* Find byte pattern ptrn in buffer buf.
* @return index of first occurrence of ptrn in buf, -1 if no occurence
*/
public static final int findBytes(byte buf[],
int off,
int len,
byte ptrn[]) {
// Note: This code is completely incomprehensible without a drawing...
int buf_len = off + len;
int ptrn_len = ptrn.length;
int i; // index into buf
int j; // index into ptrn;
byte b = ptrn[0]; // next byte of interest
for (i = off; i < buf_len; ) {
j = 0;
while (i < buf_len && j < ptrn_len && buf[i] == ptrn[j]) {
i++;
j++;
}
if (i == buf_len || j == ptrn_len)
return i - j;
else {
// We have to go back a bit as there may be an overlapping
// match starting a bit later in buf...
i = i - j + 1;
}
}
return -1;
}
/*
// for testing findBytes(byte[], int, int, byte[])
public static void main(String args[]) {
if (args.length < 4) {
System.err.println("Usage: s1 off len s2");
System.exit(1);
}
byte b1[] = new byte[args[0].length()];
byte b2[] = new byte[args[3].length()];
args[0].getBytes(0, args[0].length(), b1, 0);
args[3].getBytes(0, args[3].length(), b2, 0);
int off = Integer.parseInt(args[1]);
int len = Integer.parseInt(args[2]);
System.out.println("Index = " + findBytes(b1, off, len, b2));
}
*/
}

View File

@ -0,0 +1,20 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface Attribute {
public String getName();
public Node getValue();
public void setValue(Node arg);
public boolean getSpecified();
public void setSpecified(boolean arg);
public String toString();
}

View File

@ -0,0 +1,16 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface AttributeList {
public Attribute getAttribute(String attrName);
public Attribute setAttribute(Attribute attr);
public Attribute remove(String attrName);
public Attribute item(int index);
public int getLength();
}

View File

@ -0,0 +1,13 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
* Represents the content of comments: &lt;!-- ... --&gt;
*/
public interface Comment extends Node {
public String getData();
public void setData(String arg);
}

View File

@ -0,0 +1,13 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface DOM {
public Document createDocument(String type);
public boolean hasFeature(String feature);
}

View File

@ -0,0 +1,28 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface Document extends DocumentFragment {
public Node getDocumentType();
public void setDocumentType(Node arg);
public Element getDocumentElement();
public void setDocumentElement(Element arg);
public DocumentContext getContextInfo();
public void setContextInfo(DocumentContext arg);
public DocumentContext createDocumentContext();
public Element createElement(String tagName, AttributeList attributes);
public Text createTextNode(String data);
public Comment createComment(String data);
public PI createPI(String name, String data);
public Attribute createAttribute(String name, Node value);
public AttributeList createAttributeList();
public NodeIterator getElementsByTagName();
}

View File

@ -0,0 +1,14 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface DocumentContext {
public Document getDocument();
public void setDocument(Document arg);
}

View File

@ -0,0 +1,13 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface DocumentFragment extends Node {
public Document getMasterDoc();
public void setMasterDoc(Document arg);
}

View File

@ -0,0 +1,16 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface Element extends Node {
public String getTagName();
public AttributeList attributes();
public void setAttribute(Attribute newAttr);
public void normalize();
public NodeIterator getElementsByTagName();
}

View File

@ -0,0 +1,38 @@
# This Makefile generated by hplb.util.jmkmf
# Java package is org.w3c.dom
.SUFFIXES: .java .class .jj
JPACKAGE = org.w3c.dom
JAVA = java
JAVAC = javac
JAVACC = java COM.sun.labs.javacc.Main
JFLAGS =
OBJS = \
Attribute.class \
AttributeList.class \
Comment.class \
DOM.class \
Document.class \
DocumentContext.class \
DocumentFragment.class \
Element.class \
Node.class \
NodeIterator.class \
PI.class \
Text.class \
TreeIterator.class
JAVADOCFLAGS = -d ../../../doc/api -author -noindex -notree
all: $(OBJS)
doc:
javadoc $(JAVADOCFLAGS) $(JPACKAGE)
.jj.java: $*.jj
$(JAVACC) $<
.java.class: $*.java
$(JAVAC) $(JFLAGS) $<
clean:
rm -f *.class *~

View File

@ -0,0 +1,29 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface Node {
// NodeType
public static final int DOCUMENT = 1;
public static final int ELEMENT = 2;
public static final int ATTRIBUTE = 3;
public static final int PI = 4;
public static final int COMMENT = 5;
public static final int TEXT = 6;
public int getNodeType();
public Node getParentNode();
public NodeIterator getChildNodes();
public boolean hasChildNodes();
public Node getFirstChild();
public Node getPreviousSibling();
public Node getNextSibling();
public Node insertBefore(Node newChild, Node refChild);
public Node replaceChild(Node newChild, Node oldChild);
public Node removeChild(Node oldChild);
}

View File

@ -0,0 +1,19 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface NodeIterator {
public int getLength();
public Node getCurrent();
public Node toNext();
public Node toPrevious();
public Node toFirst();
public Node toLast();
public Node toNth(int Nth);
public Node toNode(Node destNode);
}

View File

@ -0,0 +1,16 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
* Processing Instruction
*/
public interface PI extends Node {
public String getName();
public void setName(String arg);
public String getData();
public void setData(String arg);
}

View File

@ -0,0 +1,19 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface Text extends Node {
public String getData();
public void setData(String arg);
public void append(String data);
public void insert(int offset, String data);
public void delete(int offset, int count);
public void replace(int offset, int count, String data);
public void splice(Element element, int offset, int count);
}

View File

@ -0,0 +1,20 @@
/*
* $Id$
*/
package hplb.org.w3c.dom;
/**
*
*/
public interface TreeIterator extends NodeIterator {
public int numChildren();
public int numPreviousSiblings();
public int numNextSiblings();
public Node toParent();
public Node toPreviousSibling();
public Node toNextSibling();
public Node toFirstChild();
public Node toLastChild();
public Node toNthChild();
}

View File

@ -0,0 +1,146 @@
// $Id$
package hplb.org.xml.sax;
import java.util.Enumeration;
/**
* A map of attributes for the current element.
* <p><em>This interface is part of the Java implementation of SAX,
* the Simple API for XML. It is free for both commercial and
* non-commercial use, and is distributed with no warrantee, real
* or implied.</em></p>
* <p>This map will be valid only during the invocation of the
* <code>startElement</code> callback: if you need to use attribute
* information elsewhere, you will need to make your own copies.</p>
* @author David Megginson, Microstar Software Ltd.
* @see hplb.org.xml.sax.DocumentHandler#startElement
*/
public interface AttributeMap {
/**
* Find the names of all available attributes for an element.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return An enumeration of zero or more Strings.
* @see java.util.Enumeration
* @see hplb.org.xml.sax.DocumentHandler#startElement
*/
public Enumeration getAttributeNames ();
/**
* Get the value of an attribute as a String.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return The value as a String, or null if the attribute has no value.
* @see hplb.org.xml.sax.DocumentHandler#startElement
*/
public String getValue (String attributeName);
/**
* Check if an attribute value is the name of an entity.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return true if the attribute is an entity name.
* @see #getEntityPublicID
* @see #getEntitySystemID
* @see #getNotationName
* @see #getNotationPublicID
* @see #getNotationSystemID
* @see hplb.org.xml.sax.DocumentHandler#startElement
*/
public boolean isEntity (String aname);
/**
* Check if an attribute value is the name of a notation.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return true if the attribute is a notation name.
* @see #getNotationPublicID
* @see #getNotationSystemID
* @see hplb.org.xml.sax.DocumentHandler#startElement
*/
public boolean isNotation (String aname);
/**
* Check if an attribute value is a unique identifier.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return true if the attribute is a unique identifier.
* @see hplb.org.xml.sax.DocumentHandler#startElement
*/
public boolean isId (String aname);
/**
* Check if an attribute value is a reference to an ID.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return true if the attribute is a reference to an ID.
* @see hplb.org.xml.sax.DocumentHandler#startElement
*/
public boolean isIdref (String aname);
/**
* Get the public identifier for an ENTITY attribute.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return The public identifier or null if there is none (or if
* the attribute value is not an entity name)
* @see #isEntity
*/
public String getEntityPublicID (String aname);
/**
* Get the system identifer for an ENTITY attribute.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return The system identifier or null if there is none (or if
* the attribute value is not an entity name)
* @see #isEntity
*/
public String getEntitySystemID (String aname);
/**
* Get the notation name for an ENTITY attribute.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return The notation name or null if there is none (or if
* the attribute value is not an entity name)
* @see #isEntity
*/
public String getNotationName (String aname);
/**
* Get the notation public ID for an ENTITY or NOTATION attribute.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return The public identifier or null if there is none (or if
* the attribute value is not an entity or notation name)
* @see #isEntity
* @see #isNotation
*/
public String getNotationPublicID (String aname);
/**
* Get the notation system ID for an ENTITY or NOTATION attribute.
* <p>This applies to the current element, and can be called only
* during an invocation of <code>startElement</code>.</p>
* @return The system identifier or null if there is none (or if
* the attribute value is not an entity or notation name)
* @see #isEntity
* @see #isNotation
*/
public String getNotationSystemID (String aname);
}

View File

@ -0,0 +1,129 @@
// $Id$
package hplb.org.xml.sax;
/**
* A callback interface for basic XML document events.
* <p><em>This interface is part of the Java implementation of SAX,
* the Simple API for XML. It is free for both commercial and
* non-commercial use, and is distributed with no warrantee, real
* or implied.</em></p>
* <p>This is the main handler for basic document events; it provides
* information on roughly the same level as the ESIS in full SGML,
* concentrating on logical structure rather than lexical
* representation.</p>
* <p>If you do not set a document handler, then by default all of these
* events will simply be ignored.</p>
* @author David Megginson, Microstar Software Ltd.
* @see hplb.org.xml.sax.Parser@setDocumentHandler
*/
public interface DocumentHandler {
/**
* Handle the start of a document.
* <p>This is the first event called by a
* SAX-conformant parser, so you can use it to allocate and
* initialise new objects for the document.</p>
* @exception java.lang.Exception You may throw any exception.
*/
public void startDocument ()
throws Exception;
/**
* Handle the end of a document.
* <p>This is the last event called by a
* SAX-conformant parser, so you can use it to finalize and
* clean up objects for the document.</p>
* @exception java.lang.Exception You may throw any exception.
*/
public void endDocument ()
throws Exception;
/**
* Handle the document type declaration.
* <p>This will appear only if the XML document contains a
* <code>DOCTYPE</code> declaration.</p>
* @param name The document type name.
* @param publicID The public identifier of the external DTD subset
* (if any), or null.
* @param systemID The system identifier of the external DTD subset
* (if any), or null.
* @param name The document type name.
* @exception java.lang.Exception You may throw any exception.
*/
public void doctype (String name, String publicID, String systemID)
throws Exception;
/**
* Handle the start of an element.
* <p>Please note that the information in the <code>attributes</code>
* parameter will be accurate only for the duration of this handler:
* if you need to use the information elsewhere, you should copy
* it.</p>
* @param name The element type name.
* @param attributes The available attributes.
* @exception java.lang.Exception You may throw any exception.
*/
public void startElement (String name, AttributeMap attributes)
throws Exception;
/**
* Handle the end of an element.
* @exception java.lang.Exception You may throw any exception.
*/
public void endElement (String name)
throws Exception;
/**
* Handle significant character data.
* <p>Please note that the contents of the array will be
* accurate only for the duration of this handler: if you need to
* use them elsewhere, you should make your own copy, possible
* by constructing a string:</p>
* <pre>
* String data = new String(ch, start, length);
* </pre>
* @param ch An array of characters.
* @param start The starting position in the array.
* @param length The number of characters to use in the array.
* @exception java.lang.Exception You may throw any exception.
*/
public void characters (char ch[], int start, int length)
throws Exception;
/**
* Handle ignorable whitespace.
* <p>Please note that the contents of the array will be
* accurate only for the duration of this handler: if you need to
* use them elsewhere, you should make your own copy, possible
* by constructing a string:</p>
* <pre>
* String whitespace = new String(ch, start, length);
* </pre>
* @param ch An array of whitespace characters.
* @param start The starting position in the array.
* @param length The number of characters to use in the array.
* @exception java.lang.Exception You may throw any exception.
*/
public void ignorable (char ch[], int start, int length)
throws Exception;
/**
* Handle a processing instruction.
* <p>XML processing instructions have two parts: a target, which
* is a name, followed optionally by data.</p>
* @exception java.lang.Exception You may throw any exception.
*/
public void processingInstruction (String name, String remainder)
throws Exception;
}

View File

@ -0,0 +1,48 @@
// $Id$
package hplb.org.xml.sax;
/**
* A callback interface for basic XML entity-related events.
* <p><em>This interface is part of the Java implementation of SAX,
* the Simple API for XML. It is free for both commercial and
* non-commercial use, and is distributed with no warrantee, real
* or implied.</em></p>
* <p>If you do not set an entity handler, then a parser will
* resolve all entities to the suggested system ID, and will take no
* action for entity changes.</p>
* @author David Megginson, Microstar Software Ltd.
* @see hplb.org.xml.sax.Parser#setEntityHandler
*/
public interface EntityHandler {
/**
* Resolve a system identifier.
* <p>Before loading any entity (including the document entity),
* SAX parsers will filter the system identifier through this
* callback, and you can return a different system identifier if you
* wish, or null to prevent the parser from reading any entity.</p>
* @param ename The name of the entity, "[document]" for the
* document entity, or "[external DTD]" for the external
* DTD subset.
* @param publicID The public identifier, or null if there is none.
* @param systemID The system identifier suggested in the XML document.
* @return A system identifier, or null to skip the entity.
* @exception java.lang.Exception You may throw any exception.
*/
public String resolveEntity (String ename, String publicID, String systemID)
throws Exception;
/**
* Handle a change in the current entity.
* <p>Whenever the parser switches the entity (URI) that it is reading
* from, it will call this handler to report the change.</p>
* @param systemID The URI of the new entity.
* @exception java.lang.Exception You may throw any exception.
*/
public void changeEntity (String systemID)
throws Exception;
}

View File

@ -0,0 +1,52 @@
// $Id$
package hplb.org.xml.sax;
/**
* A callback interface for basic XML error events.
* <p><em>This interface is part of the Java implementation of SAX,
* the Simple API for XML. It is free for both commercial and
* non-commercial use, and is distributed with no warrantee, real
* or implied.</em></p>
* <p>If you do not set an error handler, then a parser will report
* warnings to <code>System.err</code>, and will throw an (unspecified)
* exception for fata errors.</p>
* @author David Megginson, Microstar Software Ltd.
* @see hplb.org.xml.sax.Parser#setErrorHandler
*/
public interface ErrorHandler {
/**
* Handle a non-fatal warning.
* <p>A SAX parser will use this callback to report a condition
* that is not serious enough to stop the parse (though you may
* still stop the parse if you wish).</p>
* @param message The warning message.
* @param systemID The URI of the entity that caused the warning, or
* null if not available.
* @param line The line number in the entity, or -1 if not available.
* @param column The column number in the entity, or -1 if not available.
* @exception java.lang.Exception You may throw any exception.
*/
public void warning (String message, String systemID, int line, int column)
throws java.lang.Exception;
/**
* Handle a fatal error.
* <p>A SAX parser will use this callback to report a condition
* that is serious enough to invalidate the parse, and may not
* report all (or any) significant parse events after this. Ordinarily,
* you should stop immediately with an exception, but you can continue
* to try to collect more errors if you wish.</p>
* @param message The error message.
* @param systemID The URI of the entity that caused the error, or
* null if not available.
* @param line The line number in the entity, or -1 if not available.
* @param column The column number in the entity, or -1 if not available.
* @exception java.lang.Exception You may throw any exception.
*/
public void fatal (String message, String systemID, int line, int column)
throws Exception;
}

View File

@ -0,0 +1,201 @@
// $Id$
package hplb.org.xml.sax;
/**
* A simple base class for deriving SAX event handlers.
* <p><em>This class is part of the Java implementation of SAX,
* the Simple API for XML. It is free for both commercial and
* non-commercial use, and is distributed with no warrantee, real
* or implied.</em></p>
* <p>This class implements the default behaviour when no handler
* is specified (though parsers are not actually required to use
* this class).</p>
* @author David Megginson, Microstar Software Ltd.
* @see hplb.org.xml.sax.XmlException
* @see hplb.org.xml.sax.EntityHandler
* @see hplb.org.xml.sax.DocumentHandler
* @see hplb.org.xml.sax.ErrorHandler
*/
public class HandlerBase
implements EntityHandler, DocumentHandler, ErrorHandler
{
//////////////////////////////////////////////////////////////////////
// Implementation of hplb.org.xml.sax.EntityHandler.
//////////////////////////////////////////////////////////////////////
/**
* Resolve an external entity.
* <p>By default, simply return the system ID supplied.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.EntityHandler#resolveEntity
*/
public String resolveEntity (String ename, String publicID, String systemID)
throws Exception
{
return systemID;
}
/**
* Handle an entity-change event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.EntityHandler#changeEntity
*/
public void changeEntity (String systemID)
throws Exception
{
}
//////////////////////////////////////////////////////////////////////
// Implementation of hplb.org.xml.sax.DocumentHandler.
//////////////////////////////////////////////////////////////////////
/**
* Handle a start document event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.DocumentHandler#startDocument
*/
public void startDocument ()
throws Exception
{}
/**
* Handle a end document event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.DocumentHandler#endDocument
*/
public void endDocument ()
throws Exception
{}
/**
* Handle a document type declaration event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.DocumentHandler#doctype
*/
public void doctype (String name, String publicID, String systemID)
throws Exception
{}
/**
* Handle a start element event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.DocumentHandler#startElement
*/
public void startElement (String name, AttributeMap attributes)
throws Exception
{}
/**
* Handle an end element event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.DocumentHandler#endElement
*/
public void endElement (String name)
throws Exception
{}
/**
* Handle a character data event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.DocumentHandler#characters
*/
public void characters (char ch[], int start, int length)
throws Exception
{}
/**
* Handle an ignorable whitespace event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.DocumentHandler#ignorable
*/
public void ignorable (char ch[], int start, int length)
throws Exception
{}
/**
* Handle a processing instruction event.
* <p>By default, do nothing.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.DocumentHandler#processingInstruction
*/
public void processingInstruction (String name, String remainder)
throws Exception
{}
//////////////////////////////////////////////////////////////////////
// Implementation of ErrorHandler.
//////////////////////////////////////////////////////////////////////
/**
* Handle a non-fatal error.
* <p>By default, report the warning to System.err.</p>
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.ErrorHandler#warning
*/
public void warning (String message, String systemID, int line, int column)
throws Exception
{
System.err.println("Warning (" +
systemID +
',' +
line +
',' +
column +
"): " +
message);
}
/**
* Handle a fatal error.
* <p>By default, throw an instance of XmlException.</p>
* @exception hplb.org.xml.sax.XmlException A fatal parsing error
* has been found.
* @exception java.lang.Exception When you override this method,
* you may throw any exception.
* @see hplb.org.xml.sax.ErrorHandler#fatal
*/
public void fatal (String message, String systemID, int line, int column)
throws XmlException, Exception
{
throw new XmlException(message, systemID, line, column);
}
}

View File

@ -0,0 +1,32 @@
# This Makefile generated by jmkmf
# Java package is org.xml.sax
.SUFFIXES: .java .class .jj
JPACKAGE = org.xml.sax
JAVA = java
JAVAC = javac
JAVACC = java COM.sun.labs.javacc.Main
JFLAGS =
OBJS = \
AttributeMap.class \
DocumentHandler.class \
EntityHandler.class \
ErrorHandler.class \
HandlerBase.class \
Parser.class \
XmlException.class
JAVADOCFLAGS = -d ../../../doc/api -author -noindex -notree
all: $(OBJS)
doc:
javadoc $(JAVADOCFLAGS) $(JPACKAGE)
.jj.java: org.xml.sax.jj
$(JAVACC) $<
.java.class: $*.java
$(JAVAC) $(JFLAGS) $<
clean:
rm -f *.class *~

View File

@ -0,0 +1,71 @@
// $Id$
package hplb.org.xml.sax;
/**
* A standard interface for event-driven XML parsers.
* <p><em>This interface is part of the Java implementation of SAX,
* the Simple API for XML. It is free for both commercial and
* non-commercial use, and is distributed with no warrantee, real
* or implied.</em></p>
* <p>All SAX-conformant XML parsers (or their front-end SAX drivers)
* <em>must</em> implement this interface, together with a zero-argument
* constructor.</p>
* <p>You can plug three different kinds of callback interfaces into
* a basic SAX parser: one for entity handling, one for basic document
* events, and one for error reporting. It is not an error to start
* a parse without setting any handlers.</p>
* @author David Megginson, Microstar Software Ltd.
*/
public interface Parser {
/**
* Register the handler for basic entity events.
* <p>If you begin a parse without setting an entity handler,
* the parser will by default resolve all entities to their
* default system IDs.</p>
* @param handler An object to receive callbacks for events.
* @see hplb.org.xml.sax.EntityHandler
*/
public void setEntityHandler (EntityHandler handler);
/**
* Register the handler for basic document events.
* <p>You may begin the parse without setting a handler, but
* in that case no document events will be reported.</p>
* @param handler An object to receive callbacks for events.
* @see hplb.org.xml.sax.DocumentHandler
*/
public void setDocumentHandler (DocumentHandler handler);
/**
* Register the handler for errors and warnings.
* <p>If you begin a parse without setting an error handlers,
* warnings will be printed to System.err, and errors will
* throw an unspecified exception.</p>
* @param handler An object to receive callbacks for errors.
* @see hplb.org.xml.sax.ErrorHandler
*/
public void setErrorHandler (ErrorHandler handler);
/**
* Parse an XML document.
* <p>Nothing exciting will happen unless you have set handlers.</p>
* @param publicID The public identifier for the document, or null
* if none is available.
* @param systemID The system identifier (URI) for the document.
* @exception java.lang.Exception This method may throw any exception,
* but the parser itself
* will throw only exceptions derived from java.io.IOException;
* anything else will come from your handlers.
* @see #setEntityHandler
* @see #setDocumentHandler
* @see #setErrorHandler
*/
void parse (String publicID, String systemID) throws java.lang.Exception;
}

View File

@ -0,0 +1,73 @@
// $Id$
package hplb.org.xml.sax;
/**
* An exception for reporting XML parsing errors.
* <p><em>This interface is part of the Java implementation of SAX,
* the Simple API for XML. It is free for both commercial and
* non-commercial use, and is distributed with no warrantee, real
* or implied.</em></p>
* <p>This exception is not a required part of SAX, and it is not
* referenced in any of the core interfaces. It is used only in
* the optional HandlerBase base class, as a means of signalling
* parsing errors.</p>
* @author David Megginson, Microstar Software Ltd.
* @see hplb.org.xml.sax.HandlerBase#fatal
*/
public class XmlException extends Exception {
/**
* Construct a new exception with information about the location.
*/
public XmlException (String message, String systemID, int line, int column)
{
super(message);
this.systemID = systemID;
this.line = line;
this.column = column;
}
/**
* Find the system identifier (URI) where the error occurred.
* @return A string representing the URI, or null if none is available.
*/
public String getSystemID ()
{
return systemID;
}
/**
* Find the line number where the error occurred.
* @return The line number, or -1 if none is available.
*/
public int getLine ()
{
return line;
}
/**
* Find the column number (line offset) where the error occurred.
* @return The column number, or -1 if none is available.
*/
public int getColumn ()
{
return column;
}
//
// Internal state.
//
private String systemID;
private int line;
private int column;
}

View File

@ -0,0 +1,41 @@
/*
* $Id$
*
* Copyright 1997 Hewlett-Packard Company
*
* This file may be copied, modified and distributed only in
* accordance with the terms of the limited licence contained
* in the accompanying file LICENSE.TXT.
*/
package hplb.xml;
import java.util.Hashtable;
/**
* This class is responsible for maintaining strings as <em>atoms</em>,
* i.e. if two strings returned by getAtom() are equal in the sense of
* String.equal() then they are in fact the same Object. This is used to
* "intern" element and attribute names which can then be compared using
* the more efficient reference equality, a la "s1==s2".
*
* @author Anders Kristensen
*/
public final class Atom {
/** Holds atoms: element names (GIs), and attribute names. */
private static final Hashtable atoms = new Hashtable();
/**
* Return an atom corresponding to the argument.
*/
public static String getAtom(String s) {
synchronized (atoms) {
String a = (String) atoms.get(s);
if (a == null) {
atoms.put(s, s);
a = s;
}
return a;
}
}
}

View File

@ -0,0 +1,57 @@
/*
* $Id$
*
* Copyright 1997 Hewlett-Packard Company
*
* This file may be copied, modified and distributed only in
* accordance with the terms of the limited licence contained
* in the accompanying file LICENSE.TXT.
*/
package hplb.xml;
import hplb.org.w3c.dom.*;
/**
*
* @author Anders Kristensen
*/
public final class AttrImpl implements Attribute {
protected String name;
protected Node value;
protected boolean specified;
public AttrImpl(String name, String value) {
this(name, new TextImpl(Node.TEXT, value), true);
}
public AttrImpl(String name, Node value, boolean specified) {
this.name = name;
this.value = value;
this.specified = specified;
}
public String getName() {
return name;
}
public Node getValue() {
return value;
}
public void setValue(Node arg) {
value = arg;
}
public boolean getSpecified() {
return specified;
}
public void setSpecified(boolean arg) {
specified = arg;
}
public String toString() {
return value.toString();
}
}

View File

@ -0,0 +1,183 @@
/*
* $Id$
*
* Copyright 1997 Hewlett-Packard Company
*
* This file may be copied, modified and distributed only in
* accordance with the terms of the limited licence contained
* in the accompanying file LICENSE.TXT.
*/
package hplb.xml;
import hplb.org.w3c.dom.*;
/**
* An ordered Dictionary. keys() and elements() returns Enumerations
* which enumerate over elements in the order they were inserted.
* Elements are stored linearly. Operations put(), get(), and remove()
* are linear in the number of elements in the Dictionary.
*
* <p>Allows direct access to elements (as an alternative to using
* Enumerators) for speed.
*
* <p>Can function as a <em>bag</em>, i.e. it can be created with a mode
* which allows the same key to map to multiple entries. In this case
* operations get() and remove() operate on the <em>first</em> pair in
* the map. Hence to get hold of all values associated with a key it is
* necessary to use the direct access to underlying arrays.
*
* @author Anders Kristensen
*/
public class AttrListImpl implements AttributeList {
protected Attribute[] elms;
/**
* Number of elements. The elements are held at indices 0 to n in elms.
*/
protected int n = 0;
public AttrListImpl() {
this(2);
}
/**
* Create an AttrListImpl with the specififed initial capacity.
*/
public AttrListImpl(int size) {
if (size <= 0) throw new IllegalArgumentException(
"Initial size must be at least 1");
elms = new Attribute[size];
}
/**
* Returns the value to which the key is mapped in this dictionary.
*/
public synchronized Attribute getAttribute(String attrName) {
int i = getIndex(attrName);
return (i < 0 ? null : elms[i]);
}
protected int getIndex(String name) {
for (int i = 0; i < n; i++) {
if (elms[i].getName().equals(name)) {
return i;
}
}
return -1;
}
// XXX: what if attrName != attr.getName()???
public synchronized Attribute setAttribute(Attribute attr) {
int i = getIndex(attr.getName());
if (i >= 0) {
Attribute old = elms[i];
elms[i] = attr;
return old;
}
int len = elms.length;
if (len == n) {
// double size of key,elms arrays
AttrImpl[] e;
e = new AttrImpl[len * 2];
System.arraycopy(elms, 0, e, 0, len);
elms = e;
}
elms[n] = attr;
n++;
return null;
}
public synchronized Attribute remove(String attrName) {
int i = getIndex(attrName);
if (i < 0) return null;
Attribute val = elms[i];
System.arraycopy(elms, i+1, elms, i, n-i-1);
n--;
return val;
}
public synchronized Attribute item(int index) {
if (index < 0 || index >= n) {
throw new IndexOutOfBoundsException(""+index);
}
return elms[index];
}
/** Returns the number of keys in this dictionary. */
public synchronized int getLength() {
return n;
}
public synchronized String toString() {
StringBuffer sb = new StringBuffer();
boolean f = true;
int n = getLength();
sb.append("{ ");
for (int i = 0; i < n; i++) {
if (f) { f = false; }
else { sb.append(", "); }
Attribute attr = item(i);
sb.append(attr.getName() + '=' + attr);
}
sb.append(" }");
return sb.toString();
}
/**/
// for testing
public static void main(String[] args) throws Exception {
AttrListImpl alist;
Attribute attr;
java.io.BufferedReader r;
java.util.StringTokenizer tok;
String op;
if (args.length > 1) {
alist = new AttrListImpl(Integer.parseInt(args[0]));
} else {
alist = new AttrListImpl();
}
System.out.println(
"Enter operations... op's are one of\n"+
"put <key> <val>\n"+
"get <key>\n"+
"rem <key>\n"+
"size\n"+
"quit\n");
r = new java.io.BufferedReader(
new java.io.InputStreamReader(System.in));
while (true) {
System.out.print("doyourworst> ");
tok = new java.util.StringTokenizer(r.readLine());
op = tok.nextToken();
if ("put".equals(op)) {
attr = new AttrImpl(tok.nextToken(), tok.nextToken());
System.out.println("Value: " +
alist.setAttribute(attr));
} else if ("get".equals(op)) {
attr = alist.getAttribute(tok.nextToken());
System.out.println("Value: " +
(attr == null ? "No such element" : attr.toString()));
} else if ("rem".equals(op)) {
attr = alist.remove(tok.nextToken());
System.out.println("Value: " + attr);
} else if (op.startsWith("s")) {
System.out.println("Size: " + alist.getLength());
} else if (op.startsWith("q")) {
break;
} else {
System.out.println("Unrecognized op: " + op);
}
System.out.println("AttributeList: " + alist);
System.out.println("Size: " + alist.getLength());
System.out.println();
}
}
//*/
}

View File

@ -0,0 +1,46 @@
/*
* $Id$
*
* Copyright 1997 Hewlett-Packard Company
*
* This file may be copied, modified and distributed only in
* accordance with the terms of the limited licence contained
* in the accompanying file LICENSE.TXT.
*/
package hplb.xml;
/**
* A java.io.CharArrayWriter with the additional property that users can get
* to the actual underlying storage. Hence it's very fast (and dangerous).
* @author Anders Kristensen
*/
public final class CharBuffer extends java.io.CharArrayWriter {
public CharBuffer() {
super();
}
public CharBuffer(int size) {
super(size);
}
// use only to *decrement* size
public void setLength(int size) {
synchronized (lock) {
if (size < count) count = size;
}
}
public char[] getCharArray() {
synchronized (lock) {
return buf;
}
}
public int getLength()
{
return count;
}
}

View File

@ -0,0 +1,23 @@
/*
* $Id$
*
* Copyright 1997 Hewlett-Packard Company
*
* This file may be copied, modified and distributed only in
* accordance with the terms of the limited licence contained
* in the accompanying file LICENSE.TXT.
*/
package hplb.xml;
import hplb.org.w3c.dom.DOM;
import hplb.org.w3c.dom.Document;
public class DOMImpl implements DOM {
public Document createDocument(String type) {
return new DocumentImpl();
}
public boolean hasFeature(String feature) {
return false;
}
}

View File

@ -0,0 +1,25 @@
/*
* $Id$
*
* Copyright 1997 Hewlett-Packard Company
*
* This file may be copied, modified and distributed only in
* accordance with the terms of the limited licence contained
* in the accompanying file LICENSE.TXT.
*/
package hplb.xml;
import hplb.org.w3c.dom.*;
public class DocContextImpl implements DocumentContext {
Document doc;
public Document getDocument() {
return doc;
}
public void setDocument(Document arg) {
doc = arg;
}
}

Some files were not shown because too many files have changed in this diff Show More