mirror of https://github.com/apache/lucene.git
Initial revision
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
95eb8721c6
commit
cf2fa142c8
|
@ -0,0 +1,23 @@
|
|||
#!/bin/sh
|
||||
|
||||
#clean
|
||||
echo cleaning
|
||||
rm -r build
|
||||
rm -r classes
|
||||
rm -r cachingqueue
|
||||
rm -r logs
|
||||
|
||||
#build
|
||||
echo making build directory
|
||||
mkdir build
|
||||
cd build
|
||||
echo extracting http client
|
||||
jar xvf ../lib/HTTPClient.zip >/dev/nul
|
||||
cd ..
|
||||
cp -r src/* build
|
||||
mkdir classes
|
||||
echo compiling
|
||||
javac -g -d classes -sourcepath build build/HTTPClient/*.java
|
||||
javac -g -classpath ./lib/jakarta-oro-2.0.5.jar -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java
|
||||
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh
|
||||
./cleanlastrun.sh
|
||||
rm -r build
|
||||
rm -r classes
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh
|
||||
rm -r logs
|
||||
rm -r cachingqueue
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/sh
|
||||
|
||||
#clean
|
||||
echo cleaning
|
||||
rm -r build
|
||||
rm -r classes
|
||||
rm -r cachingqueue
|
||||
rm -r logs
|
||||
|
||||
#build
|
||||
echo making build directory
|
||||
mkdir build
|
||||
cd build
|
||||
#echo extracting http client
|
||||
#jar xvf ../lib/HTTPClient.zip >/dev/null
|
||||
cd ..
|
||||
cp -r src/* build
|
||||
mkdir classes
|
||||
echo compiling
|
||||
#javac -g -d classes -sourcepath build build/HTTPClient/*.java
|
||||
javac -g -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java
|
||||
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh
|
||||
rm -r logs
|
||||
mkdir logs
|
||||
java -server -Xmx400mb -classpath classes:lib/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://www.cis.uni-muenchen.de/ -restrictto http://[^/]*\.uni-muenchen\.de.* -threads 15
|
|
@ -0,0 +1,278 @@
|
|||
/*
|
||||
* @(#)ContentEncodingModule.java 0.3-3 06/05/2001
|
||||
*
|
||||
* This file is part of the HTTPClient package
|
||||
* Copyright (C) 1996-2001 Ronald Tschalär
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free
|
||||
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
* MA 02111-1307, USA
|
||||
*
|
||||
* For questions, suggestions, bug-reports, enhancement-requests etc.
|
||||
* I may be contacted at:
|
||||
*
|
||||
* ronald@innovation.ch
|
||||
*
|
||||
* The HTTPClient's home page is located at:
|
||||
*
|
||||
* http://www.innovation.ch/java/HTTPClient/
|
||||
*
|
||||
*/
|
||||
package HTTPClient;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Vector;
|
||||
import java.util.zip.InflaterInputStream;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
/**
|
||||
* This module handles the Content-Encoding response header. It currently
|
||||
* handles the "gzip", "deflate", "compress" and "identity" tokens.
|
||||
*
|
||||
* @author Ronald Tschalär
|
||||
* @created 29. Dezember 2001
|
||||
* @version 0.3-3 06/05/2001
|
||||
*/
|
||||
public class ContentEncodingModule implements HTTPClientModule
|
||||
{
|
||||
// Methods
|
||||
|
||||
/**
|
||||
* Invoked by the HTTPClient.
|
||||
*
|
||||
* @param req Description of the Parameter
|
||||
* @param resp Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
* @exception ModuleException Description of the Exception
|
||||
*/
|
||||
public int requestHandler(Request req, Response[] resp)
|
||||
throws ModuleException
|
||||
{
|
||||
// parse Accept-Encoding header
|
||||
|
||||
int idx;
|
||||
NVPair[] hdrs = req.getHeaders();
|
||||
for (idx = 0; idx < hdrs.length; idx++)
|
||||
{
|
||||
if (hdrs[idx].getName().equalsIgnoreCase("Accept-Encoding"))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Vector pae;
|
||||
if (idx == hdrs.length)
|
||||
{
|
||||
hdrs = Util.resizeArray(hdrs, idx + 1);
|
||||
req.setHeaders(hdrs);
|
||||
pae = new Vector();
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
pae = Util.parseHeader(hdrs[idx].getValue());
|
||||
}
|
||||
catch (ParseException pe)
|
||||
{
|
||||
throw new ModuleException(pe.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// done if "*;q=1.0" present
|
||||
|
||||
HttpHeaderElement all = Util.getElement(pae, "*");
|
||||
if (all != null)
|
||||
{
|
||||
NVPair[] params = all.getParams();
|
||||
for (idx = 0; idx < params.length; idx++)
|
||||
{
|
||||
if (params[idx].getName().equalsIgnoreCase("q"))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (idx == params.length)
|
||||
{
|
||||
// no qvalue, i.e. q=1.0
|
||||
return REQ_CONTINUE;
|
||||
}
|
||||
|
||||
if (params[idx].getValue() == null ||
|
||||
params[idx].getValue().length() == 0)
|
||||
{
|
||||
throw new ModuleException("Invalid q value for \"*\" in " +
|
||||
"Accept-Encoding header: ");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (Float.valueOf(params[idx].getValue()).floatValue() > 0.)
|
||||
{
|
||||
return REQ_CONTINUE;
|
||||
}
|
||||
}
|
||||
catch (NumberFormatException nfe)
|
||||
{
|
||||
throw new ModuleException("Invalid q value for \"*\" in " +
|
||||
"Accept-Encoding header: " + nfe.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// Add gzip, deflate and compress tokens to the Accept-Encoding header
|
||||
|
||||
if (!pae.contains(new HttpHeaderElement("deflate")))
|
||||
{
|
||||
pae.addElement(new HttpHeaderElement("deflate"));
|
||||
}
|
||||
if (!pae.contains(new HttpHeaderElement("gzip")))
|
||||
{
|
||||
pae.addElement(new HttpHeaderElement("gzip"));
|
||||
}
|
||||
if (!pae.contains(new HttpHeaderElement("x-gzip")))
|
||||
{
|
||||
pae.addElement(new HttpHeaderElement("x-gzip"));
|
||||
}
|
||||
if (!pae.contains(new HttpHeaderElement("compress")))
|
||||
{
|
||||
pae.addElement(new HttpHeaderElement("compress"));
|
||||
}
|
||||
if (!pae.contains(new HttpHeaderElement("x-compress")))
|
||||
{
|
||||
pae.addElement(new HttpHeaderElement("x-compress"));
|
||||
}
|
||||
|
||||
hdrs[idx] = new NVPair("Accept-Encoding", Util.assembleHeader(pae));
|
||||
|
||||
return REQ_CONTINUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Invoked by the HTTPClient.
|
||||
*
|
||||
* @param resp Description of the Parameter
|
||||
* @param req Description of the Parameter
|
||||
*/
|
||||
public void responsePhase1Handler(Response resp, RoRequest req)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Invoked by the HTTPClient.
|
||||
*
|
||||
* @param resp Description of the Parameter
|
||||
* @param req Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public int responsePhase2Handler(Response resp, Request req)
|
||||
{
|
||||
return RSP_CONTINUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Invoked by the HTTPClient.
|
||||
*
|
||||
* @param resp Description of the Parameter
|
||||
* @param req Description of the Parameter
|
||||
* @exception IOException Description of the Exception
|
||||
* @exception ModuleException Description of the Exception
|
||||
*/
|
||||
public void responsePhase3Handler(Response resp, RoRequest req)
|
||||
throws IOException, ModuleException
|
||||
{
|
||||
String ce = resp.getHeader("Content-Encoding");
|
||||
if (ce == null || req.getMethod().equals("HEAD") ||
|
||||
resp.getStatusCode() == 206)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Vector pce;
|
||||
try
|
||||
{
|
||||
pce = Util.parseHeader(ce);
|
||||
}
|
||||
catch (ParseException pe)
|
||||
{
|
||||
throw new ModuleException(pe.toString());
|
||||
}
|
||||
|
||||
if (pce.size() == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
String encoding = ((HttpHeaderElement) pce.firstElement()).getName();
|
||||
if (encoding.equalsIgnoreCase("gzip") ||
|
||||
encoding.equalsIgnoreCase("x-gzip"))
|
||||
{
|
||||
Log.write(Log.MODS, "CEM: pushing gzip-input-stream");
|
||||
|
||||
resp.inp_stream = new GZIPInputStream(resp.inp_stream);
|
||||
pce.removeElementAt(pce.size() - 1);
|
||||
resp.deleteHeader("Content-length");
|
||||
}
|
||||
else if (encoding.equalsIgnoreCase("deflate"))
|
||||
{
|
||||
Log.write(Log.MODS, "CEM: pushing inflater-input-stream");
|
||||
|
||||
resp.inp_stream = new InflaterInputStream(resp.inp_stream);
|
||||
pce.removeElementAt(pce.size() - 1);
|
||||
resp.deleteHeader("Content-length");
|
||||
}
|
||||
else if (encoding.equalsIgnoreCase("compress") ||
|
||||
encoding.equalsIgnoreCase("x-compress"))
|
||||
{
|
||||
Log.write(Log.MODS, "CEM: pushing uncompress-input-stream");
|
||||
|
||||
resp.inp_stream = new UncompressInputStream(resp.inp_stream);
|
||||
pce.removeElementAt(pce.size() - 1);
|
||||
resp.deleteHeader("Content-length");
|
||||
}
|
||||
else if (encoding.equalsIgnoreCase("identity"))
|
||||
{
|
||||
Log.write(Log.MODS, "CEM: ignoring 'identity' token");
|
||||
pce.removeElementAt(pce.size() - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
Log.write(Log.MODS, "CEM: Unknown content encoding '" +
|
||||
encoding + "'");
|
||||
}
|
||||
|
||||
if (pce.size() > 0)
|
||||
{
|
||||
resp.setHeader("Content-Encoding", Util.assembleHeader(pce));
|
||||
}
|
||||
else
|
||||
{
|
||||
resp.deleteHeader("Content-Encoding");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Invoked by the HTTPClient.
|
||||
*
|
||||
* @param resp Description of the Parameter
|
||||
* @param req Description of the Parameter
|
||||
*/
|
||||
public void trailerHandler(Response resp, RoRequest req)
|
||||
{
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,38 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c) <p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
/**
|
||||
* contains all global constants used in this package
|
||||
*/
|
||||
public class Constants
|
||||
{
|
||||
|
||||
/**
|
||||
* user agent string a fetcher task gives to the corresponding server
|
||||
*/
|
||||
public static final String USER_AGENT = "Mozilla/4.06 [en] (WinNT; I)";
|
||||
|
||||
/**
|
||||
* Crawler Identification
|
||||
*/
|
||||
public static final String CRAWLER_AGENT = "Fetcher/0.95";
|
||||
|
||||
/**
|
||||
* size of the temporary buffer to read web documents in
|
||||
*/
|
||||
public final static int FETCHERTASK_READSIZE = 4096;
|
||||
|
||||
/**
|
||||
* don't read more than... bytes
|
||||
*/
|
||||
public final static int FETCHERTASK_MAXFILESIZE = 2000000;
|
||||
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import java.util.*;
|
||||
import java.net.*;
|
||||
|
||||
/**
|
||||
* filter class; gets IP Adresses from host names and forwards them to
|
||||
* the other parts of the application
|
||||
* since URLs cache their IP addresses themselves, and HTTP 1.1 needs the
|
||||
* host names to be sent to the server, this class is not used anymore
|
||||
*/
|
||||
public class DNSResolver implements MessageListener
|
||||
{
|
||||
|
||||
HashMap ipCache = new HashMap();
|
||||
|
||||
|
||||
public DNSResolver()
|
||||
{
|
||||
}
|
||||
|
||||
public void notifyAddedToMessageHandler(MessageHandler m)
|
||||
{
|
||||
this.messageHandler = m;
|
||||
}
|
||||
|
||||
MessageHandler messageHandler;
|
||||
|
||||
public Message handleRequest(Message message)
|
||||
{
|
||||
if(message instanceof URLMessage)
|
||||
{
|
||||
URL url = ((URLMessage)message).getUrl();
|
||||
String host = url.getHost();
|
||||
InetAddress ip;
|
||||
/*InetAddress ip = (InetAddress)ipCache.get(host);
|
||||
|
||||
if(ip == null)
|
||||
{
|
||||
*/
|
||||
|
||||
try
|
||||
{
|
||||
ip = InetAddress.getByName(host);
|
||||
/*
|
||||
ipCache.put(host, ip);
|
||||
//System.out.println("DNSResolver: new Cache Entry \"" + host + "\" = \"" + ip.getHostAddress() + "\"");*/
|
||||
}
|
||||
catch(UnknownHostException e)
|
||||
{
|
||||
ip = null;
|
||||
return null;
|
||||
//System.out.println("DNSResolver: unknown host \"" + host + "\"");
|
||||
}
|
||||
/*}
|
||||
else
|
||||
{
|
||||
//System.out.println("DNSResolver: Cache hit: " + ip.getHostAddress());
|
||||
}*/
|
||||
//((URLMessage)message).setIpAddress(ip);
|
||||
}
|
||||
return message;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,224 @@
|
|||
/*
|
||||
* LARM - LANLab Retrieval Machine
|
||||
*
|
||||
* $history: $
|
||||
*
|
||||
*/
|
||||
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import de.lanlab.larm.threads.ThreadPool;
|
||||
import de.lanlab.larm.threads.ThreadPoolObserver;
|
||||
import de.lanlab.larm.threads.InterruptableTask;
|
||||
import de.lanlab.larm.storage.*;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import de.lanlab.larm.fetcher.FetcherTask;
|
||||
|
||||
/**
|
||||
* filter class; the Fetcher is the main class which keeps the ThreadPool that
|
||||
* gets the documents. It should be placed at the very end of the MessageQueue,
|
||||
* so that all filtering can be made beforehand.
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
*
|
||||
*/
|
||||
|
||||
public class Fetcher implements MessageListener
|
||||
{
|
||||
/**
|
||||
* holds the threads
|
||||
*/
|
||||
ThreadPool fetcherPool;
|
||||
|
||||
/**
|
||||
* total number of docs read
|
||||
*/
|
||||
int docsRead = 0;
|
||||
|
||||
/**
|
||||
* the storage where the docs are saved to
|
||||
*/
|
||||
DocumentStorage storage;
|
||||
|
||||
/**
|
||||
* the host manager keeps track of host information
|
||||
*/
|
||||
HostManager hostManager;
|
||||
|
||||
|
||||
/**
|
||||
* initializes the fetcher with the given number of threads in the thread
|
||||
* pool and a document storage.
|
||||
*
|
||||
* @param maxThreads the number of threads in the ThreadPool
|
||||
* @param storage the storage where all documents are stored
|
||||
* @param hostManager the host manager
|
||||
*/
|
||||
public Fetcher(int maxThreads, DocumentStorage storage, HostManager hostManager)
|
||||
{
|
||||
this.storage = storage;
|
||||
FetcherTask.setStorage(storage);
|
||||
fetcherPool = new ThreadPool(maxThreads, new FetcherThreadFactory(hostManager));
|
||||
fetcherPool.setQueue(new FetcherTaskQueue());
|
||||
docsRead = 0;
|
||||
this.hostManager = hostManager;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* initializes the pool with default values (5 threads, NullStorage)
|
||||
*/
|
||||
public void init()
|
||||
{
|
||||
fetcherPool.init();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* initializes the pool with a NullStorage and the given number of threads
|
||||
*
|
||||
* @param maxThreads the number of threads in the thread pool
|
||||
*/
|
||||
public void init(int maxThreads)
|
||||
{
|
||||
fetcherPool.init();
|
||||
docsRead = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this function will be called by the message handler each time a URL
|
||||
* passes all filters and gets to the fetcher. From here, it will be
|
||||
* distributed to the FetcherPool, a thread pool which carries out the task,
|
||||
* that is to fetch the document from the web.
|
||||
*
|
||||
* @param message the message, which should actually be a URLMessage
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public Message handleRequest(Message message)
|
||||
{
|
||||
URLMessage urlMessage = (URLMessage) message;
|
||||
|
||||
fetcherPool.doTask(new FetcherTask(urlMessage), "");
|
||||
docsRead++;
|
||||
|
||||
// eat the message
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* called by the message handler when this object is added to it
|
||||
*
|
||||
* @param handler the message handler
|
||||
*/
|
||||
public void notifyAddedToMessageHandler(MessageHandler handler)
|
||||
{
|
||||
this.messageHandler = handler;
|
||||
FetcherTask.setMessageHandler(handler);
|
||||
}
|
||||
|
||||
|
||||
MessageHandler messageHandler;
|
||||
|
||||
|
||||
/**
|
||||
* the thread pool observer will be called each time a thread changes its
|
||||
* state, i.e. from IDLE to RUNNING, and each time the number of thread
|
||||
* queue entries change.
|
||||
* this just wraps the thread pool method
|
||||
*
|
||||
* @param t the class that implements the ThreadPoolObserver interface
|
||||
*/
|
||||
public void addThreadPoolObserver(ThreadPoolObserver t)
|
||||
{
|
||||
fetcherPool.addThreadPoolObserver(t);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* returns the number of tasks queued. Should return 0 if there are any idle
|
||||
* threads. this method just wraps the ThreadPool method
|
||||
*
|
||||
* @return The queueSize value
|
||||
*/
|
||||
public int getQueueSize()
|
||||
{
|
||||
return fetcherPool.getQueueSize();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the total number of threads.
|
||||
* this method just wraps the ThreadPool method
|
||||
*
|
||||
* @return The workingThreadsCount value
|
||||
*/
|
||||
public int getWorkingThreadsCount()
|
||||
{
|
||||
return fetcherPool.getIdleThreadsCount() + fetcherPool.getBusyThreadsCount();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the number of threads that are currently idle.
|
||||
* this method just wraps the ThreadPool method
|
||||
*
|
||||
* @return The idleThreadsCount value
|
||||
*/
|
||||
public int getIdleThreadsCount()
|
||||
{
|
||||
return fetcherPool.getIdleThreadsCount();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the number of threads that are currently busy.
|
||||
* this method just wraps the ThreadPool method
|
||||
*
|
||||
* @return The busyThreadsCount value
|
||||
*/
|
||||
public int getBusyThreadsCount()
|
||||
{
|
||||
return fetcherPool.getBusyThreadsCount();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the threadPool attribute of the Fetcher object
|
||||
* beware: the original object is returned
|
||||
*
|
||||
* @TODO remove this / make it private if possible
|
||||
* @return The threadPool value
|
||||
*/
|
||||
public ThreadPool getThreadPool()
|
||||
{
|
||||
return fetcherPool;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the total number of docs read
|
||||
*
|
||||
* @return number of docs read
|
||||
*/
|
||||
public int getDocsRead()
|
||||
{
|
||||
return docsRead;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* returns the (original) task queue
|
||||
* @TODO remove this if possible
|
||||
* @return The taskQueue value
|
||||
*/
|
||||
public FetcherTaskQueue getTaskQueue()
|
||||
{
|
||||
return (FetcherTaskQueue) this.fetcherPool.getTaskQueue();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import java.awt.event.ActionListener;
|
||||
import java.awt.event.ActionEvent;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import java.awt.event.*;
|
||||
import de.lanlab.larm.gui.*;
|
||||
import de.lanlab.larm.threads.*;
|
||||
|
||||
/**
|
||||
* this was used to connect the GUI to the fetcher
|
||||
* @TODO put this into the GUI package, probably?
|
||||
*/
|
||||
public class FetcherGUIController implements ActionListener
|
||||
{
|
||||
FetcherMain fetcherMain;
|
||||
FetcherSummaryFrame fetcherFrame;
|
||||
|
||||
|
||||
public FetcherGUIController(FetcherMain fetcherMainPrg, FetcherSummaryFrame fetcherFrameWin, String defaultStartURL)
|
||||
{
|
||||
this.fetcherMain = fetcherMainPrg;
|
||||
this.fetcherFrame = fetcherFrameWin;
|
||||
|
||||
fetcherFrame.setRestrictTo(fetcherMain.urlScopeFilter.getRexString());
|
||||
fetcherFrame.setStartURL(defaultStartURL);
|
||||
|
||||
fetcherMain.fetcher.addThreadPoolObserver(
|
||||
new ThreadPoolObserver()
|
||||
{
|
||||
public void threadUpdate(int threadNr, String action, String info)
|
||||
{
|
||||
String status = threadNr + ": " + action + ": " + info;
|
||||
fetcherFrame.setIdleThreadsCount(fetcherMain.fetcher.getIdleThreadsCount());
|
||||
fetcherFrame.setBusyThreadsCount(fetcherMain.fetcher.getBusyThreadsCount());
|
||||
fetcherFrame.setWorkingThreadsCount(fetcherMain.fetcher.getWorkingThreadsCount());
|
||||
}
|
||||
|
||||
public void queueUpdate(String info, String action)
|
||||
{
|
||||
fetcherFrame.setRequestQueueCount(fetcherMain.fetcher.getQueueSize());
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
fetcherMain.monitor.addObserver(new Observer()
|
||||
{
|
||||
public void update(Observable o, Object arg)
|
||||
{
|
||||
// der ThreadMonitor wurde geupdated
|
||||
//fetcherFrame.setStalledThreads(fetcherMain.monitor.getStalledThreadCount(10, 500.0));
|
||||
//fetcherFrame.setBytesPerSecond(fetcherMain.monitor.getAverageReadCount(5));
|
||||
// fetcherFrame.setDocsPerSecond(fetcherMain.monitor.getDocsPerSecond(5));
|
||||
// wir nutzen die Gelegenheit, den aktuellen Speicherbestand auszugeben
|
||||
fetcherFrame.setFreeMem(Runtime.getRuntime().freeMemory());
|
||||
fetcherFrame.setTotalMem(Runtime.getRuntime().totalMemory());
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
/* fetcherMain.reFilter.addObserver(
|
||||
new Observer()
|
||||
{
|
||||
public void update(Observable o, Object arg)
|
||||
{
|
||||
fetcherFrame.setRobotsTxtCount(fetcherMain.reFilter.getExcludingHostsCount());
|
||||
}
|
||||
}
|
||||
);*/
|
||||
|
||||
fetcherMain.messageHandler.addMessageQueueObserver(new Observer()
|
||||
{
|
||||
public void update(Observable o, Object arg)
|
||||
{
|
||||
// a message has been added or deleted
|
||||
|
||||
fetcherFrame.setURLsQueued(fetcherMain.messageHandler.getQueued());
|
||||
}
|
||||
|
||||
}
|
||||
);
|
||||
|
||||
// this observer will be called if a filter has decided to throw a
|
||||
// message away.
|
||||
fetcherMain.messageHandler.addMessageProcessorObserver(new Observer()
|
||||
{
|
||||
public void update(Observable o, Object arg)
|
||||
{
|
||||
if(arg == fetcherMain.urlScopeFilter)
|
||||
{
|
||||
fetcherFrame.setScopeFiltered(fetcherMain.urlScopeFilter.getFiltered());
|
||||
}
|
||||
else if(arg == fetcherMain.urlVisitedFilter)
|
||||
{
|
||||
fetcherFrame.setVisitedFiltered(fetcherMain.urlVisitedFilter.getFiltered());
|
||||
}
|
||||
else if(arg == fetcherMain.reFilter)
|
||||
{
|
||||
fetcherFrame.setURLsCaughtCount(fetcherMain.reFilter.getFiltered());
|
||||
}
|
||||
else // it's the fetcher
|
||||
{
|
||||
fetcherFrame.setDocsRead(fetcherMain.fetcher.getDocsRead());
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
fetcherFrame.addWindowListener(
|
||||
new WindowAdapter()
|
||||
{
|
||||
public void windowClosed(WindowEvent e)
|
||||
{
|
||||
System.out.println("window Closed");
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
);
|
||||
|
||||
fetcherFrame.addStartButtonListener((ActionListener)this);
|
||||
}
|
||||
|
||||
/**
|
||||
* will be called when the start button is pressed
|
||||
*/
|
||||
public void actionPerformed(ActionEvent e)
|
||||
{
|
||||
System.out.println("Füge Start-URL ein");
|
||||
try
|
||||
{
|
||||
// urlVisitedFilter.printAllURLs();
|
||||
// urlVisitedFilter.clearHashtable();
|
||||
fetcherMain.setRexString(fetcherFrame.getRestrictTo());
|
||||
fetcherMain.startMonitor();
|
||||
fetcherMain.putURL(new URL(fetcherFrame.getStartURL()), false);
|
||||
}
|
||||
catch(Exception ex)
|
||||
{
|
||||
System.out.println("actionPerformed: Exception: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,362 @@
|
|||
/*
|
||||
* LARM - LANLab Retrieval Machine
|
||||
*
|
||||
* $history: $
|
||||
*
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import de.lanlab.larm.threads.ThreadPoolObserver;
|
||||
import de.lanlab.larm.threads.ThreadPool;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import de.lanlab.larm.gui.*;
|
||||
import de.lanlab.larm.util.*;
|
||||
import de.lanlab.larm.storage.*;
|
||||
import javax.swing.UIManager;
|
||||
import HTTPClient.*;
|
||||
import org.apache.oro.text.regex.MalformedPatternException;
|
||||
|
||||
|
||||
/**
|
||||
* ENTRY POINT: this class contains the main()-method of the application, does
|
||||
* all the initializing and optionally connects the fetcher with the GUI.
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @created December 16, 2000
|
||||
*/
|
||||
public class FetcherMain
|
||||
{
|
||||
|
||||
/**
|
||||
* the main message pipeline
|
||||
*/
|
||||
protected MessageHandler messageHandler;
|
||||
|
||||
/**
|
||||
* this filter records all incoming URLs and filters everything it already
|
||||
* knows
|
||||
*/
|
||||
protected URLVisitedFilter urlVisitedFilter;
|
||||
|
||||
/**
|
||||
* the scope filter filters URLs that fall out of the scope given by the
|
||||
* regular expression
|
||||
*/
|
||||
protected URLScopeFilter urlScopeFilter;
|
||||
|
||||
/*
|
||||
* The DNS resolver was supposed to hold the host addresses for all hosts
|
||||
* this is done by URL itself today
|
||||
*
|
||||
* protected DNSResolver dnsResolver;
|
||||
*/
|
||||
|
||||
/**
|
||||
* the robot exclusion filter looks if a robots.txt is present on a host
|
||||
* before it is first accessed
|
||||
*/
|
||||
protected RobotExclusionFilter reFilter;
|
||||
|
||||
/**
|
||||
* the host manager keeps track of all hosts and is used by the filters.
|
||||
*/
|
||||
protected HostManager hostManager;
|
||||
|
||||
/**
|
||||
* this rather flaky filter just filters out some URLs, i.e. different views
|
||||
* of Apache the apache DirIndex module. Has to be made
|
||||
* configurable in near future
|
||||
*/
|
||||
protected KnownPathsFilter knownPathsFilter;
|
||||
|
||||
/**
|
||||
* this is the main document fetcher. It contains a thread pool that fetches the
|
||||
* documents and stores them
|
||||
*/
|
||||
protected Fetcher fetcher;
|
||||
|
||||
|
||||
/**
|
||||
* the thread monitor once was only a monitoring tool, but now has become a
|
||||
* vital part of the system that computes statistics and
|
||||
* flushes the log file buffers
|
||||
*/
|
||||
|
||||
protected ThreadMonitor monitor;
|
||||
|
||||
/**
|
||||
* the storage is a central class that puts all fetched documents somewhere.
|
||||
* Several differnt implementations exist.
|
||||
*/
|
||||
protected DocumentStorage storage;
|
||||
|
||||
/**
|
||||
* the URL length filter filters URLs that are too long, i.e. because of errors
|
||||
* in the implementation of dynamic web sites
|
||||
*/
|
||||
protected URLLengthFilter urlLengthFilter;
|
||||
|
||||
/**
|
||||
* initializes all classes and registers anonymous adapter classes as
|
||||
* listeners for fetcher events.
|
||||
*
|
||||
* @param nrThreads number of fetcher threads to be created
|
||||
*/
|
||||
public FetcherMain(int nrThreads)
|
||||
{
|
||||
// to make things clear, this method is commented a bit better than
|
||||
// the rest of the program...
|
||||
|
||||
// this is the main message queue. handlers are registered with
|
||||
// the queue, and whenever a message is put in it, they are passed to the
|
||||
// filters in a "chain of responibility" manner. Every listener can decide
|
||||
// to throw the message away
|
||||
messageHandler = new MessageHandler();
|
||||
|
||||
// the storage is the class which saves a WebDocument somewhere, no
|
||||
// matter how it does it, whether it's in a file, in a database or
|
||||
// whatever
|
||||
|
||||
|
||||
// example for the (very slow) SQL Server storage:
|
||||
// this.storage = new SQLServerStorage("sun.jdbc.odbc.JdbcOdbcDriver","jdbc:odbc:search","sa","...",nrThreads);
|
||||
|
||||
// the LogStorage used here does extensive logging. It logs all links and
|
||||
// document information.
|
||||
// it also saves all documents to page files. Probably this single storage
|
||||
// could also be replaced by a pipeline; or even incorporated into the
|
||||
// existing message pipeline
|
||||
SimpleLogger log = new SimpleLogger("store", false);
|
||||
this.storage = new LogStorage(log, true, "logs/pagefile");
|
||||
|
||||
// a third example would be the NullStorage, which converts the documents into
|
||||
// heat, which evaporates above the processor
|
||||
// NullStorage();
|
||||
|
||||
// create the filters and add them to the message queue
|
||||
urlScopeFilter = new URLScopeFilter();
|
||||
|
||||
urlVisitedFilter = new URLVisitedFilter(100000, log);
|
||||
|
||||
// dnsResolver = new DNSResolver();
|
||||
hostManager = new HostManager(1000);
|
||||
|
||||
reFilter = new RobotExclusionFilter(hostManager);
|
||||
|
||||
fetcher = new Fetcher(nrThreads, storage, hostManager);
|
||||
|
||||
knownPathsFilter = new KnownPathsFilter();
|
||||
|
||||
urlLengthFilter = new URLLengthFilter(255);
|
||||
|
||||
// prevent message box popups
|
||||
HTTPConnection.setDefaultAllowUserInteraction(false);
|
||||
|
||||
// prevent GZipped files from being decoded
|
||||
HTTPConnection.removeDefaultModule(HTTPClient.ContentEncodingModule.class);
|
||||
|
||||
// initialize the threads
|
||||
fetcher.init();
|
||||
|
||||
// the thread monitor watches the thread pool.
|
||||
|
||||
monitor = new ThreadMonitor(urlLengthFilter,
|
||||
urlVisitedFilter,
|
||||
urlScopeFilter,
|
||||
/*dnsResolver,*/
|
||||
reFilter,
|
||||
messageHandler,
|
||||
fetcher.getThreadPool(),
|
||||
hostManager,
|
||||
5000 // wake up every 5 seconds
|
||||
);
|
||||
|
||||
|
||||
// add all filters to the handler.
|
||||
messageHandler.addListener(urlLengthFilter);
|
||||
messageHandler.addListener(urlScopeFilter);
|
||||
messageHandler.addListener(reFilter);
|
||||
messageHandler.addListener(urlVisitedFilter);
|
||||
messageHandler.addListener(knownPathsFilter);
|
||||
messageHandler.addListener(fetcher);
|
||||
|
||||
/* uncomment this to enable HTTPClient logging
|
||||
try
|
||||
{
|
||||
HTTPClient.Log.setLogWriter(new java.io.FileWriter("logs/HttpClient.log"),false);
|
||||
HTTPClient.Log.setLogging(HTTPClient.Log.ALL, true);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the RexString attribute of the FetcherMain object
|
||||
*
|
||||
* @param restrictTo The new RexString value
|
||||
*/
|
||||
public void setRexString(String restrictTo) throws MalformedPatternException
|
||||
{
|
||||
urlScopeFilter.setRexString(restrictTo);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param url Description of Parameter
|
||||
* @param isFrame Description of the Parameter
|
||||
* @exception java.net.MalformedURLException Description of Exception
|
||||
*/
|
||||
public void putURL(URL url, boolean isFrame)
|
||||
throws java.net.MalformedURLException
|
||||
{
|
||||
try
|
||||
{
|
||||
messageHandler.putMessage(new URLMessage(url, null, isFrame));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
System.out.println("Exception: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
//System.out.println("URLs geschrieben");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*/
|
||||
public void startMonitor()
|
||||
{
|
||||
monitor.start();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* the GUI is not working at this time. It was used in the very beginning, but
|
||||
* synchronous updates turned out to slow down the program a lot, even if the
|
||||
* GUI would be turned off. Thus, a lot
|
||||
* of Observer messages where removed later. Nontheless, it's quite cool to see
|
||||
* it working...
|
||||
*
|
||||
* @param f Description of Parameter
|
||||
* @param startURL Description of Parameter
|
||||
*/
|
||||
|
||||
/*
|
||||
public void initGui(FetcherMain f, String startURL)
|
||||
{
|
||||
// if we're on a windows platform, make it look a bit more convenient
|
||||
try
|
||||
{
|
||||
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
// dann halt nicht...
|
||||
}
|
||||
System.out.println("Init FetcherFrame");
|
||||
|
||||
FetcherSummaryFrame fetcherFrame;
|
||||
fetcherFrame = new FetcherSummaryFrame();
|
||||
fetcherFrame.setSize(640, 450);
|
||||
fetcherFrame.setVisible(true);
|
||||
FetcherGUIController guiController = new FetcherGUIController(f, fetcherFrame, startURL);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* The main program. parsed
|
||||
*
|
||||
* @param args The command line arguments
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
{
|
||||
int nrThreads = 10;
|
||||
|
||||
String startURL = "";
|
||||
String restrictTo = "http://141.84.120.82/ll/cmarschn/.*";
|
||||
boolean gui = false;
|
||||
boolean showInfo = false;
|
||||
System.out.println("LARM - LANLab Retrieval Machine - Fetcher - V 1.00 - (C) LANLab 2000-02");
|
||||
for (int i = 0; i < args.length; i++)
|
||||
{
|
||||
if (args[i].equals("-start"))
|
||||
{
|
||||
i++;
|
||||
startURL = args[i];
|
||||
System.out.println("Start-URL set to: " + startURL);
|
||||
}
|
||||
else if (args[i].equals("-restrictto"))
|
||||
{
|
||||
i++;
|
||||
restrictTo = args[i];
|
||||
System.out.println("Restricting URLs to " + restrictTo);
|
||||
}
|
||||
else if (args[i].equals("-threads"))
|
||||
{
|
||||
i++;
|
||||
nrThreads = Integer.parseInt(args[i]);
|
||||
System.out.println("Threads set to " + nrThreads);
|
||||
}
|
||||
else if (args[i].equals("-gui"))
|
||||
{
|
||||
gui = true;
|
||||
}
|
||||
else if (args[i].equals("-?"))
|
||||
{
|
||||
showInfo = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
System.out.println("Unknown option: " + args[i] + "; use -? to get syntax");
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
//URL.setURLStreamHandlerFactory(new HttpTimeoutFactory(500));
|
||||
// replaced by HTTPClient
|
||||
|
||||
FetcherMain f = new FetcherMain(nrThreads);
|
||||
if (showInfo || (startURL.equals("") && gui == false))
|
||||
{
|
||||
System.out.println("Usage: FetcherMain -start <URL> -restrictto <RegEx> [-threads <nr=10>]"); // [-gui]
|
||||
System.exit(0);
|
||||
}
|
||||
try
|
||||
{
|
||||
f.setRexString(restrictTo);
|
||||
|
||||
if (gui)
|
||||
{
|
||||
// f.initGui(f, startURL);
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
f.startMonitor();
|
||||
f.putURL(new URL(startURL), false);
|
||||
}
|
||||
catch (MalformedURLException e)
|
||||
{
|
||||
System.out.println("Malformed URL");
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (MalformedPatternException e)
|
||||
{
|
||||
System.out.println("Wrong RegEx syntax. Must be a valid PERL RE");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,617 @@
|
|||
/*
|
||||
* LARM - LANLab Retrieval Machine
|
||||
*
|
||||
* $history: $
|
||||
*
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import java.net.URL;
|
||||
import de.lanlab.larm.threads.*;
|
||||
import de.lanlab.larm.util.InputStreamObserver;
|
||||
import de.lanlab.larm.util.ObservableInputStream;
|
||||
import de.lanlab.larm.util.WebDocument;
|
||||
import de.lanlab.larm.util.SimpleCharArrayReader;
|
||||
import de.lanlab.larm.storage.DocumentStorage;
|
||||
import de.lanlab.larm.util.State;
|
||||
import de.lanlab.larm.util.SimpleLogger;
|
||||
import de.lanlab.larm.net.HttpTimeoutFactory;
|
||||
import HTTPClient.*;
|
||||
import java.net.*;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.text.*;
|
||||
import de.lanlab.larm.parser.Tokenizer;
|
||||
import de.lanlab.larm.parser.LinkHandler;
|
||||
|
||||
/**
|
||||
* this class gets the documents from the web. It connects to the server given
|
||||
* by the IP address in the URLMessage, gets the document, and forwards it to
|
||||
* the storage. If it's an HTML document, it will be parsed and all links will
|
||||
* be put into the message handler again.
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
*
|
||||
*/
|
||||
public class FetcherTask
|
||||
implements InterruptableTask, LinkHandler, Serializable
|
||||
{
|
||||
|
||||
protected volatile boolean isInterrupted = false;
|
||||
|
||||
/**
|
||||
* each task has its own number. the class variable counts up if an instance
|
||||
* of a fetcher task is created
|
||||
*/
|
||||
static volatile int taskIdentity = 0;
|
||||
|
||||
/**
|
||||
* the number of this object
|
||||
*/
|
||||
int taskNr;
|
||||
|
||||
/**
|
||||
* the BASE Href (defaults to contextUrl, may be changed with a <base> tag
|
||||
* only valid within a doTask call
|
||||
*/
|
||||
private volatile URL base;
|
||||
|
||||
/**
|
||||
* the URL of the docuzment
|
||||
* only valid within a doTask call
|
||||
*/
|
||||
private volatile URL contextUrl;
|
||||
|
||||
/**
|
||||
* the message handler the URL message comes from; same for all tasks
|
||||
*/
|
||||
protected static volatile MessageHandler messageHandler;
|
||||
|
||||
/**
|
||||
* actual number of bytes read
|
||||
* only valid within a doTask call
|
||||
*/
|
||||
private volatile long bytesRead = 0;
|
||||
|
||||
/**
|
||||
* the storage this task will put the document to
|
||||
*/
|
||||
private static volatile DocumentStorage storage;
|
||||
|
||||
/**
|
||||
* task state IDs. comparisons will be done by their references, so always
|
||||
* use the IDs
|
||||
*/
|
||||
public final static String FT_IDLE = "idle";
|
||||
public final static String FT_STARTED = "started";
|
||||
public final static String FT_OPENCONNECTION = "opening connection";
|
||||
public final static String FT_CONNECTING = "connecting";
|
||||
public final static String FT_GETTING = "getting";
|
||||
public final static String FT_READING = "reading";
|
||||
public final static String FT_SCANNING = "scanning";
|
||||
public final static String FT_STORING = "storing";
|
||||
public final static String FT_READY = "ready";
|
||||
public final static String FT_CLOSING = "closing";
|
||||
public final static String FT_EXCEPTION = "exception";
|
||||
public final static String FT_INTERRUPTED = "interrupted";
|
||||
|
||||
private volatile State taskState = new State(FT_IDLE);
|
||||
|
||||
/**
|
||||
* the URLs found will be stored and only added to the message handler in the very
|
||||
* end, to avoid too many synchronizations
|
||||
*/
|
||||
private volatile LinkedList foundUrls;
|
||||
|
||||
/**
|
||||
* the URL to be get
|
||||
*/
|
||||
protected volatile URLMessage actURLMessage;
|
||||
|
||||
/**
|
||||
* the document title, if present
|
||||
*/
|
||||
private volatile String title;
|
||||
|
||||
/**
|
||||
* headers for HTTPClient
|
||||
*/
|
||||
private static volatile NVPair headers[] = new NVPair[1];
|
||||
|
||||
static
|
||||
{
|
||||
headers[0] = new HTTPClient.NVPair("User-Agent", Constants.CRAWLER_AGENT);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets a copy of the current taskState
|
||||
*
|
||||
* @return The taskState value
|
||||
*/
|
||||
public State getTaskState()
|
||||
{
|
||||
return taskState.cloneState();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the FetcherTask object
|
||||
*
|
||||
* @param urlMessage Description of the Parameter
|
||||
*/
|
||||
public FetcherTask(URLMessage urlMessage)
|
||||
{
|
||||
actURLMessage = urlMessage;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the uRLMessages attribute of the FetcherTask object
|
||||
*
|
||||
* @return The uRLMessages value
|
||||
*/
|
||||
public URLMessage getActURLMessage()
|
||||
{
|
||||
return this.actURLMessage;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the document storage
|
||||
*
|
||||
* @param storage The new storage
|
||||
*/
|
||||
public static void setStorage(DocumentStorage storage)
|
||||
{
|
||||
FetcherTask.storage = storage;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the messageHandler
|
||||
*
|
||||
* @param messageHandler The new messageHandler
|
||||
*/
|
||||
public static void setMessageHandler(MessageHandler messageHandler)
|
||||
{
|
||||
FetcherTask.messageHandler = messageHandler;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the URL as a string
|
||||
*/
|
||||
public String getInfo()
|
||||
{
|
||||
return actURLMessage.getURLString();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the uRL attribute of the FetcherTask object
|
||||
*
|
||||
* @return The uRL value
|
||||
*/
|
||||
public URL getURL()
|
||||
{
|
||||
return actURLMessage.getUrl();
|
||||
}
|
||||
|
||||
SimpleLogger log;
|
||||
SimpleLogger errorLog;
|
||||
//private long startTime;
|
||||
|
||||
/**
|
||||
* this will be called by the fetcher thread and will do all the work
|
||||
*
|
||||
* @TODO probably split this up into different processing steps
|
||||
* @param thread Description of the Parameter
|
||||
*/
|
||||
public void run(ServerThread thread)
|
||||
{
|
||||
|
||||
taskState.setState(FT_STARTED); // state information is always set to make the thread monitor happy
|
||||
|
||||
log = thread.getLog();
|
||||
HostManager hm = ((FetcherThread)thread).getHostManager();
|
||||
|
||||
errorLog = thread.getErrorLog();
|
||||
|
||||
// startTime = System.currentTimeMillis();
|
||||
int threadNr = ((FetcherThread) thread).getThreadNumber();
|
||||
|
||||
log.log("start");
|
||||
base = contextUrl = actURLMessage.getUrl();
|
||||
String urlString = actURLMessage.getURLString();
|
||||
String host = contextUrl.getHost();
|
||||
int hostPos = urlString.indexOf(host);
|
||||
int hostLen = host.length();
|
||||
|
||||
HostInfo hi = hm.getHostInfo(host); // get and create
|
||||
|
||||
if(!hi.isHealthy())
|
||||
{
|
||||
// we make this check as late as possible to get the most current information
|
||||
log.log("Bad Host: " + contextUrl + "; returning");
|
||||
System.out.println("[" + threadNr + "] bad host: " + this.actURLMessage.getUrl());
|
||||
|
||||
taskState.setState(FT_READY, null);
|
||||
return;
|
||||
}
|
||||
|
||||
foundUrls = new java.util.LinkedList();
|
||||
|
||||
HTTPConnection conn = null;
|
||||
|
||||
title = "*untitled*";
|
||||
|
||||
int size = 1;
|
||||
|
||||
InputStream in = null;
|
||||
bytesRead = 0;
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
|
||||
URL ipURL = contextUrl;
|
||||
|
||||
taskState.setState(FT_OPENCONNECTION, urlString);
|
||||
|
||||
log.log("connecting to " + ipURL.getHost());
|
||||
taskState.setState(FT_CONNECTING, ipURL);
|
||||
conn = new HTTPConnection(host);
|
||||
|
||||
conn.setDefaultTimeout(75000);
|
||||
// 75 s
|
||||
conn.setDefaultAllowUserInteraction(false);
|
||||
|
||||
taskState.setState(this.FT_GETTING, ipURL);
|
||||
log.log("getting");
|
||||
|
||||
HTTPResponse response = conn.Get(ipURL.getFile(), "", headers);
|
||||
response.setReadIncrement(2720);
|
||||
int statusCode = response.getStatusCode();
|
||||
byte[] fullBuffer = null;
|
||||
String contentType = "";
|
||||
int contentLength = 0;
|
||||
|
||||
if (statusCode != 404 && statusCode != 403)
|
||||
{
|
||||
// read up to Constants.FETCHERTASK_MAXFILESIZE bytes into a byte array
|
||||
taskState.setState(FT_READING, ipURL);
|
||||
contentType = response.getHeader("Content-Type");
|
||||
String length = response.getHeader("Content-Length");
|
||||
if (length != null)
|
||||
{
|
||||
contentLength = Integer.parseInt(length);
|
||||
}
|
||||
log.log("reading");
|
||||
|
||||
fullBuffer = response.getData(Constants.FETCHERTASK_MAXFILESIZE); // max. 2 MB
|
||||
if (fullBuffer != null)
|
||||
{
|
||||
contentLength = fullBuffer.length;
|
||||
this.bytesRead += contentLength;
|
||||
}
|
||||
}
|
||||
//conn.stop(); // close connection. todo: Do some caching...
|
||||
|
||||
|
||||
/*
|
||||
* conn.disconnect();
|
||||
*/
|
||||
if (isInterrupted)
|
||||
{
|
||||
System.out.println("FetcherTask: interrupted while reading. File truncated");
|
||||
log.log("interrupted while reading. File truncated");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fullBuffer != null)
|
||||
{
|
||||
taskState.setState(FT_SCANNING, ipURL);
|
||||
|
||||
log.log("read file (" + fullBuffer.length + " bytes). Now scanning.");
|
||||
|
||||
if (contentType.startsWith("text/html"))
|
||||
{
|
||||
|
||||
// ouch. I haven't found a better solution yet. just slower ones.
|
||||
char[] fullCharBuffer = new char[contentLength];
|
||||
new InputStreamReader(new ByteArrayInputStream(fullBuffer)).read(fullCharBuffer);
|
||||
Tokenizer tok = new Tokenizer();
|
||||
tok.setLinkHandler(this);
|
||||
tok.parse(new SimpleCharArrayReader(fullCharBuffer));
|
||||
}
|
||||
else
|
||||
{
|
||||
// System.out.println("Discovered unknown content type: " + contentType + " at " + urlString);
|
||||
errorLog.log("[" + threadNr + "] Discovered unknown content type at " + urlString + ": " + contentType + ". just storing");
|
||||
}
|
||||
log.log("scanned");
|
||||
}
|
||||
taskState.setState(FT_STORING, ipURL);
|
||||
messageHandler.putMessages(foundUrls);
|
||||
storage.store(new WebDocument(contextUrl, contentType, fullBuffer, statusCode, actURLMessage.getReferer(), contentLength, title));
|
||||
log.log("stored");
|
||||
}
|
||||
}
|
||||
catch (InterruptedIOException e)
|
||||
{
|
||||
// timeout while reading this file
|
||||
System.out.println("[" + threadNr + "] FetcherTask: Timeout while opening: " + this.actURLMessage.getUrl());
|
||||
errorLog.log("error: Timeout: " + this.actURLMessage.getUrl());
|
||||
hi.badRequest();
|
||||
}
|
||||
catch (FileNotFoundException e)
|
||||
{
|
||||
taskState.setState(FT_EXCEPTION);
|
||||
System.out.println("[" + threadNr + "] FetcherTask: File not Found: " + this.actURLMessage.getUrl());
|
||||
errorLog.log("error: File not Found: " + this.actURLMessage.getUrl());
|
||||
}
|
||||
catch(NoRouteToHostException e)
|
||||
{
|
||||
// router is down or firewall prevents to connect
|
||||
hi.setReachable(false);
|
||||
taskState.setState(FT_EXCEPTION);
|
||||
System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
|
||||
// e.printStackTrace();
|
||||
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
}
|
||||
catch(ConnectException e)
|
||||
{
|
||||
// no server is listening at this port
|
||||
hi.setReachable(false);
|
||||
taskState.setState(FT_EXCEPTION);
|
||||
System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
|
||||
// e.printStackTrace();
|
||||
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
}
|
||||
catch (SocketException e)
|
||||
{
|
||||
taskState.setState(FT_EXCEPTION);
|
||||
System.out.println("[" + threadNr + "]: SocketException:" + e.getMessage());
|
||||
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
|
||||
}
|
||||
catch(UnknownHostException e)
|
||||
{
|
||||
// IP Address not to be determined
|
||||
hi.setReachable(false);
|
||||
taskState.setState(FT_EXCEPTION);
|
||||
System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
|
||||
// e.printStackTrace();
|
||||
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
taskState.setState(FT_EXCEPTION);
|
||||
System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
|
||||
// e.printStackTrace();
|
||||
errorLog.log("error: IOException: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
|
||||
}
|
||||
catch (OutOfMemoryError ome)
|
||||
{
|
||||
taskState.setState(FT_EXCEPTION);
|
||||
System.out.println("[" + threadNr + "] Task " + this.taskNr + " OutOfMemory after " + size + " bytes");
|
||||
errorLog.log("error: OutOfMemory after " + size + " bytes");
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
taskState.setState(FT_EXCEPTION);
|
||||
System.out.println("[" + threadNr + "] " + e.getMessage() + " type: " + e.getClass().getName());
|
||||
e.printStackTrace();
|
||||
System.out.println("[" + threadNr + "]: stopping");
|
||||
errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage() + "; stopping");
|
||||
|
||||
}
|
||||
finally
|
||||
{
|
||||
|
||||
if (isInterrupted)
|
||||
{
|
||||
System.out.println("Task was interrupted");
|
||||
log.log("interrupted");
|
||||
taskState.setState(FT_INTERRUPTED);
|
||||
}
|
||||
}
|
||||
if (isInterrupted)
|
||||
{
|
||||
System.out.println("Task: closed everything");
|
||||
}
|
||||
/*
|
||||
* }
|
||||
*/
|
||||
taskState.setState(FT_CLOSING);
|
||||
conn.stop();
|
||||
|
||||
taskState.setState(FT_READY);
|
||||
foundUrls = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* the interrupt method. not in use since the change to HTTPClient
|
||||
* @TODO decide if we need this anymore
|
||||
*/
|
||||
public void interrupt()
|
||||
{
|
||||
System.out.println("FetcherTask: interrupted!");
|
||||
this.isInterrupted = true;
|
||||
/*
|
||||
* try
|
||||
* {
|
||||
* if (conn != null)
|
||||
* {
|
||||
* ((HttpURLConnection) conn).disconnect();
|
||||
* System.out.println("FetcherTask: disconnected URL Connection");
|
||||
* conn = null;
|
||||
* }
|
||||
* if (in != null)
|
||||
* {
|
||||
* in.close();
|
||||
* / possibly hangs at close() .> KeepAliveStream.close() -> MeteredStream.skip()
|
||||
* System.out.println("FetcherTask: Closed Input Stream");
|
||||
* in = null;
|
||||
* }
|
||||
* }
|
||||
* catch (IOException e)
|
||||
* {
|
||||
* System.out.println("IOException while interrupting: ");
|
||||
* e.printStackTrace();
|
||||
* }
|
||||
* System.out.println("FetcherTask: Set all IOs to null");
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this is called whenever a links was found in the current document,
|
||||
* Don't create too many objects here, this will be called
|
||||
* millions of times
|
||||
*
|
||||
* @param link Description of the Parameter
|
||||
*/
|
||||
public void handleLink(String link, boolean isFrame)
|
||||
{
|
||||
try
|
||||
{
|
||||
// cut out Ref part
|
||||
|
||||
|
||||
int refPart = link.indexOf("#");
|
||||
//System.out.println(link);
|
||||
if (refPart == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
else if (refPart > 0)
|
||||
{
|
||||
link = link.substring(0, refPart);
|
||||
}
|
||||
|
||||
URL url = null;
|
||||
if (link.startsWith("http:"))
|
||||
{
|
||||
// distinguish between absolute and relative URLs
|
||||
|
||||
url = new URL(link);
|
||||
}
|
||||
else
|
||||
{
|
||||
// relative url
|
||||
url = new URL(base, link);
|
||||
}
|
||||
|
||||
URLMessage urlMessage = new URLMessage(url, contextUrl, isFrame);
|
||||
|
||||
String urlString = urlMessage.getURLString();
|
||||
|
||||
foundUrls.add(urlMessage);
|
||||
//messageHandler.putMessage(new actURLMessage(url)); // put them in the very end
|
||||
}
|
||||
catch (MalformedURLException e)
|
||||
{
|
||||
//log.log("malformed url: base:" + base + " -+- link:" + link);
|
||||
log.log("warning: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
log.log("warning: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
// e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* called when a BASE tag was found
|
||||
*
|
||||
* @param base the HREF attribute
|
||||
*/
|
||||
public void handleBase(String base)
|
||||
{
|
||||
try
|
||||
{
|
||||
this.base = new URL(base);
|
||||
}
|
||||
catch (MalformedURLException e)
|
||||
{
|
||||
log.log("warning: " + e.getClass().getName() + ": " + e.getMessage() + " while converting '" + base + "' to URL in document " + contextUrl);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* called when a TITLE tag was found
|
||||
*
|
||||
* @param title the string between <title> and >/title>
|
||||
*/
|
||||
public void handleTitle(String title)
|
||||
{
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* public void notifyOpened(ObservableInputStream in, long timeElapsed)
|
||||
* {
|
||||
* }
|
||||
* public void notifyClosed(ObservableInputStream in, long timeElapsed)
|
||||
* {
|
||||
* }
|
||||
* public void notifyRead(ObservableInputStream in, long timeElapsed, int nrRead, int totalRead)
|
||||
* {
|
||||
* if(totalRead / ((double)timeElapsed) < 0.3) // weniger als 300 bytes/s
|
||||
* {
|
||||
* System.out.println("Task " + this.taskNr + " stalled at pos " + totalRead + " with " + totalRead / (timeElapsed / 1000.0) + " bytes/s");
|
||||
* }
|
||||
* }
|
||||
* public void notifyFinished(ObservableInputStream in, long timeElapsed, int totalRead)
|
||||
* {
|
||||
* /System.out.println("Task " + this.taskNr + " finished (" + totalRead + " bytes in " + timeElapsed + " ms with " + totalRead / (timeElapsed / 1000.0) + " bytes/s)");
|
||||
* }
|
||||
*/
|
||||
public long getBytesRead()
|
||||
{
|
||||
return bytesRead;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* do nothing if a warning occurs within the html parser
|
||||
*
|
||||
* @param message Description of the Parameter
|
||||
* @param systemID Description of the Parameter
|
||||
* @param line Description of the Parameter
|
||||
* @param column Description of the Parameter
|
||||
* @exception java.lang.Exception Description of the Exception
|
||||
*/
|
||||
public void warning(String message, String systemID, int line, int column)
|
||||
throws java.lang.Exception { }
|
||||
|
||||
|
||||
/**
|
||||
* do nothing if a fatal error occurs...
|
||||
*
|
||||
* @param message Description of the Parameter
|
||||
* @param systemID Description of the Parameter
|
||||
* @param line Description of the Parameter
|
||||
* @param column Description of the Parameter
|
||||
* @exception Exception Description of the Exception
|
||||
*/
|
||||
public void fatal(String message, String systemID, int line, int column)
|
||||
throws Exception
|
||||
{
|
||||
System.out.println("fatal error: " + message);
|
||||
log.log("fatal error: " + message);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,198 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import de.lanlab.larm.threads.*;
|
||||
import de.lanlab.larm.util.*;
|
||||
import java.util.*;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* this special kind of task queue reorders the incoming tasks so that every subsequent
|
||||
* task is for a different host.
|
||||
* This is done by a "HashedCircularLinkedList" which allows random adding while
|
||||
* a differnet thread iterates through the collection circularly.
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @created 23. November 2001
|
||||
*/
|
||||
public class FetcherTaskQueue extends TaskQueue
|
||||
{
|
||||
/**
|
||||
* this is a hash that contains an entry for each server, which by itself is a
|
||||
* CachingQueue that stores all tasks for this server
|
||||
* @TODO probably link this to the host info structure
|
||||
*/
|
||||
HashedCircularLinkedList servers = new HashedCircularLinkedList(100, 0.75f);
|
||||
int size = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the FetcherTaskQueue object. Does nothing
|
||||
*/
|
||||
public FetcherTaskQueue() { }
|
||||
|
||||
|
||||
/**
|
||||
* true if no task is queued
|
||||
*
|
||||
* @return The empty value
|
||||
*/
|
||||
public boolean isEmpty()
|
||||
{
|
||||
return (size == 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* clear the queue. not synchronized.
|
||||
*/
|
||||
public void clear()
|
||||
{
|
||||
servers.clear();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* puts task into Queue.
|
||||
* Warning: not synchronized
|
||||
*
|
||||
* @param t the task to be added. must be a FetcherTask
|
||||
*/
|
||||
public void insert(Object t)
|
||||
{
|
||||
// assert (t != null && t.getURL() != null)
|
||||
|
||||
URLMessage um = ((FetcherTask)t).getActURLMessage();
|
||||
URL act = um.getUrl();
|
||||
String host = act.getHost();
|
||||
Queue q;
|
||||
q = ((Queue) servers.get(host));
|
||||
if (q == null)
|
||||
{
|
||||
// add a new host to the queue
|
||||
//String host2 = host.replace(':', '_').replace('/', '_').replace('\\', '_');
|
||||
// make it file system ready
|
||||
q = new CachingQueue(host, 100);
|
||||
servers.put(host, q);
|
||||
}
|
||||
// assert((q != null) && (q instanceof FetcherTaskQueue));
|
||||
q.insert(t);
|
||||
size++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* the size of the queue. make sure that insert() and size() calls are synchronized
|
||||
* if the exact number matters.
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public int size()
|
||||
{
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* the number of different hosts queued at the moment
|
||||
*/
|
||||
public int getNumHosts()
|
||||
{
|
||||
return servers.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the next task. warning: not synchronized
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public Object remove()
|
||||
{
|
||||
FetcherTask t = null;
|
||||
if (servers.size() > 0)
|
||||
{
|
||||
Queue q = (Queue) servers.next();
|
||||
// assert(q != null && q.size() > 0)
|
||||
t = (FetcherTask)q.remove();
|
||||
if (q.size() == 0)
|
||||
{
|
||||
servers.removeCurrent();
|
||||
q = null;
|
||||
}
|
||||
size--;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* tests
|
||||
*
|
||||
* @param args Description of the Parameter
|
||||
*/
|
||||
public static void main(String args[])
|
||||
{
|
||||
FetcherTaskQueue q = new FetcherTaskQueue();
|
||||
System.out.println("Test 1. put in 4 yahoos and 3 lmus. pull out LMU/Yahoo/LMU/Yahoo/LMU/Yahoo/Yahoo");
|
||||
try
|
||||
{
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false)));
|
||||
}
|
||||
catch (Throwable t)
|
||||
{
|
||||
t.printStackTrace();
|
||||
}
|
||||
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
|
||||
System.out.println("Test 2. new Queue");
|
||||
q = new FetcherTaskQueue();
|
||||
System.out.println("size [0]:");
|
||||
System.out.println(q.size());
|
||||
try
|
||||
{
|
||||
System.out.println("put 3 lmus.");
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false)));
|
||||
System.out.print("pull out 1st element [lmu/1]: ");
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println("size now [2]: " + q.size());
|
||||
System.out.print("pull out 2nd element [lmu/2]: ");
|
||||
System.out.println(((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println("size now [1]: " + q.size());
|
||||
System.out.println("put in 3 yahoos");
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false)));
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false)));
|
||||
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println("Size now [3]: " + q.size());
|
||||
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println("Size now [2]: " + q.size());
|
||||
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println("Size now [1]: " + q.size());
|
||||
System.out.println("put in another Yahoo");
|
||||
q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false)));
|
||||
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println("Size now [1]: " + q.size());
|
||||
System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
|
||||
System.out.println("Size now [0]: " + q.size());
|
||||
}
|
||||
catch (Throwable t)
|
||||
{
|
||||
t.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import de.lanlab.larm.threads.ServerThread;
|
||||
import de.lanlab.larm.util.State;
|
||||
|
||||
/**
|
||||
* a server thread for the thread pool that records the number
|
||||
* of bytes read and the number of tasks run
|
||||
* mainly for statistical purposes and to keep most of the information a task needs
|
||||
* static
|
||||
*/
|
||||
public class FetcherThread extends ServerThread
|
||||
{
|
||||
|
||||
long totalBytesRead = 0;
|
||||
long totalTasksRun = 0;
|
||||
|
||||
HostManager hostManager;
|
||||
|
||||
byte[] documentBuffer = new byte[Constants.FETCHERTASK_READSIZE];
|
||||
|
||||
public HostManager getHostManager()
|
||||
{
|
||||
return hostManager;
|
||||
}
|
||||
|
||||
public FetcherThread(int threadNumber, ThreadGroup threadGroup, HostManager hostManager)
|
||||
{
|
||||
super(threadNumber,"FetcherThread " + threadNumber, threadGroup);
|
||||
this.hostManager = hostManager;
|
||||
}
|
||||
|
||||
public static String STATE_IDLE = "Idle";
|
||||
|
||||
State idleState = new State(STATE_IDLE); // only set if task is finished
|
||||
|
||||
protected void taskReady()
|
||||
{
|
||||
totalBytesRead += ((FetcherTask)task).getBytesRead();
|
||||
totalTasksRun++;
|
||||
super.taskReady();
|
||||
idleState.setState(STATE_IDLE);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public long getTotalBytesRead()
|
||||
{
|
||||
if(task != null)
|
||||
{
|
||||
return totalBytesRead + ((FetcherTask)task).getBytesRead();
|
||||
}
|
||||
else
|
||||
{
|
||||
return totalBytesRead;
|
||||
}
|
||||
}
|
||||
|
||||
public long getTotalTasksRun()
|
||||
{
|
||||
return totalTasksRun;
|
||||
}
|
||||
|
||||
public byte[] getDocumentBuffer()
|
||||
{
|
||||
return documentBuffer;
|
||||
}
|
||||
|
||||
public State getTaskState()
|
||||
{
|
||||
if(task != null)
|
||||
{
|
||||
// task could be null here
|
||||
return ((FetcherTask)task).getTaskState();
|
||||
}
|
||||
else
|
||||
{
|
||||
return idleState.cloneState();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
import de.lanlab.larm.threads.*;
|
||||
|
||||
/**
|
||||
* this factory simply creates fetcher threads. It's passed
|
||||
* to the ThreadPool because the pool is creating the threads on its own
|
||||
*/
|
||||
public class FetcherThreadFactory extends ThreadFactory
|
||||
{
|
||||
|
||||
//static int count = 0;
|
||||
|
||||
ThreadGroup threadGroup = new ThreadGroup("FetcherThreads");
|
||||
|
||||
HostManager hostManager;
|
||||
|
||||
public FetcherThreadFactory(HostManager hostManager)
|
||||
{
|
||||
this.hostManager = hostManager;
|
||||
}
|
||||
|
||||
|
||||
public ServerThread createServerThread(int count)
|
||||
{
|
||||
ServerThread newThread = new FetcherThread(count, threadGroup, hostManager);
|
||||
newThread.setPriority(4);
|
||||
return newThread;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
|
||||
/**
|
||||
* base class of all filter classes
|
||||
*/
|
||||
public abstract class Filter
|
||||
{
|
||||
/**
|
||||
* number of items filtered. augmented directly by
|
||||
* the inheriting classes
|
||||
*/
|
||||
protected int filtered = 0;
|
||||
|
||||
|
||||
public int getFiltered()
|
||||
{
|
||||
return filtered;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
import java.util.zip.*;
|
||||
import java.net.*;
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 28. Januar 2002
|
||||
*/
|
||||
public class GZipTest
|
||||
{
|
||||
|
||||
/**
|
||||
* Constructor for the GZipTest object
|
||||
*/
|
||||
public GZipTest() { }
|
||||
|
||||
|
||||
/**
|
||||
* The main program for the GZipTest class
|
||||
*
|
||||
* @param args The command line arguments
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
{
|
||||
try
|
||||
{
|
||||
String url = "http://speechdat.phonetik.uni-muenchen.de/speechdt//speechDB/FIXED1SL/BLOCK00/SES0006/A10006O5.aif";
|
||||
|
||||
ByteArrayOutputStream a = new ByteArrayOutputStream(url.length());
|
||||
GZIPOutputStream g = new GZIPOutputStream(a);
|
||||
OutputStreamWriter o = new OutputStreamWriter(g,"ISO-8859-1");
|
||||
|
||||
o.write(url);
|
||||
o.close();
|
||||
g.finish();
|
||||
byte[] array = a.toByteArray();
|
||||
System.out.println("URL: " + url + " \n Length: " + url.length() + "\n zipped: " + array.length
|
||||
);
|
||||
}
|
||||
catch (Exception e)
|
||||
{ e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.net.*;
|
||||
import de.lanlab.larm.util.CachingQueue;
|
||||
import de.lanlab.larm.util.Queue;
|
||||
|
||||
/**
|
||||
* contains information about a host. If a host doesn't respond too often, it's
|
||||
* excluded from the crawl.
|
||||
* This class is used by the HostManager
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @created 16. Februar 2002
|
||||
*/
|
||||
public class HostInfo
|
||||
{
|
||||
static final String[] emptyKeepOutDirectories = new String[0];
|
||||
|
||||
int id;
|
||||
int healthyCount = 5; // five strikes, and you're out
|
||||
boolean isReachable = true;
|
||||
boolean robotTxtChecked = false;
|
||||
String[] disallows; // robot exclusion
|
||||
boolean isLoadingRobotsTxt = false;
|
||||
Queue queuedRequests = null; // robot exclusion
|
||||
String hostName;
|
||||
|
||||
public HostInfo(String hostName, int id)
|
||||
{
|
||||
this.id = id;
|
||||
this.disallows = HostInfo.emptyKeepOutDirectories;
|
||||
this.hostName = hostName;
|
||||
}
|
||||
|
||||
/**
|
||||
* is this host reachable and responding?
|
||||
*/
|
||||
public boolean isHealthy()
|
||||
{
|
||||
return (healthyCount > 0) && isReachable;
|
||||
}
|
||||
|
||||
/**
|
||||
* signals that the host returned with a bad request of whatever type
|
||||
*/
|
||||
public void badRequest()
|
||||
{
|
||||
healthyCount--;
|
||||
}
|
||||
|
||||
public void setReachable(boolean reachable)
|
||||
{
|
||||
isReachable = reachable;
|
||||
}
|
||||
|
||||
public boolean isReachable()
|
||||
{
|
||||
return isReachable;
|
||||
}
|
||||
|
||||
public boolean isRobotTxtChecked()
|
||||
{
|
||||
return robotTxtChecked;
|
||||
}
|
||||
|
||||
/**
|
||||
* must be synchronized externally
|
||||
*/
|
||||
public boolean isLoadingRobotsTxt()
|
||||
{
|
||||
return this.isLoadingRobotsTxt;
|
||||
}
|
||||
|
||||
public void setLoadingRobotsTxt(boolean isLoading)
|
||||
{
|
||||
this.isLoadingRobotsTxt = isLoading;
|
||||
if(isLoading)
|
||||
{
|
||||
this.queuedRequests = new CachingQueue("HostInfo_" + id + "_QueuedRequests", 100);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void setRobotsChecked(boolean isChecked, String[] disallows)
|
||||
{
|
||||
this.robotTxtChecked = isChecked;
|
||||
if(disallows != null)
|
||||
{
|
||||
this.disallows = disallows;
|
||||
}
|
||||
else
|
||||
{
|
||||
this.disallows = emptyKeepOutDirectories;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public synchronized boolean isAllowed(String path)
|
||||
{
|
||||
// assume keepOutDirectories is pretty short
|
||||
// assert disallows != null
|
||||
int length = disallows.length;
|
||||
for(int i=0; i<length; i++)
|
||||
{
|
||||
if(path.startsWith(disallows[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 16. Februar 2002
|
||||
*/
|
||||
public class HostManager
|
||||
{
|
||||
HashMap hosts;
|
||||
static int hostCount = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the HostInfo object
|
||||
*
|
||||
* @param initialSize Description of the Parameter
|
||||
*/
|
||||
public HostManager(int initialCapacity)
|
||||
{
|
||||
hosts = new HashMap(initialCapacity);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param hostName Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public HostInfo put(String hostName)
|
||||
{
|
||||
if (!hosts.containsKey(hostName))
|
||||
{
|
||||
int hostID;
|
||||
synchronized (this)
|
||||
{
|
||||
hostID = hostCount++;
|
||||
}
|
||||
HostInfo hi = new HostInfo(hostName,hostID);
|
||||
hosts.put(hostName, hi);
|
||||
return hi;
|
||||
}
|
||||
return (HostInfo)hosts.get(hostName);
|
||||
/*else
|
||||
{
|
||||
hostID = hosts.get()
|
||||
}
|
||||
// assert hostID != -1;
|
||||
return hostID;*/
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the hostID attribute of the HostInfo object
|
||||
*
|
||||
* @param hostName Description of the Parameter
|
||||
* @return The hostID value
|
||||
*/
|
||||
public HostInfo getHostInfo(String hostName)
|
||||
{
|
||||
HostInfo hi = (HostInfo)hosts.get(hostName);
|
||||
if(hi == null)
|
||||
{
|
||||
return put(hostName);
|
||||
}
|
||||
return hi;
|
||||
}
|
||||
|
||||
public int getSize()
|
||||
{
|
||||
return hosts.size();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author
|
||||
* @created 17. Februar 2002
|
||||
* @version 1.0
|
||||
*/
|
||||
import java.net.*;
|
||||
|
||||
/**
|
||||
* this can be considered a hack
|
||||
* @TODO implement this as a fast way to filter out different URL endings or beginnings
|
||||
*/
|
||||
public class KnownPathsFilter extends Filter implements MessageListener
|
||||
{
|
||||
|
||||
MessageHandler messageHandler;
|
||||
|
||||
String[] pathsToFilter =
|
||||
{
|
||||
"/robots.txt"
|
||||
};
|
||||
|
||||
String[] hostFilter =
|
||||
{
|
||||
"www.nm.informatik.uni-muenchen.de",
|
||||
"cgi.cip.informatik.uni-muenchen.de"
|
||||
};
|
||||
|
||||
String[] filesToFilter =
|
||||
{
|
||||
// exclude Apache directory files
|
||||
"/?D=D",
|
||||
"/?S=D",
|
||||
"/?M=D",
|
||||
"/?N=D",
|
||||
"/?D=A",
|
||||
"/?S=A",
|
||||
"/?M=A",
|
||||
"/?N=A",
|
||||
};
|
||||
|
||||
int pathLength;
|
||||
int fileLength;
|
||||
int hostLength;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the KnownPathsFilter object
|
||||
*/
|
||||
public KnownPathsFilter()
|
||||
{
|
||||
pathLength = pathsToFilter.length;
|
||||
fileLength = filesToFilter.length;
|
||||
hostLength = hostFilter.length;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param message Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public Message handleRequest(Message message)
|
||||
{
|
||||
URL url = ((URLMessage)message).getUrl();
|
||||
String file = url.getFile();
|
||||
String host = url.getHost();
|
||||
int i;
|
||||
for (i = 0; i < pathLength; i++)
|
||||
{
|
||||
if (file.startsWith(pathsToFilter[i]))
|
||||
{
|
||||
filtered++;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < fileLength; i++)
|
||||
{
|
||||
if (file.endsWith(filesToFilter[i]))
|
||||
{
|
||||
filtered++;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
for (i = 0; i<hostLength; i++)
|
||||
{
|
||||
if(hostFilter[i].equals(host))
|
||||
{
|
||||
filtered++;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* will be called as soon as the Listener is added to the Message Queue
|
||||
*
|
||||
* @param handler the Message Handler
|
||||
*/
|
||||
public void notifyAddedToMessageHandler(MessageHandler handler)
|
||||
{
|
||||
this.messageHandler = messageHandler;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* Marker interface.
|
||||
* represents a simple message.
|
||||
*/
|
||||
public interface Message
|
||||
{
|
||||
}
|
|
@ -0,0 +1,248 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import java.util.*;
|
||||
import de.lanlab.larm.util.SimpleObservable;
|
||||
import de.lanlab.larm.util.CachingQueue;
|
||||
import de.lanlab.larm.util.UnderflowException;
|
||||
|
||||
/**
|
||||
* this is a message handler that runs in its own thread.
|
||||
* Messages can be put via <code>putMessage</code> or <code>putMessages</code>
|
||||
* (use the latter whenever possible).<br>
|
||||
* The messages are passed to the filters in the order in which the filters where
|
||||
* added to the handler.<br>
|
||||
* They can consume the message by returning null. Otherwise, they return a Message
|
||||
* object, usually the one they got.<br>
|
||||
* The filters will run synchronously within the message handler thread<br>
|
||||
* This implements a chain of responsibility-style message handling
|
||||
*/
|
||||
public class MessageHandler implements Runnable
|
||||
{
|
||||
|
||||
/**
|
||||
* the queue where messages are put in.
|
||||
* Holds max. 2 x 5000 = 10.000 messages in RAM
|
||||
*/
|
||||
private CachingQueue messageQueue = new CachingQueue("fetcherURLMessageQueue", 5000);
|
||||
|
||||
/**
|
||||
* list of Observers
|
||||
*/
|
||||
private LinkedList listeners = new LinkedList();
|
||||
|
||||
/**
|
||||
* true as long as the thread is running
|
||||
*/
|
||||
private boolean running = true;
|
||||
|
||||
/**
|
||||
* the message handler thread
|
||||
*/
|
||||
private Thread t;
|
||||
|
||||
/**
|
||||
* flag for thread communication
|
||||
*/
|
||||
boolean messagesWaiting = false;
|
||||
|
||||
/**
|
||||
* true when a message is processed by the filters
|
||||
*/
|
||||
boolean workingOnMessage = false;
|
||||
|
||||
Object queueMonitor = new Object();
|
||||
|
||||
SimpleObservable messageQueueObservable = new SimpleObservable();
|
||||
SimpleObservable messageProcessorObservable = new SimpleObservable();
|
||||
|
||||
public boolean isWorkingOnMessage()
|
||||
{
|
||||
return workingOnMessage;
|
||||
}
|
||||
|
||||
/**
|
||||
* messageHandler-Thread erzeugen und starten
|
||||
*/
|
||||
MessageHandler()
|
||||
{
|
||||
t = new Thread(this,"MessageHandler Thread");
|
||||
t.setPriority(5); // higher priority to prevent starving when a lot of fetcher threads are used
|
||||
t.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* join messageHandler-Thread
|
||||
*/
|
||||
public void finalize()
|
||||
{
|
||||
if(t != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
t.join();
|
||||
t = null;
|
||||
}
|
||||
catch(InterruptedException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* registers a filter to the message handler
|
||||
* @param MessageListener - the Listener
|
||||
*/
|
||||
public void addListener(MessageListener m)
|
||||
{
|
||||
m.notifyAddedToMessageHandler(this);
|
||||
listeners.addLast(m);
|
||||
}
|
||||
|
||||
/**
|
||||
* registers a MessageQueueObserver
|
||||
* It will be notified whenever a message is put into the Queue (Parameter is Int(1)) oder
|
||||
* removed (Parameter is Int(-1))
|
||||
* @param o the Observer
|
||||
*/
|
||||
public void addMessageQueueObserver(Observer o)
|
||||
{
|
||||
messageQueueObservable.addObserver(o);
|
||||
}
|
||||
|
||||
/**
|
||||
* adds a message processorObeserver
|
||||
* It will be notified when a message is consumed. In this case the parameter
|
||||
* is the filter that consumed the message
|
||||
* @param o the Observer
|
||||
*/
|
||||
public void addMessageProcessorObserver(Observer o)
|
||||
{
|
||||
messageProcessorObservable.addObserver(o);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* einen Event in die Schlange schreiben
|
||||
*/
|
||||
public void putMessage(Message msg)
|
||||
{
|
||||
messageQueue.insert(msg);
|
||||
messageQueueObservable.setChanged();
|
||||
messageQueueObservable.notifyObservers(new Integer(1));
|
||||
synchronized(queueMonitor)
|
||||
{
|
||||
messagesWaiting = true;
|
||||
queueMonitor.notify();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* add a collection of events to the message queue
|
||||
*/
|
||||
public void putMessages(Collection msgs)
|
||||
{
|
||||
for(Iterator i = msgs.iterator(); i.hasNext();)
|
||||
{
|
||||
Message msg = (Message)i.next();
|
||||
messageQueue.insert(msg);
|
||||
}
|
||||
messageQueueObservable.setChanged();
|
||||
messageQueueObservable.notifyObservers(new Integer(1));
|
||||
synchronized(queueMonitor)
|
||||
{
|
||||
messagesWaiting = true;
|
||||
queueMonitor.notify();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* the main messageHandler-Thread.
|
||||
*/
|
||||
public void run()
|
||||
{
|
||||
while(running)
|
||||
{
|
||||
//System.out.println("MessageHandler-Thread started");
|
||||
|
||||
synchronized(queueMonitor)
|
||||
{
|
||||
// wait for new messages
|
||||
workingOnMessage=false;
|
||||
try
|
||||
{
|
||||
queueMonitor.wait();
|
||||
}
|
||||
catch(InterruptedException e)
|
||||
{
|
||||
System.out.println("MessageHandler: Caught InterruptedException");
|
||||
}
|
||||
workingOnMessage=true;
|
||||
}
|
||||
//messagesWaiting = false;
|
||||
Message m;
|
||||
try
|
||||
{
|
||||
while(messagesWaiting)
|
||||
{
|
||||
synchronized(this.queueMonitor)
|
||||
{
|
||||
m = (Message)messageQueue.remove();
|
||||
if(messageQueue.size() == 0)
|
||||
{
|
||||
messagesWaiting = false;
|
||||
}
|
||||
|
||||
}
|
||||
//System.out.println("MessageHandler:run: Entferne erstes Element");
|
||||
|
||||
messageQueueObservable.setChanged();
|
||||
messageQueueObservable.notifyObservers(new Integer(-1)); // Message processed
|
||||
|
||||
// und verteilen. Die Listener erhalten die Message in ihrer
|
||||
// Eintragungsreihenfolge und können die Message auch verändern
|
||||
|
||||
Iterator i = listeners.iterator();
|
||||
while(i.hasNext())
|
||||
{
|
||||
//System.out.println("Verteile...");
|
||||
try
|
||||
{
|
||||
MessageListener listener = (MessageListener)i.next();
|
||||
m = (Message)listener.handleRequest(m);
|
||||
if (m == null)
|
||||
{
|
||||
messageProcessorObservable.setChanged();
|
||||
messageProcessorObservable.notifyObservers(listener);
|
||||
break; // Handler hat die Message konsumiert
|
||||
}
|
||||
}
|
||||
catch(ClassCastException e)
|
||||
{
|
||||
System.out.println("MessageHandler:run: ClassCastException(2): " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (ClassCastException e)
|
||||
{
|
||||
System.out.println("MessageHandler:run: ClassCastException: " + e.getMessage());
|
||||
}
|
||||
catch (UnderflowException e)
|
||||
{
|
||||
messagesWaiting = false;
|
||||
// System.out.println("MessageHandler: messagesWaiting = true although nothing queued!");
|
||||
// @FIXME: here is still a multi threading issue. I don't get it why this happens.
|
||||
// does someone want to draw a petri net of this?
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
System.out.println("MessageHandler: " + e.getClass() + " " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public int getQueued()
|
||||
{
|
||||
return messageQueue.size();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* LARM - LANLab Retrieval Machine
|
||||
*
|
||||
* $history: $
|
||||
*
|
||||
*
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
/**
|
||||
* A Message Listener works on messages in a message queue Usually it returns
|
||||
* the message back into the queue. But it can also change the message or create
|
||||
* a new object. If it returns null, the message handler stops
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 24. November 2001
|
||||
*/
|
||||
public interface MessageListener
|
||||
{
|
||||
/**
|
||||
* the handler
|
||||
*
|
||||
* @param message the message to be handled
|
||||
* @return Message usually the original message
|
||||
* null: the message was consumed
|
||||
*/
|
||||
public Message handleRequest(Message message);
|
||||
|
||||
|
||||
/**
|
||||
* will be called as soon as the Listener is added to the Message Queue
|
||||
*
|
||||
* @param handler the Message Handler
|
||||
*/
|
||||
public void notifyAddedToMessageHandler(MessageHandler handler);
|
||||
}
|
|
@ -0,0 +1,429 @@
|
|||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
*
|
||||
* Description: <p>
|
||||
*
|
||||
* Copyright: Copyright (c)<p>
|
||||
*
|
||||
* Company: <p>
|
||||
*
|
||||
*
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import de.lanlab.larm.util.SimpleObservable;
|
||||
import de.lanlab.larm.util.State;
|
||||
import java.util.*;
|
||||
import java.net.*;
|
||||
import java.io.*;
|
||||
import org.apache.oro.text.perl.Perl5Util;
|
||||
import de.lanlab.larm.util.*;
|
||||
import de.lanlab.larm.threads.*;
|
||||
import HTTPClient.*;
|
||||
|
||||
/**
|
||||
* this factory simply creates fetcher threads. It's gonna be passed to the
|
||||
* ThreadPool because the pool is creating the threads on its own
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 17. Februar 2002
|
||||
*/
|
||||
class REFThreadFactory extends ThreadFactory
|
||||
{
|
||||
|
||||
ThreadGroup threadGroup = new ThreadGroup("RobotExclusionFilter");
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param count Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public ServerThread createServerThread(int count)
|
||||
{
|
||||
ServerThread newThread = new ServerThread(count, "REF-" + count, threadGroup);
|
||||
newThread.setPriority(4);
|
||||
return newThread;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* the RE filter obeys the robot exclusion standard. If a new host name is supposed
|
||||
* to be accessed, it first loads a "/robots.txt" on the given server and records the
|
||||
* disallows stated in that file.
|
||||
* The REFilter has a thread pool on its own to prevent the message handler from being
|
||||
* clogged up if the server doesn't respond. Incoming messages are queued while the
|
||||
* robots.txt is loaded.
|
||||
* The information is stored in HostInfo records of the host manager class
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @created 17. Februar 2002
|
||||
*/
|
||||
public class RobotExclusionFilter extends Filter implements MessageListener
|
||||
{
|
||||
|
||||
|
||||
protected HostManager hostManager;
|
||||
|
||||
protected SimpleLogger log;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the RobotExclusionFilter object
|
||||
*
|
||||
* @param hm Description of the Parameter
|
||||
*/
|
||||
public RobotExclusionFilter(HostManager hm)
|
||||
{
|
||||
log = new SimpleLogger("RobotExclusionFilter");
|
||||
hostManager = hm;
|
||||
rePool = new ThreadPool(2, new REFThreadFactory());
|
||||
rePool.init();
|
||||
log.setFlushAtOnce(true);
|
||||
log.log("refilter: initialized");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* called by the message handler
|
||||
*/
|
||||
public void notifyAddedToMessageHandler(MessageHandler handler)
|
||||
{
|
||||
this.messageHandler = handler;
|
||||
}
|
||||
|
||||
|
||||
MessageHandler messageHandler = null;
|
||||
ThreadPool rePool;
|
||||
|
||||
|
||||
/**
|
||||
* method that handles each URL request<p>
|
||||
*
|
||||
* This method will get the robots.txt file the first time a server is
|
||||
* requested. See the description above.
|
||||
*
|
||||
* @param message
|
||||
* the (URL)Message
|
||||
* @return
|
||||
* the original message or NULL if this host had a disallow on that URL
|
||||
* @link{http://info.webcrawler.com/mak/projects/robots/norobots.html})
|
||||
*/
|
||||
|
||||
public Message handleRequest(Message message)
|
||||
{
|
||||
//log.logThreadSafe("handleRequest: got message: " + message);
|
||||
try
|
||||
{
|
||||
// assert message instanceof URLMessage;
|
||||
URLMessage urlMsg = ((URLMessage) message);
|
||||
URL url = urlMsg.getUrl();
|
||||
//assert url != null;
|
||||
HostInfo h = hostManager.getHostInfo(url.getHost());
|
||||
if (!h.isRobotTxtChecked() && !h.isLoadingRobotsTxt())
|
||||
{
|
||||
log.logThreadSafe("handleRequest: starting to get robots.txt");
|
||||
// probably this results in Race Conditions here
|
||||
|
||||
rePool.doTask(new RobotExclusionTask(h), new Integer(h.id));
|
||||
h.setLoadingRobotsTxt(true);
|
||||
}
|
||||
|
||||
synchronized (h)
|
||||
{
|
||||
// isLoading...() and queuedRequest.insert() must be atomic
|
||||
if (h.isLoadingRobotsTxt())
|
||||
{
|
||||
|
||||
//log.logThreadSafe("handleRequest: other thread is loading");
|
||||
// assert h.queuedRequests != null
|
||||
h.queuedRequests.insert(message);
|
||||
// not thread safe
|
||||
log.logThreadSafe("handleRequest: queued file " + url);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
//log.logThreadSafe("handleRequest: no thread is loading; robots.txt loaded");
|
||||
//log.logThreadSafe("handleRequest: checking if allowed");
|
||||
String path = url.getPath();
|
||||
if (path == null || path.equals(""))
|
||||
{
|
||||
path = "/";
|
||||
}
|
||||
|
||||
if (h.isAllowed(path))
|
||||
{
|
||||
// log.logThreadSafe("handleRequest: file " + urlMsg.getURLString() + " ok");
|
||||
return message;
|
||||
}
|
||||
log.logThreadSafe("handleRequest: file " + urlMsg.getURLString() + " filtered");
|
||||
this.filtered++;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
private static volatile NVPair headers[] = new NVPair[1];
|
||||
|
||||
static
|
||||
{
|
||||
headers[0] = new HTTPClient.NVPair("User-Agent", Constants.CRAWLER_AGENT);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* the task that actually loads and parses the robots.txt files
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @created 17. Februar 2002
|
||||
*/
|
||||
class RobotExclusionTask implements InterruptableTask
|
||||
{
|
||||
HostInfo hostInfo;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the RobotExclusionTask object
|
||||
*
|
||||
* @param hostInfo Description of the Parameter
|
||||
*/
|
||||
public RobotExclusionTask(HostInfo hostInfo)
|
||||
{
|
||||
this.hostInfo = hostInfo;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* dummy
|
||||
*
|
||||
* @return The info value
|
||||
*/
|
||||
public String getInfo()
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* not used
|
||||
*/
|
||||
public void interrupt() { }
|
||||
|
||||
|
||||
/**
|
||||
* gets a robots.txt file and adds the information to the hostInfo
|
||||
* structure
|
||||
*
|
||||
* @param thread the server thread (passed by the thread pool)
|
||||
*/
|
||||
public void run(ServerThread thread)
|
||||
{
|
||||
// assert hostInfo != null;
|
||||
String threadName = Thread.currentThread().getName();
|
||||
|
||||
log.logThreadSafe("task " + threadName + ": starting to load " + hostInfo.hostName);
|
||||
//hostInfo.setLoadingRobotsTxt(true);
|
||||
String[] disallows = null;
|
||||
boolean errorOccured = false;
|
||||
try
|
||||
{
|
||||
log.logThreadSafe("task " + threadName + ": getting connection");
|
||||
HTTPConnection conn = new HTTPConnection(hostInfo.hostName);
|
||||
conn.setTimeout(30000);
|
||||
// wait at most 20 secs
|
||||
|
||||
HTTPResponse res = conn.Get("/robots.txt", (String) null, headers);
|
||||
log.logThreadSafe("task " + threadName + ": got connection.");
|
||||
if (res.getStatusCode() != 200)
|
||||
{
|
||||
errorOccured = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
log.logThreadSafe("task " + threadName + ": reading");
|
||||
byte[] file = res.getData(40000);
|
||||
// max. 40 kb
|
||||
log.logThreadSafe("task " + threadName + ": reading done. parsing");
|
||||
disallows = parse(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(file))));
|
||||
log.logThreadSafe("task " + threadName + ": parsing done. found " + disallows.length + " disallows");
|
||||
// assert disallows != null
|
||||
// HostInfo hostInfo = hostManager.getHostInfo(this.hostName);
|
||||
// assert hostInfo != null
|
||||
log.logThreadSafe("task " + threadName + ": setting disallows");
|
||||
}
|
||||
}
|
||||
catch (java.net.UnknownHostException e)
|
||||
{
|
||||
hostInfo.setReachable(false);
|
||||
log.logThreadSafe("task " + threadName + ": unknown host. setting to unreachable");
|
||||
errorOccured = true;
|
||||
}
|
||||
catch (java.net.NoRouteToHostException e)
|
||||
{
|
||||
hostInfo.setReachable(false);
|
||||
log.logThreadSafe("task " + threadName + ": no route to. setting to unreachable");
|
||||
errorOccured = true;
|
||||
}
|
||||
catch (java.net.ConnectException e)
|
||||
{
|
||||
hostInfo.setReachable(false);
|
||||
log.logThreadSafe("task " + threadName + ": connect exception. setting to unreachable");
|
||||
errorOccured = true;
|
||||
}
|
||||
catch (java.io.InterruptedIOException e)
|
||||
{
|
||||
// time out. fatal in this case
|
||||
hostInfo.setReachable(false);
|
||||
log.logThreadSafe("task " + threadName + ": time out. setting to unreachable");
|
||||
errorOccured = true;
|
||||
}
|
||||
|
||||
catch (Throwable e)
|
||||
{
|
||||
errorOccured = true;
|
||||
log.log("task " + threadName + ": unknown exception: " + e.getClass().getName() + ": " + e.getMessage() + ". continuing");
|
||||
log.log(e);
|
||||
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (errorOccured)
|
||||
{
|
||||
synchronized (hostInfo)
|
||||
{
|
||||
hostInfo.setRobotsChecked(true, null);
|
||||
// crawl everything
|
||||
hostInfo.setLoadingRobotsTxt(false);
|
||||
log.logThreadSafe("task " + threadName + ": error occured");
|
||||
log.logThreadSafe("task " + threadName + ": now put " + hostInfo.queuedRequests.size() + " queueud requests back");
|
||||
hostInfo.isLoadingRobotsTxt = false;
|
||||
putBackURLs();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
synchronized (hostInfo)
|
||||
{
|
||||
hostInfo.setRobotsChecked(true, disallows);
|
||||
log.logThreadSafe("task " + threadName + ": done");
|
||||
log.logThreadSafe("task " + threadName + ": now put " + hostInfo.queuedRequests.size() + " queueud requests back");
|
||||
hostInfo.isLoadingRobotsTxt = false;
|
||||
putBackURLs();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* put back queued URLs
|
||||
*/
|
||||
private void putBackURLs()
|
||||
{
|
||||
while (hostInfo.queuedRequests.size() > 0)
|
||||
{
|
||||
messageHandler.putMessage((Message) hostInfo.queuedRequests.remove());
|
||||
}
|
||||
log.logThreadSafe("task " + Thread.currentThread().getName() + ": finished");
|
||||
hostInfo.queuedRequests = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this parses the robots.txt file. It was taken from the PERL implementation
|
||||
* Since this is only rarely called, it's not optimized for speed
|
||||
*
|
||||
* @param r the robots.txt file
|
||||
* @return the disallows
|
||||
* @exception IOException any IOException
|
||||
*/
|
||||
public String[] parse(BufferedReader r)
|
||||
throws IOException
|
||||
{
|
||||
// taken from Perl
|
||||
Perl5Util p = new Perl5Util();
|
||||
String line;
|
||||
boolean isMe = false;
|
||||
boolean isAnon = false;
|
||||
ArrayList disallowed = new ArrayList();
|
||||
String ua = null;
|
||||
|
||||
while ((line = r.readLine()) != null)
|
||||
{
|
||||
if (p.match("/^#.*/", line))
|
||||
{
|
||||
// a comment
|
||||
continue;
|
||||
}
|
||||
line = p.substitute("s/\\s*\\#.* //", line);
|
||||
if (p.match("/^\\s*$/", line))
|
||||
{
|
||||
if (isMe)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (p.match("/^User-Agent:\\s*(.*)/i", line))
|
||||
{
|
||||
ua = p.group(1);
|
||||
ua = p.substitute("s/\\s+$//", ua);
|
||||
if (isMe)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (ua.equals("*"))
|
||||
{
|
||||
isAnon = true;
|
||||
}
|
||||
else if (Constants.CRAWLER_AGENT.startsWith(ua))
|
||||
{
|
||||
isMe = true;
|
||||
}
|
||||
}
|
||||
else if (p.match("/^Disallow:\\s*(.*)/i", line))
|
||||
{
|
||||
if (ua == null)
|
||||
{
|
||||
isAnon = true;
|
||||
// warn...
|
||||
}
|
||||
String disallow = p.group(1);
|
||||
if (disallow != null && disallow.length() > 0)
|
||||
{
|
||||
// assume we have a relative path
|
||||
;
|
||||
}
|
||||
else
|
||||
{
|
||||
disallow = "/";
|
||||
}
|
||||
if (isMe || isAnon)
|
||||
{
|
||||
disallowed.add(disallow);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// warn: unexpected line
|
||||
}
|
||||
}
|
||||
String[] disalloweds = new String[disallowed.size()];
|
||||
disallowed.toArray(disalloweds);
|
||||
return disalloweds;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,545 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.fetcher;
|
||||
|
||||
|
||||
import de.lanlab.larm.threads.*;
|
||||
import java.util.*;
|
||||
import java.text.*;
|
||||
import java.io.*;
|
||||
import de.lanlab.larm.util.State;
|
||||
import de.lanlab.larm.util.SimpleLoggerManager;
|
||||
|
||||
/**
|
||||
* this monitor takes a sample of every thread every x milliseconds,
|
||||
* and logs a lot of information. In the near past it has evolved into the multi
|
||||
* purpose monitoring and maintenance facility.
|
||||
* At the moment it prints status information
|
||||
* to log files and to the console
|
||||
* @TODO this can be done better. Probably with an agent where different services
|
||||
* can be registered to be called every X seconds
|
||||
*/
|
||||
public class ThreadMonitor extends Observable implements Runnable
|
||||
{
|
||||
/**
|
||||
* a reference to the thread pool that's gonna be observed
|
||||
*/
|
||||
private ThreadPool threadPool;
|
||||
|
||||
|
||||
class Sample
|
||||
{
|
||||
long bytesRead;
|
||||
long docsRead;
|
||||
long time;
|
||||
public Sample(long bytesRead, long docsRead, long time)
|
||||
{
|
||||
this.bytesRead = bytesRead;
|
||||
this.docsRead = docsRead;
|
||||
this.time = time;
|
||||
}
|
||||
}
|
||||
|
||||
ArrayList bytesReadPerPeriod;
|
||||
|
||||
/**
|
||||
* Zeit zwischen den Messungen
|
||||
*/
|
||||
int sampleDelta;
|
||||
|
||||
/**
|
||||
* the thread where this monitor runs in. Will run with high priority
|
||||
*/
|
||||
Thread thread;
|
||||
|
||||
|
||||
URLVisitedFilter urlVisitedFilter;
|
||||
URLScopeFilter urlScopeFilter;
|
||||
// DNSResolver dnsResolver;
|
||||
RobotExclusionFilter reFilter;
|
||||
MessageHandler messageHandler;
|
||||
URLLengthFilter urlLengthFilter;
|
||||
HostManager hostManager;
|
||||
|
||||
public final static double KBYTE = 1024;
|
||||
public final static double MBYTE = 1024 * KBYTE;
|
||||
public final static double ONEGBYTE = 1024 * MBYTE;
|
||||
|
||||
|
||||
String formatBytes(long lbytes)
|
||||
{
|
||||
double bytes = (double)lbytes;
|
||||
if(bytes >= ONEGBYTE)
|
||||
{
|
||||
return fractionFormat.format((bytes/ONEGBYTE)) + " GB";
|
||||
}
|
||||
else if(bytes >= MBYTE)
|
||||
{
|
||||
return fractionFormat.format(bytes/MBYTE) + " MB";
|
||||
}
|
||||
else if(bytes >= KBYTE)
|
||||
{
|
||||
return fractionFormat.format(bytes/KBYTE) + " KB";
|
||||
}
|
||||
else
|
||||
{
|
||||
return fractionFormat.format(bytes) + " Bytes";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* a logfile where status information is posted
|
||||
* FIXME: put that in a seperate class (double code in FetcherTask)
|
||||
*/
|
||||
PrintWriter logWriter;
|
||||
private SimpleDateFormat formatter
|
||||
= new SimpleDateFormat ("hh:mm:ss:SSSS");
|
||||
private DecimalFormat fractionFormat = new DecimalFormat("0.00");
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
private void log(String text)
|
||||
{
|
||||
try
|
||||
{
|
||||
logWriter.println(formatter.format(new Date()) + ";" + (System.currentTimeMillis()-startTime) + ";" + text);
|
||||
logWriter.flush();
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
System.out.println("Couldn't write to logfile");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* construct the monitor gets a reference to all monitored filters
|
||||
* @param threadPool the pool to be observed
|
||||
* @param sampleDelta time in ms between samples
|
||||
*/
|
||||
public ThreadMonitor(URLLengthFilter urlLengthFilter,
|
||||
URLVisitedFilter urlVisitedFilter,
|
||||
URLScopeFilter urlScopeFilter,
|
||||
/*DNSResolver dnsResolver,*/
|
||||
RobotExclusionFilter reFilter,
|
||||
MessageHandler messageHandler,
|
||||
ThreadPool threadPool,
|
||||
HostManager hostManager,
|
||||
int sampleDelta)
|
||||
{
|
||||
this.urlLengthFilter = urlLengthFilter;
|
||||
this.urlVisitedFilter = urlVisitedFilter;
|
||||
this.urlScopeFilter = urlScopeFilter;
|
||||
/* this.dnsResolver = dnsResolver;*/
|
||||
this.hostManager = hostManager;
|
||||
this.reFilter = reFilter;
|
||||
this.messageHandler = messageHandler;
|
||||
|
||||
this.threadPool = threadPool;
|
||||
bytesReadPerPeriod = new ArrayList();
|
||||
this.sampleDelta = sampleDelta;
|
||||
this.thread = new Thread(this, "ThreadMonitor");
|
||||
this.thread.setPriority(7);
|
||||
|
||||
try
|
||||
{
|
||||
File logDir = new File("logs");
|
||||
logDir.mkdir();
|
||||
logWriter = new PrintWriter(new BufferedWriter(new FileWriter("logs/ThreadMonitor.log")));
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
System.out.println("Couldn't create logfile (ThreadMonitor)");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* java.lang.Threads run method. To be invoked via start()
|
||||
* the monitor's main thread takes the samples every sampleDelta ms
|
||||
* Since Java is not real time, it remembers
|
||||
*/
|
||||
public void run()
|
||||
{
|
||||
int nothingReadCount = 0;
|
||||
long lastPeriodBytesRead = -1;
|
||||
long monitorRunCount = 0;
|
||||
long startTime = System.currentTimeMillis();
|
||||
log("time;overallBytesRead;overallTasksRun;urlsQueued;urlsWaiting;isWorkingOnMessage;urlsScopeFiltered;urlsVisitedFiltered;urlsREFiltered;memUsed;memFree;totalMem;nrHosts;visitedSize;visitedStringSize;urlLengthFiltered");
|
||||
while(true)
|
||||
{
|
||||
try
|
||||
{
|
||||
try
|
||||
{
|
||||
thread.sleep(sampleDelta);
|
||||
}
|
||||
catch(InterruptedException e)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Iterator threadIterator = threadPool.getThreadIterator();
|
||||
int i=0;
|
||||
StringBuffer bytesReadString = new StringBuffer(200);
|
||||
StringBuffer rawBytesReadString = new StringBuffer(200);
|
||||
StringBuffer tasksRunString = new StringBuffer(200);
|
||||
long overallBytesRead = 0;
|
||||
long overallTasksRun = 0;
|
||||
long now = System.currentTimeMillis();
|
||||
boolean finished = false;
|
||||
//System.out.print("\f");
|
||||
/*while(!finished)
|
||||
{
|
||||
boolean restart = false;*/
|
||||
boolean allThreadsIdle = true;
|
||||
StringBuffer sb = new StringBuffer(500);
|
||||
|
||||
while(threadIterator.hasNext())
|
||||
{
|
||||
FetcherThread thread = (FetcherThread)threadIterator.next();
|
||||
long totalBytesRead = thread.getTotalBytesRead();
|
||||
overallBytesRead += totalBytesRead;
|
||||
bytesReadString.append(formatBytes(totalBytesRead)).append( "; ");
|
||||
rawBytesReadString.append(totalBytesRead).append("; ");
|
||||
long tasksRun = thread.getTotalTasksRun();
|
||||
overallTasksRun += tasksRun;
|
||||
tasksRunString.append(tasksRun).append("; ");
|
||||
|
||||
// check task status
|
||||
State state = thread.getTaskState();
|
||||
//StringBuffer sb = new StringBuffer(200);
|
||||
sb.setLength(0);
|
||||
System.out.println(sb + "[" + thread.getThreadNumber() + "] " + state.getState() + " for " +
|
||||
(now - state.getStateSince() ) + " ms " +
|
||||
(state.getInfo() != null ? "(" + state.getInfo() +")" : "")
|
||||
);
|
||||
if(!(state.getState().equals(FetcherThread.STATE_IDLE)))
|
||||
{
|
||||
//if(allThreadsIdle) System.out.println("(not all threads are idle, '"+state.getState()+"' != '"+FetcherThread.STATE_IDLE+"')");
|
||||
allThreadsIdle = false;
|
||||
}
|
||||
if (((state.equals(FetcherTask.FT_CONNECTING)) || (state.equals(FetcherTask.FT_GETTING)) || (state.equals(FetcherTask.FT_READING)) || (state.equals(FetcherTask.FT_CLOSING)))
|
||||
&& ((now - state.getStateSince()) > 160000))
|
||||
{
|
||||
System.out.println("****Restarting Thread " + thread.getThreadNumber());
|
||||
threadPool.restartThread(thread.getThreadNumber());
|
||||
break; // Iterator is invalid
|
||||
}
|
||||
|
||||
}
|
||||
/*if(restart)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
finished = true;
|
||||
}*/
|
||||
/*
|
||||
if(overallBytesRead == lastPeriodBytesRead)
|
||||
{
|
||||
*
|
||||
disabled kickout feature - cm
|
||||
|
||||
nothingReadCount ++;
|
||||
System.out.println("Anomaly: nothing read during the last period(s). " + (20-nothingReadCount+1) + " periods to exit");
|
||||
if(nothingReadCount > 20) // nothing happens anymore
|
||||
{
|
||||
log("Ending");
|
||||
System.out.println("End at " + new Date().toString());
|
||||
// print some information
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
nothingReadCount = 0;
|
||||
}*/
|
||||
|
||||
lastPeriodBytesRead = overallBytesRead;
|
||||
|
||||
//State reState = new State("hhh"); //reFilter.getState();
|
||||
sb.setLength(0);
|
||||
//System.out.println(sb + "Robot-Excl.Filter State: " + reState.getState() + " since " + (now-reState.getStateSince()) + " ms " + (reState.getInfo() != null ? " at " + reState.getInfo() : ""));
|
||||
|
||||
addSample(new Sample(overallBytesRead, overallTasksRun, System.currentTimeMillis()));
|
||||
int nrHosts = ((FetcherTaskQueue)threadPool.getTaskQueue()).getNumHosts();
|
||||
int visitedSize = urlVisitedFilter.size();
|
||||
int visitedStringSize = urlVisitedFilter.getStringSize();
|
||||
|
||||
double bytesPerSecond = getAverageBytesRead();
|
||||
double docsPerSecond = getAverageDocsRead();
|
||||
sb.setLength(0);
|
||||
System.out.println(sb + "\nBytes total: " + formatBytes(overallBytesRead) + " (" + formatBytes((long)(((double)overallBytesRead)*1000/(System.currentTimeMillis()-startTime))) + " per second since start)" +
|
||||
"\nBytes per Second: " + formatBytes((int)bytesPerSecond) + " (50 secs)" +
|
||||
"\nDocs per Second: " + docsPerSecond +
|
||||
"\nBytes per Thread: " + bytesReadString);
|
||||
double docsPerSecondTotal = ((double)overallTasksRun)*1000/(System.currentTimeMillis()-startTime);
|
||||
sb.setLength(0);
|
||||
System.out.println(sb + "Docs read total: " + overallTasksRun + " Docs/s: " + fractionFormat.format(docsPerSecondTotal) +
|
||||
"\nDocs p.thread: " + tasksRunString);
|
||||
|
||||
long memUsed = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
|
||||
long memFree = Runtime.getRuntime().freeMemory();
|
||||
long totalMem = Runtime.getRuntime().totalMemory();
|
||||
sb.setLength(0);
|
||||
System.out.println(sb + "Mem used: " + formatBytes(memUsed) + ", free: " + formatBytes(memFree) + " total VM: " + totalMem);
|
||||
int urlsQueued = messageHandler.getQueued();
|
||||
int urlsWaiting = threadPool.getQueueSize();
|
||||
boolean isWorkingOnMessage = messageHandler.isWorkingOnMessage();
|
||||
int urlsScopeFiltered = urlScopeFilter.getFiltered();
|
||||
int urlsVisitedFiltered = urlVisitedFilter.getFiltered();
|
||||
int urlsREFiltered = reFilter.getFiltered();
|
||||
int urlLengthFiltered = urlLengthFilter.getFiltered();
|
||||
sb.setLength(0);
|
||||
System.out.println(sb + "URLs queued: " + urlsQueued + " waiting: " + urlsWaiting);
|
||||
sb.setLength(0);
|
||||
System.out.println(sb + "Message is being processed: " + isWorkingOnMessage);
|
||||
sb.setLength(0);
|
||||
System.out.println(sb + "URLs Filtered: length: " + urlLengthFiltered + " scope: " + urlsScopeFiltered + " visited: " + urlsVisitedFiltered + " robot.txt: " + urlsREFiltered);
|
||||
sb.setLength(0);
|
||||
System.out.println(sb + "Visited size: " + visitedSize + "; String Size in VisitedFilter: " + visitedStringSize + "; Number of Hosts: " + nrHosts + "; hosts in Host Manager: " + hostManager.getSize() + "\n");
|
||||
sb.setLength(0);
|
||||
log(sb + "" + now + ";" + overallBytesRead + ";" + overallTasksRun + ";" + urlsQueued + ";" + urlsWaiting + ";" + isWorkingOnMessage + ";" + urlsScopeFiltered + ";" + urlsVisitedFiltered + ";" + urlsREFiltered + ";" + memUsed + ";" + memFree + ";" + totalMem + ";" + nrHosts + ";" + visitedSize + ";" + visitedStringSize + ";" + rawBytesReadString + ";" + urlLengthFiltered);
|
||||
|
||||
|
||||
if(!isWorkingOnMessage && (urlsQueued == 0) && (urlsWaiting == 0) && allThreadsIdle)
|
||||
{
|
||||
nothingReadCount++;
|
||||
if(nothingReadCount > 3)
|
||||
{
|
||||
SimpleLoggerManager.getInstance().flush();
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
nothingReadCount = 0;
|
||||
}
|
||||
|
||||
this.setChanged();
|
||||
this.notifyObservers();
|
||||
|
||||
// Request Garbage Collection
|
||||
monitorRunCount++;
|
||||
|
||||
if(monitorRunCount % 6 == 0)
|
||||
{
|
||||
System.runFinalization();
|
||||
}
|
||||
|
||||
if(monitorRunCount % 2 == 0)
|
||||
{
|
||||
System.gc();
|
||||
SimpleLoggerManager.getInstance().flush();
|
||||
}
|
||||
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
System.out.println("Monitor: Exception: " + e.getClass().getName());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* start the thread
|
||||
*/
|
||||
public void start()
|
||||
{
|
||||
this.clear();
|
||||
thread.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* interrupt the monitor thread
|
||||
*/
|
||||
public void interrupt()
|
||||
{
|
||||
thread.interrupt();
|
||||
}
|
||||
|
||||
|
||||
public synchronized void clear()
|
||||
{
|
||||
//sampleTimeStamps.clear();
|
||||
/*for(int i=0; i < timeSamples.length; i++)
|
||||
{
|
||||
timeSamples[i].clear();
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/* public synchronized double getAverageReadCount(int maxPeriods)
|
||||
{
|
||||
int lastPeriod = bytesReadPerPeriod.size()-1;
|
||||
int periods = Math.min(lastPeriod, maxPeriods);
|
||||
if(periods < 2)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
|
||||
long bytesLastPeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod)).bytesRead;
|
||||
long bytesBeforePeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod - periods)).bytesRead;
|
||||
long bytesRead = bytesLastPeriod - bytesBeforePeriod;
|
||||
|
||||
long endTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue();
|
||||
long startTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1 - periods)).longValue();
|
||||
long duration = endTime - startTime;
|
||||
System.out.println("bytes read: " + bytesRead + " duration in s: " + duration/1000.0 + " = " + ((double)bytesRead) / (duration/1000.0) + " per second");
|
||||
|
||||
return ((double)bytesRead) / (duration/1000.0);
|
||||
}
|
||||
*/
|
||||
|
||||
/*public synchronized double getDocsPerSecond(int maxPeriods)
|
||||
{
|
||||
int lastPeriod = bytesReadPerPeriod.size()-1;
|
||||
int periods = Math.min(lastPeriod, maxPeriods);
|
||||
if(periods < 2)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
|
||||
long docsLastPeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod)).docsRead;
|
||||
long docsBeforePeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod - periods)).docsRead;
|
||||
long docsRead = docsLastPeriod - docsBeforePeriod;
|
||||
|
||||
long endTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue();
|
||||
long startTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size() - periods)).longValue();
|
||||
long duration = endTime - startTime;
|
||||
System.out.println("docs read: " + docsRead + " duration in s: " + duration/1000.0 + " = " + ((double)docsRead) / (duration/1000.0) + " per second");
|
||||
|
||||
return ((double)docsRead) / (duration/1000.0);
|
||||
}*/
|
||||
|
||||
/**
|
||||
* retrieves the number of threads whose byteCount is below the threshold
|
||||
* @param maxPeriods the number of periods to look back
|
||||
* @param threshold the number of bytes per second that acts as the threshold for a stalled thread
|
||||
*/
|
||||
/*public synchronized int getStalledThreadCount(int maxPeriods, double threshold)
|
||||
{
|
||||
int periods = Math.min(sampleTimeStamps.size(), maxPeriods);
|
||||
int stalledThreads = 0;
|
||||
int j=0, i=0;
|
||||
if(periods > 1)
|
||||
{
|
||||
for(j=0; j<timeSamples.length; j++)
|
||||
{
|
||||
long threadByteCount = 0;
|
||||
ArrayList actArrayList = timeSamples[j];
|
||||
double bytesPerSecond = 0;
|
||||
try
|
||||
{
|
||||
for(i=0; i<periods; i++)
|
||||
{
|
||||
|
||||
Sample actSample = (Sample)(actArrayList.get(i));
|
||||
threadByteCount += actSample.bytesRead;
|
||||
}
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
System.out.println("getAverageReadCount: " + e.getClass().getName() + ": " + e.getMessage() + "(" + i + ";" + j + ")");
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
bytesPerSecond = ((double)threadByteCount) /
|
||||
((double)((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue()
|
||||
- ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-periods)).longValue()) * 1000.0;
|
||||
if(bytesPerSecond < threshold)
|
||||
{
|
||||
stalledThreads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return stalledThreads;
|
||||
}
|
||||
*/
|
||||
|
||||
int samples=0;
|
||||
|
||||
public void addSample(Sample s)
|
||||
{
|
||||
if(samples < 10)
|
||||
{
|
||||
bytesReadPerPeriod.add(s);
|
||||
samples++;
|
||||
}
|
||||
else
|
||||
{
|
||||
bytesReadPerPeriod.set(samples % 10, s);
|
||||
}
|
||||
}
|
||||
|
||||
public double getAverageBytesRead()
|
||||
{
|
||||
Iterator i = bytesReadPerPeriod.iterator();
|
||||
Sample oldest = null;
|
||||
Sample newest = null;
|
||||
while(i.hasNext())
|
||||
{
|
||||
|
||||
Sample s = (Sample)i.next();
|
||||
if(oldest == null)
|
||||
{
|
||||
oldest = newest = s;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(s.time < oldest.time)
|
||||
{
|
||||
oldest = s;
|
||||
}
|
||||
else if(s.time > newest.time)
|
||||
{
|
||||
newest = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ((newest.bytesRead - oldest.bytesRead)/((newest.time - oldest.time)/1000.0));
|
||||
}
|
||||
public double getAverageDocsRead()
|
||||
{
|
||||
Iterator i = bytesReadPerPeriod.iterator();
|
||||
Sample oldest = null;
|
||||
Sample newest = null;
|
||||
while(i.hasNext())
|
||||
{
|
||||
|
||||
Sample s = (Sample)i.next();
|
||||
if(oldest == null)
|
||||
{
|
||||
oldest = newest = s;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(s.time < oldest.time)
|
||||
{
|
||||
oldest = s;
|
||||
}
|
||||
else if(s.time > newest.time)
|
||||
{
|
||||
newest = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ((newest.docsRead - oldest.docsRead)/((newest.time - oldest.time)/1000.0));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author
|
||||
* @created 28. Januar 2002
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* kills URLs longer than X characters. Used to prevent endless loops where
|
||||
* the page contains the current URL + some extension
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @created 28. Januar 2002
|
||||
*/
|
||||
|
||||
public class URLLengthFilter extends Filter implements MessageListener
|
||||
{
|
||||
/**
|
||||
* called by the message handler
|
||||
*
|
||||
* @param handler the handler
|
||||
*/
|
||||
public void notifyAddedToMessageHandler(MessageHandler handler)
|
||||
{
|
||||
this.messageHandler = handler;
|
||||
}
|
||||
|
||||
|
||||
MessageHandler messageHandler;
|
||||
|
||||
int maxLength;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the URLLengthFilter object
|
||||
*
|
||||
* @param maxLength max length of the _total_ URL (protocol+host+port+path)
|
||||
*/
|
||||
public URLLengthFilter(int maxLength)
|
||||
{
|
||||
this.maxLength = maxLength;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* handles the message
|
||||
*
|
||||
* @param message Description of the Parameter
|
||||
* @return the original message or NULL if the URL was too long
|
||||
*/
|
||||
public Message handleRequest(Message message)
|
||||
{
|
||||
URLMessage m = (URLMessage) message;
|
||||
String file = m.getUrl().getFile();
|
||||
if (file != null && file.length() > maxLength) // path + query
|
||||
{
|
||||
filtered++;
|
||||
return null;
|
||||
}
|
||||
return message;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import java.net.*;
|
||||
import java.io.*;
|
||||
import de.lanlab.larm.util.URLUtils;
|
||||
|
||||
/**
|
||||
* represents a URL which is passed around in the messageHandler
|
||||
*/
|
||||
public class URLMessage implements Message, Serializable
|
||||
{
|
||||
/**
|
||||
* the URL
|
||||
*/
|
||||
protected URL url;
|
||||
protected String urlString;
|
||||
|
||||
protected URL referer;
|
||||
protected String refererString;
|
||||
boolean isFrame;
|
||||
|
||||
public URLMessage(URL url, URL referer, boolean isFrame)
|
||||
{
|
||||
//super();
|
||||
this.url = url;
|
||||
this.urlString = url != null ? URLUtils.toExternalFormNoRef(url) : null;
|
||||
|
||||
this.referer = referer;
|
||||
this.refererString = referer != null ? URLUtils.toExternalFormNoRef(referer) : null;
|
||||
this.isFrame = isFrame;
|
||||
//System.out.println("" + refererString + " -> " + urlString);
|
||||
}
|
||||
|
||||
public URL getUrl()
|
||||
{
|
||||
return this.url;
|
||||
}
|
||||
|
||||
public URL getReferer()
|
||||
{
|
||||
return this.referer;
|
||||
}
|
||||
|
||||
|
||||
public String toString()
|
||||
{
|
||||
return urlString;
|
||||
}
|
||||
|
||||
public String getURLString()
|
||||
{
|
||||
return urlString;
|
||||
}
|
||||
|
||||
public String getRefererString()
|
||||
{
|
||||
return refererString;
|
||||
}
|
||||
|
||||
|
||||
public int hashCode()
|
||||
{
|
||||
return url.hashCode();
|
||||
}
|
||||
|
||||
private void writeObject(java.io.ObjectOutputStream out) throws IOException
|
||||
{
|
||||
out.writeObject(url);
|
||||
out.writeObject(referer);
|
||||
out.writeBoolean(isFrame);
|
||||
}
|
||||
|
||||
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException
|
||||
{
|
||||
url = (URL)in.readObject();
|
||||
referer = (URL)in.readObject();
|
||||
urlString = url.toExternalForm();
|
||||
refererString = referer.toExternalForm();
|
||||
isFrame = in.readBoolean();
|
||||
}
|
||||
|
||||
public String getInfo()
|
||||
{
|
||||
return (referer != null ? refererString : "<start>") + "\t" + urlString + "\t" + (isFrame ? "1" : "0");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import org.apache.oro.text.regex.Perl5Matcher;
|
||||
import org.apache.oro.text.regex.Perl5Compiler;
|
||||
import org.apache.oro.text.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Filter-Klasse; prüft eine eingegangene Message auf Einhaltung eines
|
||||
* regulären Ausdrucks. Wenn die URL diesem Ausdruck
|
||||
* nicht entspricht, wird sie verworfen
|
||||
* @author Clemens Marschner
|
||||
*/
|
||||
class URLScopeFilter extends Filter implements MessageListener
|
||||
{
|
||||
public void notifyAddedToMessageHandler(MessageHandler handler)
|
||||
{
|
||||
this.messageHandler = handler;
|
||||
}
|
||||
MessageHandler messageHandler;
|
||||
|
||||
/**
|
||||
* the regular expression which describes a valid URL
|
||||
*/
|
||||
private Pattern pattern;
|
||||
private Perl5Matcher matcher;
|
||||
private Perl5Compiler compiler;
|
||||
|
||||
public URLScopeFilter()
|
||||
{
|
||||
matcher = new Perl5Matcher();
|
||||
compiler = new Perl5Compiler();
|
||||
}
|
||||
|
||||
public String getRexString()
|
||||
{
|
||||
return pattern.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* set the regular expression
|
||||
* @param rexString the expression
|
||||
*/
|
||||
public void setRexString(String rexString) throws org.apache.oro.text.regex.MalformedPatternException
|
||||
{
|
||||
this.pattern = compiler.compile(rexString, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK);
|
||||
//System.out.println("pattern set to: " + pattern);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this method will be called by the message handler. Tests the URL
|
||||
* and throws it out if it's not in the scope
|
||||
*/
|
||||
public Message handleRequest(Message message)
|
||||
{
|
||||
if(message instanceof URLMessage)
|
||||
{
|
||||
String urlString = ((URLMessage)message).toString();
|
||||
int length = urlString.length();
|
||||
char buffer[] = new char[length];
|
||||
urlString.getChars(0,length,buffer,0);
|
||||
|
||||
//System.out.println("using pattern: " + pattern);
|
||||
boolean match = matcher.matches(buffer, pattern);
|
||||
if(!match)
|
||||
{
|
||||
//System.out.println("not in Scope: " + urlString);
|
||||
filtered++;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
package de.lanlab.larm.fetcher;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
|
||||
import de.lanlab.larm.util.SimpleLogger;
|
||||
|
||||
/**
|
||||
* contains a HashMap of all URLs already passed. Adds each URL to that list, or
|
||||
* consumes it if it is already present
|
||||
*
|
||||
* @todo find ways to reduce memory consumption here. the approach is somewhat naive
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @created 3. Januar 2002
|
||||
*/
|
||||
class URLVisitedFilter extends Filter implements MessageListener
|
||||
{
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param handler Description of the Parameter
|
||||
*/
|
||||
public void notifyAddedToMessageHandler(MessageHandler handler)
|
||||
{
|
||||
this.messageHandler = handler;
|
||||
}
|
||||
|
||||
|
||||
MessageHandler messageHandler;
|
||||
|
||||
SimpleLogger log;
|
||||
|
||||
HashSet urlHash;
|
||||
|
||||
static Boolean dummy = new Boolean(true);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the URLVisitedFilter object
|
||||
*
|
||||
* @param initialHashCapacity Description of the Parameter
|
||||
*/
|
||||
public URLVisitedFilter(int initialHashCapacity, SimpleLogger log)
|
||||
{
|
||||
urlHash = new HashSet(initialHashCapacity);
|
||||
this.log = log;
|
||||
//urlVector = new Vector(initialHashCapacity);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* clears everything
|
||||
*/
|
||||
public void clearHashtable()
|
||||
{
|
||||
urlHash.clear();
|
||||
// urlVector.clear();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @param message Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public Message handleRequest(Message message)
|
||||
{
|
||||
if (message instanceof URLMessage)
|
||||
{
|
||||
URLMessage urlMessage = ((URLMessage) message);
|
||||
URL url = urlMessage.getUrl();
|
||||
String urlString = urlMessage.getURLString();
|
||||
if (urlHash.contains(urlString))
|
||||
{
|
||||
//System.out.println("URLVisitedFilter: " + urlString + " already present.");
|
||||
filtered++;
|
||||
if(log != null)
|
||||
{
|
||||
log.logThreadSafe(urlMessage.getInfo());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
// System.out.println("URLVisitedFilter: " + urlString + " not present yet.");
|
||||
urlHash.add(urlString);
|
||||
stringSize += urlString.length(); // see below
|
||||
//urlVector.add(urlString);
|
||||
}
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
|
||||
private int stringSize = 0;
|
||||
|
||||
/**
|
||||
* just a method to get a rough number of characters contained in the array
|
||||
* with that you see that the total memory is mostly used by this class
|
||||
*/
|
||||
public int getStringSize()
|
||||
{
|
||||
return stringSize;
|
||||
}
|
||||
|
||||
public int size()
|
||||
{
|
||||
return urlHash.size();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,875 @@
|
|||
package de.lanlab.larm.graph;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 30. Januar 2002
|
||||
*/
|
||||
class Node implements Comparable
|
||||
{
|
||||
LinkedList incoming;
|
||||
// 16 + 4 per entry
|
||||
//HashSet incomingNodes; // 16 + 16 per entry, 11 x 16 default size = 192
|
||||
LinkedList outgoing;
|
||||
// 16 + 4 per entry
|
||||
//Object o;
|
||||
//HashSet outgoingNodes; // 16 + 16 per entry, 11 x 16 default size = 192
|
||||
|
||||
//LinkedList shortestIncoming;
|
||||
int id;
|
||||
// 4
|
||||
float distance;
|
||||
// 8
|
||||
String name;
|
||||
// 4 + String object
|
||||
String title;
|
||||
// 4 + String object
|
||||
float nodeRank[] = new float[2];
|
||||
// 16
|
||||
// 470 bytes + 2 string objects
|
||||
/**
|
||||
* Description of the Field
|
||||
*/
|
||||
public static int sortType = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param n Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public int compareTo(Object n)
|
||||
{
|
||||
if (sortType < 2)
|
||||
{
|
||||
double diff = ((Node) n).nodeRank[sortType] - nodeRank[sortType];
|
||||
return diff < 0 ? -1 : diff > 0 ? 1 : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (((Node) n).incoming.size() - incoming.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the Node object
|
||||
*
|
||||
* @param id Description of the Parameter
|
||||
* @param name Description of the Parameter
|
||||
* @param title Description of the Parameter
|
||||
*/
|
||||
public Node(int id, String name, String title)
|
||||
{
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
this.title = title;
|
||||
this.incoming = new LinkedList();
|
||||
this.outgoing = new LinkedList();
|
||||
//this.incomingNodes = new HashSet();
|
||||
//this.outgoingNodes = new HashSet();
|
||||
this.distance = Float.MAX_VALUE;
|
||||
this.nodeRank[0] = this.nodeRank[1] = 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a feature to the Incoming attribute of the Node object
|
||||
*
|
||||
* @param incomingT The feature to be added to the Incoming attribute
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public boolean addIncoming(Transition incomingT)
|
||||
{
|
||||
Integer id = new Integer(incomingT.getFrom().id);
|
||||
if (!incoming.contains(id))
|
||||
{
|
||||
// attn: doesn't scale well, but also saves memory
|
||||
|
||||
incoming.addLast(incomingT);
|
||||
//incomingNodes.add(id);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a feature to the Outgoing attribute of the Node object
|
||||
*
|
||||
* @param outgoingT The feature to be added to the Outgoing attribute
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public boolean addOutgoing(Transition outgoingT)
|
||||
{
|
||||
Integer id = new Integer(outgoingT.getTo().id);
|
||||
if (!outgoing.contains(id))
|
||||
{
|
||||
outgoing.addLast(outgoingT);
|
||||
//outgoingNodes.add(id);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the incoming attribute of the Node object
|
||||
*
|
||||
* @return The incoming value
|
||||
*/
|
||||
public LinkedList getIncoming()
|
||||
{
|
||||
return incoming;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the outgoing attribute of the Node object
|
||||
*
|
||||
* @return The outgoing value
|
||||
*/
|
||||
public LinkedList getOutgoing()
|
||||
{
|
||||
return outgoing;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the distance attribute of the Node object
|
||||
*
|
||||
* @param distance The new distance value
|
||||
*/
|
||||
public void setDistance(float distance)
|
||||
{
|
||||
this.distance = distance;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the distance attribute of the Node object
|
||||
*
|
||||
* @return The distance value
|
||||
*/
|
||||
public float getDistance()
|
||||
{
|
||||
return distance;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the name attribute of the Node object
|
||||
*
|
||||
* @return The name value
|
||||
*/
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the title attribute of the Node object
|
||||
*
|
||||
* @param title The new title value
|
||||
*/
|
||||
public void setTitle(String title)
|
||||
{
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the title attribute of the Node object
|
||||
*
|
||||
* @return The title value
|
||||
*/
|
||||
public String getTitle()
|
||||
{
|
||||
return title;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the nodeRank attribute of the Node object
|
||||
*
|
||||
* @param idx Description of the Parameter
|
||||
* @return The nodeRank value
|
||||
*/
|
||||
public float getNodeRank(int idx)
|
||||
{
|
||||
return nodeRank[idx];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the nodeRank attribute of the Node object
|
||||
*
|
||||
* @param nodeRank The new nodeRank value
|
||||
* @param idx The new nodeRank value
|
||||
*/
|
||||
public void setNodeRank(float nodeRank, int idx)
|
||||
{
|
||||
this.nodeRank[idx] = nodeRank;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 30. Januar 2002
|
||||
*/
|
||||
class Transition
|
||||
{
|
||||
|
||||
|
||||
Node from;
|
||||
Node to;
|
||||
float distance;
|
||||
float linkRank[] = new float[2];
|
||||
boolean isFrame;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the Transition object
|
||||
*
|
||||
* @param from Description of the Parameter
|
||||
* @param to Description of the Parameter
|
||||
* @param isFrame Description of the Parameter
|
||||
*/
|
||||
public Transition(Node from, Node to, boolean isFrame)
|
||||
{
|
||||
LinkedList l = from.getOutgoing();
|
||||
Iterator i = l.iterator();
|
||||
while(i.hasNext())
|
||||
{
|
||||
Transition t = (Transition)i.next();
|
||||
if(t.getTo() == to)
|
||||
{
|
||||
return; // schon enthalten
|
||||
}
|
||||
}
|
||||
this.from = from;
|
||||
this.to = to;
|
||||
from.addOutgoing(this);
|
||||
to.addIncoming(this);
|
||||
this.distance = Integer.MAX_VALUE;
|
||||
this.isFrame = isFrame;
|
||||
this.linkRank[0] = this.linkRank[1] = 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the to attribute of the Transition object
|
||||
*
|
||||
* @return The to value
|
||||
*/
|
||||
public Node getTo()
|
||||
{
|
||||
return to;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the from attribute of the Transition object
|
||||
*
|
||||
* @return The from value
|
||||
*/
|
||||
public Node getFrom()
|
||||
{
|
||||
return from;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the distance attribute of the Transition object
|
||||
*
|
||||
* @return The distance value
|
||||
*/
|
||||
public float getDistance()
|
||||
{
|
||||
return distance;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the distance attribute of the Transition object
|
||||
*
|
||||
* @param distance The new distance value
|
||||
*/
|
||||
public void setDistance(float distance)
|
||||
{
|
||||
this.distance = distance;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the frame attribute of the Transition object
|
||||
*
|
||||
* @return The frame value
|
||||
*/
|
||||
public boolean isFrame()
|
||||
{
|
||||
return isFrame;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the linkRank attribute of the Transition object
|
||||
*
|
||||
* @param idx Description of the Parameter
|
||||
* @return The linkRank value
|
||||
*/
|
||||
public float getLinkRank(int idx)
|
||||
{
|
||||
return linkRank[idx];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the linkRank attribute of the Transition object
|
||||
*
|
||||
* @param linkRank The new linkRank value
|
||||
* @param idx The new linkRank value
|
||||
*/
|
||||
public void setLinkRank(float linkRank, int idx)
|
||||
{
|
||||
this.linkRank[idx] = linkRank;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 30. Januar 2002
|
||||
*/
|
||||
public class DistanceCount
|
||||
{
|
||||
|
||||
|
||||
HashMap nodes = new HashMap(100000);
|
||||
LinkedList nodesToDo = new LinkedList();
|
||||
static int id = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Gets the orCreateNode attribute of the DistanceCount object
|
||||
*
|
||||
* @param name Description of the Parameter
|
||||
* @param title Description of the Parameter
|
||||
* @return The orCreateNode value
|
||||
*/
|
||||
Node getOrCreateNode(String name, String title)
|
||||
{
|
||||
Node node = (Node) nodes.get(name);
|
||||
if (node != null)
|
||||
{
|
||||
if (title != null)
|
||||
{
|
||||
node.setTitle(title);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
else
|
||||
{
|
||||
node = new Node(id++, name, title);
|
||||
nodes.put(name, node);
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the DistanceCount object
|
||||
*
|
||||
* @param filename Description of the Parameter
|
||||
* @exception IOException Description of the Exception
|
||||
*/
|
||||
public DistanceCount(String filename)
|
||||
throws IOException
|
||||
{
|
||||
System.out.println("reading file...");
|
||||
long t1 = System.currentTimeMillis();
|
||||
BufferedReader b = new BufferedReader(new FileReader(filename));
|
||||
String line;
|
||||
boolean firstNotFound = true;
|
||||
Node firstNode = null;
|
||||
int lines = 0;
|
||||
while ((line = b.readLine()) != null)
|
||||
{
|
||||
lines++;
|
||||
String title = null;
|
||||
try
|
||||
{
|
||||
//StringTokenizer st = new StringTokenizer(line, " ");
|
||||
StringTokenizer st = new StringTokenizer(line, "\t");
|
||||
String from = st.nextToken();
|
||||
if (from.endsWith("/"))
|
||||
{
|
||||
from = from.substring(0, from.length() - 1);
|
||||
}
|
||||
from = from.toLowerCase();
|
||||
String to = st.nextToken();
|
||||
if (to.endsWith("/"))
|
||||
{
|
||||
to = to.substring(0, to.length() - 1);
|
||||
}
|
||||
to = to.toLowerCase();
|
||||
boolean isFrame = (Integer.parseInt(st.nextToken()) == 1);
|
||||
if (st.countTokens() > 3)
|
||||
{
|
||||
title = "<untitled>";
|
||||
//StringBuffer sb = new StringBuffer();
|
||||
st.nextToken();
|
||||
// result
|
||||
st.nextToken();
|
||||
// Mime Type
|
||||
st.nextToken();
|
||||
// Size
|
||||
/*
|
||||
* while(st.hasMoreTokens())
|
||||
* {
|
||||
* sb.append(st.nextToken()).append(" ");
|
||||
* }
|
||||
*/
|
||||
title = st.nextToken();
|
||||
if (title.length() > 2)
|
||||
{
|
||||
|
||||
title = title.substring(1, title.length() - 1);
|
||||
int indexOfPara = title.indexOf("\"");
|
||||
if (indexOfPara > -1)
|
||||
{
|
||||
title = title.substring(0, indexOfPara);
|
||||
}
|
||||
}
|
||||
}
|
||||
Node fromNode = getOrCreateNode(from, null);
|
||||
Node toNode = getOrCreateNode(to, title);
|
||||
Transition t = new Transition(fromNode, toNode, isFrame);
|
||||
/*
|
||||
* if(firstNotFound && to.equals("http://127.0.0.1"))
|
||||
* {
|
||||
* firstNode = toNode;
|
||||
* firstNotFound = false;
|
||||
* }
|
||||
*/
|
||||
if (lines % 10000 == 0)
|
||||
{
|
||||
System.out.println("" + lines + " Lines; " + nodes.size() + " nodes");
|
||||
}
|
||||
}
|
||||
catch (NoSuchElementException e)
|
||||
{
|
||||
System.out.println("Malformed line " + lines + ": field number doesn't match");
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
System.out.println("Malformed line " + lines + ": NumberFormat wrong");
|
||||
}
|
||||
}
|
||||
System.out.println("finished; b" + lines + " Lines; " + nodes.size() + " nodes");
|
||||
long t2 = System.currentTimeMillis();
|
||||
System.out.println("" + (t2 - t1) + " ms");
|
||||
|
||||
/*
|
||||
* if(firstNotFound)
|
||||
* {
|
||||
* System.out.println("Couldn't find start page");
|
||||
* System.exit(-1);
|
||||
* }
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param firstNode Description of the Parameter
|
||||
*/
|
||||
public void calculateShortestDistance(Node firstNode)
|
||||
{
|
||||
clearDistances();
|
||||
firstNode.setDistance(0);
|
||||
nodesToDo.addLast(firstNode);
|
||||
int calculations = 0;
|
||||
while (!nodesToDo.isEmpty())
|
||||
{
|
||||
if (calculations % 100000 == 0)
|
||||
{
|
||||
System.out.println("Calculations: " + calculations + "; nodes to go: " + nodesToDo.size() + " total Mem: " + Runtime.getRuntime().totalMemory() + "; free mem: " + Runtime.getRuntime().freeMemory());
|
||||
}
|
||||
calculations++;
|
||||
|
||||
Node act = (Node) nodesToDo.removeFirst();
|
||||
LinkedList outTrans = act.getOutgoing();
|
||||
float distance = act.getDistance();
|
||||
Iterator i = outTrans.iterator();
|
||||
//distance++;
|
||||
|
||||
while (i.hasNext())
|
||||
{
|
||||
Transition t = (Transition) i.next();
|
||||
float transDistance = t.getDistance();
|
||||
/*if (t.isFrame())
|
||||
{
|
||||
System.out.println("Frame from " + t.from.getName() + " to " + t.to.getName());
|
||||
}*/
|
||||
float newDistance = distance + (t.isFrame() ? 0.25f : 1f);
|
||||
if (transDistance > newDistance)
|
||||
{
|
||||
t.setDistance(newDistance);
|
||||
Node to = t.getTo();
|
||||
if (to.distance > distance)
|
||||
{
|
||||
to.setDistance(newDistance);
|
||||
nodesToDo.addLast(to);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* if(looksGood)
|
||||
* {
|
||||
* System.out.println("Node " + act.id + " looks good");
|
||||
* }
|
||||
*/
|
||||
}
|
||||
System.out.println("Calculations: " + calculations );
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void clearDistances()
|
||||
{
|
||||
System.out.println("Clearing distance data...");
|
||||
Iterator it = nodes.values().iterator();
|
||||
int nr = 0;
|
||||
while (it.hasNext())
|
||||
{
|
||||
Node n = (Node) it.next();
|
||||
nr++;
|
||||
n.setDistance(Float.MAX_VALUE);
|
||||
}
|
||||
System.out.println("cleared " + nr + " nodes. done");
|
||||
|
||||
}
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param nodeFrom Description of the Parameter
|
||||
* @param nodeTo Description of the Parameter
|
||||
*/
|
||||
public void printDistance(String nodeFrom, String nodeTo)
|
||||
{
|
||||
|
||||
Node firstNode = (Node) nodes.get(nodeFrom);
|
||||
if (firstNode == null)
|
||||
{
|
||||
System.out.println("FROM node not found");
|
||||
return;
|
||||
}
|
||||
Node toNode = (Node) nodes.get(nodeTo);
|
||||
if (toNode == null)
|
||||
{
|
||||
System.out.println("TO node not found");
|
||||
return;
|
||||
}
|
||||
//System.out.println("resetting node distance...");
|
||||
//clearDistances();
|
||||
|
||||
System.out.println("calculating...");
|
||||
calculateShortestDistance(firstNode);
|
||||
|
||||
//t1 = System.currentTimeMillis();
|
||||
//System.out.println("" + (t1-t2) + " ms");
|
||||
|
||||
|
||||
System.out.println("\nSorting...");
|
||||
|
||||
/*
|
||||
* Collection nodeCollection = nodes.values();
|
||||
* Object[] nodeArray = nodeCollection.toArray();
|
||||
* Arrays.sort(nodeArray);
|
||||
* t2 = System.currentTimeMillis();
|
||||
* System.out.println("" + (t2-t1) + " ms");
|
||||
* int from = 0;
|
||||
* int to = 1;
|
||||
*/
|
||||
/*
|
||||
* /calculate page Rank
|
||||
* for(int i = 0; i< 1; i++)
|
||||
* {
|
||||
* from = i%2;
|
||||
* to = (i+1) % 2;
|
||||
* for(int j = 0; j<nodeArray.length; j++)
|
||||
* {
|
||||
* Node act = (Node)nodeArray[j];
|
||||
* LinkedList inc = act.getIncoming();
|
||||
* float pageRank = 0;
|
||||
* Iterator it = inc.iterator();
|
||||
* while(it.hasNext())
|
||||
* {
|
||||
* Transition t = (Transition)it.next();
|
||||
* pageRank += t.getLinkRank(from);
|
||||
* }
|
||||
* act.setNodeRank(pageRank, to);
|
||||
* LinkedList out = act.getOutgoing();
|
||||
* int size = out.size();
|
||||
* if(size > 0)
|
||||
* {
|
||||
* float linkRank = pageRank / size;
|
||||
* it = out.iterator();
|
||||
* while(it.hasNext())
|
||||
* {
|
||||
* Transition t = (Transition)it.next();
|
||||
* t.setLinkRank(linkRank, to);
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
/*
|
||||
* System.out.println("\nLink Count:");
|
||||
* for(int i=0; i<10; i++)
|
||||
* {
|
||||
* Node n = ((Node)nodeArray[i]);
|
||||
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
|
||||
* }
|
||||
* for(int i=nodeArray.length/2; i<nodeArray.length/2+10; i++)
|
||||
* {
|
||||
* Node n = ((Node)nodeArray[i]);
|
||||
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
|
||||
* }
|
||||
* for(int i=nodeArray.length-10; i<nodeArray.length; i++)
|
||||
* {
|
||||
* Node n = ((Node)nodeArray[i]);
|
||||
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
|
||||
* }
|
||||
* Node.sortType = to;
|
||||
* Arrays.sort(nodeArray);
|
||||
* System.out.println("\nPageRank Count:");
|
||||
* for(int i=0; i<10; i++)
|
||||
* {
|
||||
* Node n = ((Node)nodeArray[i]);
|
||||
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
|
||||
* }
|
||||
* for(int i=nodeArray.length/2; i<nodeArray.length/2+10; i++)
|
||||
* {
|
||||
* Node n = ((Node)nodeArray[i]);
|
||||
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
|
||||
* }
|
||||
* for(int i=nodeArray.length-10; i<nodeArray.length; i++)
|
||||
* {
|
||||
* Node n = ((Node)nodeArray[i]);
|
||||
* System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
|
||||
* }
|
||||
* System.out.println("\nStats...");
|
||||
* float distanceAccumulated=0;
|
||||
* float distanceMax = 0;
|
||||
* int notCounted = 0;
|
||||
* for(int j = 0; j<nodeArray.length; j++)
|
||||
* {
|
||||
* Node n = (Node)nodeArray[j];
|
||||
* if(n.distance != Integer.MAX_VALUE)
|
||||
* {
|
||||
* distanceAccumulated += n.distance;
|
||||
* distanceMax = Math.max(distanceMax, n.distance);
|
||||
* }
|
||||
* else
|
||||
* {
|
||||
* notCounted++;
|
||||
* }
|
||||
* }
|
||||
* System.out.println("Mean Distance: " + ((double)distanceAccumulated)/nodeArray.length);
|
||||
* System.out.println("Max Distance: " + (distanceMax));
|
||||
* System.out.println("Not reachable nodes(?): " + notCounted);
|
||||
* System.out.println("Referer Median: " + ((Node)(nodeArray[Math.round(nodeArray.length/2)])).incoming.size());
|
||||
* System.out.println("\nSamples:");
|
||||
*/
|
||||
|
||||
printShortestRoute(toNode, 0,0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*/
|
||||
public void printRandomRoute()
|
||||
{
|
||||
Random r = new java.util.Random(System.currentTimeMillis());
|
||||
Collection nodeColl = nodes.values();
|
||||
Object[] nodeArray = (Object[])nodeColl.toArray();
|
||||
int rnd = (int) (r.nextDouble() * nodeArray.length);
|
||||
Node from = (Node) nodeArray[rnd];
|
||||
rnd = (int) (r.nextDouble() * nodeArray.length);
|
||||
Node to = (Node) nodeArray[rnd];
|
||||
System.out.println("Calculating distance...");
|
||||
calculateShortestDistance(from);
|
||||
System.out.println("printing...");
|
||||
printShortestRoute(to, 0,0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param n Description of the Parameter
|
||||
* @param indent Description of the Parameter
|
||||
*/
|
||||
public void printShortestRoute(Node n, int indent, int linkCount)
|
||||
{
|
||||
String spaces = " ".substring(0, indent);
|
||||
|
||||
if (n.getIncoming().isEmpty())
|
||||
{
|
||||
System.out.println(spaces + "<start>");
|
||||
}
|
||||
else
|
||||
{
|
||||
System.out.print(spaces + "+- " + n.name + " (" + (n.getTitle() != null ? n.getTitle().substring(0,Math.min(n.getTitle().length(),25)) : "") + "\") D:" + n.distance + "; L:" + n.getIncoming().size() + "; C:" + linkCount);
|
||||
Iterator it = n.getIncoming().iterator();
|
||||
float dist = n.distance;
|
||||
if (dist > 10000000)
|
||||
{
|
||||
System.out.println(spaces + "\n--no link--");
|
||||
return;
|
||||
}
|
||||
while (it.hasNext())
|
||||
{
|
||||
Transition t = (Transition) it.next();
|
||||
if (t.distance <= dist)
|
||||
{
|
||||
if (t.isFrame())
|
||||
{
|
||||
System.out.println(" **F** ->");
|
||||
}
|
||||
else
|
||||
{
|
||||
System.out.println(" -> ");
|
||||
}
|
||||
printShortestRoute(t.getFrom(), indent + 1, linkCount + n.getIncoming().size());
|
||||
}
|
||||
}
|
||||
}
|
||||
//System.out.println("");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this class reads in store.log, constructs a graph of the crawled web and is able
|
||||
* to perform a breadth-first search for the shortest distance between two nodes<br>
|
||||
* Note: this is experimental stuff. get into the source code to see how it works
|
||||
* @param args args[0] must point to the store.log file
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
{
|
||||
// Syntax: DistanceCount <store.log>
|
||||
try
|
||||
{
|
||||
DistanceCount dc = new DistanceCount(args[0]);
|
||||
boolean running = true;
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(System.in),400);
|
||||
while (running)
|
||||
{
|
||||
System.out.print("\n\nCommand (? for help) > ");
|
||||
String newL;
|
||||
String input = "";
|
||||
//while((newL = in.readLine()) != null)
|
||||
//{
|
||||
input = in.readLine();
|
||||
StringTokenizer st = new StringTokenizer(input," ");
|
||||
String command;
|
||||
boolean printHelp = false;
|
||||
|
||||
if (!st.hasMoreTokens())
|
||||
{
|
||||
printHelp = true;
|
||||
command = "?";
|
||||
}
|
||||
else
|
||||
{
|
||||
command = st.nextToken();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if ("?".equals(command))
|
||||
{
|
||||
printHelp = true;
|
||||
}
|
||||
else if ("d".equals(command))
|
||||
{
|
||||
String from = st.nextToken();
|
||||
String to = st.nextToken();
|
||||
dc.printDistance(from ,to);
|
||||
}
|
||||
else if ("q".equals(command))
|
||||
{
|
||||
running = false;
|
||||
}
|
||||
else if ("r".equals(command))
|
||||
{
|
||||
dc.printRandomRoute();
|
||||
}
|
||||
else
|
||||
{
|
||||
System.out.println("unknown command '" + command + "'");
|
||||
}
|
||||
}
|
||||
catch (java.util.NoSuchElementException e)
|
||||
{
|
||||
System.out.println("Syntax error");
|
||||
e.printStackTrace();
|
||||
printHelp = true;
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
if (printHelp)
|
||||
{
|
||||
System.out.println("\nSyntax\n" +
|
||||
"? print this help message\n" +
|
||||
"d <page1> <page2> print shortest route from page1 to page2\n" +
|
||||
"r print random walk\n" +
|
||||
"q quit");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
catch (ArrayIndexOutOfBoundsException e)
|
||||
{
|
||||
System.out.println("Syntax: java ... store.log");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
package de.lanlab.larm.gui;
|
||||
|
||||
/*
|
||||
A basic extension of the java.awt.Dialog class
|
||||
*/
|
||||
|
||||
import java.awt.*;
|
||||
|
||||
public class AboutDialog extends Dialog {
|
||||
|
||||
public AboutDialog(Frame parent, boolean modal)
|
||||
{
|
||||
super(parent, modal);
|
||||
|
||||
// This code is automatically generated by Visual Cafe when you add
|
||||
// components to the visual environment. It instantiates and initializes
|
||||
// the components. To modify the code, only use code syntax that matches
|
||||
// what Visual Cafe can generate, or Visual Cafe may be unable to back
|
||||
// parse your Java file into its visual environment.
|
||||
|
||||
//{{INIT_CONTROLS
|
||||
setLayout(null);
|
||||
setSize(249,150);
|
||||
setVisible(false);
|
||||
label1.setText("LARM - LANLab Retrieval Machine");
|
||||
add(label1);
|
||||
label1.setBounds(12,12,228,24);
|
||||
okButton.setLabel("OK");
|
||||
add(okButton);
|
||||
okButton.setBounds(95,85,66,27);
|
||||
label2.setText("(C) 2000 Clemens Marschner");
|
||||
add(label2);
|
||||
label2.setBounds(12,36,228,24);
|
||||
setTitle("AWT-Anwendung - Info");
|
||||
//}}
|
||||
|
||||
//{{REGISTER_LISTENERS
|
||||
SymWindow aSymWindow = new SymWindow();
|
||||
this.addWindowListener(aSymWindow);
|
||||
SymAction lSymAction = new SymAction();
|
||||
okButton.addActionListener(lSymAction);
|
||||
//}}
|
||||
|
||||
}
|
||||
|
||||
public AboutDialog(Frame parent, String title, boolean modal)
|
||||
{
|
||||
this(parent, modal);
|
||||
setTitle(title);
|
||||
}
|
||||
|
||||
public void addNotify()
|
||||
{
|
||||
// Record the size of the window prior to calling parents addNotify.
|
||||
Dimension d = getSize();
|
||||
|
||||
super.addNotify();
|
||||
|
||||
// Only do this once.
|
||||
if (fComponentsAdjusted)
|
||||
return;
|
||||
|
||||
// Adjust components according to the insets
|
||||
Insets insets = getInsets();
|
||||
setSize(insets.left + insets.right + d.width, insets.top + insets.bottom + d.height);
|
||||
Component components[] = getComponents();
|
||||
for (int i = 0; i < components.length; i++)
|
||||
{
|
||||
Point p = components[i].getLocation();
|
||||
p.translate(insets.left, insets.top);
|
||||
components[i].setLocation(p);
|
||||
}
|
||||
|
||||
// Used for addNotify check.
|
||||
fComponentsAdjusted = true;
|
||||
}
|
||||
|
||||
public void setVisible(boolean b)
|
||||
{
|
||||
if (b)
|
||||
{
|
||||
Rectangle bounds = getParent().getBounds();
|
||||
Rectangle abounds = getBounds();
|
||||
|
||||
setLocation(bounds.x + (bounds.width - abounds.width)/ 2,
|
||||
bounds.y + (bounds.height - abounds.height)/2);
|
||||
}
|
||||
|
||||
super.setVisible(b);
|
||||
}
|
||||
|
||||
//{{DECLARE_CONTROLS
|
||||
java.awt.Label label1 = new java.awt.Label();
|
||||
java.awt.Button okButton = new java.awt.Button();
|
||||
java.awt.Label label2 = new java.awt.Label();
|
||||
//}}
|
||||
|
||||
// Used for addNotify check.
|
||||
boolean fComponentsAdjusted = false;
|
||||
|
||||
class SymAction implements java.awt.event.ActionListener
|
||||
{
|
||||
public void actionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
Object object = event.getSource();
|
||||
if (object == okButton)
|
||||
okButton_ActionPerformed(event);
|
||||
}
|
||||
}
|
||||
|
||||
void okButton_ActionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
okButton_ActionPerformed_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void okButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
|
||||
{
|
||||
try {
|
||||
this.dispose();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SymWindow extends java.awt.event.WindowAdapter
|
||||
{
|
||||
public void windowClosing(java.awt.event.WindowEvent event)
|
||||
{
|
||||
Object object = event.getSource();
|
||||
if (object == AboutDialog.this)
|
||||
AboutDialog_WindowClosing(event);
|
||||
}
|
||||
}
|
||||
|
||||
void AboutDialog_WindowClosing(java.awt.event.WindowEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
AboutDialog_WindowClosing_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void AboutDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
|
||||
{
|
||||
try {
|
||||
this.dispose();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,485 @@
|
|||
package de.lanlab.larm.gui;
|
||||
|
||||
/*
|
||||
This simple extension of the java.awt.Frame class
|
||||
contains all the elements necessary to act as the
|
||||
main window of an application.
|
||||
*/
|
||||
|
||||
import java.awt.*;
|
||||
import java.awt.event.ActionListener;
|
||||
//import com.sun.java.swing.*;
|
||||
|
||||
public class FetcherFrame extends Frame
|
||||
{
|
||||
public FetcherFrame()
|
||||
{
|
||||
// This code is automatically generated by Visual Cafe when you add
|
||||
// components to the visual environment. It instantiates and initializes
|
||||
// the components. To modify the code, only use code syntax that matches
|
||||
// what Visual Cafe can generate, or Visual Cafe may be unable to back
|
||||
// parse your Java file into its visual environment.
|
||||
|
||||
//{{INIT_CONTROLS
|
||||
setLayout(new BorderLayout(0,0));
|
||||
setSize(800,600);
|
||||
setVisible(false);
|
||||
openFileDialog1.setMode(FileDialog.LOAD);
|
||||
openFileDialog1.setTitle("Öffnen");
|
||||
//$$ openFileDialog1.move(24,312);
|
||||
mainPanelWithBorders.setLayout(new BorderLayout(0,0));
|
||||
add("Center", mainPanelWithBorders);
|
||||
mainPanelWithBorders.setBounds(0,0,800,600);
|
||||
northBorder.setLayout(null);
|
||||
mainPanelWithBorders.add("North", northBorder);
|
||||
northBorder.setBackground(java.awt.Color.lightGray);
|
||||
northBorder.setBounds(0,0,800,3);
|
||||
southBorder.setLayout(null);
|
||||
mainPanelWithBorders.add("South", southBorder);
|
||||
southBorder.setBackground(java.awt.Color.lightGray);
|
||||
southBorder.setBounds(0,597,800,3);
|
||||
westBorder.setLayout(null);
|
||||
mainPanelWithBorders.add("West", westBorder);
|
||||
westBorder.setBackground(java.awt.Color.lightGray);
|
||||
westBorder.setBounds(0,3,3,594);
|
||||
eastBorder.setLayout(null);
|
||||
mainPanelWithBorders.add("East", eastBorder);
|
||||
eastBorder.setBackground(java.awt.Color.lightGray);
|
||||
eastBorder.setBounds(797,3,3,594);
|
||||
mainPanel.setLayout(new BorderLayout(0,3));
|
||||
mainPanelWithBorders.add("Center", mainPanel);
|
||||
mainPanel.setBackground(java.awt.Color.lightGray);
|
||||
mainPanel.setBounds(3,3,794,594);
|
||||
upperPanel.setLayout(new GridLayout(1,2,0,0));
|
||||
mainPanel.add("North", upperPanel);
|
||||
upperPanel.setBounds(0,0,794,150);
|
||||
preferencesPanel.setLayout(null);
|
||||
upperPanel.add(preferencesPanel);
|
||||
preferencesPanel.setBounds(0,0,397,150);
|
||||
startURLlabel.setText("Start-URL");
|
||||
preferencesPanel.add(startURLlabel);
|
||||
startURLlabel.setBounds(12,0,121,24);
|
||||
startURL.setText("uni-muenchen.de");
|
||||
preferencesPanel.add(startURL);
|
||||
startURL.setBounds(132,0,133,24);
|
||||
startButton.setLabel("Start");
|
||||
preferencesPanel.add(startButton);
|
||||
startButton.setFont(new Font("Dialog", Font.BOLD, 12));
|
||||
startButton.setBounds(288,36,99,24);
|
||||
restrictToLabel.setText("Restrict host to");
|
||||
preferencesPanel.add(restrictToLabel);
|
||||
restrictToLabel.setBounds(12,36,121,28);
|
||||
preferencesPanel.add(restrictTo);
|
||||
restrictTo.setBounds(133,36,133,24);
|
||||
logPanel.setLayout(new BorderLayout(0,0));
|
||||
upperPanel.add(logPanel);
|
||||
logPanel.setBounds(397,0,397,150);
|
||||
logPanel.add("Center", logList);
|
||||
logList.setBackground(java.awt.Color.white);
|
||||
logList.setBounds(0,0,397,150);
|
||||
lowerPanel.setLayout(new GridLayout(1,3,3,3));
|
||||
mainPanel.add("Center", lowerPanel);
|
||||
lowerPanel.setBounds(0,153,794,441);
|
||||
urlQueuePanel.setLayout(new BorderLayout(0,0));
|
||||
lowerPanel.add(urlQueuePanel);
|
||||
urlQueuePanel.setBounds(0,0,196,441);
|
||||
urlQueueLabel.setText("URLQueue");
|
||||
urlQueuePanel.add("North", urlQueueLabel);
|
||||
urlQueueLabel.setBounds(0,0,196,23);
|
||||
urlQueuePanel.add("Center", urlQueueList);
|
||||
urlQueueList.setBackground(java.awt.Color.white);
|
||||
urlQueueList.setBounds(0,23,196,418);
|
||||
urlThreadPanel.setLayout(new BorderLayout(0,0));
|
||||
lowerPanel.add(urlThreadPanel);
|
||||
urlThreadPanel.setBounds(199,0,196,441);
|
||||
urlThreadLabel.setText("URLThreads");
|
||||
urlThreadPanel.add("North", urlThreadLabel);
|
||||
urlThreadLabel.setBounds(0,0,196,23);
|
||||
urlThreadPanel.add("Center", urlThreadList);
|
||||
urlThreadList.setBackground(java.awt.Color.white);
|
||||
urlThreadList.setBounds(0,23,196,418);
|
||||
docQueuePanel.setLayout(new BorderLayout(0,0));
|
||||
lowerPanel.add(docQueuePanel);
|
||||
docQueuePanel.setBounds(398,0,196,441);
|
||||
docQueueLabel.setText("DocQueue");
|
||||
docQueuePanel.add("North", docQueueLabel);
|
||||
docQueueLabel.setBounds(0,0,196,23);
|
||||
docQueuePanel.add("Center", docQueueList);
|
||||
docQueueList.setBackground(java.awt.Color.white);
|
||||
docQueueList.setBounds(0,23,196,418);
|
||||
docThreadPanel.setLayout(new BorderLayout(0,0));
|
||||
lowerPanel.add(docThreadPanel);
|
||||
docThreadPanel.setBounds(597,0,196,441);
|
||||
docThreadLabel.setText("DocThreads");
|
||||
docThreadPanel.add("North", docThreadLabel);
|
||||
docThreadLabel.setBounds(0,0,196,23);
|
||||
docThreadPanel.add("Center", docThreadList);
|
||||
docThreadList.setBackground(java.awt.Color.white);
|
||||
docThreadList.setBounds(0,23,196,418);
|
||||
setTitle("LARM - Fetcher");
|
||||
//}}
|
||||
|
||||
//{{INIT_MENUS
|
||||
menu1.setLabel("Datei");
|
||||
menu1.add(newMenuItem);
|
||||
newMenuItem.setEnabled(false);
|
||||
newMenuItem.setLabel("Neu");
|
||||
newMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_N,false));
|
||||
menu1.add(openMenuItem);
|
||||
openMenuItem.setLabel("Öffnen...");
|
||||
openMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_O,false));
|
||||
menu1.add(saveMenuItem);
|
||||
saveMenuItem.setEnabled(false);
|
||||
saveMenuItem.setLabel("Speichern");
|
||||
saveMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_S,false));
|
||||
menu1.add(saveAsMenuItem);
|
||||
saveAsMenuItem.setEnabled(false);
|
||||
saveAsMenuItem.setLabel("Speichern unter...");
|
||||
menu1.add(separatorMenuItem);
|
||||
separatorMenuItem.setLabel("-");
|
||||
menu1.add(exitMenuItem);
|
||||
exitMenuItem.setLabel("Beenden");
|
||||
mainMenuBar.add(menu1);
|
||||
menu2.setLabel("Bearbeiten");
|
||||
menu2.add(cutMenuItem);
|
||||
cutMenuItem.setEnabled(false);
|
||||
cutMenuItem.setLabel("Ausschneiden");
|
||||
cutMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_X,false));
|
||||
menu2.add(copyMenuItem);
|
||||
copyMenuItem.setEnabled(false);
|
||||
copyMenuItem.setLabel("Kopieren");
|
||||
copyMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_C,false));
|
||||
menu2.add(pasteMenuItem);
|
||||
pasteMenuItem.setEnabled(false);
|
||||
pasteMenuItem.setLabel("Einfügen");
|
||||
pasteMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_V,false));
|
||||
mainMenuBar.add(menu2);
|
||||
menu3.setLabel("Hilfe");
|
||||
menu3.add(aboutMenuItem);
|
||||
aboutMenuItem.setLabel("Info...");
|
||||
mainMenuBar.add(menu3);
|
||||
//$$ mainMenuBar.move(0,312);
|
||||
setMenuBar(mainMenuBar);
|
||||
//}}
|
||||
|
||||
//{{REGISTER_LISTENERS
|
||||
SymWindow aSymWindow = new SymWindow();
|
||||
this.addWindowListener(aSymWindow);
|
||||
SymAction lSymAction = new SymAction();
|
||||
openMenuItem.addActionListener(lSymAction);
|
||||
exitMenuItem.addActionListener(lSymAction);
|
||||
aboutMenuItem.addActionListener(lSymAction);
|
||||
startButton.addActionListener(lSymAction);
|
||||
//}}
|
||||
}
|
||||
|
||||
public FetcherFrame(String title)
|
||||
{
|
||||
this();
|
||||
setTitle(title);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shows or hides the component depending on the boolean flag b.
|
||||
* @param b if true, show the component; otherwise, hide the component.
|
||||
* @see java.awt.Component#isVisible
|
||||
*/
|
||||
public void setVisible(boolean b)
|
||||
{
|
||||
if(b)
|
||||
{
|
||||
setLocation(50, 50);
|
||||
}
|
||||
super.setVisible(b);
|
||||
}
|
||||
|
||||
static public void main(String args[])
|
||||
{
|
||||
try
|
||||
{
|
||||
//Create a new instance of our application's frame, and make it visible.
|
||||
(new FetcherFrame()).setVisible(true);
|
||||
}
|
||||
catch (Throwable t)
|
||||
{
|
||||
System.err.println(t);
|
||||
t.printStackTrace();
|
||||
//Ensure the application exits with an error condition.
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
public void addNotify()
|
||||
{
|
||||
// Record the size of the window prior to calling parents addNotify.
|
||||
Dimension d = getSize();
|
||||
|
||||
super.addNotify();
|
||||
|
||||
if (fComponentsAdjusted)
|
||||
return;
|
||||
|
||||
// Adjust components according to the insets
|
||||
setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height);
|
||||
Component components[] = getComponents();
|
||||
for (int i = 0; i < components.length; i++)
|
||||
{
|
||||
Point p = components[i].getLocation();
|
||||
p.translate(getInsets().left, getInsets().top);
|
||||
components[i].setLocation(p);
|
||||
}
|
||||
fComponentsAdjusted = true;
|
||||
}
|
||||
|
||||
// Used for addNotify check.
|
||||
boolean fComponentsAdjusted = false;
|
||||
|
||||
//{{DECLARE_CONTROLS
|
||||
java.awt.FileDialog openFileDialog1 = new java.awt.FileDialog(this);
|
||||
java.awt.Panel mainPanelWithBorders = new java.awt.Panel();
|
||||
java.awt.Panel northBorder = new java.awt.Panel();
|
||||
java.awt.Panel southBorder = new java.awt.Panel();
|
||||
java.awt.Panel westBorder = new java.awt.Panel();
|
||||
java.awt.Panel eastBorder = new java.awt.Panel();
|
||||
java.awt.Panel mainPanel = new java.awt.Panel();
|
||||
java.awt.Panel upperPanel = new java.awt.Panel();
|
||||
java.awt.Panel preferencesPanel = new java.awt.Panel();
|
||||
java.awt.Label startURLlabel = new java.awt.Label();
|
||||
java.awt.TextField startURL = new java.awt.TextField(30);
|
||||
java.awt.Button startButton = new java.awt.Button();
|
||||
java.awt.Label restrictToLabel = new java.awt.Label();
|
||||
java.awt.TextField restrictTo = new java.awt.TextField();
|
||||
java.awt.Panel logPanel = new java.awt.Panel();
|
||||
java.awt.List logList = new java.awt.List(8);
|
||||
java.awt.Panel lowerPanel = new java.awt.Panel();
|
||||
java.awt.Panel urlQueuePanel = new java.awt.Panel();
|
||||
java.awt.Label urlQueueLabel = new java.awt.Label();
|
||||
java.awt.List urlQueueList = new java.awt.List(5);
|
||||
java.awt.Panel urlThreadPanel = new java.awt.Panel();
|
||||
java.awt.Label urlThreadLabel = new java.awt.Label();
|
||||
java.awt.List urlThreadList = new java.awt.List(4);
|
||||
java.awt.Panel docQueuePanel = new java.awt.Panel();
|
||||
java.awt.Label docQueueLabel = new java.awt.Label();
|
||||
java.awt.List docQueueList = new java.awt.List(4);
|
||||
java.awt.Panel docThreadPanel = new java.awt.Panel();
|
||||
java.awt.Label docThreadLabel = new java.awt.Label();
|
||||
java.awt.List docThreadList = new java.awt.List(4);
|
||||
//}}
|
||||
|
||||
//{{DECLARE_MENUS
|
||||
java.awt.MenuBar mainMenuBar = new java.awt.MenuBar();
|
||||
java.awt.Menu menu1 = new java.awt.Menu();
|
||||
java.awt.MenuItem newMenuItem = new java.awt.MenuItem();
|
||||
java.awt.MenuItem openMenuItem = new java.awt.MenuItem();
|
||||
java.awt.MenuItem saveMenuItem = new java.awt.MenuItem();
|
||||
java.awt.MenuItem saveAsMenuItem = new java.awt.MenuItem();
|
||||
java.awt.MenuItem separatorMenuItem = new java.awt.MenuItem();
|
||||
java.awt.MenuItem exitMenuItem = new java.awt.MenuItem();
|
||||
java.awt.Menu menu2 = new java.awt.Menu();
|
||||
java.awt.MenuItem cutMenuItem = new java.awt.MenuItem();
|
||||
java.awt.MenuItem copyMenuItem = new java.awt.MenuItem();
|
||||
java.awt.MenuItem pasteMenuItem = new java.awt.MenuItem();
|
||||
java.awt.Menu menu3 = new java.awt.Menu();
|
||||
java.awt.MenuItem aboutMenuItem = new java.awt.MenuItem();
|
||||
//}}
|
||||
|
||||
class SymWindow extends java.awt.event.WindowAdapter
|
||||
{
|
||||
public void windowClosing(java.awt.event.WindowEvent event)
|
||||
{
|
||||
Object object = event.getSource();
|
||||
if (object == FetcherFrame.this)
|
||||
FetcherFrame_WindowClosing(event);
|
||||
}
|
||||
}
|
||||
|
||||
void FetcherFrame_WindowClosing(java.awt.event.WindowEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
FetcherFrame_WindowClosing_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void FetcherFrame_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
|
||||
{
|
||||
try {
|
||||
// QuitDialog Create and show as modal
|
||||
(new QuitDialog(this, true)).setVisible(true);
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SymAction implements java.awt.event.ActionListener
|
||||
{
|
||||
public void actionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
Object object = event.getSource();
|
||||
if (object == openMenuItem)
|
||||
openMenuItem_ActionPerformed(event);
|
||||
else if (object == aboutMenuItem)
|
||||
aboutMenuItem_ActionPerformed(event);
|
||||
else if (object == exitMenuItem)
|
||||
exitMenuItem_ActionPerformed(event);
|
||||
else if (object == startButton)
|
||||
startButton_ActionPerformed(event);
|
||||
}
|
||||
}
|
||||
|
||||
void openMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
openMenuItem_ActionPerformed_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void openMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
|
||||
{
|
||||
try {
|
||||
// OpenFileDialog Create and show as modal
|
||||
int defMode = openFileDialog1.getMode();
|
||||
String defTitle = openFileDialog1.getTitle();
|
||||
String defDirectory = openFileDialog1.getDirectory();
|
||||
String defFile = openFileDialog1.getFile();
|
||||
|
||||
openFileDialog1 = new java.awt.FileDialog(this, defTitle, defMode);
|
||||
openFileDialog1.setDirectory(defDirectory);
|
||||
openFileDialog1.setFile(defFile);
|
||||
openFileDialog1.setVisible(true);
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void aboutMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
aboutMenuItem_ActionPerformed_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void aboutMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
|
||||
{
|
||||
try {
|
||||
// AboutDialog Create and show as modal
|
||||
(new AboutDialog(this, true)).setVisible(true);
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void exitMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
exitMenuItem_ActionPerformed_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void exitMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
|
||||
{
|
||||
try {
|
||||
// QuitDialog Create and show as modal
|
||||
(new QuitDialog(this, true)).setVisible(true);
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void startButton_ActionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
}
|
||||
|
||||
public void addUrlQueueItem(String item)
|
||||
{
|
||||
urlQueueList.add(item);
|
||||
}
|
||||
|
||||
public void removeUrlQueueItem(String item)
|
||||
{
|
||||
urlQueueList.remove(item);
|
||||
}
|
||||
public void addDocQueueItem(String item)
|
||||
{
|
||||
docQueueList.add(item);
|
||||
}
|
||||
|
||||
public void removeDocQueueItem(String item)
|
||||
{
|
||||
docQueueList.remove(item);
|
||||
}
|
||||
|
||||
public synchronized int addUrlThreadItem(String item)
|
||||
{
|
||||
urlThreadList.add(item);
|
||||
return urlThreadList.getItemCount();
|
||||
}
|
||||
|
||||
public synchronized int addUrlThreadItem(String item, int pos)
|
||||
{
|
||||
urlThreadList.add(item,pos);
|
||||
return urlThreadList.getItemCount();
|
||||
}
|
||||
|
||||
public void replaceUrlThreadItem(String item, int index)
|
||||
{
|
||||
urlThreadList.replaceItem(item,index);
|
||||
}
|
||||
|
||||
public synchronized int addDocThreadItem(String item)
|
||||
{
|
||||
docThreadList.add(item);
|
||||
return docThreadList.getItemCount();
|
||||
}
|
||||
|
||||
public void replaceDocThreadItem(String item, int index)
|
||||
{
|
||||
docThreadList.replaceItem(item,index);
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void addLogEntry(String entry)
|
||||
{
|
||||
logList.add(entry);
|
||||
logList.makeVisible(logList.getItemCount()-1);
|
||||
}
|
||||
|
||||
public void clearLog()
|
||||
{
|
||||
logList.removeAll();
|
||||
}
|
||||
|
||||
public void addStartButtonListener(ActionListener a)
|
||||
{
|
||||
startButton.addActionListener(a);
|
||||
}
|
||||
|
||||
public String getRestrictTo()
|
||||
{
|
||||
return restrictTo.getText();
|
||||
}
|
||||
public void setRestrictTo(String restrictTo)
|
||||
{
|
||||
this.restrictTo.setText(restrictTo);
|
||||
}
|
||||
public String getStartURL()
|
||||
{
|
||||
return startURL.getText();
|
||||
}
|
||||
public void setStartURL(String startURL)
|
||||
{
|
||||
this.startURL.setText(startURL);
|
||||
}
|
||||
|
||||
//public void setInfoText(String text)
|
||||
//{
|
||||
// thi
|
||||
//}
|
||||
}
|
||||
|
|
@ -0,0 +1,332 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c) <p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.gui;
|
||||
|
||||
import javax.swing.*;
|
||||
import java.awt.*;
|
||||
import java.awt.event.*;
|
||||
|
||||
|
||||
public class FetcherSummaryFrame extends JFrame
|
||||
{
|
||||
JPanel lowerPanel = new JPanel();
|
||||
JPanel progressPanel = new JPanel();
|
||||
JPanel middlePanel = new JPanel();
|
||||
JPanel rightPanel = new JPanel();
|
||||
BorderLayout borderLayout1 = new BorderLayout();
|
||||
JPanel propertyPanel = new JPanel();
|
||||
JLabel hostLabel = new JLabel();
|
||||
JLabel urlRestrictionFrame = new JLabel();
|
||||
JTextField startURL = new JTextField();
|
||||
JTextField restrictTo = new JTextField();
|
||||
JButton startButton = new JButton();
|
||||
GridLayout gridLayout1 = new GridLayout();
|
||||
JProgressBar urlQueuedProgress = new JProgressBar(0,100);
|
||||
JLabel urlQueuedLabel = new JLabel();
|
||||
JLabel scopeFilteredLabel = new JLabel();
|
||||
JProgressBar scopeFilteredProgress = new JProgressBar(0,100);
|
||||
JLabel visitedFilteredLabel = new JLabel();
|
||||
JProgressBar visitedFilteredProgress = new JProgressBar(0,100);
|
||||
JLabel workingThreadsLabel = new JLabel();
|
||||
JProgressBar workingThreadsProgress = new JProgressBar(0,100);
|
||||
JLabel idleThreadsLabel = new JLabel();
|
||||
JProgressBar idleThreadsProgress = new JProgressBar(0,100);
|
||||
JLabel busyThreadsLabel = new JLabel();
|
||||
JProgressBar busyThreadsProgress = new JProgressBar(0,100);
|
||||
JLabel requestQueueLabel = new JLabel();
|
||||
JProgressBar requestQueueProgress = new JProgressBar();
|
||||
JLabel stalledThreadsLabel = new JLabel();
|
||||
JProgressBar stalledThreadsProgress = new JProgressBar();
|
||||
JLabel dnsLabel = new JLabel();
|
||||
JProgressBar dnsProgress = new JProgressBar(0,100);
|
||||
JLabel freeMemLabel = new JLabel();
|
||||
JLabel freeMemText = new JLabel();
|
||||
JLabel totalMemLabel = new JLabel();
|
||||
JLabel totalMemText = new JLabel();
|
||||
JLabel bpsLabel = new JLabel();
|
||||
JLabel bpsText = new JLabel();
|
||||
JLabel docsLabel = new JLabel();
|
||||
JLabel docsText = new JLabel();
|
||||
JLabel docsReadLabel = new JLabel();
|
||||
JLabel docsReadText = new JLabel();
|
||||
JProgressBar urlsCaughtProgress = new JProgressBar(0,100);
|
||||
JLabel urlsCaughtText = new JLabel();
|
||||
JLabel robotsTxtsText = new JLabel();
|
||||
JProgressBar robotsTxtsProgress = new JProgressBar(0,100);
|
||||
|
||||
public FetcherSummaryFrame()
|
||||
{
|
||||
try
|
||||
{
|
||||
jbInit();
|
||||
this.setTitle("LARM - LANLab Retrieval Machine");
|
||||
this.setSize(new Dimension(640,350));
|
||||
this.urlQueuedProgress.setStringPainted(true);
|
||||
this.urlQueuedProgress.setString("0");
|
||||
this.scopeFilteredProgress.setStringPainted(true);
|
||||
this.scopeFilteredProgress.setString("0");
|
||||
this.visitedFilteredProgress.setStringPainted(true);
|
||||
this.visitedFilteredProgress.setString("0");
|
||||
workingThreadsProgress.setStringPainted(true);
|
||||
workingThreadsProgress.setString("0");
|
||||
idleThreadsProgress.setStringPainted(true);
|
||||
idleThreadsProgress.setString("0");
|
||||
busyThreadsProgress.setStringPainted(true);
|
||||
busyThreadsProgress.setString("0");
|
||||
stalledThreadsProgress.setStringPainted(true);
|
||||
stalledThreadsProgress.setString("0");
|
||||
requestQueueProgress.setStringPainted(true);
|
||||
requestQueueProgress.setString("0");
|
||||
dnsProgress.setStringPainted(true);
|
||||
dnsProgress.setString("0");
|
||||
urlsCaughtProgress.setStringPainted(true);
|
||||
urlsCaughtProgress.setString("0");
|
||||
robotsTxtsProgress.setStringPainted(true);
|
||||
robotsTxtsProgress.setString("0");
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private void jbInit() throws Exception
|
||||
{
|
||||
this.getContentPane().setLayout(borderLayout1);
|
||||
propertyPanel.setMinimumSize(new Dimension(10, 70));
|
||||
propertyPanel.setPreferredSize(new Dimension(10, 80));
|
||||
propertyPanel.setLayout(null);
|
||||
hostLabel.setText("Startseite");
|
||||
hostLabel.setBounds(new Rectangle(18, 15, 76, 17));
|
||||
urlRestrictionFrame.setText("URL-Restriction (regul. Ausdruck)");
|
||||
urlRestrictionFrame.setBounds(new Rectangle(18, 37, 208, 17));
|
||||
startURL.setBounds(new Rectangle(224, 14, 281, 21));
|
||||
restrictTo.setBounds(new Rectangle(224, 38, 281, 21));
|
||||
startButton.setActionCommand("start");
|
||||
startButton.setText("Start");
|
||||
startButton.setBounds(new Rectangle(528, 14, 79, 47));
|
||||
lowerPanel.setLayout(gridLayout1);
|
||||
urlQueuedLabel.setToolTipText("");
|
||||
urlQueuedLabel.setText("URLs queued");
|
||||
scopeFilteredLabel.setToolTipText("");
|
||||
scopeFilteredLabel.setText("Scope-gefiltert");
|
||||
visitedFilteredLabel.setText("Visited gefiltert");
|
||||
workingThreadsLabel.setText("Number of Working Threads");
|
||||
idleThreadsLabel.setText("Idle Threads");
|
||||
busyThreadsLabel.setText("Busy Threads");
|
||||
requestQueueLabel.setText("requests queued");
|
||||
stalledThreadsLabel.setText("stalled Threads");
|
||||
stalledThreadsProgress.setPreferredSize(new Dimension(190, 25));
|
||||
requestQueueProgress.setPreferredSize(new Dimension(190, 25));
|
||||
busyThreadsProgress.setPreferredSize(new Dimension(190, 25));
|
||||
idleThreadsProgress.setPreferredSize(new Dimension(190, 25));
|
||||
workingThreadsProgress.setPreferredSize(new Dimension(190, 25));
|
||||
urlQueuedProgress.setPreferredSize(new Dimension(190, 25));
|
||||
scopeFilteredProgress.setPreferredSize(new Dimension(190, 25));
|
||||
visitedFilteredProgress.setPreferredSize(new Dimension(190, 25));
|
||||
dnsLabel.setText("DNS Hosts cached");
|
||||
dnsProgress.setPreferredSize(new Dimension(190, 25));
|
||||
freeMemLabel.setText("Free Mem");
|
||||
freeMemLabel.setPreferredSize(new Dimension(60, 17));
|
||||
freeMemText.setText("0");
|
||||
freeMemText.setPreferredSize(new Dimension(120, 17));
|
||||
freeMemText.setMinimumSize(new Dimension(100, 17));
|
||||
totalMemLabel.setText("total Mem");
|
||||
totalMemLabel.setPreferredSize(new Dimension(60, 17));
|
||||
totalMemText.setText("0");
|
||||
totalMemText.setPreferredSize(new Dimension(120, 17));
|
||||
totalMemText.setMinimumSize(new Dimension(100, 17));
|
||||
bpsLabel.setPreferredSize(new Dimension(60, 17));
|
||||
bpsLabel.setText("Bytes/s");
|
||||
bpsText.setMinimumSize(new Dimension(100, 17));
|
||||
bpsText.setPreferredSize(new Dimension(120, 17));
|
||||
bpsText.setText("0");
|
||||
docsLabel.setText("Docs/s");
|
||||
docsLabel.setPreferredSize(new Dimension(60, 17));
|
||||
docsText.setText("0");
|
||||
docsText.setPreferredSize(new Dimension(120, 17));
|
||||
docsText.setMinimumSize(new Dimension(100, 17));
|
||||
docsReadLabel.setText("Docs read");
|
||||
docsReadLabel.setPreferredSize(new Dimension(60, 17));
|
||||
docsReadText.setText("0");
|
||||
docsReadText.setPreferredSize(new Dimension(120, 17));
|
||||
docsReadText.setMinimumSize(new Dimension(100, 17));
|
||||
urlsCaughtProgress.setPreferredSize(new Dimension(190, 25));
|
||||
urlsCaughtText.setText("URLs caught by Robots.txt");
|
||||
robotsTxtsText.setText("Robots.txts found");
|
||||
robotsTxtsProgress.setPreferredSize(new Dimension(190, 25));
|
||||
this.getContentPane().add(lowerPanel, BorderLayout.CENTER);
|
||||
lowerPanel.add(progressPanel, null);
|
||||
progressPanel.add(urlQueuedLabel, null);
|
||||
progressPanel.add(urlQueuedProgress, null);
|
||||
progressPanel.add(scopeFilteredLabel, null);
|
||||
progressPanel.add(scopeFilteredProgress, null);
|
||||
progressPanel.add(visitedFilteredLabel, null);
|
||||
progressPanel.add(visitedFilteredProgress, null);
|
||||
progressPanel.add(dnsLabel, null);
|
||||
progressPanel.add(dnsProgress, null);
|
||||
progressPanel.add(robotsTxtsText, null);
|
||||
progressPanel.add(robotsTxtsProgress, null);
|
||||
progressPanel.add(urlsCaughtText, null);
|
||||
progressPanel.add(urlsCaughtProgress, null);
|
||||
lowerPanel.add(middlePanel, null);
|
||||
middlePanel.add(workingThreadsLabel, null);
|
||||
middlePanel.add(workingThreadsProgress, null);
|
||||
middlePanel.add(idleThreadsLabel, null);
|
||||
middlePanel.add(idleThreadsProgress, null);
|
||||
middlePanel.add(busyThreadsLabel, null);
|
||||
middlePanel.add(busyThreadsProgress, null);
|
||||
middlePanel.add(requestQueueLabel, null);
|
||||
middlePanel.add(requestQueueProgress, null);
|
||||
middlePanel.add(stalledThreadsLabel, null);
|
||||
middlePanel.add(stalledThreadsProgress, null);
|
||||
lowerPanel.add(rightPanel, null);
|
||||
rightPanel.add(docsLabel, null);
|
||||
rightPanel.add(docsText, null);
|
||||
rightPanel.add(docsReadLabel, null);
|
||||
rightPanel.add(docsReadText, null);
|
||||
rightPanel.add(bpsLabel, null);
|
||||
rightPanel.add(bpsText, null);
|
||||
rightPanel.add(totalMemLabel, null);
|
||||
rightPanel.add(totalMemText, null);
|
||||
rightPanel.add(freeMemLabel, null);
|
||||
rightPanel.add(freeMemText, null);
|
||||
this.getContentPane().add(propertyPanel, BorderLayout.NORTH);
|
||||
propertyPanel.add(urlRestrictionFrame, null);
|
||||
propertyPanel.add(restrictTo, null);
|
||||
propertyPanel.add(hostLabel, null);
|
||||
propertyPanel.add(startButton, null);
|
||||
propertyPanel.add(startURL, null);
|
||||
}
|
||||
|
||||
public void setCounterProgressBar(JProgressBar p, int value)
|
||||
{
|
||||
int oldMax = p.getMaximum();
|
||||
int oldValue = p.getValue();
|
||||
|
||||
if(value > oldMax)
|
||||
{
|
||||
p.setMaximum(oldMax * 2);
|
||||
}
|
||||
else if (value < oldMax / 2 && oldValue >= oldMax / 2)
|
||||
{
|
||||
p.setMaximum(oldMax / 2);
|
||||
}
|
||||
p.setValue(value);
|
||||
p.setString("" + value);
|
||||
}
|
||||
|
||||
public void setURLsQueued(int queued)
|
||||
{
|
||||
setCounterProgressBar(this.urlQueuedProgress, queued);
|
||||
}
|
||||
|
||||
public void setScopeFiltered(int filtered)
|
||||
{
|
||||
setCounterProgressBar(this.scopeFilteredProgress, filtered);
|
||||
}
|
||||
|
||||
public void setVisitedFiltered(int filtered)
|
||||
{
|
||||
setCounterProgressBar(this.visitedFilteredProgress, filtered);
|
||||
}
|
||||
|
||||
public void setWorkingThreadsCount(int threads)
|
||||
{
|
||||
setCounterProgressBar(this.workingThreadsProgress, threads);
|
||||
}
|
||||
|
||||
public void setIdleThreadsCount(int threads)
|
||||
{
|
||||
setCounterProgressBar(this.idleThreadsProgress, threads);
|
||||
}
|
||||
|
||||
public void setBusyThreadsCount(int threads)
|
||||
{
|
||||
setCounterProgressBar(this.busyThreadsProgress, threads);
|
||||
}
|
||||
|
||||
public void setRequestQueueCount(int requests)
|
||||
{
|
||||
setCounterProgressBar(this.requestQueueProgress, requests);
|
||||
}
|
||||
|
||||
public void setDNSCount(int count)
|
||||
{
|
||||
setCounterProgressBar(this.dnsProgress, count);
|
||||
}
|
||||
|
||||
public void setURLsCaughtCount(int count)
|
||||
{
|
||||
setCounterProgressBar(this.urlQueuedProgress, count);
|
||||
}
|
||||
|
||||
public void addStartButtonListener(ActionListener a)
|
||||
{
|
||||
startButton.addActionListener(a);
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getRestrictTo()
|
||||
{
|
||||
return restrictTo.getText();
|
||||
}
|
||||
public void setRestrictTo(String restrictTo)
|
||||
{
|
||||
this.restrictTo.setText(restrictTo);
|
||||
}
|
||||
public String getStartURL()
|
||||
{
|
||||
return startURL.getText();
|
||||
}
|
||||
public void setStartURL(String startURL)
|
||||
{
|
||||
this.startURL.setText(startURL);
|
||||
}
|
||||
|
||||
public void setStalledThreads(int stalled)
|
||||
{
|
||||
stalledThreadsProgress.setValue(stalled);
|
||||
}
|
||||
|
||||
public void setBytesPerSecond(double bps)
|
||||
{
|
||||
bpsText.setText("" + bps);
|
||||
}
|
||||
|
||||
|
||||
public void setDocsPerSecond(double docs)
|
||||
{
|
||||
bpsText.setText("" + docs);
|
||||
}
|
||||
|
||||
public void setFreeMem(long freeMem)
|
||||
{
|
||||
freeMemText.setText("" + freeMem);
|
||||
}
|
||||
|
||||
public void setTotalMem(long totalMem)
|
||||
{
|
||||
totalMemText.setText("" + totalMem);
|
||||
}
|
||||
|
||||
public void setRobotsTxtCount(int robotsTxtCount)
|
||||
{
|
||||
setCounterProgressBar(robotsTxtsProgress, robotsTxtCount);
|
||||
}
|
||||
|
||||
public void setDocsRead(int docs)
|
||||
{
|
||||
bpsText.setText("" + docs);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
package de.lanlab.larm.gui;
|
||||
/*
|
||||
A basic extension of the java.awt.Dialog class
|
||||
*/
|
||||
|
||||
import java.awt.*;
|
||||
import java.awt.event.*;
|
||||
|
||||
public class QuitDialog extends Dialog
|
||||
{
|
||||
public QuitDialog(Frame parent, boolean modal)
|
||||
{
|
||||
super(parent, modal);
|
||||
|
||||
//Keep a local reference to the invoking frame
|
||||
frame = parent;
|
||||
|
||||
// This code is automatically generated by Visual Cafe when you add
|
||||
// components to the visual environment. It instantiates and initializes
|
||||
// the components. To modify the code, only use code syntax that matches
|
||||
// what Visual Cafe can generate, or Visual Cafe may be unable to back
|
||||
// parse your Java file into its visual environment.
|
||||
//{{INIT_CONTROLS
|
||||
setLayout(null);
|
||||
setSize(337,135);
|
||||
setVisible(false);
|
||||
yesButton.setLabel(" Ja ");
|
||||
add(yesButton);
|
||||
yesButton.setFont(new Font("Dialog", Font.BOLD, 12));
|
||||
yesButton.setBounds(72,80,79,22);
|
||||
noButton.setLabel(" Nein ");
|
||||
add(noButton);
|
||||
noButton.setFont(new Font("Dialog", Font.BOLD, 12));
|
||||
noButton.setBounds(185,80,79,22);
|
||||
label1.setText("Möchten Sie LARM beenden?");
|
||||
label1.setAlignment(java.awt.Label.CENTER);
|
||||
add(label1);
|
||||
label1.setBounds(68,33,220,23);
|
||||
setTitle("LARM - Beenden");
|
||||
//}}
|
||||
|
||||
//{{REGISTER_LISTENERS
|
||||
SymWindow aSymWindow = new SymWindow();
|
||||
this.addWindowListener(aSymWindow);
|
||||
SymAction lSymAction = new SymAction();
|
||||
noButton.addActionListener(lSymAction);
|
||||
yesButton.addActionListener(lSymAction);
|
||||
//}}
|
||||
}
|
||||
|
||||
public void addNotify()
|
||||
{
|
||||
// Record the size of the window prior to calling parents addNotify.
|
||||
Dimension d = getSize();
|
||||
|
||||
super.addNotify();
|
||||
|
||||
if (fComponentsAdjusted)
|
||||
return;
|
||||
|
||||
// Adjust components according to the insets
|
||||
setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height);
|
||||
Component components[] = getComponents();
|
||||
for (int i = 0; i < components.length; i++)
|
||||
{
|
||||
Point p = components[i].getLocation();
|
||||
p.translate(getInsets().left, getInsets().top);
|
||||
components[i].setLocation(p);
|
||||
}
|
||||
fComponentsAdjusted = true;
|
||||
}
|
||||
|
||||
public QuitDialog(Frame parent, String title, boolean modal)
|
||||
{
|
||||
this(parent, modal);
|
||||
setTitle(title);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shows or hides the component depending on the boolean flag b.
|
||||
* @param b if true, show the component; otherwise, hide the component.
|
||||
* @see java.awt.Component#isVisible
|
||||
*/
|
||||
public void setVisible(boolean b)
|
||||
{
|
||||
if(b)
|
||||
{
|
||||
Rectangle bounds = getParent().getBounds();
|
||||
Rectangle abounds = getBounds();
|
||||
|
||||
setLocation(bounds.x + (bounds.width - abounds.width)/ 2,
|
||||
bounds.y + (bounds.height - abounds.height)/2);
|
||||
Toolkit.getDefaultToolkit().beep();
|
||||
}
|
||||
super.setVisible(b);
|
||||
}
|
||||
|
||||
// Used for addNotify check.
|
||||
boolean fComponentsAdjusted = false;
|
||||
// Invoking frame
|
||||
Frame frame = null;
|
||||
|
||||
//{{DECLARE_CONTROLS
|
||||
java.awt.Button yesButton = new java.awt.Button();
|
||||
java.awt.Button noButton = new java.awt.Button();
|
||||
java.awt.Label label1 = new java.awt.Label();
|
||||
//}}
|
||||
|
||||
class SymAction implements java.awt.event.ActionListener
|
||||
{
|
||||
public void actionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
Object object = event.getSource();
|
||||
if (object == yesButton)
|
||||
yesButton_ActionPerformed(event);
|
||||
else if (object == noButton)
|
||||
noButton_ActionPerformed(event);
|
||||
}
|
||||
}
|
||||
|
||||
void yesButton_ActionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
yesButton_ActionPerformed_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void yesButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
|
||||
{
|
||||
try {
|
||||
frame.setVisible(false); // Hide the invoking frame
|
||||
frame.dispose(); // Free system resources
|
||||
this.dispose(); // Free system resources
|
||||
System.exit(0); // close the application
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void noButton_ActionPerformed(java.awt.event.ActionEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
noButton_ActionPerformed_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void noButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
|
||||
{
|
||||
try {
|
||||
this.dispose();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SymWindow extends java.awt.event.WindowAdapter
|
||||
{
|
||||
public void windowClosing(java.awt.event.WindowEvent event)
|
||||
{
|
||||
Object object = event.getSource();
|
||||
if (object == QuitDialog.this)
|
||||
QuitDialog_WindowClosing(event);
|
||||
}
|
||||
}
|
||||
|
||||
void QuitDialog_WindowClosing(java.awt.event.WindowEvent event)
|
||||
{
|
||||
// to do: code goes here.
|
||||
|
||||
QuitDialog_WindowClosing_Interaction1(event);
|
||||
}
|
||||
|
||||
|
||||
void QuitDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
|
||||
{
|
||||
try {
|
||||
this.dispose();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
package de.lanlab.larm.net;
|
||||
|
||||
// whatever package you want
|
||||
import sun.net.www.http.HttpClient;
|
||||
import sun.net.www.MessageHeader;
|
||||
import sun.net.ProgressEntry;
|
||||
|
||||
import java.net.*;
|
||||
import java.io.*;
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
*@author cmarschn
|
||||
*@created 2. Mai 2001
|
||||
*/
|
||||
public class HttpClientTimeout extends HttpClient {
|
||||
private int timeout = -1;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the HttpClientTimeout object
|
||||
*
|
||||
*@param url Description of Parameter
|
||||
*@param proxy Description of Parameter
|
||||
*@param proxyPort Description of Parameter
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
public HttpClientTimeout(URL url, String proxy, int proxyPort) throws IOException {
|
||||
super(url, proxy, proxyPort);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the HttpClientTimeout object
|
||||
*
|
||||
*@param url Description of Parameter
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
public HttpClientTimeout(URL url) throws IOException {
|
||||
super(url, null, -1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the Timeout attribute of the HttpClientTimeout object
|
||||
*
|
||||
*@param i The new Timeout value
|
||||
*@exception SocketException Description of Exception
|
||||
*/
|
||||
public void setTimeout(int i) throws SocketException {
|
||||
this.timeout = -1;
|
||||
serverSocket.setSoTimeout(i);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the Socket attribute of the HttpClientTimeout object
|
||||
*
|
||||
*@return The Socket value
|
||||
*/
|
||||
public Socket getSocket() {
|
||||
return serverSocket;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param header Description of Parameter
|
||||
*@param entry Description of Parameter
|
||||
*@return Description of the Returned Value
|
||||
*@exception java.io.IOException Description of Exception
|
||||
*/
|
||||
public boolean parseHTTP(MessageHeader header, ProgressEntry entry) throws java.io.IOException {
|
||||
if (this.timeout != -1) {
|
||||
try {
|
||||
serverSocket.setSoTimeout(this.timeout);
|
||||
}
|
||||
catch (SocketException e) {
|
||||
throw new java.io.IOException("unable to set socket timeout!");
|
||||
}
|
||||
}
|
||||
return super.parseHTTP(header, entry);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
serverSocket.close();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* public void SetTimeout(int i) throws SocketException {
|
||||
* serverSocket.setSoTimeout(i);
|
||||
* }
|
||||
*/
|
||||
/*
|
||||
* This class has no public constructor for HTTP. This method is used to
|
||||
* get an HttpClient to the specifed URL. If there's currently an
|
||||
* active HttpClient to that server/port, you'll get that one.
|
||||
*
|
||||
* no longer syncrhonized -- it slows things down too much
|
||||
* synchronize at a higher level
|
||||
*/
|
||||
/**
|
||||
* Gets the New attribute of the HttpClientTimeout class
|
||||
*
|
||||
*@param url Description of Parameter
|
||||
*@return The New value
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
public static HttpClientTimeout getNew(URL url) throws IOException {
|
||||
/*
|
||||
* see if one's already around
|
||||
*/
|
||||
HttpClientTimeout ret = (HttpClientTimeout) kac.get(url);
|
||||
if (ret == null) {
|
||||
ret = new HttpClientTimeout(url);
|
||||
// CTOR called openServer()
|
||||
}
|
||||
else {
|
||||
ret.url = url;
|
||||
}
|
||||
// don't know if we're keeping alive until we parse the headers
|
||||
// for now, keepingAlive is false
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
package de.lanlab.larm.net;
|
||||
|
||||
import java.net.*;
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
*@author cmarschn
|
||||
*@created 2. Mai 2001
|
||||
*/
|
||||
public class HttpTimeoutFactory implements URLStreamHandlerFactory {
|
||||
int fiTimeoutVal;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the HttpTimeoutFactory object
|
||||
*
|
||||
*@param iT Description of Parameter
|
||||
*/
|
||||
public HttpTimeoutFactory(int iT) {
|
||||
fiTimeoutVal = iT;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param str Description of Parameter
|
||||
*@return Description of the Returned Value
|
||||
*/
|
||||
public URLStreamHandler createURLStreamHandler(String str) {
|
||||
return new HttpTimeoutHandler(fiTimeoutVal);
|
||||
}
|
||||
|
||||
static HttpTimeoutFactory instance = null;
|
||||
|
||||
/**
|
||||
* gets an instance. only the first call will create it. In subsequent calls the iT
|
||||
* parameter doesn't have a meaning.
|
||||
*/
|
||||
public static HttpTimeoutFactory getInstance(int iT)
|
||||
{
|
||||
if(instance == null)
|
||||
{
|
||||
instance = new HttpTimeoutFactory(iT);
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
package de.lanlab.larm.net;
|
||||
|
||||
import java.net.*;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
*@author cmarschn
|
||||
*@created 2. Mai 2001
|
||||
*/
|
||||
public class HttpTimeoutHandler extends sun.net.www.protocol.http.Handler {
|
||||
int timeoutVal;
|
||||
HttpURLConnectionTimeout fHUCT;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the HttpTimeoutHandler object
|
||||
*
|
||||
*@param iT Description of Parameter
|
||||
*/
|
||||
public HttpTimeoutHandler(int iT) {
|
||||
timeoutVal = iT;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the Socket attribute of the HttpTimeoutHandler object
|
||||
*
|
||||
*@return The Socket value
|
||||
*/
|
||||
public Socket getSocket() {
|
||||
return fHUCT.getSocket();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@exception Exception Description of Exception
|
||||
*/
|
||||
public void close() throws Exception {
|
||||
fHUCT.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param u Description of Parameter
|
||||
*@return Description of the Returned Value
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
protected java.net.URLConnection openConnection(URL u) throws IOException {
|
||||
return fHUCT = new HttpURLConnectionTimeout(u, this, timeoutVal);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the Proxy attribute of the HttpTimeoutHandler object
|
||||
*
|
||||
*@return The Proxy value
|
||||
*/
|
||||
String getProxy() {
|
||||
return proxy;
|
||||
// breaking encapsulation
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the ProxyPort attribute of the HttpTimeoutHandler object
|
||||
*
|
||||
*@return The ProxyPort value
|
||||
*/
|
||||
int getProxyPort() {
|
||||
return proxyPort;
|
||||
// breaking encapsulation
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,226 @@
|
|||
package de.lanlab.larm.net;
|
||||
|
||||
import java.net.*;
|
||||
import java.io.*;
|
||||
import sun.net.www.http.HttpClient;
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
*@author cmarschn
|
||||
*@created 2. Mai 2001
|
||||
*/
|
||||
public class HttpURLConnectionTimeout extends sun.net.www.protocol.http.HttpURLConnection {
|
||||
int fiTimeoutVal;
|
||||
HttpTimeoutHandler fHandler;
|
||||
HttpClientTimeout fClient;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the HttpURLConnectionTimeout object
|
||||
*
|
||||
*@param u Description of Parameter
|
||||
*@param handler Description of Parameter
|
||||
*@param iTimeout Description of Parameter
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
public HttpURLConnectionTimeout(URL u, HttpTimeoutHandler handler, int iTimeout) throws IOException {
|
||||
super(u, handler);
|
||||
fHandler = handler;
|
||||
fiTimeoutVal = iTimeout;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the HttpURLConnectionTimeout object
|
||||
*
|
||||
*@param u Description of Parameter
|
||||
*@param host Description of Parameter
|
||||
*@param port Description of Parameter
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
public HttpURLConnectionTimeout(URL u, String host, int port) throws IOException {
|
||||
super(u, host, port);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
public void connect() throws IOException {
|
||||
if (connected) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if ("http".equals(url.getProtocol())
|
||||
/*
|
||||
* && !failedOnce <- PRIVATE
|
||||
*/
|
||||
) {
|
||||
// for safety's sake, as reported by KLGroup
|
||||
synchronized (url) {
|
||||
http = HttpClientTimeout.getNew(url);
|
||||
}
|
||||
fClient = (HttpClientTimeout) http;
|
||||
((HttpClientTimeout) http).setTimeout(fiTimeoutVal);
|
||||
}
|
||||
else {
|
||||
// make sure to construct new connection if first
|
||||
// attempt failed
|
||||
http = new HttpClientTimeout(url, fHandler.getProxy(), fHandler.getProxyPort());
|
||||
}
|
||||
ps = (PrintStream) http.getOutputStream();
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw e;
|
||||
}
|
||||
// this was missing from the original version
|
||||
connected = true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a new HttpClient object, bypassing the cache of HTTP client
|
||||
* objects/connections.
|
||||
*
|
||||
*@param url the URL being accessed
|
||||
*@return The NewClient value
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
protected HttpClient getNewClient(URL url)
|
||||
throws IOException {
|
||||
HttpClientTimeout client = new HttpClientTimeout(url, (String) null, -1);
|
||||
try {
|
||||
client.setTimeout(fiTimeoutVal);
|
||||
}
|
||||
catch (Exception e) {
|
||||
System.out.println("Unable to set timeout value");
|
||||
}
|
||||
return (HttpClient) client;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the Socket attribute of the HttpURLConnectionTimeout object
|
||||
*
|
||||
*@return The Socket value
|
||||
*/
|
||||
Socket getSocket() {
|
||||
return fClient.getSocket();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@exception Exception Description of Exception
|
||||
*/
|
||||
void close() throws Exception {
|
||||
fClient.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* opens a stream allowing redirects only to the same host.
|
||||
*
|
||||
*@param c Description of Parameter
|
||||
*@return Description of the Returned Value
|
||||
*@exception IOException Description of Exception
|
||||
*/
|
||||
public static InputStream openConnectionCheckRedirects(URLConnection c)
|
||||
throws IOException {
|
||||
boolean redir;
|
||||
int redirects = 0;
|
||||
InputStream in = null;
|
||||
|
||||
do {
|
||||
if (c instanceof HttpURLConnectionTimeout) {
|
||||
((HttpURLConnectionTimeout) c).setInstanceFollowRedirects(false);
|
||||
}
|
||||
|
||||
// We want to open the input stream before
|
||||
// getting headers, because getHeaderField()
|
||||
// et al swallow IOExceptions.
|
||||
in = c.getInputStream();
|
||||
redir = false;
|
||||
|
||||
if (c instanceof HttpURLConnectionTimeout) {
|
||||
HttpURLConnectionTimeout http = (HttpURLConnectionTimeout) c;
|
||||
int stat = http.getResponseCode();
|
||||
if (stat >= 300 && stat <= 305 &&
|
||||
stat != HttpURLConnection.HTTP_NOT_MODIFIED) {
|
||||
URL base = http.getURL();
|
||||
String loc = http.getHeaderField("Location");
|
||||
URL target = null;
|
||||
if (loc != null) {
|
||||
target = new URL(base, loc);
|
||||
}
|
||||
http.disconnect();
|
||||
if (target == null
|
||||
|| !base.getProtocol().equals(target.getProtocol())
|
||||
|| base.getPort() != target.getPort()
|
||||
|| !HostsEquals(base, target)
|
||||
|| redirects >= 5) {
|
||||
throw new SecurityException("illegal URL redirect");
|
||||
}
|
||||
redir = true;
|
||||
c = target.openConnection();
|
||||
redirects++;
|
||||
}
|
||||
}
|
||||
} while (redir);
|
||||
return in;
|
||||
}
|
||||
|
||||
|
||||
// Same as java.net.URL.hostsEqual
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param u1 Description of Parameter
|
||||
*@param u2 Description of Parameter
|
||||
*@return Description of the Returned Value
|
||||
*/
|
||||
static boolean HostsEquals(URL u1, URL u2) {
|
||||
final String h1 = u1.getHost();
|
||||
final String h2 = u2.getHost();
|
||||
|
||||
if (h1 == null) {
|
||||
return h2 == null;
|
||||
}
|
||||
else if (h2 == null) {
|
||||
return false;
|
||||
}
|
||||
else if (h1.equalsIgnoreCase(h2)) {
|
||||
return true;
|
||||
}
|
||||
// Have to resolve addresses before comparing, otherwise
|
||||
// names like tachyon and tachyon.eng would compare different
|
||||
final boolean result[] = {false};
|
||||
|
||||
java.security.AccessController.doPrivileged(
|
||||
new java.security.PrivilegedAction() {
|
||||
/**
|
||||
* Main processing method for the HttpURLConnectionTimeout object
|
||||
*
|
||||
*@return Description of the Returned Value
|
||||
*/
|
||||
public Object run() {
|
||||
try {
|
||||
InetAddress a1 = InetAddress.getByName(h1);
|
||||
InetAddress a2 = InetAddress.getByName(h2);
|
||||
result[0] = a1.equals(a2);
|
||||
}
|
||||
catch (UnknownHostException e) {
|
||||
}
|
||||
catch (SecurityException e) {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
return result[0];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.parser;
|
||||
|
||||
public interface LinkHandler
|
||||
{
|
||||
public void handleLink(String value, boolean isFrame);
|
||||
public void handleBase(String value);
|
||||
public void handleTitle(String value);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,37 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
*
|
||||
* Description: <p>
|
||||
*
|
||||
* Copyright: Copyright (c)<p>
|
||||
*
|
||||
* Company: <p>
|
||||
*
|
||||
*
|
||||
*
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.storage;
|
||||
import de.lanlab.larm.util.*;
|
||||
|
||||
/**
|
||||
* This interface stores documents provided by a fetcher task
|
||||
* @author Clemens Marschner
|
||||
*/
|
||||
public interface DocumentStorage
|
||||
{
|
||||
/**
|
||||
* called once when the storage is supposed to be initialized
|
||||
*/
|
||||
public void open();
|
||||
|
||||
|
||||
/**
|
||||
* called to store a web document
|
||||
*
|
||||
* @param doc the document
|
||||
*/
|
||||
public void store(WebDocument doc);
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
package de.lanlab.larm.storage;
|
||||
|
||||
import de.lanlab.larm.util.WebDocument;
|
||||
import de.lanlab.larm.util.SimpleLogger;
|
||||
import java.io.*;
|
||||
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author
|
||||
* @created 11. Januar 2002
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* this class saves the documents into page files of 50 MB and keeps a record of all
|
||||
* the positions into a Logger. the log file contains URL, page file number, and
|
||||
* index within the page file.
|
||||
*
|
||||
*/
|
||||
|
||||
public class LogStorage implements DocumentStorage
|
||||
{
|
||||
|
||||
SimpleLogger log;
|
||||
|
||||
File pageFile;
|
||||
FileOutputStream out;
|
||||
int pageFileCount;
|
||||
String filePrefix;
|
||||
int offset;
|
||||
boolean isValid = false;
|
||||
/**
|
||||
* Description of the Field
|
||||
*/
|
||||
public final static int MAXLENGTH = 50000000;
|
||||
boolean logContents = false;
|
||||
String fileName;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the LogStorage object
|
||||
*
|
||||
* @param log the logger where index information is saved to
|
||||
* @param logContents whether all docs are to be stored in page files or not
|
||||
* @param filePrefix the file name where the page file number is appended
|
||||
*/
|
||||
public LogStorage(SimpleLogger log, boolean logContents, String filePrefix)
|
||||
{
|
||||
this.log = log;
|
||||
pageFileCount = 0;
|
||||
this.filePrefix = filePrefix;
|
||||
this.logContents = logContents;
|
||||
if (logContents)
|
||||
{
|
||||
openPageFile();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*/
|
||||
public void open() { }
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*/
|
||||
public void openPageFile()
|
||||
{
|
||||
int id = ++pageFileCount;
|
||||
fileName = filePrefix + "_" + id + ".pfl";
|
||||
try
|
||||
{
|
||||
this.offset = 0;
|
||||
out = new FileOutputStream(fileName);
|
||||
isValid = true;
|
||||
}
|
||||
catch (IOException io)
|
||||
{
|
||||
log.logThreadSafe("**ERROR: IOException while opening pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
|
||||
isValid = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the outputStream attribute of the LogStorage object
|
||||
*
|
||||
* @return The outputStream value
|
||||
*/
|
||||
public OutputStream getOutputStream()
|
||||
{
|
||||
if (offset > MAXLENGTH)
|
||||
{
|
||||
try
|
||||
{
|
||||
out.close();
|
||||
}
|
||||
catch (IOException io)
|
||||
{
|
||||
log.logThreadSafe("**ERROR: IOException while closing pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
|
||||
}
|
||||
openPageFile();
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param bytes Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public synchronized int writeToPageFile(byte[] bytes)
|
||||
{
|
||||
try
|
||||
{
|
||||
OutputStream out = getOutputStream();
|
||||
int oldOffset = this.offset;
|
||||
out.write(bytes);
|
||||
this.offset += bytes.length;
|
||||
return oldOffset;
|
||||
}
|
||||
catch (IOException io)
|
||||
{
|
||||
log.logThreadSafe("**ERROR: IOException while writing " + bytes.length + " bytes to pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the logger attribute of the LogStorage object
|
||||
*
|
||||
* @param log The new logger value
|
||||
*/
|
||||
public void setLogger(SimpleLogger log)
|
||||
{
|
||||
this.log = log;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* stores the document if storing is enabled
|
||||
*
|
||||
* @param doc Description of the Parameter
|
||||
*/
|
||||
public void store(WebDocument doc)
|
||||
{
|
||||
String docInfo = doc.getInfo();
|
||||
if (logContents && isValid && doc.getDocumentBytes() != null)
|
||||
{
|
||||
int offset = writeToPageFile(doc.getDocumentBytes());
|
||||
docInfo = docInfo + "\t" + pageFileCount + "\t" + offset;
|
||||
}
|
||||
log.logThreadSafe(docInfo);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.storage;
|
||||
import de.lanlab.larm.util.*;
|
||||
|
||||
/**
|
||||
* doesn't do a lot
|
||||
*/
|
||||
public class NullStorage implements DocumentStorage
|
||||
{
|
||||
|
||||
public NullStorage()
|
||||
{
|
||||
}
|
||||
|
||||
public void open() {}
|
||||
public void store(WebDocument doc) {}
|
||||
|
||||
}
|
|
@ -0,0 +1,176 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.storage;
|
||||
import java.sql.*;
|
||||
import de.lanlab.larm.util.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* saves the document into an sql table. At this time only in MS SQL (and probably Sybase)
|
||||
* a table "Document" with the columns DO_URL(varchar), DO_MimeType(varchar) and
|
||||
* DO_Data2(BLOB) is created after start<br>
|
||||
* notes: experimental; slow
|
||||
*/
|
||||
public class SQLServerStorage implements DocumentStorage
|
||||
{
|
||||
|
||||
private Vector freeCons;
|
||||
private Vector busyCons;
|
||||
|
||||
private Vector freeStatements;
|
||||
private Vector busyStatements;
|
||||
|
||||
private PreparedStatement addDoc;
|
||||
|
||||
public SQLServerStorage(String driver, String connectionString, String account, String password, int nrConnections)
|
||||
{
|
||||
try
|
||||
{
|
||||
Class.forName(driver);
|
||||
freeCons = new Vector(nrConnections);
|
||||
busyCons = new Vector(nrConnections);
|
||||
freeStatements = new Vector(nrConnections);
|
||||
busyStatements = new Vector(nrConnections);
|
||||
|
||||
Connection sqlConn;
|
||||
PreparedStatement statement;
|
||||
for(int i=0; i<nrConnections; i++)
|
||||
{
|
||||
sqlConn = DriverManager.getConnection(connectionString, account, password);
|
||||
statement = sqlConn.prepareStatement("INSERT INTO Document (DO_URL, DO_MimeType, DO_Data2) VALUES (?,?,?)");
|
||||
freeCons.add(sqlConn);
|
||||
freeStatements.add(statement);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
catch(SQLException e)
|
||||
{
|
||||
synchronized(this)
|
||||
{
|
||||
System.out.println(/*"Task " + taskNr + ": */ "SQLException: " + e.getMessage());
|
||||
System.err.println(" SQLState: " + e.getSQLState());
|
||||
System.err.println(" VendorError: " + e.getErrorCode());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
catch(Exception e)
|
||||
{
|
||||
System.out.println("SQLServerStorage: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
public Connection getConnection()
|
||||
{
|
||||
synchronized(this)
|
||||
{
|
||||
Connection actual = (Connection)freeCons.firstElement();
|
||||
freeCons.removeElementAt(0);
|
||||
if(actual == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
busyCons.add(actual);
|
||||
return actual;
|
||||
}
|
||||
}
|
||||
|
||||
public void releaseConnection(Connection con)
|
||||
{
|
||||
synchronized(this)
|
||||
{
|
||||
busyCons.remove(con);
|
||||
freeCons.add(con);
|
||||
}
|
||||
}
|
||||
|
||||
public PreparedStatement getStatement()
|
||||
{
|
||||
synchronized(this)
|
||||
{
|
||||
PreparedStatement actual = (PreparedStatement)freeStatements.firstElement();
|
||||
freeStatements.removeElementAt(0);
|
||||
if(actual == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
busyStatements.add(actual);
|
||||
return actual;
|
||||
}
|
||||
}
|
||||
|
||||
public void releaseStatement(PreparedStatement statement)
|
||||
{
|
||||
synchronized(this)
|
||||
{
|
||||
busyStatements.remove(statement);
|
||||
freeStatements.add(statement);
|
||||
}
|
||||
}
|
||||
|
||||
public void open()
|
||||
{
|
||||
Connection conn = null;
|
||||
try
|
||||
{
|
||||
conn = getConnection();
|
||||
Statement delDoc = conn.createStatement();
|
||||
|
||||
// bisherige Daten löschen, indem die Tabelle neu angelegt wird (geht schneller)
|
||||
|
||||
delDoc.executeUpdate("if exists (select * from sysobjects where id = object_id(N'[dbo].[Document]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)drop table [dbo].[Document]");
|
||||
delDoc.executeUpdate("CREATE TABLE [dbo].[Document] ([DO_ID] [int] IDENTITY (1, 1) NOT NULL , [DA_CrawlPass] [int] NULL , [DO_URL] [varchar] (255) NULL , [DO_ContentType] [varchar] (50) NULL , [DO_Data] [text] NULL , [DO_Hashcode] [int] NULL , [DO_ContentLength] [int] NULL , [DO_ContentEncoding] [varchar] (20) NULL , [DO_Data2] [image] NULL, [DO_MimeType] [varchar] (255) NULL) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]"); // löschen
|
||||
}
|
||||
catch(SQLException e)
|
||||
{
|
||||
System.out.println(/*"Task " + taskNr + ": */"SQLException: " + e.getMessage());
|
||||
System.err.println(" SQLState: " + e.getSQLState());
|
||||
System.err.println(" VendorError: " + e.getErrorCode());
|
||||
}
|
||||
finally
|
||||
{
|
||||
if(conn != null)
|
||||
{
|
||||
releaseConnection(conn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void store(WebDocument document)
|
||||
{
|
||||
|
||||
PreparedStatement addDoc = null;
|
||||
try
|
||||
{
|
||||
addDoc = getStatement();
|
||||
addDoc.setString(1, document.getURLString());
|
||||
addDoc.setString(2, document.getMimeType());
|
||||
addDoc.setBytes(3, document.getDocumentBytes());
|
||||
addDoc.execute();
|
||||
}
|
||||
catch(SQLException e)
|
||||
{
|
||||
System.out.println(/* "Task " + taskNr + ": */ "SQLException: " + e.getMessage());
|
||||
System.err.println(" SQLState: " + e.getSQLState());
|
||||
System.err.println(" VendorError: " + e.getErrorCode());
|
||||
}
|
||||
finally
|
||||
{
|
||||
if(addDoc != null)
|
||||
{
|
||||
releaseStatement(addDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
|
||||
package de.lanlab.larm.threads;
|
||||
|
||||
public interface InterruptableTask
|
||||
{
|
||||
public void run(ServerThread thread);
|
||||
public void interrupt();
|
||||
public String getInfo();
|
||||
}
|
|
@ -0,0 +1,173 @@
|
|||
package de.lanlab.larm.threads;
|
||||
|
||||
import java.util.Vector;
|
||||
import java.util.Iterator;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import de.lanlab.larm.util.*;
|
||||
|
||||
/**
|
||||
* This thread class acts like a server. It's running idle within
|
||||
* a thread pool until "runTask" is called. The given task will then
|
||||
* be executed asynchronously
|
||||
*/
|
||||
public class ServerThread extends Thread
|
||||
{
|
||||
/**
|
||||
* the task that is to be executed. null in idle-mode
|
||||
*/
|
||||
protected InterruptableTask task = null;
|
||||
|
||||
private boolean busy = false;
|
||||
|
||||
private ArrayList listeners = new ArrayList();
|
||||
private boolean isInterrupted = false;
|
||||
private int threadNumber;
|
||||
|
||||
SimpleLogger log;
|
||||
SimpleLogger errorLog;
|
||||
|
||||
public ServerThread(int threadNumber, String name, ThreadGroup threadGroup)
|
||||
{
|
||||
super(threadGroup, name);
|
||||
init(threadNumber);
|
||||
}
|
||||
|
||||
|
||||
public ServerThread(int threadNumber, String name)
|
||||
{
|
||||
super(name);
|
||||
init(threadNumber);
|
||||
}
|
||||
|
||||
void init(int threadNumber)
|
||||
{
|
||||
this.threadNumber = threadNumber;
|
||||
File logDir = new File("logs");
|
||||
logDir.mkdir();
|
||||
log = new SimpleLogger("thread" + threadNumber);
|
||||
errorLog = new SimpleLogger("thread" + threadNumber + "_errors");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* constructor
|
||||
* @param threadNumber assigns an arbitrary number to this thread
|
||||
* used by ServerThreadFactory
|
||||
*/
|
||||
public ServerThread(int threadNumber)
|
||||
{
|
||||
init(threadNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* the run method runs asynchronously. It waits until runTask() is
|
||||
* called
|
||||
*/
|
||||
public void run()
|
||||
{
|
||||
try
|
||||
{
|
||||
|
||||
while(!isInterrupted)
|
||||
{
|
||||
synchronized(this)
|
||||
{
|
||||
while(task == null)
|
||||
{
|
||||
wait();
|
||||
}
|
||||
}
|
||||
task.run(this);
|
||||
taskReady();
|
||||
}
|
||||
}
|
||||
catch(InterruptedException e)
|
||||
{
|
||||
System.out.println("ServerThread " + threadNumber + " interrupted");
|
||||
log.log("** Thread Interrupted **");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this is the main method that will invoke a task to run.
|
||||
*/
|
||||
public synchronized void runTask(InterruptableTask t)
|
||||
{
|
||||
busy = true;
|
||||
task = t;
|
||||
notify();
|
||||
}
|
||||
|
||||
/**
|
||||
* it should be possible to interrupt a task with this function.
|
||||
* therefore, the task has to check its interrupted()-state
|
||||
*/
|
||||
public void interruptTask()
|
||||
{
|
||||
if(task != null)
|
||||
{
|
||||
task.interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* the server thread can either be in idle or busy mode
|
||||
*/
|
||||
public boolean isBusy()
|
||||
{
|
||||
return busy;
|
||||
}
|
||||
|
||||
public void addTaskReadyListener(TaskReadyListener l)
|
||||
{
|
||||
listeners.add(l);
|
||||
}
|
||||
|
||||
public void removeTaskReadyListener(TaskReadyListener l)
|
||||
{
|
||||
listeners.remove(l);
|
||||
}
|
||||
|
||||
public void interrupt()
|
||||
{
|
||||
super.interrupt();
|
||||
isInterrupted = true;
|
||||
}
|
||||
|
||||
public int getThreadNumber()
|
||||
{
|
||||
return this.threadNumber;
|
||||
}
|
||||
|
||||
public InterruptableTask getTask()
|
||||
{
|
||||
return task;
|
||||
}
|
||||
|
||||
/**
|
||||
* this method will be called when the task ends. It notifies all
|
||||
* of its observers about its changed state
|
||||
*/
|
||||
protected void taskReady()
|
||||
{
|
||||
task = null;
|
||||
busy = false;
|
||||
Iterator Ie = listeners.iterator();
|
||||
while(Ie.hasNext())
|
||||
{
|
||||
((TaskReadyListener)Ie.next()).taskReady(this);
|
||||
}
|
||||
}
|
||||
|
||||
public SimpleLogger getLog()
|
||||
{
|
||||
return log;
|
||||
}
|
||||
|
||||
public SimpleLogger getErrorLog()
|
||||
{
|
||||
return errorLog;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
package de.lanlab.larm.threads;
|
||||
import de.lanlab.larm.util.Queue;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine
|
||||
* Description:
|
||||
* Copyright: Copyright (c)
|
||||
* Company:
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class TaskQueue implements Queue
|
||||
{
|
||||
LinkedList queue = new LinkedList();
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public TaskQueue()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void insertMultiple(Collection c)
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* push a task to the start of the queue
|
||||
* @param i the task
|
||||
*/
|
||||
public void insert(Object i)
|
||||
{
|
||||
queue.addFirst(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the last element out of the queue
|
||||
* The element will be removed from the queue
|
||||
* @return the task
|
||||
*/
|
||||
public Object remove()
|
||||
{
|
||||
return queue.isEmpty() ? null : (InterruptableTask)queue.removeLast();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public Iterator iterator()
|
||||
{
|
||||
return queue.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void clear()
|
||||
{
|
||||
queue.clear();
|
||||
}
|
||||
|
||||
public boolean isEmpty()
|
||||
{
|
||||
return queue.isEmpty();
|
||||
}
|
||||
|
||||
public int size()
|
||||
{
|
||||
return queue.size();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
package de.lanlab.larm.threads;
|
||||
|
||||
import de.lanlab.larm.util.Observer;
|
||||
|
||||
public interface TaskReadyListener extends Observer
|
||||
{
|
||||
public void taskReady(ServerThread s);
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c)<p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.threads;
|
||||
|
||||
public class ThreadFactory
|
||||
{
|
||||
// static int count = 0;
|
||||
|
||||
public ServerThread createServerThread(int count)
|
||||
{
|
||||
return new ServerThread(count);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,380 @@
|
|||
|
||||
package de.lanlab.larm.threads;
|
||||
|
||||
//import java.util.Vector;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* if you have many tasks to accomplish, you can do this with one of the
|
||||
* following strategies:
|
||||
* <uL>
|
||||
* <li> do it one after another (single threaded). this may often be
|
||||
* inefficient because most programs often wait for external resources
|
||||
* <li> assign a new thread for each task (thread on demand). This will clog
|
||||
* up the system if many tasks have to be accomplished synchronously
|
||||
* <li> hold a number of tasks, and queue the requests if there are more
|
||||
* tasks than threads (ThreadPool).
|
||||
* </ul>
|
||||
* This thread pool is based on an article in Java-Magazin 06/2000.
|
||||
* synchronizations were removed unless necessary
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class ThreadPool implements ThreadingStrategy, TaskReadyListener {
|
||||
private int maxThreads = MAX_THREADS;
|
||||
/**
|
||||
* references to all threads are stored here
|
||||
*/
|
||||
private HashMap allThreads = new HashMap();
|
||||
/**
|
||||
* this vector takes all idle threads
|
||||
*/
|
||||
private Vector idleThreads = new Vector();
|
||||
/**
|
||||
* this vector takes all threads that are in operation (busy)
|
||||
*/
|
||||
private Vector busyThreads = new Vector();
|
||||
|
||||
/**
|
||||
* if there are no idleThreads, tasks will go here
|
||||
*/
|
||||
private TaskQueue queue = new TaskQueue();
|
||||
|
||||
/**
|
||||
* thread pool observers will be notified of status changes
|
||||
*/
|
||||
private Vector threadPoolObservers = new Vector();
|
||||
|
||||
private boolean isStopped = false;
|
||||
|
||||
/**
|
||||
* default maximum number of threads, if not given by the user
|
||||
*/
|
||||
public final static int MAX_THREADS = 5;
|
||||
|
||||
/**
|
||||
* thread was created
|
||||
*/
|
||||
public final static String THREAD_CREATE = "T_CREATE";
|
||||
/**
|
||||
* thread was created
|
||||
*/
|
||||
public final static String THREAD_START = "T_START";
|
||||
/**
|
||||
* thread is running
|
||||
*/
|
||||
public final static String THREAD_RUNNING = "T_RUNNING";
|
||||
/**
|
||||
* thread was stopped
|
||||
*/
|
||||
public final static String THREAD_STOP = "T_STOP";
|
||||
/**
|
||||
* thread was destroyed
|
||||
*/
|
||||
public final static String THREAD_END = "T_END";
|
||||
/**
|
||||
* thread is idle
|
||||
*/
|
||||
public final static String THREAD_IDLE = "T_IDLE";
|
||||
|
||||
/**
|
||||
* a task was added to the queue, because all threads were busy
|
||||
*/
|
||||
public final static String THREADQUEUE_ADD = "TQ_ADD";
|
||||
|
||||
/**
|
||||
* a task was removed from the queue, because a thread had finished and was
|
||||
* ready
|
||||
*/
|
||||
public final static String THREADQUEUE_REMOVE = "TQ_REMOVE";
|
||||
|
||||
/**
|
||||
* this factory will create the tasks
|
||||
*/
|
||||
ThreadFactory factory;
|
||||
|
||||
|
||||
/**
|
||||
* this constructor will create the pool with MAX_THREADS threads and the
|
||||
* default factory
|
||||
*/
|
||||
public ThreadPool() {
|
||||
this(MAX_THREADS, new ThreadFactory());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this constructor will create the pool with the default Factory
|
||||
*
|
||||
*@param max the maximum number of threads
|
||||
*/
|
||||
public ThreadPool(int max) {
|
||||
this(max, new ThreadFactory());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* constructor
|
||||
*
|
||||
*@param max maximum number of threads
|
||||
*@param factory the thread factory with which the threads will be created
|
||||
*/
|
||||
public ThreadPool(int max, ThreadFactory factory) {
|
||||
maxThreads = max;
|
||||
this.factory = factory;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this init method will create the tasks. It must be called by hand
|
||||
*/
|
||||
public void init() {
|
||||
for (int i = 0; i < maxThreads; i++) {
|
||||
createThread(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param i Description of the Parameter
|
||||
*/
|
||||
public void createThread(int i) {
|
||||
ServerThread s = factory.createServerThread(i);
|
||||
idleThreads.add(s);
|
||||
allThreads.put(new Integer(i), s);
|
||||
s.addTaskReadyListener(this);
|
||||
sendMessage(i, THREAD_CREATE, "");
|
||||
s.start();
|
||||
sendMessage(i, THREAD_IDLE, "");
|
||||
}
|
||||
|
||||
|
||||
// FIXME: synchronisationstechnisch buggy
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param i Description of the Parameter
|
||||
*/
|
||||
public void restartThread(int i) {
|
||||
sendMessage(i, THREAD_STOP, "");
|
||||
ServerThread t = (ServerThread) allThreads.get(new Integer(i));
|
||||
idleThreads.remove(t);
|
||||
busyThreads.remove(t);
|
||||
allThreads.remove(new Integer(i));
|
||||
t.interruptTask();
|
||||
t.interrupt();
|
||||
//t.join();
|
||||
// deprecated, I know, but the only way to overcome SUN's bugs
|
||||
t = null;
|
||||
createThread(i);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param t Description of the Parameter
|
||||
*@param key Description of the Parameter
|
||||
*/
|
||||
public synchronized void doTask(InterruptableTask t, Object key) {
|
||||
if (!idleThreads.isEmpty()) {
|
||||
ServerThread s = (ServerThread) idleThreads.firstElement();
|
||||
idleThreads.remove(s);
|
||||
busyThreads.add(s);
|
||||
sendMessage(s.getThreadNumber(), THREAD_START, t.getInfo());
|
||||
s.runTask(t);
|
||||
sendMessage(s.getThreadNumber(), THREAD_RUNNING, t.getInfo());
|
||||
} else {
|
||||
|
||||
queue.insert(t);
|
||||
sendMessage(-1, THREADQUEUE_ADD, t.getInfo());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this will interrupt all threads. Therefore the InterruptableTasks must
|
||||
* attend on the interrupted-flag
|
||||
*/
|
||||
public void interrupt() {
|
||||
Iterator tasks = queue.iterator();
|
||||
while (tasks.hasNext()) {
|
||||
InterruptableTask t = (InterruptableTask) tasks.next();
|
||||
t.interrupt();
|
||||
sendMessage(-1, THREADQUEUE_REMOVE, t.getInfo());
|
||||
// In der Hoffnung, dass alles klappt...
|
||||
}
|
||||
queue.clear();
|
||||
Iterator threads = busyThreads.iterator();
|
||||
while (threads.hasNext()) {
|
||||
((ServerThread) threads.next()).interruptTask();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this will interrupt the tasks and end all threads
|
||||
*/
|
||||
public void stop() {
|
||||
isStopped = true;
|
||||
interrupt();
|
||||
Iterator threads = idleThreads.iterator();
|
||||
while (threads.hasNext()) {
|
||||
((ServerThread) threads.next()).interruptTask();
|
||||
}
|
||||
idleThreads.clear();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* wird von einem ServerThread aufgerufen, wenn dieser fertig ist
|
||||
*
|
||||
*@param s Description of the Parameter
|
||||
*@param: ServerThread s - der aufrufende Thread
|
||||
*/
|
||||
public synchronized void taskReady(ServerThread s) {
|
||||
if (isStopped) {
|
||||
s.interrupt();
|
||||
sendMessage(s.getThreadNumber(), THREAD_STOP, s.getTask().getInfo());
|
||||
busyThreads.remove(s);
|
||||
} else if (!queue.isEmpty()) {
|
||||
InterruptableTask t = (InterruptableTask) queue.remove();
|
||||
//queue.remove(t);
|
||||
sendMessage(-1, THREADQUEUE_REMOVE, t.getInfo());
|
||||
sendMessage(s.getThreadNumber(), THREAD_START, "");
|
||||
s.runTask(t);
|
||||
sendMessage(s.getThreadNumber(), THREAD_RUNNING, s.getTask().getInfo());
|
||||
} else {
|
||||
sendMessage(s.getThreadNumber(), THREAD_IDLE, "");
|
||||
idleThreads.add(s);
|
||||
busyThreads.remove(s);
|
||||
}
|
||||
synchronized (idleThreads) {
|
||||
idleThreads.notify();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*/
|
||||
public void waitForFinish() {
|
||||
synchronized (idleThreads) {
|
||||
while (busyThreads.size() != 0) {
|
||||
//System.out.println("busyThreads: " + busyThreads.size());
|
||||
try {
|
||||
idleThreads.wait();
|
||||
} catch (InterruptedException e) {
|
||||
System.out.println("Interrupted: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
//System.out.println("busyThreads: " + busyThreads.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a feature to the ThreadPoolObserver attribute of the ThreadPool
|
||||
* object
|
||||
*
|
||||
*@param o The feature to be added to the ThreadPoolObserver attribute
|
||||
*/
|
||||
public void addThreadPoolObserver(ThreadPoolObserver o) {
|
||||
threadPoolObservers.add(o);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param threadNr Description of the Parameter
|
||||
*@param action Description of the Parameter
|
||||
*@param info Description of the Parameter
|
||||
*/
|
||||
protected void sendMessage(int threadNr, String action, String info) {
|
||||
|
||||
Iterator Ie = threadPoolObservers.iterator();
|
||||
//System.out.println("ThreadPool: Sende " + action + " message an " + threadPoolObservers.size() + " Observers");
|
||||
if (threadNr != -1) {
|
||||
while (Ie.hasNext()) {
|
||||
((ThreadPoolObserver) Ie.next()).threadUpdate(threadNr, action, info);
|
||||
}
|
||||
} else {
|
||||
while (Ie.hasNext()) {
|
||||
((ThreadPoolObserver) Ie.next()).queueUpdate(info, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the queueSize attribute of the ThreadPool object
|
||||
*
|
||||
*@return The queueSize value
|
||||
*/
|
||||
public synchronized int getQueueSize() {
|
||||
return this.queue.size();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the idleThreadsCount attribute of the ThreadPool object
|
||||
*
|
||||
*@return The idleThreadsCount value
|
||||
*/
|
||||
public synchronized int getIdleThreadsCount() {
|
||||
return this.idleThreads.size();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the busyThreadsCount attribute of the ThreadPool object
|
||||
*
|
||||
*@return The busyThreadsCount value
|
||||
*/
|
||||
public synchronized int getBusyThreadsCount() {
|
||||
return this.busyThreads.size();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the threadCount attribute of the ThreadPool object
|
||||
*
|
||||
*@return The threadCount value
|
||||
*/
|
||||
public synchronized int getThreadCount() {
|
||||
return this.idleThreads.size() + this.busyThreads.size();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the threadIterator attribute of the ThreadPool object
|
||||
*
|
||||
*@return The threadIterator value
|
||||
*/
|
||||
public Iterator getThreadIterator() {
|
||||
return allThreads.values().iterator();
|
||||
// return allThreads.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
*@param queue Description of the Parameter
|
||||
*/
|
||||
public void setQueue(TaskQueue queue) {
|
||||
this.queue = queue;
|
||||
}
|
||||
|
||||
public TaskQueue getTaskQueue()
|
||||
{
|
||||
return queue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
package de.lanlab.larm.threads;
|
||||
|
||||
import de.lanlab.larm.util.Observer;
|
||||
|
||||
/**
|
||||
* an observer that observes the thread pool...
|
||||
*/
|
||||
public interface ThreadPoolObserver extends Observer
|
||||
{
|
||||
public void queueUpdate(String info, String action);
|
||||
public void threadUpdate(int threadNr, String action, String info);
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
package de.lanlab.larm.threads;
|
||||
|
||||
public interface ThreadingStrategy
|
||||
{
|
||||
public void doTask(InterruptableTask t, Object key);
|
||||
public void interrupt();
|
||||
public void stop();
|
||||
}
|
|
@ -0,0 +1,721 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
*
|
||||
* Description: <p>
|
||||
*
|
||||
* Copyright: Copyright (c)<p>
|
||||
*
|
||||
* Company: <p>
|
||||
*
|
||||
*
|
||||
*
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.util;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
class StoreException extends RuntimeException
|
||||
{
|
||||
Exception origException;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the StoreException object
|
||||
*
|
||||
* @param e Description of the Parameter
|
||||
*/
|
||||
public StoreException(Exception e)
|
||||
{
|
||||
origException = e;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the message attribute of the StoreException object
|
||||
*
|
||||
* @return The message value
|
||||
*/
|
||||
public String getMessage()
|
||||
{
|
||||
return origException.getMessage();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*/
|
||||
public void printStackTrace()
|
||||
{
|
||||
System.err.println("StoreException occured with reason: " + origException.getMessage());
|
||||
origException.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* internal class that represents one block within a queue
|
||||
*
|
||||
* @author Clemens Marschner
|
||||
* @created 3. Januar 2002
|
||||
*/
|
||||
class QueueBlock
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* the elements section will be set to null if it is on disk Vector elements
|
||||
* must be Serializable
|
||||
*/
|
||||
LinkedList elements;
|
||||
|
||||
/**
|
||||
* Anzahl Elemente im Block. Kopie von elements.size()
|
||||
*/
|
||||
int size;
|
||||
|
||||
/**
|
||||
* maximale Blockgröße
|
||||
*/
|
||||
int maxSize;
|
||||
|
||||
/**
|
||||
* if set, elements is null and block was written to file
|
||||
*/
|
||||
boolean onDisk;
|
||||
|
||||
/**
|
||||
* Blockname
|
||||
*/
|
||||
String name;
|
||||
|
||||
|
||||
/**
|
||||
* initialisiert den Block
|
||||
*
|
||||
* @param name Der Blockname (muss eindeutig sein, sonst Kollision auf
|
||||
* Dateiebene)
|
||||
* @param maxSize maximale Blockgröße. Über- und Unterläufe werden durch
|
||||
* Exceptions behandelt
|
||||
*/
|
||||
public QueueBlock(String name, int maxSize)
|
||||
{
|
||||
this.name = name;
|
||||
this.onDisk = false;
|
||||
this.elements = new LinkedList();
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* serialisiert und speichert den Block auf Platte
|
||||
*
|
||||
* @exception StoreException Description of the Exception
|
||||
*/
|
||||
public void store()
|
||||
throws StoreException
|
||||
{
|
||||
try
|
||||
{
|
||||
ObjectOutputStream o = new ObjectOutputStream(new FileOutputStream(getFileName()));
|
||||
o.writeObject(elements);
|
||||
elements = null;
|
||||
o.close();
|
||||
onDisk = true;
|
||||
//System.out.println("CachingQueue.store: Block stored");
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
System.err.println("CachingQueue.store: IOException");
|
||||
throw new StoreException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the filename of the block
|
||||
*/
|
||||
String getFileName()
|
||||
{
|
||||
// package protected!
|
||||
|
||||
return "cachingqueue/" + name + ".cqb";
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* load the block from disk
|
||||
*
|
||||
* @exception StoreException Description of the Exception
|
||||
*/
|
||||
public void load()
|
||||
throws StoreException
|
||||
{
|
||||
try
|
||||
{
|
||||
ObjectInputStream i = new ObjectInputStream(new FileInputStream(getFileName()));
|
||||
elements = (LinkedList) i.readObject();
|
||||
i.close();
|
||||
onDisk = false;
|
||||
size = elements.size();
|
||||
if (!(new File(getFileName()).delete()))
|
||||
{
|
||||
System.err.println("CachingQueue.load: file could not be deleted");
|
||||
}
|
||||
//System.out.println("CachingQueue.load: Block loaded");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
System.err.println("CachingQueue.load: Exception " + e.getClass().getName() + " occured");
|
||||
throw new StoreException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* inserts an object at the start of the queue must be synchronized by
|
||||
* calling class to be thread safe
|
||||
*
|
||||
* @param o Description of the Parameter
|
||||
* @exception StoreException Description of the Exception
|
||||
*/
|
||||
public void insert(Object o)
|
||||
throws StoreException
|
||||
{
|
||||
if (onDisk)
|
||||
{
|
||||
load();
|
||||
}
|
||||
if (size >= maxSize)
|
||||
{
|
||||
throw new OverflowException();
|
||||
}
|
||||
elements.addFirst(o);
|
||||
size++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* gibt das letzte Element aus der Queue zurück und löscht dieses must be
|
||||
* made synchronized by calling class to be thread safe
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
* @exception UnderflowException Description of the Exception
|
||||
* @exception StoreException Description of the Exception
|
||||
*/
|
||||
public Object remove()
|
||||
throws UnderflowException, StoreException
|
||||
{
|
||||
if (onDisk)
|
||||
{
|
||||
load();
|
||||
}
|
||||
if (size <= 0)
|
||||
{
|
||||
throw new UnderflowException();
|
||||
}
|
||||
size--;
|
||||
return elements.removeLast();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the number of elements in the block
|
||||
*/
|
||||
public int size()
|
||||
{
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* destructor. Assures that all files are deleted, even if the queue was not
|
||||
* empty at the time when the program ended
|
||||
*/
|
||||
public void finalize()
|
||||
{
|
||||
// System.err.println("finalize von " + name + " called");
|
||||
if (onDisk)
|
||||
{
|
||||
// temp-Datei löschen. Passiert, wenn z.B. eine Exception aufgetreten ist
|
||||
// System.err.println("CachingQueue.finalize von Block " + name + ": lösche Datei");
|
||||
if (!(new File(getFileName()).delete()))
|
||||
{
|
||||
// Dateifehler möglich durch Exception: ignorieren
|
||||
|
||||
// System.err.println("CachingQueue.finalize: file could not be deleted although onDisk was true");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this class holds a queue whose data is kept on disk whenever possible.
|
||||
* It's a single ended queue, meaning data can only be added at the front and
|
||||
* taken from the back. the queue itself is divided into blocks. Only the first
|
||||
* and last blocks are kept in main memory, the rest is stored on disk. Only a
|
||||
* LinkedList entry is kept in memory then.
|
||||
* Blocks are swapped if an overflow (in case of insertions) or underflow (in case
|
||||
* of removals) occur.<br>
|
||||
*
|
||||
* <pre>
|
||||
* +---+---+---+---+-+
|
||||
* put -> | M | S | S | S |M| -> remove
|
||||
* +---+---+---+---+-+
|
||||
* </pre>
|
||||
* the maximum number of entries can be specified with the blockSize parameter. Thus,
|
||||
* the queue actually holds a maximum number of 2 x blockSize objects in main memory,
|
||||
* plus a few bytes for each block.<br>
|
||||
* The objects contained in the blocks are stored with the standard Java
|
||||
* serialization mechanism
|
||||
* The files are named "cachingqueue\\Queuename_BlockNumber.cqb"
|
||||
* note that the class is not synchronized
|
||||
* @author Clemens Marschner
|
||||
* @created 3. Januar 2002
|
||||
*/
|
||||
|
||||
public class CachingQueue implements Queue
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* the Blocks
|
||||
*/
|
||||
LinkedList queueBlocks;
|
||||
|
||||
/**
|
||||
* fast access to the first block
|
||||
*/
|
||||
QueueBlock first = null;
|
||||
|
||||
/**
|
||||
* fast access to the last block
|
||||
*/
|
||||
QueueBlock last = null;
|
||||
|
||||
/**
|
||||
* maximum block size
|
||||
*/
|
||||
int blockSize;
|
||||
|
||||
/**
|
||||
* "primary key" identity count for each block
|
||||
*/
|
||||
int blockCount = 0;
|
||||
|
||||
/**
|
||||
* active blocks
|
||||
*/
|
||||
int numBlocks = 0;
|
||||
|
||||
/**
|
||||
* queue name
|
||||
*/
|
||||
String name;
|
||||
|
||||
/**
|
||||
* total number of objects
|
||||
*/
|
||||
int size;
|
||||
|
||||
|
||||
/**
|
||||
* init
|
||||
*
|
||||
* @param name the name of the queue, used in files names
|
||||
* @param blockSize maximum number of objects stored in one block
|
||||
*/
|
||||
public CachingQueue(String name, int blockSize)
|
||||
{
|
||||
queueBlocks = new LinkedList();
|
||||
this.name = name;
|
||||
this.blockSize = blockSize;
|
||||
File cq = new File("cachingqueue");
|
||||
cq.mkdir();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* inserts an object to the front of the queue
|
||||
*
|
||||
* @param o the object to be inserted. must implement Serializable
|
||||
* @exception StoreException encapsulates Exceptions that occur when writing to hard disk
|
||||
*/
|
||||
public synchronized void insert(Object o)
|
||||
throws StoreException
|
||||
{
|
||||
if (last == null && first == null)
|
||||
{
|
||||
first = last = newBlock();
|
||||
queueBlocks.addFirst(first);
|
||||
numBlocks++;
|
||||
}
|
||||
if (last == null && first != null)
|
||||
{
|
||||
// assert((last==null && first==null) || (last!= null && first!=null));
|
||||
System.err.println("Error in CachingQueue: last!=first==null");
|
||||
}
|
||||
|
||||
if (first.size() >= blockSize)
|
||||
{
|
||||
// save block and create a new one
|
||||
QueueBlock newBlock = newBlock();
|
||||
numBlocks++;
|
||||
if (last != first)
|
||||
{
|
||||
first.store();
|
||||
}
|
||||
queueBlocks.addFirst(newBlock);
|
||||
first = newBlock;
|
||||
}
|
||||
first.insert(o);
|
||||
size++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* returns the last object from the queue
|
||||
*
|
||||
* @return the object returned
|
||||
*
|
||||
* @exception StoreException Description of the Exception
|
||||
* @exception UnderflowException if the queue was empty
|
||||
*/
|
||||
public synchronized Object remove()
|
||||
throws StoreException, UnderflowException
|
||||
{
|
||||
if (last == null)
|
||||
{
|
||||
throw new UnderflowException();
|
||||
}
|
||||
if (last.size() <= 0)
|
||||
{
|
||||
queueBlocks.removeLast();
|
||||
numBlocks--;
|
||||
if (numBlocks == 1)
|
||||
{
|
||||
last = first;
|
||||
}
|
||||
else if (numBlocks == 0)
|
||||
{
|
||||
first = last = null;
|
||||
throw new UnderflowException();
|
||||
}
|
||||
else if (numBlocks < 0)
|
||||
{
|
||||
// assert(numBlocks >= 0)
|
||||
System.err.println("CachingQueue.remove: numBlocks<0!");
|
||||
throw new UnderflowException();
|
||||
}
|
||||
else
|
||||
{
|
||||
last = (QueueBlock) queueBlocks.getLast();
|
||||
}
|
||||
}
|
||||
--size;
|
||||
return last.remove();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* not supported
|
||||
*
|
||||
* @param c Description of the Parameter
|
||||
*/
|
||||
public void insertMultiple(java.util.Collection c)
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* creates a new block
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
private QueueBlock newBlock()
|
||||
{
|
||||
return new QueueBlock(name + "_" + blockCount++, blockSize);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* total number of objects contained in the queue
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public int size()
|
||||
{
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* testing
|
||||
*
|
||||
* @param args The command line arguments
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
{
|
||||
System.out.println("Test1: " + CachingQueueTester.testUnderflow());
|
||||
System.out.println("Test2: " + CachingQueueTester.testInsert());
|
||||
System.out.println("Test3: " + CachingQueueTester.testBufReadWrite());
|
||||
System.out.println("Test4: " + CachingQueueTester.testBufReadWrite2());
|
||||
System.out.println("Test5: " + CachingQueueTester.testUnderflow2());
|
||||
System.out.println("Test6: " + CachingQueueTester.testBufReadWrite3());
|
||||
System.out.println("Test7: " + CachingQueueTester.testExceptions());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testklasse TODO: auslagern und per JUnit handhaben
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 3. Januar 2002
|
||||
*/
|
||||
class AssertionFailedException extends RuntimeException
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Testklasse. Enthält einige Tests für die Funktionalität der CachingQueue
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 3. Januar 2002
|
||||
*/
|
||||
class CachingQueueTester
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* A unit test for JUnit
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public static boolean testUnderflow()
|
||||
{
|
||||
CachingQueue cq = new CachingQueue("testQueue1", 10);
|
||||
try
|
||||
{
|
||||
cq.remove();
|
||||
}
|
||||
catch (UnderflowException e)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A unit test for JUnit
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public static boolean testInsert()
|
||||
{
|
||||
CachingQueue cq = new CachingQueue("testQueue2", 10);
|
||||
String test = "Test1";
|
||||
assert(cq.size() == 0);
|
||||
cq.insert(test);
|
||||
assert(cq.size() == 1);
|
||||
return (cq.remove() == test);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A unit test for JUnit
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public static boolean testBufReadWrite()
|
||||
{
|
||||
CachingQueue cq = new CachingQueue("testQueue3", 2);
|
||||
String test1 = "Test1";
|
||||
String test2 = "Test2";
|
||||
String test3 = "Test3";
|
||||
cq.insert(test1);
|
||||
cq.insert(test2);
|
||||
cq.insert(test3);
|
||||
assert(cq.size() == 3);
|
||||
cq.remove();
|
||||
cq.remove();
|
||||
assert(cq.size() == 1);
|
||||
return (cq.remove() == test3);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A unit test for JUnit
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public static boolean testBufReadWrite2()
|
||||
{
|
||||
CachingQueue cq = new CachingQueue("testQueue4", 2);
|
||||
String test1 = "Test1";
|
||||
String test2 = "Test2";
|
||||
String test3 = "Test3";
|
||||
String test4 = "Test4";
|
||||
String test5 = "Test5";
|
||||
cq.insert(test1);
|
||||
cq.insert(test2);
|
||||
cq.insert(test3);
|
||||
cq.insert(test4);
|
||||
cq.insert(test5);
|
||||
assert(cq.size() == 5);
|
||||
String t = (String) cq.remove();
|
||||
assert(t.equals(test1));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test2));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test3));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test4));
|
||||
t = (String) cq.remove();
|
||||
assert(cq.size() == 0);
|
||||
return (t.equals(test5));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param expr Description of the Parameter
|
||||
*/
|
||||
public static void assert(boolean expr)
|
||||
{
|
||||
if (!expr)
|
||||
{
|
||||
throw new AssertionFailedException();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A unit test for JUnit
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public static boolean testUnderflow2()
|
||||
{
|
||||
CachingQueue cq = new CachingQueue("testQueue5", 2);
|
||||
String test1 = "Test1";
|
||||
String test2 = "Test2";
|
||||
String test3 = "Test3";
|
||||
String test4 = "Test4";
|
||||
String test5 = "Test5";
|
||||
cq.insert(test1);
|
||||
cq.insert(test2);
|
||||
cq.insert(test3);
|
||||
cq.insert(test4);
|
||||
cq.insert(test5);
|
||||
assert(cq.remove().equals(test1));
|
||||
assert(cq.remove().equals(test2));
|
||||
assert(cq.remove().equals(test3));
|
||||
assert(cq.remove().equals(test4));
|
||||
assert(cq.remove().equals(test5));
|
||||
try
|
||||
{
|
||||
cq.remove();
|
||||
}
|
||||
catch (UnderflowException e)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A unit test for JUnit
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public static boolean testBufReadWrite3()
|
||||
{
|
||||
CachingQueue cq = new CachingQueue("testQueue4", 1);
|
||||
String test1 = "Test1";
|
||||
String test2 = "Test2";
|
||||
String test3 = "Test3";
|
||||
String test4 = "Test4";
|
||||
String test5 = "Test5";
|
||||
cq.insert(test1);
|
||||
cq.insert(test2);
|
||||
cq.insert(test3);
|
||||
cq.insert(test4);
|
||||
cq.insert(test5);
|
||||
String t = (String) cq.remove();
|
||||
assert(t.equals(test1));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test2));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test3));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test4));
|
||||
t = (String) cq.remove();
|
||||
return (t.equals(test5));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A unit test for JUnit
|
||||
*
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public static boolean testExceptions()
|
||||
{
|
||||
System.gc();
|
||||
CachingQueue cq = new CachingQueue("testQueue5", 1);
|
||||
String test1 = "Test1";
|
||||
String test2 = "Test2";
|
||||
String test3 = "Test3";
|
||||
String test4 = "Test4";
|
||||
String test5 = "Test5";
|
||||
cq.insert(test1);
|
||||
cq.insert(test2);
|
||||
cq.insert(test3);
|
||||
cq.insert(test4);
|
||||
cq.insert(test5);
|
||||
try
|
||||
{
|
||||
if (!(new File("testQueue5_1.cqb").delete()))
|
||||
{
|
||||
System.err.println("CachingQueueTester.textExceptions: Store 1 nicht vorhanden. Filename geändert?");
|
||||
}
|
||||
if (!(new File("testQueue5_2.cqb").delete()))
|
||||
{
|
||||
System.err.println("CachingQueueTester.textExceptions: Store 2 nicht vorhanden. Filename geändert?");
|
||||
}
|
||||
String t = (String) cq.remove();
|
||||
assert(t.equals(test1));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test2));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test3));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test4));
|
||||
t = (String) cq.remove();
|
||||
assert(t.equals(test5));
|
||||
}
|
||||
catch (StoreException e)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
finally
|
||||
{
|
||||
cq = null;
|
||||
System.gc();
|
||||
// finalizer müssten aufgerufen werden
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,273 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
import java.lang.reflect.*;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine
|
||||
* Description:
|
||||
* Copyright: Copyright (c)
|
||||
* Company:
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* prints class information with the reflection api
|
||||
* for debugging only
|
||||
*/
|
||||
public class ClassInfo
|
||||
{
|
||||
|
||||
public ClassInfo()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Usage: java ClassInfo PackageName.MyNewClassName PackageName.DerivedClassName
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
{
|
||||
|
||||
String name = args[0];
|
||||
String derivedName = args[1];
|
||||
LinkedList l = new LinkedList();
|
||||
ListIterator itry = l.listIterator();
|
||||
|
||||
try
|
||||
{
|
||||
Class cls = Class.forName(name);
|
||||
name = cls.getName();
|
||||
String pkg = getPackageName(name);
|
||||
String clss = getClassName(name);
|
||||
|
||||
StringWriter importsWriter = new StringWriter();
|
||||
PrintWriter imports = new PrintWriter(importsWriter);
|
||||
StringWriter outWriter = new StringWriter();
|
||||
PrintWriter out = new PrintWriter(outWriter);
|
||||
|
||||
TreeSet importClasses = new TreeSet();
|
||||
importClasses.add(getImportStatement(name));
|
||||
|
||||
out.println("/**\n * (class description here)\n */\npublic class " + derivedName + " " + (cls.isInterface() ? "implements " : "extends ") + clss + "\n{");
|
||||
|
||||
Method[] m = cls.getMethods();
|
||||
for(int i= 0; i< m.length; i++)
|
||||
{
|
||||
Method thism = m[i];
|
||||
if((thism.getModifiers() & Modifier.PRIVATE) == 0 && ((thism.getModifiers() & Modifier.FINAL) == 0)
|
||||
&& (thism.getDeclaringClass().getName() != "java.lang.Object"))
|
||||
{
|
||||
out.println(" /**");
|
||||
out.println(" * (method description here)");
|
||||
out.println(" * defined in " + thism.getDeclaringClass().getName());
|
||||
|
||||
Class[] parameters = thism.getParameterTypes();
|
||||
for(int j = 0; j < parameters.length; j ++)
|
||||
{
|
||||
if(getPackageName(parameters[j].getName()) != "")
|
||||
{
|
||||
importClasses.add(getImportStatement(parameters[j].getName()));
|
||||
}
|
||||
out.println(" * @param p" + j + " (parameter description here)");
|
||||
}
|
||||
|
||||
if(thism.getReturnType().getName() != "void")
|
||||
{
|
||||
String returnPackage = getPackageName(thism.getReturnType().getName());
|
||||
if(returnPackage != "")
|
||||
{
|
||||
importClasses.add(getImportStatement(thism.getReturnType().getName()));
|
||||
}
|
||||
out.println(" * @return (return value description here)");
|
||||
}
|
||||
|
||||
out.println(" */");
|
||||
|
||||
out.print(" " + getModifierString(thism.getModifiers()) + getClassName(thism.getReturnType().getName()) + " ");
|
||||
out.print(thism.getName() + "(");
|
||||
|
||||
for(int j = 0; j < parameters.length; j ++)
|
||||
{
|
||||
if(j>0)
|
||||
{
|
||||
out.print(", ");
|
||||
}
|
||||
out.print(getClassName(parameters[j].getName()) + " p" + j);
|
||||
}
|
||||
out.print(")");
|
||||
Class[] exceptions = thism.getExceptionTypes();
|
||||
|
||||
if (exceptions.length > 0)
|
||||
{
|
||||
out.print(" throws ");
|
||||
}
|
||||
|
||||
for(int k = 0; k < exceptions.length; k++)
|
||||
{
|
||||
if(k > 0)
|
||||
{
|
||||
out.print(", ");
|
||||
}
|
||||
String exCompleteName = exceptions[k].getName();
|
||||
String exName = getClassName(exCompleteName);
|
||||
importClasses.add(getImportStatement(exCompleteName));
|
||||
|
||||
out.print(exName);
|
||||
}
|
||||
out.print("\n" +
|
||||
" {\n" +
|
||||
" /**@todo: Implement this " + thism.getName() + "() method */\n" +
|
||||
" throw new UnsupportedOperationException(\"Method " + thism.getName() + "() not yet implemented.\");\n" +
|
||||
" }\n\n");
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
out.println("}");
|
||||
|
||||
Iterator importIterator = importClasses.iterator();
|
||||
while(importIterator.hasNext())
|
||||
{
|
||||
String importName = (String)importIterator.next();
|
||||
if(!importName.startsWith("java.lang"))
|
||||
{
|
||||
imports.println("import " + importName + ";");
|
||||
}
|
||||
}
|
||||
|
||||
out.flush();
|
||||
imports.flush();
|
||||
|
||||
if(getPackageName(derivedName) != "")
|
||||
{
|
||||
System.out.println("package " + getPackageName(derivedName) + ";\n");
|
||||
}
|
||||
System.out.println( "/**\n" +
|
||||
" * Title: \n" +
|
||||
" * Description:\n" +
|
||||
" * Copyright: Copyright (c)\n" +
|
||||
" * Company:\n" +
|
||||
" * @author\n" +
|
||||
" * @version 1.0\n" +
|
||||
" */\n");
|
||||
System.out.println(importsWriter.getBuffer());
|
||||
System.out.print(outWriter.getBuffer());
|
||||
}
|
||||
catch(Throwable t)
|
||||
{
|
||||
t.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static String getPackageName(String className)
|
||||
{
|
||||
if(className.charAt(0) == '[')
|
||||
{
|
||||
switch(className.charAt(1))
|
||||
{
|
||||
case 'L':
|
||||
return getPackageName(className.substring(2,className.length()-1));
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
String name = className.lastIndexOf(".") != -1 ? className.substring(0, className.lastIndexOf(".")) : "";
|
||||
//System.out.println("Package: " + name);
|
||||
return name;
|
||||
}
|
||||
|
||||
public static String getClassName(String className)
|
||||
{
|
||||
if(className.charAt(0) == '[')
|
||||
{
|
||||
switch(className.charAt(1))
|
||||
{
|
||||
case 'L':
|
||||
return getClassName(className.substring(2,className.length()-1)) + "[]";
|
||||
case 'C':
|
||||
return "char[]";
|
||||
case 'I':
|
||||
return "int[]";
|
||||
case 'B':
|
||||
return "byte[]";
|
||||
// rest is missing here
|
||||
|
||||
}
|
||||
}
|
||||
String name = (className.lastIndexOf(".") > -1) ? className.substring(className.lastIndexOf(".")+1) : className;
|
||||
//System.out.println("Class: " + name);
|
||||
return name;
|
||||
}
|
||||
|
||||
static String getImportStatement(String className)
|
||||
{
|
||||
String pack = getPackageName(className);
|
||||
String clss = getClassName(className);
|
||||
if(clss.indexOf("[]") > -1)
|
||||
{
|
||||
return pack + "." + clss.substring(0,clss.length() - 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pack + "." + clss;
|
||||
}
|
||||
}
|
||||
|
||||
public static String getModifierString(int modifiers)
|
||||
{
|
||||
StringBuffer mods = new StringBuffer();
|
||||
if((modifiers & Modifier.ABSTRACT) != 0)
|
||||
{
|
||||
mods.append("abstract ");
|
||||
}
|
||||
if((modifiers & Modifier.FINAL) != 0)
|
||||
{
|
||||
mods.append("final ");
|
||||
}
|
||||
if((modifiers & Modifier.INTERFACE) != 0)
|
||||
{
|
||||
mods.append("interface ");
|
||||
}
|
||||
if((modifiers & Modifier.NATIVE) != 0)
|
||||
{
|
||||
mods.append("native ");
|
||||
}
|
||||
if((modifiers & Modifier.PRIVATE) != 0)
|
||||
{
|
||||
mods.append("private ");
|
||||
}
|
||||
if((modifiers & Modifier.PROTECTED) != 0)
|
||||
{
|
||||
mods.append("protected ");
|
||||
}
|
||||
if((modifiers & Modifier.PUBLIC) != 0)
|
||||
{
|
||||
mods.append("public ");
|
||||
}
|
||||
if((modifiers & Modifier.STATIC) != 0)
|
||||
{
|
||||
mods.append("static ");
|
||||
}
|
||||
if((modifiers & Modifier.STRICT) != 0)
|
||||
{
|
||||
mods.append("strictfp ");
|
||||
}
|
||||
if((modifiers & Modifier.SYNCHRONIZED) != 0)
|
||||
{
|
||||
mods.append("synchronized ");
|
||||
}
|
||||
if((modifiers & Modifier.TRANSIENT) != 0)
|
||||
{
|
||||
mods.append("transient ");
|
||||
}
|
||||
if((modifiers & Modifier.VOLATILE) != 0)
|
||||
{
|
||||
mods.append("volatile ");
|
||||
}
|
||||
return mods.toString();
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,319 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
/**
|
||||
* Title:
|
||||
* Description:
|
||||
* Copyright: Copyright (c)
|
||||
* Company:
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* simple hashed linked list. It allows for inserting and removing elements like
|
||||
* in a hash table (in fact, it uses a HashMap), while still being able to easily
|
||||
* traverse the collection like a list. In addition, the iterator is circular. It
|
||||
* always returns a next element as long as there are elements in the list. In
|
||||
* contrast to the iterator of Sun's collection classes, this class can cope with
|
||||
* inserts and removals while traversing the list.<p>
|
||||
* Elements are always added to the end of the list, that is, always at the same place<br>
|
||||
* All operations should work in near constant time as the list grows. Only the
|
||||
* trade-off costs of a hash (memory versus speed) have to be considered.
|
||||
* The List doesn't accept null elements
|
||||
* @todo put the traversal function into an Iterator
|
||||
* @todo implement the class as a derivate from a Hash
|
||||
*/
|
||||
public class HashedCircularLinkedList
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Entry class.
|
||||
*/
|
||||
private static class Entry
|
||||
{
|
||||
Object key;
|
||||
Object element;
|
||||
Entry next;
|
||||
Entry previous;
|
||||
|
||||
Entry(Object element, Entry next, Entry previous, Object key)
|
||||
{
|
||||
this.element = element;
|
||||
this.next = next;
|
||||
this.previous = previous;
|
||||
this.key = key;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* the list. contains objects
|
||||
*/
|
||||
private transient Entry header = new Entry(null, null, null, null);
|
||||
|
||||
/**
|
||||
* the hash. maps keys to entries, which by themselves map to objects
|
||||
*/
|
||||
HashMap keys;
|
||||
|
||||
private transient int size = 0;
|
||||
|
||||
/** the current entry in the traversal */
|
||||
Entry current = null;
|
||||
|
||||
/**
|
||||
* Constructs an empty list.
|
||||
*/
|
||||
public HashedCircularLinkedList(int initialCapacity, float loadFactor)
|
||||
{
|
||||
header.next = header.previous = header;
|
||||
keys = new HashMap(initialCapacity, loadFactor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of elements in this list.
|
||||
*
|
||||
* @return the number of elements in this list.
|
||||
*/
|
||||
public int size()
|
||||
{
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the first occurrence of the specified element in this list. If
|
||||
* the list does not contain the element, it is unchanged. More formally,
|
||||
* removes the element with the lowest index <tt>i</tt> such that
|
||||
* <tt>(o==null ? get(i)==null : o.equals(get(i)))</tt> (if such an
|
||||
* element exists).
|
||||
*
|
||||
* @param o element to be removed from this list, if present.
|
||||
* @return <tt>true</tt> if the list contained the specified element.
|
||||
*/
|
||||
public boolean removeByKey(Object o)
|
||||
{
|
||||
// assert(o != null)
|
||||
Entry e = (Entry)keys.get(o);
|
||||
if(e != null)
|
||||
{
|
||||
if(e == current)
|
||||
{
|
||||
if(size > 1)
|
||||
{
|
||||
current = previousEntry(current);
|
||||
}
|
||||
else
|
||||
{
|
||||
current = null;
|
||||
}
|
||||
}
|
||||
this.removeEntryFromList(e);
|
||||
keys.remove(o);
|
||||
size--;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all of the elements from this list.
|
||||
*/
|
||||
public void clear()
|
||||
{
|
||||
// list
|
||||
header.next = header.previous = header;
|
||||
|
||||
// hash
|
||||
keys.clear();
|
||||
|
||||
size = 0;
|
||||
current = null;
|
||||
}
|
||||
|
||||
|
||||
private Entry addEntryBefore(Object key, Object o, Entry e)
|
||||
{
|
||||
Entry newEntry = new Entry(o, e, e.previous, key);
|
||||
newEntry.previous.next = newEntry;
|
||||
newEntry.next.previous = newEntry;
|
||||
return newEntry;
|
||||
}
|
||||
|
||||
private void removeEntryFromList(Entry e)
|
||||
{
|
||||
if(e != null)
|
||||
{
|
||||
if (e == header)
|
||||
{
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
|
||||
e.previous.next = e.next;
|
||||
e.next.previous = e.previous;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* (method description here)
|
||||
* defined in java.util.Map
|
||||
* @param p0 (parameter description here)
|
||||
* @param p1 (parameter description here)
|
||||
* @return (return value description here)
|
||||
*/
|
||||
public boolean put(Object key, Object value)
|
||||
{
|
||||
if(key != null && !keys.containsKey(key))
|
||||
{
|
||||
Entry e = addEntryBefore(key, value, header); // add it as the last element
|
||||
keys.put(key, e); // link key to entry
|
||||
size++;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public boolean hasNext()
|
||||
{
|
||||
return (size > 0);
|
||||
}
|
||||
|
||||
private Entry nextEntry(Entry e)
|
||||
{
|
||||
// assert(e != null)
|
||||
if(size > 1)
|
||||
{
|
||||
if(e == null)
|
||||
{
|
||||
e = header;
|
||||
}
|
||||
Entry next = e.next;
|
||||
if(next == header)
|
||||
{
|
||||
next = next.next;
|
||||
}
|
||||
return next;
|
||||
}
|
||||
else if(size == 1)
|
||||
{
|
||||
return header.next;
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private Entry previousEntry(Entry e)
|
||||
{
|
||||
// assert(e != null)
|
||||
if(size > 1)
|
||||
{
|
||||
if(e == null)
|
||||
{
|
||||
e = header;
|
||||
}
|
||||
Entry previous = e.previous;
|
||||
if(previous == header)
|
||||
{
|
||||
previous = previous.previous;
|
||||
}
|
||||
return previous;
|
||||
}
|
||||
else if(size == 1)
|
||||
{
|
||||
return header.previous;
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Object next()
|
||||
{
|
||||
current = nextEntry(current);
|
||||
if(current != null)
|
||||
{
|
||||
return current.element;
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public void removeCurrent()
|
||||
{
|
||||
keys.remove(current.key);
|
||||
removeEntryFromList(current);
|
||||
}
|
||||
|
||||
|
||||
public Object get(Object key)
|
||||
{
|
||||
Entry e = ((Entry)keys.get(key));
|
||||
if(e != null)
|
||||
{
|
||||
return e.element;
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* testing
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
{
|
||||
HashedCircularLinkedList h = new HashedCircularLinkedList(20, 0.75f);
|
||||
h.put("1", "a");
|
||||
h.put("2", "b");
|
||||
h.put("3", "c");
|
||||
String t;
|
||||
System.out.println("size [3]: " + h.size());
|
||||
t = (String)h.next();
|
||||
System.out.println("2nd element via get [b]: " + h.get("2"));
|
||||
|
||||
System.out.println("next element [a]: " + t);
|
||||
t = (String)h.next();
|
||||
System.out.println("next element [b]: " + t);
|
||||
t = (String)h.next();
|
||||
System.out.println("next element [c]: " + t);
|
||||
t = (String)h.next();
|
||||
System.out.println("1st element after circular traversal [a]: " + t);
|
||||
h.removeByKey("1");
|
||||
System.out.println("1st element after remove [null]: " + h.get("1"));
|
||||
System.out.println("size after removal [2]: " + h.size());
|
||||
t = (String)h.next();
|
||||
System.out.println("next element [b]: " + t);
|
||||
t = (String)h.next();
|
||||
System.out.println("next element [c]: " + t);
|
||||
t = (String)h.next();
|
||||
System.out.println("next element [b]: " + t);
|
||||
h.removeCurrent();
|
||||
t = (String)h.next();
|
||||
System.out.println("next element after 1 removal [c]: " + t);
|
||||
t = (String)h.next();
|
||||
System.out.println("next element: [c]: " + t);
|
||||
h.removeByKey("3");
|
||||
System.out.println("size after 3 removals [0]: " + h.size());
|
||||
t = (String)h.next();
|
||||
System.out.println("next element [null]: " + t);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c) <p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.util;
|
||||
|
||||
public interface InputStreamObserver
|
||||
{
|
||||
public void notifyOpened(ObservableInputStream in, long timeElapsed);
|
||||
public void notifyClosed(ObservableInputStream in, long timeElapsed);
|
||||
public void notifyRead(ObservableInputStream in, long timeElapsed, int nrRead, int totalRead);
|
||||
public void notifyFinished(ObservableInputStream in, long timeElapsed, int totalRead);
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
package de.lanlab.larm.util;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
public class Logger
|
||||
{
|
||||
private FileOutputStream out;
|
||||
|
||||
public Logger(String fileName)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c) <p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.util;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
public class ObservableInputStream extends FilterInputStream
|
||||
{
|
||||
private boolean reporting = true;
|
||||
private long startTime;
|
||||
private int totalRead = 0;
|
||||
private int step = 1;
|
||||
private int nextStep = 0;
|
||||
|
||||
InputStreamObserver observer;
|
||||
|
||||
public ObservableInputStream(InputStream in, InputStreamObserver iso, int reportingStep)
|
||||
{
|
||||
super(in);
|
||||
startTime = System.currentTimeMillis();
|
||||
observer = iso;
|
||||
observer.notifyOpened(this, System.currentTimeMillis() - startTime);
|
||||
nextStep = step = reportingStep;
|
||||
}
|
||||
|
||||
public void close() throws IOException
|
||||
{
|
||||
super.close();
|
||||
observer.notifyClosed(this, System.currentTimeMillis() - startTime);
|
||||
}
|
||||
|
||||
public void setReporting(boolean reporting)
|
||||
{
|
||||
this.reporting = reporting;
|
||||
}
|
||||
|
||||
public boolean isReporting()
|
||||
{
|
||||
return reporting;
|
||||
}
|
||||
|
||||
public void setReportingStep(int step)
|
||||
{
|
||||
this.step = step;
|
||||
}
|
||||
|
||||
public int read() throws IOException
|
||||
{
|
||||
int readByte = super.read();
|
||||
if(reporting)
|
||||
{
|
||||
notifyObserver(readByte>=0? 1 : 0);
|
||||
}
|
||||
return readByte;
|
||||
}
|
||||
|
||||
public int read(byte[] b) throws IOException
|
||||
{
|
||||
int nrRead = super.read(b);
|
||||
if(reporting)
|
||||
{
|
||||
notifyObserver(nrRead);
|
||||
}
|
||||
return nrRead;
|
||||
}
|
||||
|
||||
private void notifyObserver(int nrRead)
|
||||
{
|
||||
if(nrRead > 0)
|
||||
{
|
||||
totalRead += nrRead;
|
||||
if(totalRead > nextStep)
|
||||
{
|
||||
nextStep += step;
|
||||
observer.notifyRead(this, System.currentTimeMillis() - startTime, nrRead, totalRead);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
observer.notifyFinished(this, System.currentTimeMillis() - startTime, totalRead);
|
||||
}
|
||||
}
|
||||
|
||||
public int read(byte[] b, int offs, int size) throws IOException
|
||||
{
|
||||
int nrRead = super.read(b, offs, size);
|
||||
if(reporting)
|
||||
{
|
||||
notifyObserver(nrRead);
|
||||
}
|
||||
return nrRead;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
|
||||
/**
|
||||
* not used
|
||||
*/
|
||||
public interface Observer
|
||||
{
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
/**
|
||||
* Title: LARM
|
||||
* Description:
|
||||
* Copyright: Copyright (c) 2001
|
||||
* Company: LMU-IP
|
||||
* @author Clemens Marschner
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
|
||||
public class OverflowException extends RuntimeException
|
||||
{
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine
|
||||
* Description:
|
||||
* Copyright: Copyright (c)
|
||||
* Company:
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
public interface Queue
|
||||
{
|
||||
public Object remove();
|
||||
public void insert(Object o);
|
||||
public void insertMultiple(Collection c);
|
||||
public int size();
|
||||
}
|
|
@ -0,0 +1,285 @@
|
|||
/*
|
||||
* @(#)SimpleCharArrayReader.java 1.35 00/02/02
|
||||
*
|
||||
*/
|
||||
|
||||
package de.lanlab.larm.util;
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* A <code>SimpleCharArrayReader</code> contains
|
||||
* an internal buffer that contains bytes that
|
||||
* may be read from the stream. An internal
|
||||
* counter keeps track of the next byte to
|
||||
* be supplied by the <code>read</code> method.
|
||||
* <br>
|
||||
* In contrast to the original <code>CharArrayReader</code> this
|
||||
* version is not thread safe. The monitor on the read()-function caused programs
|
||||
* to slow down much, because this function is called for every character. This
|
||||
* class can thus only be used if only one thread is accessing the stream
|
||||
* @author Clemens Marschner
|
||||
* @version 1.00
|
||||
* @see java.io.ByteArrayInputStream
|
||||
*/
|
||||
public
|
||||
class SimpleCharArrayReader extends Reader
|
||||
{
|
||||
|
||||
/**
|
||||
* A flag that is set to true when this stream is closed.
|
||||
*/
|
||||
private boolean isClosed = false;
|
||||
|
||||
/**
|
||||
* An array of bytes that was provided
|
||||
* by the creator of the stream. Elements <code>buf[0]</code>
|
||||
* through <code>buf[count-1]</code> are the
|
||||
* only bytes that can ever be read from the
|
||||
* stream; element <code>buf[pos]</code> is
|
||||
* the next byte to be read.
|
||||
*/
|
||||
protected char buf[];
|
||||
|
||||
/**
|
||||
* The index of the next character to read from the input stream buffer.
|
||||
* This value should always be nonnegative
|
||||
* and not larger than the value of <code>count</code>.
|
||||
* The next byte to be read from the input stream buffer
|
||||
* will be <code>buf[pos]</code>.
|
||||
*/
|
||||
protected int pos;
|
||||
|
||||
/**
|
||||
* The currently marked position in the stream.
|
||||
* SimpleCharArrayReader objects are marked at position zero by
|
||||
* default when constructed. They may be marked at another
|
||||
* position within the buffer by the <code>mark()</code> method.
|
||||
* The current buffer position is set to this point by the
|
||||
* <code>reset()</code> method.
|
||||
*
|
||||
* @since JDK1.1
|
||||
*/
|
||||
protected int mark = 0;
|
||||
|
||||
/**
|
||||
* The index one greater than the last valid character in the input
|
||||
* stream buffer.
|
||||
* This value should always be nonnegative
|
||||
* and not larger than the length of <code>buf</code>.
|
||||
* It is one greater than the position of
|
||||
* the last byte within <code>buf</code> that
|
||||
* can ever be read from the input stream buffer.
|
||||
*/
|
||||
protected int count;
|
||||
|
||||
/**
|
||||
* Creates a <code>SimpleCharArrayReader</code>
|
||||
* so that it uses <code>buf</code> as its
|
||||
* buffer array.
|
||||
* The buffer array is not copied.
|
||||
* The initial value of <code>pos</code>
|
||||
* is <code>0</code> and the initial value
|
||||
* of <code>count</code> is the length of
|
||||
* <code>buf</code>.
|
||||
*
|
||||
* @param buf the input buffer.
|
||||
*/
|
||||
public SimpleCharArrayReader(char buf[])
|
||||
{
|
||||
this.buf = buf;
|
||||
this.pos = 0;
|
||||
this.count = buf.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates <code>SimpleCharArrayReader</code>
|
||||
* that uses <code>buf</code> as its
|
||||
* buffer array. The initial value of <code>pos</code>
|
||||
* is <code>offset</code> and the initial value
|
||||
* of <code>count</code> is <code>offset+len</code>.
|
||||
* The buffer array is not copied.
|
||||
* <p>
|
||||
* Note that if bytes are simply read from
|
||||
* the resulting input stream, elements <code>buf[pos]</code>
|
||||
* through <code>buf[pos+len-1]</code> will
|
||||
* be read; however, if a <code>reset</code>
|
||||
* operation is performed, then bytes <code>buf[0]</code>
|
||||
* through b<code>uf[pos-1]</code> will then
|
||||
* become available for input.
|
||||
*
|
||||
* @param buf the input buffer.
|
||||
* @param offset the offset in the buffer of the first byte to read.
|
||||
* @param length the maximum number of bytes to read from the buffer.
|
||||
*/
|
||||
public SimpleCharArrayReader(char buf[], int offset, int length)
|
||||
{
|
||||
this.buf = buf;
|
||||
this.pos = offset;
|
||||
this.count = Math.min(offset + length, buf.length);
|
||||
this.mark = offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the next byte of data from this input stream. The value
|
||||
* byte is returned as an <code>int</code> in the range
|
||||
* <code>0</code> to <code>255</code>. If no byte is available
|
||||
* because the end of the stream has been reached, the value
|
||||
* <code>-1</code> is returned.
|
||||
* <p>
|
||||
*
|
||||
* @return the next byte of data, or <code>-1</code> if the end of the
|
||||
* stream has been reached.
|
||||
*/
|
||||
public int read()
|
||||
{
|
||||
return (pos < count) ? (buf[pos++] & 0xff) : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads up to <code>len</code> bytes of data into an array of bytes
|
||||
* from this input stream.
|
||||
* If <code>pos</code> equals <code>count</code>,
|
||||
* then <code>-1</code> is returned to indicate
|
||||
* end of file. Otherwise, the number <code>k</code>
|
||||
* of bytes read is equal to the smaller of
|
||||
* <code>len</code> and <code>count-pos</code>.
|
||||
* If <code>k</code> is positive, then bytes
|
||||
* <code>buf[pos]</code> through <code>buf[pos+k-1]</code>
|
||||
* are copied into <code>b[off]</code> through
|
||||
* <code>b[off+k-1]</code> in the manner performed
|
||||
* by <code>System.arraycopy</code>. The
|
||||
* value <code>k</code> is added into <code>pos</code>
|
||||
* and <code>k</code> is returned.
|
||||
* <p>
|
||||
* This <code>read</code> method cannot block.
|
||||
*
|
||||
* @param b the buffer into which the data is read.
|
||||
* @param off the start offset of the data.
|
||||
* @param len the maximum number of bytes read.
|
||||
* @return the total number of bytes read into the buffer, or
|
||||
* <code>-1</code> if there is no more data because the end of
|
||||
* the stream has been reached.
|
||||
*/
|
||||
public int read(char b[], int off, int len)
|
||||
{
|
||||
if (b == null)
|
||||
{
|
||||
throw new NullPointerException();
|
||||
}
|
||||
else if ((off < 0) || (off > b.length) || (len < 0) ||
|
||||
((off + len) > b.length) || ((off + len) < 0))
|
||||
{
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
if (pos >= count)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if (pos + len > count)
|
||||
{
|
||||
len = count - pos;
|
||||
}
|
||||
if (len <= 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
System.arraycopy(buf, pos, b, off, len);
|
||||
pos += len;
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips <code>n</code> bytes of input from this input stream. Fewer
|
||||
* bytes might be skipped if the end of the input stream is reached.
|
||||
* The actual number <code>k</code>
|
||||
* of bytes to be skipped is equal to the smaller
|
||||
* of <code>n</code> and <code>count-pos</code>.
|
||||
* The value <code>k</code> is added into <code>pos</code>
|
||||
* and <code>k</code> is returned.
|
||||
*
|
||||
* @param n the number of bytes to be skipped.
|
||||
* @return the actual number of bytes skipped.
|
||||
*/
|
||||
public long skip(long n)
|
||||
{
|
||||
if (pos + n > count)
|
||||
{
|
||||
n = count - pos;
|
||||
}
|
||||
if (n < 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
pos += n;
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes that can be read from this input
|
||||
* stream without blocking.
|
||||
* The value returned is
|
||||
* <code>count - pos</code>,
|
||||
* which is the number of bytes remaining to be read from the input buffer.
|
||||
*
|
||||
* @return the number of bytes that can be read from the input stream
|
||||
* without blocking.
|
||||
*/
|
||||
public int available()
|
||||
{
|
||||
return count - pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if SimpleCharArrayReader supports mark/reset.
|
||||
*
|
||||
* @since JDK1.1
|
||||
*/
|
||||
public boolean markSupported()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the current marked position in the stream.
|
||||
* SimpleCharArrayReader objects are marked at position zero by
|
||||
* default when constructed. They may be marked at another
|
||||
* position within the buffer by this method.
|
||||
*
|
||||
* @since JDK1.1
|
||||
*/
|
||||
public void mark(int readAheadLimit)
|
||||
{
|
||||
mark = pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the buffer to the marked position. The marked position
|
||||
* is the beginning unless another position was marked.
|
||||
* The value of <code>pos</code> is set to 0.
|
||||
*/
|
||||
public void reset()
|
||||
{
|
||||
|
||||
pos = mark;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes this input stream and releases any system resources
|
||||
* associated with the stream.
|
||||
* <p>
|
||||
*/
|
||||
public void close() throws IOException
|
||||
{
|
||||
isClosed = true;
|
||||
}
|
||||
|
||||
/** Check to make sure that the stream has not been closed */
|
||||
private void ensureOpen()
|
||||
{
|
||||
/* This method does nothing for now. Once we add throws clauses
|
||||
* to the I/O methods in this class, it will throw an IOException
|
||||
* if the stream has been closed.
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine
|
||||
* Description:
|
||||
* Copyright: Copyright (c)
|
||||
* Company:
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.text.*;
|
||||
|
||||
/**
|
||||
* this class is only used for SPEED. Its log function is not thread safe by
|
||||
* default.
|
||||
* It uses a BufferdWriter.
|
||||
* It registers with a logger manager, which can be used to flush several loggers
|
||||
* at once
|
||||
* @todo: including the date slows down a lot
|
||||
*
|
||||
*/
|
||||
public class SimpleLogger
|
||||
{
|
||||
private SimpleDateFormat formatter = new SimpleDateFormat ("HH:mm:ss:SSSS");
|
||||
|
||||
Writer logFile;
|
||||
|
||||
StringBuffer buffer = new StringBuffer(1000);
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
boolean includeDate;
|
||||
|
||||
public void setStartTime(long startTime)
|
||||
{
|
||||
this.startTime = startTime;
|
||||
}
|
||||
|
||||
public synchronized void logThreadSafe(String text)
|
||||
{
|
||||
log(text);
|
||||
}
|
||||
|
||||
public synchronized void logThreadSafe(Throwable t)
|
||||
{
|
||||
log(t);
|
||||
}
|
||||
|
||||
public void log(String text)
|
||||
{
|
||||
try
|
||||
{
|
||||
buffer.setLength(0);
|
||||
if(includeDate)
|
||||
{
|
||||
buffer.append(formatter.format(new Date())).append(": ").append(System.currentTimeMillis()-startTime).append(" ms: ");
|
||||
}
|
||||
buffer.append(text).append("\n");
|
||||
logFile.write(buffer.toString());
|
||||
if(flushAtOnce)
|
||||
{
|
||||
logFile.flush();
|
||||
}
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
System.out.println("Couldn't write to logfile");
|
||||
}
|
||||
}
|
||||
|
||||
public void log(Throwable t)
|
||||
{
|
||||
t.printStackTrace(new PrintWriter(logFile));
|
||||
}
|
||||
|
||||
boolean flushAtOnce = false;
|
||||
|
||||
public void setFlushAtOnce(boolean flush)
|
||||
{
|
||||
this.flushAtOnce = flush;
|
||||
}
|
||||
|
||||
public SimpleLogger(String name)
|
||||
{
|
||||
init(name, true);
|
||||
}
|
||||
|
||||
public SimpleLogger(String name, boolean includeDate)
|
||||
{
|
||||
init(name, includeDate);
|
||||
}
|
||||
|
||||
public void flush() throws IOException
|
||||
{
|
||||
logFile.flush();
|
||||
}
|
||||
|
||||
private void init(String name, boolean includeDate)
|
||||
{
|
||||
try
|
||||
{
|
||||
logFile = new BufferedWriter(new FileWriter("logs/" + name + ".log"));
|
||||
SimpleLoggerManager.getInstance().register(this);
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
System.out.println("IOException while creating logfile " + name + ":");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine
|
||||
* Description:
|
||||
* Copyright: Copyright (c)
|
||||
* Company:
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* this singleton manages all loggers. It can be used to flush all SimpleLoggers
|
||||
* at once
|
||||
*/
|
||||
public class SimpleLoggerManager
|
||||
{
|
||||
static SimpleLoggerManager instance = null;
|
||||
|
||||
ArrayList logs;
|
||||
|
||||
private SimpleLoggerManager()
|
||||
{
|
||||
logs = new ArrayList();
|
||||
}
|
||||
|
||||
public void register(SimpleLogger logger)
|
||||
{
|
||||
logs.add(logger);
|
||||
}
|
||||
|
||||
public void flush() throws IOException
|
||||
{
|
||||
Iterator it = logs.iterator();
|
||||
IOException ex = null;
|
||||
while(it.hasNext())
|
||||
{
|
||||
try
|
||||
{
|
||||
SimpleLogger logger = (SimpleLogger)it.next();
|
||||
logger.flush();
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
ex = e;
|
||||
}
|
||||
}
|
||||
if(ex != null)
|
||||
{
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
|
||||
public static SimpleLoggerManager getInstance()
|
||||
{
|
||||
if(instance == null)
|
||||
{
|
||||
instance = new SimpleLoggerManager();
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine<p>
|
||||
* Description: <p>
|
||||
* Copyright: Copyright (c) <p>
|
||||
* Company: <p>
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
package de.lanlab.larm.util;
|
||||
|
||||
import java.util.Observable;
|
||||
|
||||
public class SimpleObservable extends Observable
|
||||
{
|
||||
|
||||
public void setChanged()
|
||||
{
|
||||
super.setChanged();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine
|
||||
* Description:
|
||||
* Copyright: Copyright (c)
|
||||
* Company:
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* thread safe state information.
|
||||
* The get methods are not synchronized. Clone the state object before using them
|
||||
* If you use a state object in a class, always return a clone
|
||||
* <pre>public class MyClass {
|
||||
* State state = new State("Running");
|
||||
* public State getState() { return state.cloneState() }</pre>
|
||||
*
|
||||
* note on serialization: if you deserialize a state, the state string will be newly created.
|
||||
* that means you then have to compare the states via equal() and not ==
|
||||
*/
|
||||
public class State implements Cloneable, Serializable
|
||||
{
|
||||
|
||||
private String state;
|
||||
private long stateSince;
|
||||
private Object info;
|
||||
|
||||
public State(String state)
|
||||
{
|
||||
setState(state);
|
||||
}
|
||||
|
||||
|
||||
private State(String state, long stateSince)
|
||||
{
|
||||
init(state, stateSince, null);
|
||||
}
|
||||
|
||||
private State(String state, long stateSince, Object info)
|
||||
{
|
||||
init(state, stateSince, info);
|
||||
}
|
||||
|
||||
private void init(String state, long stateSince, Object info)
|
||||
{
|
||||
this.state = state;
|
||||
this.stateSince = stateSince;
|
||||
this.info = info;
|
||||
}
|
||||
|
||||
public void setState(String state)
|
||||
{
|
||||
setState(state, null);
|
||||
}
|
||||
|
||||
public synchronized void setState(String state, Object info)
|
||||
{
|
||||
this.state = state;
|
||||
this.stateSince = System.currentTimeMillis();
|
||||
this.info = info;
|
||||
}
|
||||
|
||||
public String getState()
|
||||
{
|
||||
return state;
|
||||
}
|
||||
|
||||
public long getStateSince()
|
||||
{
|
||||
return stateSince;
|
||||
}
|
||||
|
||||
public Object getInfo()
|
||||
{
|
||||
return info;
|
||||
}
|
||||
|
||||
public synchronized Object clone()
|
||||
{
|
||||
return new State(state, stateSince, info);
|
||||
}
|
||||
|
||||
public State cloneState()
|
||||
{
|
||||
return (State)clone();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 27. Januar 2002
|
||||
*/
|
||||
public class URLUtils
|
||||
{
|
||||
/**
|
||||
* does the same as URL.toExternalForm(), but leaves out the Ref part (which we would
|
||||
* cut off anyway) and handles the String Buffer so that no call of expandCapacity() will
|
||||
* be necessary
|
||||
* only meaningful if the default URLStreamHandler is used (as is the case with http, https, or shttp)
|
||||
*
|
||||
* @param u the URL to be converted
|
||||
* @return the URL as String
|
||||
*/
|
||||
public static String toExternalFormNoRef(URL u)
|
||||
{
|
||||
String protocol = u.getProtocol();
|
||||
String authority = u.getAuthority();
|
||||
String file = u.getFile();
|
||||
|
||||
StringBuffer result = new StringBuffer(
|
||||
(protocol == null ? 0 : protocol.length()) +
|
||||
(authority == null ? 0 : authority.length()) +
|
||||
(file == null ? 1 : file.length()) + 3
|
||||
);
|
||||
|
||||
result.append(protocol);
|
||||
result.append(":");
|
||||
if (u.getAuthority() != null && u.getAuthority().length() > 0)
|
||||
{
|
||||
result.append("//");
|
||||
result.append(u.getAuthority());
|
||||
}
|
||||
if (u.getFile() != null && u.getFile().length() > 0)
|
||||
{
|
||||
result.append(u.getFile());
|
||||
}
|
||||
else
|
||||
{
|
||||
result.append("/");
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
/**
|
||||
* Title: LARM
|
||||
* Description:
|
||||
* Copyright: Copyright (c) 2001
|
||||
* Company: LMU-IP
|
||||
* @author Clemens Marschner
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
|
||||
public class UnderflowException extends RuntimeException
|
||||
{
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
package de.lanlab.larm.util;
|
||||
|
||||
|
||||
import java.net.URL;
|
||||
import de.lanlab.larm.fetcher.URLMessage;
|
||||
|
||||
/**
|
||||
* a web document of whatever type. generated by a fetcher task
|
||||
*/
|
||||
public class WebDocument extends URLMessage
|
||||
{
|
||||
protected String mimeType;
|
||||
protected byte[] document;
|
||||
protected int resultCode;
|
||||
protected int size;
|
||||
protected String title;
|
||||
|
||||
public WebDocument(URL url, String mimeType, byte[] document, int resultCode, URL referer, int size, String title)
|
||||
{
|
||||
super(url, referer, false);
|
||||
this.url = url;
|
||||
this.mimeType = mimeType;
|
||||
this.document = document;
|
||||
this.resultCode = resultCode;
|
||||
this.size = size;
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getTitle()
|
||||
{
|
||||
return title;
|
||||
}
|
||||
|
||||
public URL getUrl()
|
||||
{
|
||||
return url;
|
||||
}
|
||||
|
||||
public int getSize()
|
||||
{
|
||||
return this.size;
|
||||
}
|
||||
|
||||
public void setSize(int size)
|
||||
{
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
|
||||
public void setDocument(byte[] document)
|
||||
{
|
||||
this.document = document;
|
||||
}
|
||||
public int getResultCode()
|
||||
{
|
||||
return resultCode;
|
||||
}
|
||||
|
||||
public void setResultCode(int resultCode)
|
||||
{
|
||||
this.resultCode = resultCode;
|
||||
}
|
||||
|
||||
public byte[] getDocumentBytes()
|
||||
{
|
||||
return this.document;
|
||||
}
|
||||
|
||||
public void setUrl(URL url)
|
||||
{
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
public void setMimeType(String mimeType)
|
||||
{
|
||||
this.mimeType = mimeType;
|
||||
}
|
||||
|
||||
public String getMimeType()
|
||||
{
|
||||
return mimeType;
|
||||
}
|
||||
|
||||
public String getInfo()
|
||||
{
|
||||
return super.getInfo() + "\t" +
|
||||
this.resultCode + "\t" +
|
||||
this.mimeType + "\t" +
|
||||
this.size + "\t" +
|
||||
"\"" + this.title.replace('\"', (char)0xff ).replace('\n',' ').replace('\r',' ') + "\"";
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,294 @@
|
|||
/*
|
||||
* $Id$
|
||||
*
|
||||
* Copyright 1997 Hewlett-Packard Company
|
||||
*
|
||||
* This file may be copied, modified and distributed only in
|
||||
* accordance with the terms of the limited licence contained
|
||||
* in the accompanying file LICENSE.TXT.
|
||||
*/
|
||||
|
||||
package hplb.misc;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.*;
|
||||
|
||||
/**
|
||||
* This class is a container for algorithms working on byte arrays - some
|
||||
* of the algorithms are analogous to those in java.lang.String.
|
||||
* @author Anders Kristensen
|
||||
*/
|
||||
public class ByteArray {
|
||||
|
||||
/** Returns copy of characters in s as a new byte array. */
|
||||
public static final byte[] getBytes(String s) {
|
||||
int len = s.length();
|
||||
byte b[] = new byte[len];
|
||||
s.getBytes(0, len, b, 0);
|
||||
return b;
|
||||
}
|
||||
|
||||
/** Returns contents of file as byte array. */
|
||||
public static byte[] loadFromFile(String filename) throws IOException {
|
||||
return loadFromFile(new File(filename));
|
||||
}
|
||||
|
||||
/** Returns contents of file <i>file</i> as byte array. */
|
||||
public static byte[] loadFromFile(File file) throws IOException {
|
||||
int n, nread = 0, len = (int) file.length();
|
||||
FileInputStream fin = new FileInputStream(file);
|
||||
byte[] content = new byte[len];
|
||||
|
||||
while (nread < len) {
|
||||
if ((n = fin.read(content, nread, len - nread)) == -1)
|
||||
throw new IOException("Error loading Compound from file");
|
||||
nread += n;
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads n bytes from the specified input stream. It will return
|
||||
* fewer bytes if fewer bytes are available on the stream.
|
||||
* Hence the application should check the resulting arrays length.
|
||||
*/
|
||||
public static byte[] readn(InputStream in, int n) throws IOException {
|
||||
byte[] buf = new byte[n];
|
||||
int ntotal = 0;
|
||||
int nread;
|
||||
|
||||
while (ntotal < n) {
|
||||
nread = in.read(buf, ntotal, n - ntotal);
|
||||
if (nread < 0) {
|
||||
// we got less than expected - return what we got
|
||||
byte[] newbuf = new byte[ntotal];
|
||||
System.arraycopy(buf, 0, newbuf, 0, ntotal);
|
||||
return newbuf;
|
||||
}
|
||||
ntotal += nread;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return contents of a WWW resource identified by a URL.
|
||||
* @param url the resource to retrieve
|
||||
* @return the resource contents as a byte array
|
||||
*/
|
||||
public static byte[] getContent(URL url) throws IOException {
|
||||
URLConnection conn = url.openConnection();
|
||||
InputStream in = conn.getInputStream();
|
||||
int length;
|
||||
|
||||
/*
|
||||
* N.B. URLConnection.getContentLength() is buggy for "http" resources
|
||||
* (at least in JDK1.0.2) and won't work for "file" URLs either.
|
||||
*/
|
||||
length = length = conn.getContentLength();
|
||||
if (length == -1)
|
||||
length = conn.getHeaderFieldInt("Content-Length", -1);
|
||||
if (length == -1)
|
||||
return readAll(in);
|
||||
return readn(in, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all input from an InputStream and return as a byte array.
|
||||
* This method will not return before the end of the stream is reached.
|
||||
* @return contents of the stream
|
||||
*/
|
||||
public static byte[] readAll(InputStream in) throws IOException {
|
||||
byte[] buf = new byte[1024];
|
||||
int nread, ntotal = 0;
|
||||
|
||||
while ((nread = in.read(buf, ntotal, buf.length - ntotal)) > -1) {
|
||||
ntotal += nread;
|
||||
if (ntotal == buf.length) {
|
||||
// extend buffer
|
||||
byte[] newbuf = new byte[buf.length * 2];
|
||||
System.arraycopy(buf, 0, newbuf, 0, buf.length);
|
||||
buf = newbuf;
|
||||
}
|
||||
}
|
||||
if (ntotal < buf.length) {
|
||||
// we cannot have excess space
|
||||
byte[] newbuf = new byte[ntotal];
|
||||
System.arraycopy(buf, 0, newbuf, 0, ntotal);
|
||||
buf = newbuf;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies data from the specified input stream to the output stream
|
||||
* until end of file is met.
|
||||
* @return the total number of bytes written to the output stream
|
||||
*/
|
||||
public static int cpybytes(InputStream in, OutputStream out)
|
||||
throws IOException
|
||||
{
|
||||
byte[] buf = new byte[1024];
|
||||
int n, ntotal = 0;
|
||||
while ((n = in.read(buf)) > -1) {
|
||||
out.write(buf, 0, n);
|
||||
ntotal += n;
|
||||
}
|
||||
return ntotal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies data from the specified input stream to the output stream
|
||||
* until <em>n</em> bytes has been copied or end of file is met.
|
||||
* @return the total number of bytes written to the output stream
|
||||
*/
|
||||
public static int cpybytes(InputStream in, OutputStream out, int n)
|
||||
throws IOException
|
||||
{
|
||||
int sz = n < 1024 ? n : 1024;
|
||||
byte[] buf = new byte[sz];
|
||||
int chunk, nread, ntotal = 0;
|
||||
|
||||
chunk = sz;
|
||||
|
||||
while (ntotal < n && (nread = in.read(buf, 0, chunk)) > -1) {
|
||||
out.write(buf, 0, nread);
|
||||
ntotal += nread;
|
||||
chunk = (n - ntotal < sz) ? n - ntotal : sz;
|
||||
}
|
||||
return ntotal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index within this String of the first occurrence of the
|
||||
* specified character or -1 if the character is not found.
|
||||
* @params buf the buffer to search
|
||||
* @params ch the character to search for
|
||||
*/
|
||||
public static final int indexOf(byte[] buf,
|
||||
int ch) {
|
||||
return indexOf(buf, ch, 0, buf.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index within this String of the first occurrence of the
|
||||
* specified character, starting the search at fromIndex. This method
|
||||
* returns -1 if the character is not found.
|
||||
* @params buf the buffer to search
|
||||
* @params ch the character to search for
|
||||
* @params fromIndex the index to start the search from
|
||||
* @params toIndex the highest possible index returned plus 1
|
||||
*/
|
||||
public static final int indexOf(byte[] buf,
|
||||
int ch,
|
||||
int fromIndex,
|
||||
int toIndex) {
|
||||
int i;
|
||||
|
||||
for (i = fromIndex; i < toIndex && buf[i] != ch; i++)
|
||||
; // do nothing
|
||||
|
||||
if (i < toIndex)
|
||||
return i;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the first occurrence of s in the specified
|
||||
* buffer or -1 if this is not found.
|
||||
*/
|
||||
public static final int indexOf(byte[] buf, String s) {
|
||||
return indexOf(buf, s, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the first occurrence of s in the specified
|
||||
* buffer. The search starts from fromIndex. This method returns -1
|
||||
* if the index is not found.
|
||||
*/
|
||||
public static final int indexOf(byte[] buf, String s, int fromIndex) {
|
||||
int i; // index into buf
|
||||
int j; // index into s
|
||||
int max_i = buf.length;
|
||||
int max_j = s.length();
|
||||
|
||||
for (i = fromIndex; i + max_j <= max_i; i++) {
|
||||
for (j = 0; j < max_j; j++) {
|
||||
if (buf[j + i] != s.charAt(j))
|
||||
break;
|
||||
}
|
||||
if (j == max_j) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
// for testing indexOf(byte[], String, int)
|
||||
public static void main(String[] args) {
|
||||
byte[] buf = getBytes(args[0]);
|
||||
System.out.println("IndexOf(arg0, arg1, 0) = " + indexOf(buf, args[1], 3));
|
||||
}
|
||||
*/
|
||||
|
||||
public static final boolean isSpace(int ch) {
|
||||
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') return true;
|
||||
else return false;
|
||||
}
|
||||
|
||||
public static final int skipSpaces(byte[] buf, int fromIndex, int toIndex) {
|
||||
int i;
|
||||
for (i = fromIndex; i < toIndex && isSpace(buf[i]); i++)
|
||||
;
|
||||
return i;
|
||||
}
|
||||
/**
|
||||
* Find byte pattern ptrn in buffer buf.
|
||||
* @return index of first occurrence of ptrn in buf, -1 if no occurence
|
||||
*/
|
||||
public static final int findBytes(byte buf[],
|
||||
int off,
|
||||
int len,
|
||||
byte ptrn[]) {
|
||||
// Note: This code is completely incomprehensible without a drawing...
|
||||
|
||||
int buf_len = off + len;
|
||||
int ptrn_len = ptrn.length;
|
||||
int i; // index into buf
|
||||
int j; // index into ptrn;
|
||||
byte b = ptrn[0]; // next byte of interest
|
||||
|
||||
for (i = off; i < buf_len; ) {
|
||||
j = 0;
|
||||
while (i < buf_len && j < ptrn_len && buf[i] == ptrn[j]) {
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
if (i == buf_len || j == ptrn_len)
|
||||
return i - j;
|
||||
else {
|
||||
// We have to go back a bit as there may be an overlapping
|
||||
// match starting a bit later in buf...
|
||||
i = i - j + 1;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
// for testing findBytes(byte[], int, int, byte[])
|
||||
public static void main(String args[]) {
|
||||
if (args.length < 4) {
|
||||
System.err.println("Usage: s1 off len s2");
|
||||
System.exit(1);
|
||||
}
|
||||
byte b1[] = new byte[args[0].length()];
|
||||
byte b2[] = new byte[args[3].length()];
|
||||
args[0].getBytes(0, args[0].length(), b1, 0);
|
||||
args[3].getBytes(0, args[3].length(), b2, 0);
|
||||
int off = Integer.parseInt(args[1]);
|
||||
int len = Integer.parseInt(args[2]);
|
||||
System.out.println("Index = " + findBytes(b1, off, len, b2));
|
||||
}
|
||||
*/
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface Attribute {
|
||||
|
||||
public String getName();
|
||||
public Node getValue();
|
||||
public void setValue(Node arg);
|
||||
|
||||
public boolean getSpecified();
|
||||
public void setSpecified(boolean arg);
|
||||
|
||||
public String toString();
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface AttributeList {
|
||||
public Attribute getAttribute(String attrName);
|
||||
public Attribute setAttribute(Attribute attr);
|
||||
public Attribute remove(String attrName);
|
||||
public Attribute item(int index);
|
||||
public int getLength();
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
* Represents the content of comments: <!-- ... -->
|
||||
*/
|
||||
public interface Comment extends Node {
|
||||
public String getData();
|
||||
public void setData(String arg);
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface DOM {
|
||||
public Document createDocument(String type);
|
||||
public boolean hasFeature(String feature);
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface Document extends DocumentFragment {
|
||||
public Node getDocumentType();
|
||||
public void setDocumentType(Node arg);
|
||||
|
||||
public Element getDocumentElement();
|
||||
public void setDocumentElement(Element arg);
|
||||
|
||||
public DocumentContext getContextInfo();
|
||||
public void setContextInfo(DocumentContext arg);
|
||||
|
||||
public DocumentContext createDocumentContext();
|
||||
public Element createElement(String tagName, AttributeList attributes);
|
||||
public Text createTextNode(String data);
|
||||
public Comment createComment(String data);
|
||||
public PI createPI(String name, String data);
|
||||
public Attribute createAttribute(String name, Node value);
|
||||
public AttributeList createAttributeList();
|
||||
public NodeIterator getElementsByTagName();
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface DocumentContext {
|
||||
|
||||
public Document getDocument();
|
||||
public void setDocument(Document arg);
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface DocumentFragment extends Node {
|
||||
public Document getMasterDoc();
|
||||
public void setMasterDoc(Document arg);
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface Element extends Node {
|
||||
public String getTagName();
|
||||
public AttributeList attributes();
|
||||
public void setAttribute(Attribute newAttr);
|
||||
public void normalize();
|
||||
public NodeIterator getElementsByTagName();
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
# This Makefile generated by hplb.util.jmkmf
|
||||
# Java package is org.w3c.dom
|
||||
|
||||
.SUFFIXES: .java .class .jj
|
||||
JPACKAGE = org.w3c.dom
|
||||
JAVA = java
|
||||
JAVAC = javac
|
||||
JAVACC = java COM.sun.labs.javacc.Main
|
||||
JFLAGS =
|
||||
OBJS = \
|
||||
Attribute.class \
|
||||
AttributeList.class \
|
||||
Comment.class \
|
||||
DOM.class \
|
||||
Document.class \
|
||||
DocumentContext.class \
|
||||
DocumentFragment.class \
|
||||
Element.class \
|
||||
Node.class \
|
||||
NodeIterator.class \
|
||||
PI.class \
|
||||
Text.class \
|
||||
TreeIterator.class
|
||||
JAVADOCFLAGS = -d ../../../doc/api -author -noindex -notree
|
||||
|
||||
all: $(OBJS)
|
||||
|
||||
doc:
|
||||
javadoc $(JAVADOCFLAGS) $(JPACKAGE)
|
||||
|
||||
.jj.java: $*.jj
|
||||
$(JAVACC) $<
|
||||
|
||||
.java.class: $*.java
|
||||
$(JAVAC) $(JFLAGS) $<
|
||||
|
||||
clean:
|
||||
rm -f *.class *~
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface Node {
|
||||
// NodeType
|
||||
public static final int DOCUMENT = 1;
|
||||
public static final int ELEMENT = 2;
|
||||
public static final int ATTRIBUTE = 3;
|
||||
public static final int PI = 4;
|
||||
public static final int COMMENT = 5;
|
||||
public static final int TEXT = 6;
|
||||
|
||||
public int getNodeType();
|
||||
public Node getParentNode();
|
||||
public NodeIterator getChildNodes();
|
||||
public boolean hasChildNodes();
|
||||
public Node getFirstChild();
|
||||
public Node getPreviousSibling();
|
||||
public Node getNextSibling();
|
||||
public Node insertBefore(Node newChild, Node refChild);
|
||||
public Node replaceChild(Node newChild, Node oldChild);
|
||||
public Node removeChild(Node oldChild);
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface NodeIterator {
|
||||
public int getLength();
|
||||
public Node getCurrent();
|
||||
public Node toNext();
|
||||
public Node toPrevious();
|
||||
public Node toFirst();
|
||||
public Node toLast();
|
||||
public Node toNth(int Nth);
|
||||
public Node toNode(Node destNode);
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
* Processing Instruction
|
||||
*/
|
||||
public interface PI extends Node {
|
||||
public String getName();
|
||||
public void setName(String arg);
|
||||
|
||||
public String getData();
|
||||
public void setData(String arg);
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface Text extends Node {
|
||||
public String getData();
|
||||
public void setData(String arg);
|
||||
|
||||
public void append(String data);
|
||||
public void insert(int offset, String data);
|
||||
public void delete(int offset, int count);
|
||||
public void replace(int offset, int count, String data);
|
||||
public void splice(Element element, int offset, int count);
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
package hplb.org.w3c.dom;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface TreeIterator extends NodeIterator {
|
||||
public int numChildren();
|
||||
public int numPreviousSiblings();
|
||||
public int numNextSiblings();
|
||||
public Node toParent();
|
||||
public Node toPreviousSibling();
|
||||
public Node toNextSibling();
|
||||
public Node toFirstChild();
|
||||
public Node toLastChild();
|
||||
public Node toNthChild();
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
// $Id$
|
||||
|
||||
package hplb.org.xml.sax;
|
||||
|
||||
import java.util.Enumeration;
|
||||
|
||||
/**
|
||||
* A map of attributes for the current element.
|
||||
* <p><em>This interface is part of the Java implementation of SAX,
|
||||
* the Simple API for XML. It is free for both commercial and
|
||||
* non-commercial use, and is distributed with no warrantee, real
|
||||
* or implied.</em></p>
|
||||
* <p>This map will be valid only during the invocation of the
|
||||
* <code>startElement</code> callback: if you need to use attribute
|
||||
* information elsewhere, you will need to make your own copies.</p>
|
||||
* @author David Megginson, Microstar Software Ltd.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startElement
|
||||
*/
|
||||
public interface AttributeMap {
|
||||
|
||||
|
||||
/**
|
||||
* Find the names of all available attributes for an element.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return An enumeration of zero or more Strings.
|
||||
* @see java.util.Enumeration
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startElement
|
||||
*/
|
||||
public Enumeration getAttributeNames ();
|
||||
|
||||
|
||||
/**
|
||||
* Get the value of an attribute as a String.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return The value as a String, or null if the attribute has no value.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startElement
|
||||
*/
|
||||
public String getValue (String attributeName);
|
||||
|
||||
|
||||
/**
|
||||
* Check if an attribute value is the name of an entity.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return true if the attribute is an entity name.
|
||||
* @see #getEntityPublicID
|
||||
* @see #getEntitySystemID
|
||||
* @see #getNotationName
|
||||
* @see #getNotationPublicID
|
||||
* @see #getNotationSystemID
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startElement
|
||||
*/
|
||||
public boolean isEntity (String aname);
|
||||
|
||||
|
||||
/**
|
||||
* Check if an attribute value is the name of a notation.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return true if the attribute is a notation name.
|
||||
* @see #getNotationPublicID
|
||||
* @see #getNotationSystemID
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startElement
|
||||
*/
|
||||
public boolean isNotation (String aname);
|
||||
|
||||
|
||||
/**
|
||||
* Check if an attribute value is a unique identifier.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return true if the attribute is a unique identifier.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startElement
|
||||
*/
|
||||
public boolean isId (String aname);
|
||||
|
||||
|
||||
/**
|
||||
* Check if an attribute value is a reference to an ID.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return true if the attribute is a reference to an ID.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startElement
|
||||
*/
|
||||
public boolean isIdref (String aname);
|
||||
|
||||
|
||||
/**
|
||||
* Get the public identifier for an ENTITY attribute.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return The public identifier or null if there is none (or if
|
||||
* the attribute value is not an entity name)
|
||||
* @see #isEntity
|
||||
*/
|
||||
public String getEntityPublicID (String aname);
|
||||
|
||||
|
||||
/**
|
||||
* Get the system identifer for an ENTITY attribute.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return The system identifier or null if there is none (or if
|
||||
* the attribute value is not an entity name)
|
||||
* @see #isEntity
|
||||
*/
|
||||
public String getEntitySystemID (String aname);
|
||||
|
||||
|
||||
/**
|
||||
* Get the notation name for an ENTITY attribute.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return The notation name or null if there is none (or if
|
||||
* the attribute value is not an entity name)
|
||||
* @see #isEntity
|
||||
*/
|
||||
public String getNotationName (String aname);
|
||||
|
||||
|
||||
/**
|
||||
* Get the notation public ID for an ENTITY or NOTATION attribute.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return The public identifier or null if there is none (or if
|
||||
* the attribute value is not an entity or notation name)
|
||||
* @see #isEntity
|
||||
* @see #isNotation
|
||||
*/
|
||||
public String getNotationPublicID (String aname);
|
||||
|
||||
|
||||
/**
|
||||
* Get the notation system ID for an ENTITY or NOTATION attribute.
|
||||
* <p>This applies to the current element, and can be called only
|
||||
* during an invocation of <code>startElement</code>.</p>
|
||||
* @return The system identifier or null if there is none (or if
|
||||
* the attribute value is not an entity or notation name)
|
||||
* @see #isEntity
|
||||
* @see #isNotation
|
||||
*/
|
||||
public String getNotationSystemID (String aname);
|
||||
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
// $Id$
|
||||
|
||||
package hplb.org.xml.sax;
|
||||
|
||||
|
||||
/**
|
||||
* A callback interface for basic XML document events.
|
||||
* <p><em>This interface is part of the Java implementation of SAX,
|
||||
* the Simple API for XML. It is free for both commercial and
|
||||
* non-commercial use, and is distributed with no warrantee, real
|
||||
* or implied.</em></p>
|
||||
* <p>This is the main handler for basic document events; it provides
|
||||
* information on roughly the same level as the ESIS in full SGML,
|
||||
* concentrating on logical structure rather than lexical
|
||||
* representation.</p>
|
||||
* <p>If you do not set a document handler, then by default all of these
|
||||
* events will simply be ignored.</p>
|
||||
* @author David Megginson, Microstar Software Ltd.
|
||||
* @see hplb.org.xml.sax.Parser@setDocumentHandler
|
||||
*/
|
||||
public interface DocumentHandler {
|
||||
|
||||
|
||||
/**
|
||||
* Handle the start of a document.
|
||||
* <p>This is the first event called by a
|
||||
* SAX-conformant parser, so you can use it to allocate and
|
||||
* initialise new objects for the document.</p>
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void startDocument ()
|
||||
throws Exception;
|
||||
|
||||
|
||||
/**
|
||||
* Handle the end of a document.
|
||||
* <p>This is the last event called by a
|
||||
* SAX-conformant parser, so you can use it to finalize and
|
||||
* clean up objects for the document.</p>
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void endDocument ()
|
||||
throws Exception;
|
||||
|
||||
|
||||
/**
|
||||
* Handle the document type declaration.
|
||||
* <p>This will appear only if the XML document contains a
|
||||
* <code>DOCTYPE</code> declaration.</p>
|
||||
* @param name The document type name.
|
||||
* @param publicID The public identifier of the external DTD subset
|
||||
* (if any), or null.
|
||||
* @param systemID The system identifier of the external DTD subset
|
||||
* (if any), or null.
|
||||
* @param name The document type name.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void doctype (String name, String publicID, String systemID)
|
||||
throws Exception;
|
||||
|
||||
|
||||
/**
|
||||
* Handle the start of an element.
|
||||
* <p>Please note that the information in the <code>attributes</code>
|
||||
* parameter will be accurate only for the duration of this handler:
|
||||
* if you need to use the information elsewhere, you should copy
|
||||
* it.</p>
|
||||
* @param name The element type name.
|
||||
* @param attributes The available attributes.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void startElement (String name, AttributeMap attributes)
|
||||
throws Exception;
|
||||
|
||||
|
||||
/**
|
||||
* Handle the end of an element.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void endElement (String name)
|
||||
throws Exception;
|
||||
|
||||
|
||||
/**
|
||||
* Handle significant character data.
|
||||
* <p>Please note that the contents of the array will be
|
||||
* accurate only for the duration of this handler: if you need to
|
||||
* use them elsewhere, you should make your own copy, possible
|
||||
* by constructing a string:</p>
|
||||
* <pre>
|
||||
* String data = new String(ch, start, length);
|
||||
* </pre>
|
||||
* @param ch An array of characters.
|
||||
* @param start The starting position in the array.
|
||||
* @param length The number of characters to use in the array.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void characters (char ch[], int start, int length)
|
||||
throws Exception;
|
||||
|
||||
|
||||
/**
|
||||
* Handle ignorable whitespace.
|
||||
* <p>Please note that the contents of the array will be
|
||||
* accurate only for the duration of this handler: if you need to
|
||||
* use them elsewhere, you should make your own copy, possible
|
||||
* by constructing a string:</p>
|
||||
* <pre>
|
||||
* String whitespace = new String(ch, start, length);
|
||||
* </pre>
|
||||
* @param ch An array of whitespace characters.
|
||||
* @param start The starting position in the array.
|
||||
* @param length The number of characters to use in the array.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void ignorable (char ch[], int start, int length)
|
||||
throws Exception;
|
||||
|
||||
|
||||
/**
|
||||
* Handle a processing instruction.
|
||||
* <p>XML processing instructions have two parts: a target, which
|
||||
* is a name, followed optionally by data.</p>
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void processingInstruction (String name, String remainder)
|
||||
throws Exception;
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
// $Id$
|
||||
|
||||
package hplb.org.xml.sax;
|
||||
|
||||
|
||||
/**
|
||||
* A callback interface for basic XML entity-related events.
|
||||
* <p><em>This interface is part of the Java implementation of SAX,
|
||||
* the Simple API for XML. It is free for both commercial and
|
||||
* non-commercial use, and is distributed with no warrantee, real
|
||||
* or implied.</em></p>
|
||||
* <p>If you do not set an entity handler, then a parser will
|
||||
* resolve all entities to the suggested system ID, and will take no
|
||||
* action for entity changes.</p>
|
||||
* @author David Megginson, Microstar Software Ltd.
|
||||
* @see hplb.org.xml.sax.Parser#setEntityHandler
|
||||
*/
|
||||
public interface EntityHandler {
|
||||
|
||||
|
||||
/**
|
||||
* Resolve a system identifier.
|
||||
* <p>Before loading any entity (including the document entity),
|
||||
* SAX parsers will filter the system identifier through this
|
||||
* callback, and you can return a different system identifier if you
|
||||
* wish, or null to prevent the parser from reading any entity.</p>
|
||||
* @param ename The name of the entity, "[document]" for the
|
||||
* document entity, or "[external DTD]" for the external
|
||||
* DTD subset.
|
||||
* @param publicID The public identifier, or null if there is none.
|
||||
* @param systemID The system identifier suggested in the XML document.
|
||||
* @return A system identifier, or null to skip the entity.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public String resolveEntity (String ename, String publicID, String systemID)
|
||||
throws Exception;
|
||||
|
||||
/**
|
||||
* Handle a change in the current entity.
|
||||
* <p>Whenever the parser switches the entity (URI) that it is reading
|
||||
* from, it will call this handler to report the change.</p>
|
||||
* @param systemID The URI of the new entity.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void changeEntity (String systemID)
|
||||
throws Exception;
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// $Id$
|
||||
|
||||
package hplb.org.xml.sax;
|
||||
|
||||
|
||||
/**
|
||||
* A callback interface for basic XML error events.
|
||||
* <p><em>This interface is part of the Java implementation of SAX,
|
||||
* the Simple API for XML. It is free for both commercial and
|
||||
* non-commercial use, and is distributed with no warrantee, real
|
||||
* or implied.</em></p>
|
||||
* <p>If you do not set an error handler, then a parser will report
|
||||
* warnings to <code>System.err</code>, and will throw an (unspecified)
|
||||
* exception for fata errors.</p>
|
||||
* @author David Megginson, Microstar Software Ltd.
|
||||
* @see hplb.org.xml.sax.Parser#setErrorHandler
|
||||
*/
|
||||
public interface ErrorHandler {
|
||||
|
||||
/**
|
||||
* Handle a non-fatal warning.
|
||||
* <p>A SAX parser will use this callback to report a condition
|
||||
* that is not serious enough to stop the parse (though you may
|
||||
* still stop the parse if you wish).</p>
|
||||
* @param message The warning message.
|
||||
* @param systemID The URI of the entity that caused the warning, or
|
||||
* null if not available.
|
||||
* @param line The line number in the entity, or -1 if not available.
|
||||
* @param column The column number in the entity, or -1 if not available.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void warning (String message, String systemID, int line, int column)
|
||||
throws java.lang.Exception;
|
||||
|
||||
/**
|
||||
* Handle a fatal error.
|
||||
* <p>A SAX parser will use this callback to report a condition
|
||||
* that is serious enough to invalidate the parse, and may not
|
||||
* report all (or any) significant parse events after this. Ordinarily,
|
||||
* you should stop immediately with an exception, but you can continue
|
||||
* to try to collect more errors if you wish.</p>
|
||||
* @param message The error message.
|
||||
* @param systemID The URI of the entity that caused the error, or
|
||||
* null if not available.
|
||||
* @param line The line number in the entity, or -1 if not available.
|
||||
* @param column The column number in the entity, or -1 if not available.
|
||||
* @exception java.lang.Exception You may throw any exception.
|
||||
*/
|
||||
public void fatal (String message, String systemID, int line, int column)
|
||||
throws Exception;
|
||||
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
// $Id$
|
||||
|
||||
package hplb.org.xml.sax;
|
||||
|
||||
|
||||
/**
|
||||
* A simple base class for deriving SAX event handlers.
|
||||
* <p><em>This class is part of the Java implementation of SAX,
|
||||
* the Simple API for XML. It is free for both commercial and
|
||||
* non-commercial use, and is distributed with no warrantee, real
|
||||
* or implied.</em></p>
|
||||
* <p>This class implements the default behaviour when no handler
|
||||
* is specified (though parsers are not actually required to use
|
||||
* this class).</p>
|
||||
* @author David Megginson, Microstar Software Ltd.
|
||||
* @see hplb.org.xml.sax.XmlException
|
||||
* @see hplb.org.xml.sax.EntityHandler
|
||||
* @see hplb.org.xml.sax.DocumentHandler
|
||||
* @see hplb.org.xml.sax.ErrorHandler
|
||||
*/
|
||||
public class HandlerBase
|
||||
implements EntityHandler, DocumentHandler, ErrorHandler
|
||||
{
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Implementation of hplb.org.xml.sax.EntityHandler.
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Resolve an external entity.
|
||||
* <p>By default, simply return the system ID supplied.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.EntityHandler#resolveEntity
|
||||
*/
|
||||
public String resolveEntity (String ename, String publicID, String systemID)
|
||||
throws Exception
|
||||
{
|
||||
return systemID;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Handle an entity-change event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.EntityHandler#changeEntity
|
||||
*/
|
||||
public void changeEntity (String systemID)
|
||||
throws Exception
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Implementation of hplb.org.xml.sax.DocumentHandler.
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
/**
|
||||
* Handle a start document event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startDocument
|
||||
*/
|
||||
public void startDocument ()
|
||||
throws Exception
|
||||
{}
|
||||
|
||||
|
||||
/**
|
||||
* Handle a end document event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#endDocument
|
||||
*/
|
||||
public void endDocument ()
|
||||
throws Exception
|
||||
{}
|
||||
|
||||
|
||||
/**
|
||||
* Handle a document type declaration event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#doctype
|
||||
*/
|
||||
public void doctype (String name, String publicID, String systemID)
|
||||
throws Exception
|
||||
{}
|
||||
|
||||
|
||||
/**
|
||||
* Handle a start element event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#startElement
|
||||
*/
|
||||
public void startElement (String name, AttributeMap attributes)
|
||||
throws Exception
|
||||
{}
|
||||
|
||||
|
||||
/**
|
||||
* Handle an end element event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#endElement
|
||||
*/
|
||||
public void endElement (String name)
|
||||
throws Exception
|
||||
{}
|
||||
|
||||
|
||||
/**
|
||||
* Handle a character data event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#characters
|
||||
*/
|
||||
public void characters (char ch[], int start, int length)
|
||||
throws Exception
|
||||
{}
|
||||
|
||||
|
||||
/**
|
||||
* Handle an ignorable whitespace event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#ignorable
|
||||
*/
|
||||
public void ignorable (char ch[], int start, int length)
|
||||
throws Exception
|
||||
{}
|
||||
|
||||
|
||||
/**
|
||||
* Handle a processing instruction event.
|
||||
* <p>By default, do nothing.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.DocumentHandler#processingInstruction
|
||||
*/
|
||||
public void processingInstruction (String name, String remainder)
|
||||
throws Exception
|
||||
{}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Implementation of ErrorHandler.
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
/**
|
||||
* Handle a non-fatal error.
|
||||
* <p>By default, report the warning to System.err.</p>
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.ErrorHandler#warning
|
||||
*/
|
||||
public void warning (String message, String systemID, int line, int column)
|
||||
throws Exception
|
||||
{
|
||||
System.err.println("Warning (" +
|
||||
systemID +
|
||||
',' +
|
||||
line +
|
||||
',' +
|
||||
column +
|
||||
"): " +
|
||||
message);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Handle a fatal error.
|
||||
* <p>By default, throw an instance of XmlException.</p>
|
||||
* @exception hplb.org.xml.sax.XmlException A fatal parsing error
|
||||
* has been found.
|
||||
* @exception java.lang.Exception When you override this method,
|
||||
* you may throw any exception.
|
||||
* @see hplb.org.xml.sax.ErrorHandler#fatal
|
||||
*/
|
||||
public void fatal (String message, String systemID, int line, int column)
|
||||
throws XmlException, Exception
|
||||
{
|
||||
throw new XmlException(message, systemID, line, column);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
# This Makefile generated by jmkmf
|
||||
# Java package is org.xml.sax
|
||||
|
||||
.SUFFIXES: .java .class .jj
|
||||
JPACKAGE = org.xml.sax
|
||||
JAVA = java
|
||||
JAVAC = javac
|
||||
JAVACC = java COM.sun.labs.javacc.Main
|
||||
JFLAGS =
|
||||
OBJS = \
|
||||
AttributeMap.class \
|
||||
DocumentHandler.class \
|
||||
EntityHandler.class \
|
||||
ErrorHandler.class \
|
||||
HandlerBase.class \
|
||||
Parser.class \
|
||||
XmlException.class
|
||||
JAVADOCFLAGS = -d ../../../doc/api -author -noindex -notree
|
||||
|
||||
all: $(OBJS)
|
||||
|
||||
doc:
|
||||
javadoc $(JAVADOCFLAGS) $(JPACKAGE)
|
||||
|
||||
.jj.java: org.xml.sax.jj
|
||||
$(JAVACC) $<
|
||||
|
||||
.java.class: $*.java
|
||||
$(JAVAC) $(JFLAGS) $<
|
||||
|
||||
clean:
|
||||
rm -f *.class *~
|
|
@ -0,0 +1,71 @@
|
|||
// $Id$
|
||||
|
||||
package hplb.org.xml.sax;
|
||||
|
||||
|
||||
/**
|
||||
* A standard interface for event-driven XML parsers.
|
||||
* <p><em>This interface is part of the Java implementation of SAX,
|
||||
* the Simple API for XML. It is free for both commercial and
|
||||
* non-commercial use, and is distributed with no warrantee, real
|
||||
* or implied.</em></p>
|
||||
* <p>All SAX-conformant XML parsers (or their front-end SAX drivers)
|
||||
* <em>must</em> implement this interface, together with a zero-argument
|
||||
* constructor.</p>
|
||||
* <p>You can plug three different kinds of callback interfaces into
|
||||
* a basic SAX parser: one for entity handling, one for basic document
|
||||
* events, and one for error reporting. It is not an error to start
|
||||
* a parse without setting any handlers.</p>
|
||||
* @author David Megginson, Microstar Software Ltd.
|
||||
*/
|
||||
public interface Parser {
|
||||
|
||||
|
||||
/**
|
||||
* Register the handler for basic entity events.
|
||||
* <p>If you begin a parse without setting an entity handler,
|
||||
* the parser will by default resolve all entities to their
|
||||
* default system IDs.</p>
|
||||
* @param handler An object to receive callbacks for events.
|
||||
* @see hplb.org.xml.sax.EntityHandler
|
||||
*/
|
||||
public void setEntityHandler (EntityHandler handler);
|
||||
|
||||
|
||||
/**
|
||||
* Register the handler for basic document events.
|
||||
* <p>You may begin the parse without setting a handler, but
|
||||
* in that case no document events will be reported.</p>
|
||||
* @param handler An object to receive callbacks for events.
|
||||
* @see hplb.org.xml.sax.DocumentHandler
|
||||
*/
|
||||
public void setDocumentHandler (DocumentHandler handler);
|
||||
|
||||
|
||||
/**
|
||||
* Register the handler for errors and warnings.
|
||||
* <p>If you begin a parse without setting an error handlers,
|
||||
* warnings will be printed to System.err, and errors will
|
||||
* throw an unspecified exception.</p>
|
||||
* @param handler An object to receive callbacks for errors.
|
||||
* @see hplb.org.xml.sax.ErrorHandler
|
||||
*/
|
||||
public void setErrorHandler (ErrorHandler handler);
|
||||
|
||||
|
||||
/**
|
||||
* Parse an XML document.
|
||||
* <p>Nothing exciting will happen unless you have set handlers.</p>
|
||||
* @param publicID The public identifier for the document, or null
|
||||
* if none is available.
|
||||
* @param systemID The system identifier (URI) for the document.
|
||||
* @exception java.lang.Exception This method may throw any exception,
|
||||
* but the parser itself
|
||||
* will throw only exceptions derived from java.io.IOException;
|
||||
* anything else will come from your handlers.
|
||||
* @see #setEntityHandler
|
||||
* @see #setDocumentHandler
|
||||
* @see #setErrorHandler
|
||||
*/
|
||||
void parse (String publicID, String systemID) throws java.lang.Exception;
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// $Id$
|
||||
|
||||
package hplb.org.xml.sax;
|
||||
|
||||
|
||||
/**
|
||||
* An exception for reporting XML parsing errors.
|
||||
* <p><em>This interface is part of the Java implementation of SAX,
|
||||
* the Simple API for XML. It is free for both commercial and
|
||||
* non-commercial use, and is distributed with no warrantee, real
|
||||
* or implied.</em></p>
|
||||
* <p>This exception is not a required part of SAX, and it is not
|
||||
* referenced in any of the core interfaces. It is used only in
|
||||
* the optional HandlerBase base class, as a means of signalling
|
||||
* parsing errors.</p>
|
||||
* @author David Megginson, Microstar Software Ltd.
|
||||
* @see hplb.org.xml.sax.HandlerBase#fatal
|
||||
*/
|
||||
public class XmlException extends Exception {
|
||||
|
||||
|
||||
/**
|
||||
* Construct a new exception with information about the location.
|
||||
*/
|
||||
public XmlException (String message, String systemID, int line, int column)
|
||||
{
|
||||
super(message);
|
||||
this.systemID = systemID;
|
||||
this.line = line;
|
||||
this.column = column;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the system identifier (URI) where the error occurred.
|
||||
* @return A string representing the URI, or null if none is available.
|
||||
*/
|
||||
public String getSystemID ()
|
||||
{
|
||||
return systemID;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the line number where the error occurred.
|
||||
* @return The line number, or -1 if none is available.
|
||||
*/
|
||||
public int getLine ()
|
||||
{
|
||||
return line;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the column number (line offset) where the error occurred.
|
||||
* @return The column number, or -1 if none is available.
|
||||
*/
|
||||
public int getColumn ()
|
||||
{
|
||||
return column;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Internal state.
|
||||
//
|
||||
|
||||
private String systemID;
|
||||
private int line;
|
||||
private int column;
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* $Id$
|
||||
*
|
||||
* Copyright 1997 Hewlett-Packard Company
|
||||
*
|
||||
* This file may be copied, modified and distributed only in
|
||||
* accordance with the terms of the limited licence contained
|
||||
* in the accompanying file LICENSE.TXT.
|
||||
*/
|
||||
|
||||
package hplb.xml;
|
||||
|
||||
import java.util.Hashtable;
|
||||
|
||||
/**
|
||||
* This class is responsible for maintaining strings as <em>atoms</em>,
|
||||
* i.e. if two strings returned by getAtom() are equal in the sense of
|
||||
* String.equal() then they are in fact the same Object. This is used to
|
||||
* "intern" element and attribute names which can then be compared using
|
||||
* the more efficient reference equality, a la "s1==s2".
|
||||
*
|
||||
* @author Anders Kristensen
|
||||
*/
|
||||
public final class Atom {
|
||||
/** Holds atoms: element names (GIs), and attribute names. */
|
||||
private static final Hashtable atoms = new Hashtable();
|
||||
|
||||
/**
|
||||
* Return an atom corresponding to the argument.
|
||||
*/
|
||||
public static String getAtom(String s) {
|
||||
synchronized (atoms) {
|
||||
String a = (String) atoms.get(s);
|
||||
if (a == null) {
|
||||
atoms.put(s, s);
|
||||
a = s;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* $Id$
|
||||
*
|
||||
* Copyright 1997 Hewlett-Packard Company
|
||||
*
|
||||
* This file may be copied, modified and distributed only in
|
||||
* accordance with the terms of the limited licence contained
|
||||
* in the accompanying file LICENSE.TXT.
|
||||
*/
|
||||
|
||||
package hplb.xml;
|
||||
|
||||
import hplb.org.w3c.dom.*;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Anders Kristensen
|
||||
*/
|
||||
public final class AttrImpl implements Attribute {
|
||||
protected String name;
|
||||
protected Node value;
|
||||
protected boolean specified;
|
||||
|
||||
public AttrImpl(String name, String value) {
|
||||
this(name, new TextImpl(Node.TEXT, value), true);
|
||||
}
|
||||
|
||||
public AttrImpl(String name, Node value, boolean specified) {
|
||||
this.name = name;
|
||||
this.value = value;
|
||||
this.specified = specified;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public Node getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(Node arg) {
|
||||
value = arg;
|
||||
}
|
||||
|
||||
public boolean getSpecified() {
|
||||
return specified;
|
||||
}
|
||||
|
||||
public void setSpecified(boolean arg) {
|
||||
specified = arg;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return value.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* $Id$
|
||||
*
|
||||
* Copyright 1997 Hewlett-Packard Company
|
||||
*
|
||||
* This file may be copied, modified and distributed only in
|
||||
* accordance with the terms of the limited licence contained
|
||||
* in the accompanying file LICENSE.TXT.
|
||||
*/
|
||||
|
||||
package hplb.xml;
|
||||
|
||||
import hplb.org.w3c.dom.*;
|
||||
|
||||
/**
|
||||
* An ordered Dictionary. keys() and elements() returns Enumerations
|
||||
* which enumerate over elements in the order they were inserted.
|
||||
* Elements are stored linearly. Operations put(), get(), and remove()
|
||||
* are linear in the number of elements in the Dictionary.
|
||||
*
|
||||
* <p>Allows direct access to elements (as an alternative to using
|
||||
* Enumerators) for speed.
|
||||
*
|
||||
* <p>Can function as a <em>bag</em>, i.e. it can be created with a mode
|
||||
* which allows the same key to map to multiple entries. In this case
|
||||
* operations get() and remove() operate on the <em>first</em> pair in
|
||||
* the map. Hence to get hold of all values associated with a key it is
|
||||
* necessary to use the direct access to underlying arrays.
|
||||
*
|
||||
* @author Anders Kristensen
|
||||
*/
|
||||
public class AttrListImpl implements AttributeList {
|
||||
protected Attribute[] elms;
|
||||
|
||||
/**
|
||||
* Number of elements. The elements are held at indices 0 to n in elms.
|
||||
*/
|
||||
protected int n = 0;
|
||||
|
||||
public AttrListImpl() {
|
||||
this(2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an AttrListImpl with the specififed initial capacity.
|
||||
*/
|
||||
public AttrListImpl(int size) {
|
||||
if (size <= 0) throw new IllegalArgumentException(
|
||||
"Initial size must be at least 1");
|
||||
elms = new Attribute[size];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value to which the key is mapped in this dictionary.
|
||||
*/
|
||||
public synchronized Attribute getAttribute(String attrName) {
|
||||
int i = getIndex(attrName);
|
||||
return (i < 0 ? null : elms[i]);
|
||||
}
|
||||
|
||||
protected int getIndex(String name) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (elms[i].getName().equals(name)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// XXX: what if attrName != attr.getName()???
|
||||
public synchronized Attribute setAttribute(Attribute attr) {
|
||||
int i = getIndex(attr.getName());
|
||||
if (i >= 0) {
|
||||
Attribute old = elms[i];
|
||||
elms[i] = attr;
|
||||
return old;
|
||||
}
|
||||
|
||||
int len = elms.length;
|
||||
if (len == n) {
|
||||
// double size of key,elms arrays
|
||||
AttrImpl[] e;
|
||||
e = new AttrImpl[len * 2];
|
||||
System.arraycopy(elms, 0, e, 0, len);
|
||||
elms = e;
|
||||
}
|
||||
elms[n] = attr;
|
||||
n++;
|
||||
return null;
|
||||
}
|
||||
|
||||
public synchronized Attribute remove(String attrName) {
|
||||
int i = getIndex(attrName);
|
||||
if (i < 0) return null;
|
||||
Attribute val = elms[i];
|
||||
System.arraycopy(elms, i+1, elms, i, n-i-1);
|
||||
n--;
|
||||
return val;
|
||||
}
|
||||
|
||||
public synchronized Attribute item(int index) {
|
||||
if (index < 0 || index >= n) {
|
||||
throw new IndexOutOfBoundsException(""+index);
|
||||
}
|
||||
return elms[index];
|
||||
}
|
||||
|
||||
/** Returns the number of keys in this dictionary. */
|
||||
public synchronized int getLength() {
|
||||
return n;
|
||||
}
|
||||
|
||||
public synchronized String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
boolean f = true;
|
||||
int n = getLength();
|
||||
|
||||
sb.append("{ ");
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (f) { f = false; }
|
||||
else { sb.append(", "); }
|
||||
Attribute attr = item(i);
|
||||
sb.append(attr.getName() + '=' + attr);
|
||||
}
|
||||
sb.append(" }");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**/
|
||||
// for testing
|
||||
public static void main(String[] args) throws Exception {
|
||||
AttrListImpl alist;
|
||||
Attribute attr;
|
||||
java.io.BufferedReader r;
|
||||
java.util.StringTokenizer tok;
|
||||
String op;
|
||||
|
||||
if (args.length > 1) {
|
||||
alist = new AttrListImpl(Integer.parseInt(args[0]));
|
||||
} else {
|
||||
alist = new AttrListImpl();
|
||||
}
|
||||
|
||||
System.out.println(
|
||||
"Enter operations... op's are one of\n"+
|
||||
"put <key> <val>\n"+
|
||||
"get <key>\n"+
|
||||
"rem <key>\n"+
|
||||
"size\n"+
|
||||
"quit\n");
|
||||
|
||||
r = new java.io.BufferedReader(
|
||||
new java.io.InputStreamReader(System.in));
|
||||
while (true) {
|
||||
System.out.print("doyourworst> ");
|
||||
tok = new java.util.StringTokenizer(r.readLine());
|
||||
op = tok.nextToken();
|
||||
if ("put".equals(op)) {
|
||||
attr = new AttrImpl(tok.nextToken(), tok.nextToken());
|
||||
System.out.println("Value: " +
|
||||
alist.setAttribute(attr));
|
||||
} else if ("get".equals(op)) {
|
||||
attr = alist.getAttribute(tok.nextToken());
|
||||
System.out.println("Value: " +
|
||||
(attr == null ? "No such element" : attr.toString()));
|
||||
} else if ("rem".equals(op)) {
|
||||
attr = alist.remove(tok.nextToken());
|
||||
System.out.println("Value: " + attr);
|
||||
} else if (op.startsWith("s")) {
|
||||
System.out.println("Size: " + alist.getLength());
|
||||
} else if (op.startsWith("q")) {
|
||||
break;
|
||||
} else {
|
||||
System.out.println("Unrecognized op: " + op);
|
||||
}
|
||||
|
||||
System.out.println("AttributeList: " + alist);
|
||||
System.out.println("Size: " + alist.getLength());
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
//*/
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* $Id$
|
||||
*
|
||||
* Copyright 1997 Hewlett-Packard Company
|
||||
*
|
||||
* This file may be copied, modified and distributed only in
|
||||
* accordance with the terms of the limited licence contained
|
||||
* in the accompanying file LICENSE.TXT.
|
||||
*/
|
||||
|
||||
package hplb.xml;
|
||||
|
||||
/**
|
||||
* A java.io.CharArrayWriter with the additional property that users can get
|
||||
* to the actual underlying storage. Hence it's very fast (and dangerous).
|
||||
* @author Anders Kristensen
|
||||
*/
|
||||
public final class CharBuffer extends java.io.CharArrayWriter {
|
||||
public CharBuffer() {
|
||||
super();
|
||||
}
|
||||
|
||||
public CharBuffer(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
// use only to *decrement* size
|
||||
public void setLength(int size) {
|
||||
synchronized (lock) {
|
||||
if (size < count) count = size;
|
||||
}
|
||||
}
|
||||
|
||||
public char[] getCharArray() {
|
||||
synchronized (lock) {
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
|
||||
public int getLength()
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* $Id$
|
||||
*
|
||||
* Copyright 1997 Hewlett-Packard Company
|
||||
*
|
||||
* This file may be copied, modified and distributed only in
|
||||
* accordance with the terms of the limited licence contained
|
||||
* in the accompanying file LICENSE.TXT.
|
||||
*/
|
||||
|
||||
package hplb.xml;
|
||||
|
||||
import hplb.org.w3c.dom.DOM;
|
||||
import hplb.org.w3c.dom.Document;
|
||||
|
||||
public class DOMImpl implements DOM {
|
||||
public Document createDocument(String type) {
|
||||
return new DocumentImpl();
|
||||
}
|
||||
public boolean hasFeature(String feature) {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* $Id$
|
||||
*
|
||||
* Copyright 1997 Hewlett-Packard Company
|
||||
*
|
||||
* This file may be copied, modified and distributed only in
|
||||
* accordance with the terms of the limited licence contained
|
||||
* in the accompanying file LICENSE.TXT.
|
||||
*/
|
||||
|
||||
package hplb.xml;
|
||||
|
||||
import hplb.org.w3c.dom.*;
|
||||
|
||||
public class DocContextImpl implements DocumentContext {
|
||||
Document doc;
|
||||
|
||||
public Document getDocument() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
public void setDocument(Document arg) {
|
||||
doc = arg;
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue