diff --git a/sandbox/contributions/webcrawler-LARM/build.sh b/sandbox/contributions/webcrawler-LARM/build.sh
new file mode 100755
index 00000000000..384c3ab9e68
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/build.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+#clean
+echo cleaning
+rm -r build
+rm -r classes
+rm -r cachingqueue
+rm -r logs
+
+#build
+echo making build directory
+mkdir build
+cd build
+echo extracting http client
+jar xvf ../lib/HTTPClient.zip >/dev/nul
+cd ..
+cp -r src/* build
+mkdir classes
+echo compiling
+javac -g -d classes -sourcepath build build/HTTPClient/*.java
+javac -g -classpath ./lib/jakarta-oro-2.0.5.jar -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java
+
+
diff --git a/sandbox/contributions/webcrawler-LARM/clean.sh b/sandbox/contributions/webcrawler-LARM/clean.sh
new file mode 100755
index 00000000000..65c222feba1
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/clean.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+./cleanlastrun.sh
+rm -r build
+rm -r classes
+
diff --git a/sandbox/contributions/webcrawler-LARM/cleanlastrun.sh b/sandbox/contributions/webcrawler-LARM/cleanlastrun.sh
new file mode 100755
index 00000000000..730d2165b55
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/cleanlastrun.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+rm -r logs
+rm -r cachingqueue
+
diff --git a/sandbox/contributions/webcrawler-LARM/og-build.sh b/sandbox/contributions/webcrawler-LARM/og-build.sh
new file mode 100755
index 00000000000..5ce5c7dd214
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/og-build.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+#clean
+echo cleaning
+rm -r build
+rm -r classes
+rm -r cachingqueue
+rm -r logs
+
+#build
+echo making build directory
+mkdir build
+cd build
+#echo extracting http client
+#jar xvf ../lib/HTTPClient.zip >/dev/null
+cd ..
+cp -r src/* build
+mkdir classes
+echo compiling
+#javac -g -d classes -sourcepath build build/HTTPClient/*.java
+javac -g -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java
+
+
diff --git a/sandbox/contributions/webcrawler-LARM/run.sh b/sandbox/contributions/webcrawler-LARM/run.sh
new file mode 100755
index 00000000000..4af92d2fed6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/run.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+rm -r logs
+mkdir logs
+java -server -Xmx400mb -classpath classes:lib/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://www.cis.uni-muenchen.de/ -restrictto http://[^/]*\.uni-muenchen\.de.* -threads 15  
diff --git a/sandbox/contributions/webcrawler-LARM/src/HTTPClient/ContentEncodingModule.java b/sandbox/contributions/webcrawler-LARM/src/HTTPClient/ContentEncodingModule.java
new file mode 100644
index 00000000000..994caec61f6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/HTTPClient/ContentEncodingModule.java
@@ -0,0 +1,278 @@
+/*
+ *  @(#)ContentEncodingModule.java			0.3-3 06/05/2001
+ *
+ *  This file is part of the HTTPClient package
+ *  Copyright (C) 1996-2001 Ronald Tschalär
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free
+ *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *  MA 02111-1307, USA
+ *
+ *  For questions, suggestions, bug-reports, enhancement-requests etc.
+ *  I may be contacted at:
+ *
+ *  ronald@innovation.ch
+ *
+ *  The HTTPClient's home page is located at:
+ *
+ *  http://www.innovation.ch/java/HTTPClient/
+ *
+ */
+package HTTPClient;
+
+import java.io.IOException;
+import java.util.Vector;
+import java.util.zip.InflaterInputStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * This module handles the Content-Encoding response header. It currently
+ * handles the "gzip", "deflate", "compress" and "identity" tokens.
+ *
+ * @author    Ronald Tschalär
+ * @created   29. Dezember 2001
+ * @version   0.3-3 06/05/2001
+ */
+public class ContentEncodingModule implements HTTPClientModule
+{
+    // Methods
+
+    /**
+     * Invoked by the HTTPClient.
+     *
+     * @param req                  Description of the Parameter
+     * @param resp                 Description of the Parameter
+     * @return                     Description of the Return Value
+     * @exception ModuleException  Description of the Exception
+     */
+    public int requestHandler(Request req, Response[] resp)
+        throws ModuleException
+    {
+        // parse Accept-Encoding header
+
+        int idx;
+        NVPair[] hdrs = req.getHeaders();
+        for (idx = 0; idx < hdrs.length; idx++)
+        {
+            if (hdrs[idx].getName().equalsIgnoreCase("Accept-Encoding"))
+            {
+                break;
+            }
+        }
+
+        Vector pae;
+        if (idx == hdrs.length)
+        {
+            hdrs = Util.resizeArray(hdrs, idx + 1);
+            req.setHeaders(hdrs);
+            pae = new Vector();
+        }
+        else
+        {
+            try
+            {
+                pae = Util.parseHeader(hdrs[idx].getValue());
+            }
+            catch (ParseException pe)
+            {
+                throw new ModuleException(pe.toString());
+            }
+        }
+
+        // done if "*;q=1.0" present
+
+        HttpHeaderElement all = Util.getElement(pae, "*");
+        if (all != null)
+        {
+            NVPair[] params = all.getParams();
+            for (idx = 0; idx < params.length; idx++)
+            {
+                if (params[idx].getName().equalsIgnoreCase("q"))
+                {
+                    break;
+                }
+            }
+
+            if (idx == params.length)
+            {
+                // no qvalue, i.e. q=1.0
+                return REQ_CONTINUE;
+            }
+
+            if (params[idx].getValue() == null ||
+                    params[idx].getValue().length() == 0)
+            {
+                throw new ModuleException("Invalid q value for \"*\" in " +
+                        "Accept-Encoding header: ");
+            }
+
+            try
+            {
+                if (Float.valueOf(params[idx].getValue()).floatValue() > 0.)
+                {
+                    return REQ_CONTINUE;
+                }
+            }
+            catch (NumberFormatException nfe)
+            {
+                throw new ModuleException("Invalid q value for \"*\" in " +
+                        "Accept-Encoding header: " + nfe.getMessage());
+            }
+        }
+
+        // Add gzip, deflate and compress tokens to the Accept-Encoding header
+
+        if (!pae.contains(new HttpHeaderElement("deflate")))
+        {
+            pae.addElement(new HttpHeaderElement("deflate"));
+        }
+        if (!pae.contains(new HttpHeaderElement("gzip")))
+        {
+            pae.addElement(new HttpHeaderElement("gzip"));
+        }
+        if (!pae.contains(new HttpHeaderElement("x-gzip")))
+        {
+            pae.addElement(new HttpHeaderElement("x-gzip"));
+        }
+        if (!pae.contains(new HttpHeaderElement("compress")))
+        {
+            pae.addElement(new HttpHeaderElement("compress"));
+        }
+        if (!pae.contains(new HttpHeaderElement("x-compress")))
+        {
+            pae.addElement(new HttpHeaderElement("x-compress"));
+        }
+
+        hdrs[idx] = new NVPair("Accept-Encoding", Util.assembleHeader(pae));
+
+        return REQ_CONTINUE;
+    }
+
+
+    /**
+     * Invoked by the HTTPClient.
+     *
+     * @param resp  Description of the Parameter
+     * @param req   Description of the Parameter
+     */
+    public void responsePhase1Handler(Response resp, RoRequest req)
+    {
+    }
+
+
+    /**
+     * Invoked by the HTTPClient.
+     *
+     * @param resp  Description of the Parameter
+     * @param req   Description of the Parameter
+     * @return      Description of the Return Value
+     */
+    public int responsePhase2Handler(Response resp, Request req)
+    {
+        return RSP_CONTINUE;
+    }
+
+
+    /**
+     * Invoked by the HTTPClient.
+     *
+     * @param resp                 Description of the Parameter
+     * @param req                  Description of the Parameter
+     * @exception IOException      Description of the Exception
+     * @exception ModuleException  Description of the Exception
+     */
+    public void responsePhase3Handler(Response resp, RoRequest req)
+        throws IOException, ModuleException
+    {
+        String ce = resp.getHeader("Content-Encoding");
+        if (ce == null || req.getMethod().equals("HEAD") ||
+                resp.getStatusCode() == 206)
+        {
+            return;
+        }
+
+        Vector pce;
+        try
+        {
+            pce = Util.parseHeader(ce);
+        }
+        catch (ParseException pe)
+        {
+            throw new ModuleException(pe.toString());
+        }
+
+        if (pce.size() == 0)
+        {
+            return;
+        }
+
+        String encoding = ((HttpHeaderElement) pce.firstElement()).getName();
+        if (encoding.equalsIgnoreCase("gzip") ||
+                encoding.equalsIgnoreCase("x-gzip"))
+        {
+            Log.write(Log.MODS, "CEM:   pushing gzip-input-stream");
+
+            resp.inp_stream = new GZIPInputStream(resp.inp_stream);
+            pce.removeElementAt(pce.size() - 1);
+            resp.deleteHeader("Content-length");
+        }
+        else if (encoding.equalsIgnoreCase("deflate"))
+        {
+            Log.write(Log.MODS, "CEM:   pushing inflater-input-stream");
+
+            resp.inp_stream = new InflaterInputStream(resp.inp_stream);
+            pce.removeElementAt(pce.size() - 1);
+            resp.deleteHeader("Content-length");
+        }
+        else if (encoding.equalsIgnoreCase("compress") ||
+                encoding.equalsIgnoreCase("x-compress"))
+        {
+            Log.write(Log.MODS, "CEM:   pushing uncompress-input-stream");
+
+            resp.inp_stream = new UncompressInputStream(resp.inp_stream);
+            pce.removeElementAt(pce.size() - 1);
+            resp.deleteHeader("Content-length");
+        }
+        else if (encoding.equalsIgnoreCase("identity"))
+        {
+            Log.write(Log.MODS, "CEM:   ignoring 'identity' token");
+            pce.removeElementAt(pce.size() - 1);
+        }
+        else
+        {
+            Log.write(Log.MODS, "CEM:   Unknown content encoding '" +
+                    encoding + "'");
+        }
+
+        if (pce.size() > 0)
+        {
+            resp.setHeader("Content-Encoding", Util.assembleHeader(pce));
+        }
+        else
+        {
+            resp.deleteHeader("Content-Encoding");
+        }
+    }
+
+
+    /**
+     * Invoked by the HTTPClient.
+     *
+     * @param resp  Description of the Parameter
+     * @param req   Description of the Parameter
+     */
+    public void trailerHandler(Response resp, RoRequest req)
+    {
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/HTTPClient/HTTPConnection.java b/sandbox/contributions/webcrawler-LARM/src/HTTPClient/HTTPConnection.java
new file mode 100644
index 00000000000..ba9309cb84c
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/HTTPClient/HTTPConnection.java
@@ -0,0 +1,4489 @@
+/*
+ *  @(#)HTTPConnection.java				0.3-3 06/05/2001
+ *
+ *  This file is part of the HTTPClient package
+ *  Copyright (C) 1996-2001 Ronald Tschalär
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free
+ *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *  MA 02111-1307, USA
+ *
+ *  For questions, suggestions, bug-reports, enhancement-requests etc.
+ *  I may be contacted at:
+ *
+ *  ronald@innovation.ch
+ *
+ *  The HTTPClient's home page is located at:
+ *
+ *  http://www.innovation.ch/java/HTTPClient/
+ *
+ */
+package HTTPClient;
+
+import java.io.OutputStream;
+import java.io.DataOutputStream;
+import java.io.FilterOutputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.net.URL;
+import java.net.Socket;
+import java.net.InetAddress;
+import java.net.SocketException;
+import java.net.ConnectException;
+import java.net.UnknownHostException;
+import java.net.NoRouteToHostException;
+import java.util.Vector;
+import java.applet.Applet;
+
+/**
+ * This class implements http protocol requests; it contains most of HTTP/1.1
+ * and ought to be unconditionally compliant. Redirections are automatically
+ * handled, and authorizations requests are recognized and dealt with via an
+ * authorization handler. Only full HTTP/1.0 and HTTP/1.1 requests are
+ * generated. HTTP/1.1, HTTP/1.0 and HTTP/0.9 responses are recognized. <P>
+ *
+ * Using the HTTPClient should be quite simple. First add the import statement '
+ * <code>import HTTPClient.*;</code>' to your file(s). Request can then be sent
+ * using one of the methods <var>Head()</var> , <var>Get()</var> , <var>Post()
+ * </var>, etc in <var>HTTPConnection</var> . These methods all return an
+ * instance of <var>HTTPResponse</var> which has methods for accessing the
+ * response headers (<var>getHeader()</var> , <var>getHeaderAsInt()</var> ,
+ * etc), various response info (<var>getStatusCode()</var> , <var>
+ * getReasonLine()</var> , etc) and the reponse data (<var>getData()</var> ,
+ * <var>getText()</var> , and <var>getInputStream()</var> ). Following are some
+ * examples. <P>
+ *
+ * If this is in an applet you can retrieve files from your server as follows:
+ * <PRE>
+ *     try
+ *     {
+ *         HTTPConnection con = new HTTPConnection(this);
+ *         HTTPResponse   rsp = con.Get("/my_file");
+ *         if (rsp.getStatusCode() >= 300)
+ *         {
+ *             System.err.println("Received Error: "+rsp.getReasonLine());
+ *             System.err.println(rsp.getText());
+ *         }
+ *         else
+ *             data = rsp.getData();
+ *
+ *         rsp = con.Get("/another_file");
+ *         if (rsp.getStatusCode() >= 300)
+ *         {
+ *             System.err.println("Received Error: "+rsp.getReasonLine());
+ *             System.err.println(rsp.getText());
+ *         }
+ *         else
+ *             other_data = rsp.getData();
+ *     }
+ *     catch (IOException ioe)
+ *     {
+ *         System.err.println(ioe.toString());
+ *     }
+ *     catch (ModuleException me)
+ *     {
+ *         System.err.println("Error handling request: " + me.getMessage());
+ *     }
+ * </PRE> This will get the files "/my_file" and "/another_file" and put their
+ * contents into byte[]'s accessible via <code>getData()</code>. Note that you
+ * need to only create a new <var>HTTPConnection</var> when sending a request to
+ * a new server (different host or port); although you may create a new <var>
+ * HTTPConnection</var> for every request to the same server this <strong>not
+ * </strong> recommended, as various information about the server is cached
+ * after the first request (to optimize subsequent requests) and persistent
+ * connections are used whenever possible. <P>
+ *
+ * To POST form data you would use something like this (assuming you have two
+ * fields called <var>name</var> and <var>e-mail</var> , whose contents are
+ * stored in the variables <var>name</var> and <var>email</var> ): <PRE>
+ *     try
+ *     {
+ *         NVPair form_data[] = new NVPair[2];
+ *         form_data[0] = new NVPair("name", name);
+ *         form_data[1] = new NVPair("e-mail", email);
+ *
+ *         HTTPConnection con = new HTTPConnection(this);
+ *         HTTPResponse   rsp = con.Post("/cgi-bin/my_script", form_data);
+ *         if (rsp.getStatusCode() >= 300)
+ *         {
+ *             System.err.println("Received Error: "+rsp.getReasonLine());
+ *             System.err.println(rsp.getText());
+ *         }
+ *         else
+ *             stream = rsp.getInputStream();
+ *     }
+ *     catch (IOException ioe)
+ *     {
+ *         System.err.println(ioe.toString());
+ *     }
+ *     catch (ModuleException me)
+ *     {
+ *         System.err.println("Error handling request: " + me.getMessage());
+ *     }
+ * </PRE> Here the response data is read at leasure via an <var>InputStream
+ * </var> instead of all at once into a <var>byte[]</var> . <P>
+ *
+ * As another example, if you have a URL you're trying to send a request to you
+ * would do something like the following: <PRE>
+ *     try
+ *     {
+ *         URL url = new URL("http://www.mydomain.us/test/my_file");
+ *         HTTPConnection con = new HTTPConnection(url);
+ *         HTTPResponse   rsp = con.Put(url.getFile(), "Hello World");
+ *         if (rsp.getStatusCode() >= 300)
+ *         {
+ *             System.err.println("Received Error: "+rsp.getReasonLine());
+ *             System.err.println(rsp.getText());
+ *         }
+ *         else
+ *             text = rsp.getText();
+ *     }
+ *     catch (IOException ioe)
+ *     {
+ *         System.err.println(ioe.toString());
+ *     }
+ *     catch (ModuleException me)
+ *     {
+ *         System.err.println("Error handling request: " + me.getMessage());
+ *     }
+ * </PRE> <P>
+ *
+ * There are a whole number of methods for each request type; however the
+ * general forms are ([...] means that the enclosed is optional):
+ * <ul>
+ *   <li> Head ( file [, form-data [, headers ] ] )
+ *   <li> Head ( file [, query [, headers ] ] )
+ *   <li> Get ( file [, form-data [, headers ] ] )
+ *   <li> Get ( file [, query [, headers ] ] )
+ *   <li> Post ( file [, form-data [, headers ] ] )
+ *   <li> Post ( file [, data [, headers ] ] )
+ *   <li> Post ( file [, stream [, headers ] ] )
+ *   <li> Put ( file , data [, headers ] )
+ *   <li> Put ( file , stream [, headers ] )
+ *   <li> Delete ( file [, headers ] )
+ *   <li> Options ( file [, headers [, data] ] )
+ *   <li> Options ( file [, headers [, stream] ] )
+ *   <li> Trace ( file [, headers ] )
+ * </ul>
+ *
+ *
+ * @author    Ronald Tschalär
+ * @created   29. Dezember 2001
+ * @version   0.3-3 06/05/2001
+ */
+public class HTTPConnection implements GlobalConstants, HTTPClientModuleConstants
+{
+    /**
+     * The current version of this package.
+     */
+    public final static String version = "RPT-HTTPClient/0.3-3";
+
+    /**
+     * The default context
+     */
+    private final static Object dflt_context = new Object();
+
+    /**
+     * The current context
+     */
+    private Object Context = null;
+
+    /**
+     * The protocol used on this connection
+     */
+    private int Protocol;
+
+    /**
+     * The server's protocol version; M.m stored as (M<<16 | m)
+     */
+    int ServerProtocolVersion;
+
+    /**
+     * Have we gotten the server's protocol version yet?
+     */
+    boolean ServProtVersKnown;
+
+    /**
+     * The protocol version we send in a request; this is always HTTP/1.1 unless
+     * we're talking to a broken server in which case it's HTTP/1.0
+     */
+    private String RequestProtocolVersion;
+
+    /**
+     * The remote host this connection is associated with
+     */
+    private String Host;
+
+    /**
+     * The remote port this connection is attached to
+     */
+    private int Port;
+
+    /**
+     * The local address this connection is associated with
+     */
+    private InetAddress LocalAddr;
+
+    /**
+     * The local port this connection is attached to
+     */
+    private int LocalPort;
+
+    /**
+     * The current proxy host to use (if any)
+     */
+    private String Proxy_Host = null;
+
+    /**
+     * The current proxy port
+     */
+    private int Proxy_Port;
+
+    /**
+     * The default proxy host to use (if any)
+     */
+    private static String Default_Proxy_Host = null;
+
+    /**
+     * The default proxy port
+     */
+    private static int Default_Proxy_Port;
+
+    /**
+     * The list of hosts for which no proxy is to be used
+     */
+    private static CIHashtable non_proxy_host_list = new CIHashtable();
+    private static Vector non_proxy_dom_list = new Vector();
+    private static Vector non_proxy_addr_list = new Vector();
+    private static Vector non_proxy_mask_list = new Vector();
+
+    /**
+     * The socks server to use
+     */
+    private SocksClient Socks_client = null;
+
+    /**
+     * The default socks server to use
+     */
+    private static SocksClient Default_Socks_client = null;
+
+    /**
+     * the current stream demultiplexor
+     */
+    private StreamDemultiplexor input_demux = null;
+
+    /**
+     * a list of active stream demultiplexors
+     */
+    LinkedList DemuxList = new LinkedList();
+
+    /**
+     * a list of active requests
+     */
+    private LinkedList RequestList = new LinkedList();
+
+    /**
+     * does the server support keep-alive's?
+     */
+    private boolean doesKeepAlive = false;
+
+    /**
+     * have we been able to determine the above yet?
+     */
+    private boolean keepAliveUnknown = true;
+
+    /**
+     * the maximum number of requests over a HTTP/1.0 keep-alive connection
+     */
+    private int keepAliveReqMax = -1;
+
+    /**
+     * the number of requests over a HTTP/1.0 keep-alive connection left
+     */
+    private int keepAliveReqLeft;
+
+    /**
+     * hack to force buffering of data instead of using chunked T-E
+     */
+    private static boolean no_chunked = false;
+
+    /**
+     * hack to force HTTP/1.0 requests
+     */
+    private static boolean force_1_0 = false;
+
+    /**
+     * hack to be able to disable pipelining
+     */
+    private static boolean neverPipeline = false;
+
+    /**
+     * hack to be able to disable keep-alives
+     */
+    private static boolean noKeepAlives = false;
+
+    /**
+     * hack to work around M$ bug
+     */
+    private static boolean haveMSLargeWritesBug = false;
+
+    /**
+     * hack to only enable defered handling of streamed requests when configured
+     * to do so.
+     */
+    static boolean deferStreamed = false;
+
+    /**
+     * the default timeout to use for new connections
+     */
+    private static int DefaultTimeout = 0;
+
+    /**
+     * the timeout to use for reading responses
+     */
+    private int Timeout;
+
+    /**
+     * The list of default http headers
+     */
+    private NVPair[] DefaultHeaders = new NVPair[0];
+
+    /**
+     * The default list of modules (as a Vector of Class objects)
+     */
+    private static Vector DefaultModuleList;
+
+    /**
+     * The list of modules (as a Vector of Class objects)
+     */
+    private Vector ModuleList;
+
+    /**
+     * controls whether modules are allowed to interact with user
+     */
+    private static boolean defaultAllowUI = true;
+
+    /**
+     * controls whether modules are allowed to interact with user
+     */
+    private boolean allowUI;
+
+    static
+    {
+        /*
+         *  Let's try and see if we can figure out whether any proxies are
+         *  being used.
+         */
+        try
+        {
+            // JDK 1.1 naming
+
+            String host = System.getProperty("http.proxyHost");
+            if (host == null)
+            {
+                throw new Exception();
+            }
+            // try JDK 1.0.x naming
+            int port = Integer.getInteger("http.proxyPort", -1).intValue();
+
+            Log.write(Log.CONN, "Conn:  using proxy " + host + ":" + port);
+            setProxyServer(host, port);
+        }
+        catch (Exception e)
+        {
+            try
+            {
+                // JDK 1.0.x naming
+
+                if (Boolean.getBoolean("proxySet"))
+                {
+                    String host = System.getProperty("proxyHost");
+                    int port = Integer.getInteger("proxyPort", -1).intValue();
+                    Log.write(Log.CONN, "Conn:  using proxy " + host + ":" + port);
+                    setProxyServer(host, port);
+                }
+            }
+            catch (Exception ee)
+            {
+                Default_Proxy_Host = null;
+            }
+        }
+
+        /*
+         *  now check for the non-proxy list
+         */
+        try
+        {
+            String hosts = System.getProperty("HTTPClient.nonProxyHosts");
+            if (hosts == null)
+            {
+                hosts = System.getProperty("http.nonProxyHosts");
+            }
+
+            String[] list = Util.splitProperty(hosts);
+            dontProxyFor(list);
+        }
+        catch (Exception e)
+        {
+        }
+
+        /*
+         *  we can't turn the JDK SOCKS handling off, so we don't use the
+         *  properties 'socksProxyHost' and 'socksProxyPort'. Instead we
+         *  define 'HTTPClient.socksHost', 'HTTPClient.socksPort' and
+         *  'HTTPClient.socksVersion'.
+         */
+        try
+        {
+            String host = System.getProperty("HTTPClient.socksHost");
+            if (host != null && host.length() > 0)
+            {
+                int port = Integer.getInteger("HTTPClient.socksPort", -1).intValue();
+                int version = Integer.getInteger("HTTPClient.socksVersion", -1).intValue();
+                Log.write(Log.CONN, "Conn:  using SOCKS " + host + ":" + port);
+                if (version == -1)
+                {
+                    setSocksServer(host, port);
+                }
+                else
+                {
+                    setSocksServer(host, port, version);
+                }
+            }
+        }
+        catch (Exception e)
+        {
+            Default_Socks_client = null;
+        }
+
+        // Set up module list
+
+        String modules = "HTTPClient.RetryModule|" +
+                "HTTPClient.CookieModule|" +
+                "HTTPClient.RedirectionModule|" +
+                "HTTPClient.AuthorizationModule|" +
+                "HTTPClient.DefaultModule|" +
+                "HTTPClient.TransferEncodingModule|" +
+                "HTTPClient.ContentMD5Module|" +
+                "HTTPClient.ContentEncodingModule";
+
+        boolean in_applet = false;
+        try
+        {
+            modules = System.getProperty("HTTPClient.Modules", modules);
+        }
+        catch (SecurityException se)
+        {
+            in_applet = true;
+        }
+
+        DefaultModuleList = new Vector();
+        String[] list = Util.splitProperty(modules);
+        for (int idx = 0; idx < list.length; idx++)
+        {
+            try
+            {
+                DefaultModuleList.addElement(Class.forName(list[idx]));
+                Log.write(Log.CONN, "Conn:  added module " + list[idx]);
+            }
+            catch (ClassNotFoundException cnfe)
+            {
+                if (!in_applet)
+                {
+                    throw new NoClassDefFoundError(cnfe.getMessage());
+                }
+
+                /*
+                 *  Just ignore it. This allows for example applets to just
+                 *  load the necessary modules - if you don't need a module
+                 *  then don't provide it, and it won't be added to the
+                 *  list. The disadvantage is that if you accidently misstype
+                 *  a module name this will lead to a "silent" error.
+                 */
+            }
+        }
+
+        /*
+         *  Hack: disable pipelining
+         */
+        try
+        {
+            neverPipeline = Boolean.getBoolean("HTTPClient.disable_pipelining");
+            if (neverPipeline)
+            {
+                Log.write(Log.CONN, "Conn:  disabling pipelining");
+            }
+        }
+        catch (Exception e)
+        {
+        }
+
+        /*
+         *  Hack: disable keep-alives
+         */
+        try
+        {
+            noKeepAlives = Boolean.getBoolean("HTTPClient.disableKeepAlives");
+            if (noKeepAlives)
+            {
+                Log.write(Log.CONN, "Conn:  disabling keep-alives");
+            }
+        }
+        catch (Exception e)
+        {
+        }
+
+        /*
+         *  Hack: force HTTP/1.0 requests
+         */
+        try
+        {
+            force_1_0 = Boolean.getBoolean("HTTPClient.forceHTTP_1.0");
+            if (force_1_0)
+            {
+                Log.write(Log.CONN, "Conn:  forcing HTTP/1.0 requests");
+            }
+        }
+        catch (Exception e)
+        {
+        }
+
+        /*
+         *  Hack: prevent chunking of request data
+         */
+        try
+        {
+            no_chunked = Boolean.getBoolean("HTTPClient.dontChunkRequests");
+            if (no_chunked)
+            {
+                Log.write(Log.CONN, "Conn:  never chunking requests");
+            }
+        }
+        catch (Exception e)
+        {
+        }
+
+        /*
+         *  M$ bug: large writes hang the stuff
+         */
+        try
+        {
+            if (System.getProperty("os.name").indexOf("Windows") >= 0 &&
+                    System.getProperty("java.version").startsWith("1.1"))
+            {
+                haveMSLargeWritesBug = true;
+            }
+            if (haveMSLargeWritesBug)
+            {
+                Log.write(Log.CONN, "Conn:  splitting large writes into 20K chunks (M$ bug)");
+            }
+        }
+        catch (Exception e)
+        {
+        }
+
+        /*
+         *  Deferring the handling of responses to requests which used an output
+         *  stream is new in V0.3-3. Because it can cause memory leaks for apps
+         *  which aren't expecting this, we only enable this feature if
+         *  explicitly requested to do so.
+         */
+        try
+        {
+            deferStreamed = Boolean.getBoolean("HTTPClient.deferStreamed");
+            if (deferStreamed)
+            {
+                Log.write(Log.CONN, "Conn:  enabling defered handling of " +
+                        "responses to streamed requests");
+            }
+        }
+        catch (Exception e)
+        {
+        }
+    }
+
+
+    // Constructors
+
+    /**
+     * Constructs a connection to the host from where the applet was loaded.
+     * Note that current security policies only let applets connect home.
+     *
+     * @param applet                        the current applet
+     * @exception ProtocolNotSuppException  Description of the Exception
+     */
+    public HTTPConnection(Applet applet)
+        throws ProtocolNotSuppException
+    {
+        this(applet.getCodeBase().getProtocol(),
+                applet.getCodeBase().getHost(),
+                applet.getCodeBase().getPort());
+    }
+
+
+    /**
+     * Constructs a connection to the specified host on port 80
+     *
+     * @param host  the host
+     */
+    public HTTPConnection(String host)
+    {
+        Setup(HTTP, host, 80, null, -1);
+    }
+
+
+    /**
+     * Constructs a connection to the specified host on the specified port
+     *
+     * @param host  the host
+     * @param port  the port
+     */
+    public HTTPConnection(String host, int port)
+    {
+        Setup(HTTP, host, port, null, -1);
+    }
+
+
+    /**
+     * Constructs a connection to the specified host on the specified port,
+     * using the specified protocol (currently only "http" is supported).
+     *
+     * @param prot                          the protocol
+     * @param host                          the host
+     * @param port                          the port, or -1 for the default port
+     * @exception ProtocolNotSuppException  if the protocol is not HTTP
+     */
+    public HTTPConnection(String prot, String host, int port)
+        throws ProtocolNotSuppException
+    {
+        this(prot, host, port, null, -1);
+    }
+
+
+    /**
+     * Constructs a connection to the specified host on the specified port,
+     * using the specified protocol (currently only "http" is supported), local
+     * address, and local port.
+     *
+     * @param prot                          the protocol
+     * @param host                          the host
+     * @param port                          the port, or -1 for the default port
+     * @param localAddr                     the local address to bind to
+     * @param localPort                     Description of the Parameter
+     * @exception ProtocolNotSuppException  if the protocol is not HTTP
+     */
+    public HTTPConnection(String prot, String host, int port,
+            InetAddress localAddr, int localPort)
+        throws ProtocolNotSuppException
+    {
+        prot = prot.trim().toLowerCase();
+
+        //if (!prot.equals("http")  &&  !prot.equals("https"))
+        if (!prot.equals("http"))
+        {
+            throw new ProtocolNotSuppException("Unsupported protocol '" + prot + "'");
+        }
+
+        if (prot.equals("http"))
+        {
+            Setup(HTTP, host, port, localAddr, localPort);
+        }
+        else if (prot.equals("https"))
+        {
+            Setup(HTTPS, host, port, localAddr, localPort);
+        }
+        else if (prot.equals("shttp"))
+        {
+            Setup(SHTTP, host, port, localAddr, localPort);
+        }
+        else if (prot.equals("http-ng"))
+        {
+            Setup(HTTP_NG, host, port, localAddr, localPort);
+        }
+    }
+
+
+    /**
+     * Constructs a connection to the host (port) as given in the url.
+     *
+     * @param url                           the url
+     * @exception ProtocolNotSuppException  if the protocol is not HTTP
+     */
+    public HTTPConnection(URL url)
+        throws ProtocolNotSuppException
+    {
+        this(url.getProtocol(), url.getHost(), url.getPort());
+    }
+
+
+    /**
+     * Constructs a connection to the host (port) as given in the uri.
+     *
+     * @param uri                           the uri
+     * @exception ProtocolNotSuppException  if the protocol is not HTTP
+     */
+    public HTTPConnection(URI uri)
+        throws ProtocolNotSuppException
+    {
+        this(uri.getScheme(), uri.getHost(), uri.getPort());
+    }
+
+
+    /**
+     * Sets the class variables. Must not be public.
+     *
+     * @param prot       the protocol
+     * @param host       the host
+     * @param port       the port
+     * @param localAddr  the local address to bind to; if null, it's ignored
+     * @param localPort  the local port to bind to
+     */
+    private void Setup(int prot, String host, int port, InetAddress localAddr,
+            int localPort)
+    {
+        Protocol = prot;
+        Host = host.trim().toLowerCase();
+        Port = port;
+        LocalAddr = localAddr;
+        LocalPort = localPort;
+
+        if (Port == -1)
+        {
+            Port = URI.defaultPort(getProtocol());
+        }
+
+        if (Default_Proxy_Host != null && !matchNonProxy(Host))
+        {
+            setCurrentProxy(Default_Proxy_Host, Default_Proxy_Port);
+        }
+        else
+        {
+            setCurrentProxy(null, 0);
+        }
+
+        Socks_client = Default_Socks_client;
+        Timeout = DefaultTimeout;
+        ModuleList = (Vector) DefaultModuleList.clone();
+        allowUI = defaultAllowUI;
+        if (noKeepAlives)
+        {
+            setDefaultHeaders(new NVPair[]{new NVPair("Connection", "close")});
+        }
+    }
+
+
+    /**
+     * Determines if the given host matches any entry in the non-proxy list.
+     *
+     * @param host  the host to match - must be trim()'d and lowercase
+     * @return      true if a match is found, false otherwise
+     * @see         #dontProxyFor(java.lang.String)
+     */
+    private boolean matchNonProxy(String host)
+    {
+        // Check host name list
+
+        if (non_proxy_host_list.get(host) != null)
+        {
+            return true;
+        }
+
+        // Check domain name list
+
+        for (int idx = 0; idx < non_proxy_dom_list.size(); idx++)
+        {
+            if (host.endsWith((String) non_proxy_dom_list.elementAt(idx)))
+            {
+                return true;
+            }
+        }
+
+        // Check IP-address and subnet list
+
+        if (non_proxy_addr_list.size() == 0)
+        {
+            return false;
+        }
+
+        InetAddress[] host_addr;
+        try
+        {
+            host_addr = InetAddress.getAllByName(host);
+        }
+        catch (UnknownHostException uhe)
+        {
+            return false;
+        }
+        // maybe the proxy has better luck
+
+        for (int idx = 0; idx < non_proxy_addr_list.size(); idx++)
+        {
+            byte[] addr = (byte[]) non_proxy_addr_list.elementAt(idx);
+            byte[] mask = (byte[]) non_proxy_mask_list.elementAt(idx);
+
+            ip_loop :
+            for (int idx2 = 0; idx2 < host_addr.length; idx2++)
+            {
+                byte[] raw_addr = host_addr[idx2].getAddress();
+                if (raw_addr.length != addr.length)
+                {
+                    continue;
+                }
+
+                for (int idx3 = 0; idx3 < raw_addr.length; idx3++)
+                {
+                    if ((raw_addr[idx3] & mask[idx3]) != (addr[idx3] & mask[idx3]))
+                    {
+                        continue ip_loop;
+                    }
+                }
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+
+    // Methods
+
+    /**
+     * Sends the HEAD request. This request is just like the corresponding GET
+     * except that it only returns the headers and no data.
+     *
+     * @param file                 the absolute path of the file
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        #Get(java.lang.String)
+     */
+    public HTTPResponse Head(String file)
+        throws IOException, ModuleException
+    {
+        return Head(file, (String) null, null);
+    }
+
+
+    /**
+     * Sends the HEAD request. This request is just like the corresponding GET
+     * except that it only returns the headers and no data.
+     *
+     * @param file                 the absolute path of the file
+     * @param form_data            an array of Name/Value pairs
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        #Get(java.lang.String, HTTPClient.NVPair[])
+     */
+    public HTTPResponse Head(String file, NVPair form_data[])
+        throws IOException, ModuleException
+    {
+        return Head(file, form_data, null);
+    }
+
+
+    /**
+     * Sends the HEAD request. This request is just like the corresponding GET
+     * except that it only returns the headers and no data.
+     *
+     * @param file                 the absolute path of the file
+     * @param form_data            an array of Name/Value pairs
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        #Get(java.lang.String, HTTPClient.NVPair[],
+     *      HTTPClient.NVPair[])
+     */
+    public HTTPResponse Head(String file, NVPair[] form_data, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        String File = stripRef(file);
+        String
+                query = Codecs.nv2query(form_data);
+        if (query != null && query.length() > 0)
+        {
+            File += "?" + query;
+        }
+
+        return setupRequest("HEAD", File, headers, null, null);
+    }
+
+
+    /**
+     * Sends the HEAD request. This request is just like the corresponding GET
+     * except that it only returns the headers and no data.
+     *
+     * @param file                 the absolute path of the file
+     * @param query                the query string; it will be urlencoded
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        #Get(java.lang.String, java.lang.String)
+     */
+    public HTTPResponse Head(String file, String query)
+        throws IOException, ModuleException
+    {
+        return Head(file, query, null);
+    }
+
+
+    /**
+     * Sends the HEAD request. This request is just like the corresponding GET
+     * except that it only returns the headers and no data.
+     *
+     * @param file                 the absolute path of the file
+     * @param query                the query string; it will be urlencoded
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        #Get(java.lang.String, java.lang.String,
+     *      HTTPClient.NVPair[])
+     */
+    public HTTPResponse Head(String file, String query, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        String File = stripRef(file);
+        if (query != null && query.length() > 0)
+        {
+            File += "?" + Codecs.URLEncode(query);
+        }
+
+        return setupRequest("HEAD", File, headers, null, null);
+    }
+
+
+    /**
+     * GETs the file.
+     *
+     * @param file                 the absolute path of the file
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Get(String file)
+        throws IOException, ModuleException
+    {
+        return Get(file, (String) null, null);
+    }
+
+
+    /**
+     * GETs the file with a query consisting of the specified form-data. The
+     * data is urlencoded, turned into a string of the form
+     * "name1=value1&name2=value2" and then sent as a query string.
+     *
+     * @param file                 the absolute path of the file
+     * @param form_data            an array of Name/Value pairs
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Get(String file, NVPair form_data[])
+        throws IOException, ModuleException
+    {
+        return Get(file, form_data, null);
+    }
+
+
+    /**
+     * GETs the file with a query consisting of the specified form-data. The
+     * data is urlencoded, turned into a string of the form
+     * "name1=value1&name2=value2" and then sent as a query string.
+     *
+     * @param file                 the absolute path of the file
+     * @param form_data            an array of Name/Value pairs
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Get(String file, NVPair[] form_data, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        String File = stripRef(file);
+        String
+                query = Codecs.nv2query(form_data);
+        if (query != null && query.length() > 0)
+        {
+            File += "?" + query;
+        }
+
+        return setupRequest("GET", File, headers, null, null);
+    }
+
+
+    /**
+     * GETs the file using the specified query string. The query string is first
+     * urlencoded.
+     *
+     * @param file                 the absolute path of the file
+     * @param query                the query
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Get(String file, String query)
+        throws IOException, ModuleException
+    {
+        return Get(file, query, null);
+    }
+
+
+    /**
+     * GETs the file using the specified query string. The query string is first
+     * urlencoded.
+     *
+     * @param file                 the absolute path of the file
+     * @param query                the query string
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Get(String file, String query, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        String File = stripRef(file);
+        if (query != null && query.length() > 0)
+        {
+            File += "?" + Codecs.URLEncode(query);
+        }
+
+        return setupRequest("GET", File, headers, null, null);
+    }
+
+
+    /**
+     * POSTs to the specified file. No data is sent.
+     *
+     * @param file                 the absolute path of the file
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Post(String file)
+        throws IOException, ModuleException
+    {
+        return Post(file, (byte[]) null, null);
+    }
+
+
+    /**
+     * POSTs form-data to the specified file. The data is first urlencoded and
+     * then turned into a string of the form "name1=value1&name2=value2". A
+     * <var>Content-type</var> header with the value <var>
+     * application/x-www-form-urlencoded</var> is added.
+     *
+     * @param file                 the absolute path of the file
+     * @param form_data            an array of Name/Value pairs
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Post(String file, NVPair form_data[])
+        throws IOException, ModuleException
+    {
+        NVPair[] headers =
+                {new NVPair("Content-type", "application/x-www-form-urlencoded")};
+
+        return Post(file, Codecs.nv2query(form_data), headers);
+    }
+
+
+    /**
+     * POST's form-data to the specified file using the specified headers. The
+     * data is first urlencoded and then turned into a string of the form
+     * "name1=value1&name2=value2". If no <var>Content-type</var> header is
+     * given then one is added with a value of <var>
+     * application/x-www-form-urlencoded</var> .
+     *
+     * @param file                 the absolute path of the file
+     * @param form_data            an array of Name/Value pairs
+     * @param headers              additional headers
+     * @return                     a HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Post(String file, NVPair form_data[], NVPair headers[])
+        throws IOException, ModuleException
+    {
+        int idx;
+        for (idx = 0; idx < headers.length; idx++)
+        {
+            if (headers[idx].getName().equalsIgnoreCase("Content-type"))
+            {
+                break;
+            }
+        }
+        if (idx == headers.length)
+        {
+            headers = Util.resizeArray(headers, idx + 1);
+            headers[idx] =
+                    new NVPair("Content-type", "application/x-www-form-urlencoded");
+        }
+
+        return Post(file, Codecs.nv2query(form_data), headers);
+    }
+
+
+    /**
+     * POSTs the data to the specified file. The data is converted to an array
+     * of bytes using the default character converter. The request is sent using
+     * the content-type "application/octet-stream".
+     *
+     * @param file                 the absolute path of the file
+     * @param data                 the data
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        java.lang.String#getBytes()
+     */
+    public HTTPResponse Post(String file, String data)
+        throws IOException, ModuleException
+    {
+        return Post(file, data, null);
+    }
+
+
+    /**
+     * POSTs the data to the specified file using the specified headers.
+     *
+     * @param file                 the absolute path of the file
+     * @param data                 the data
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        java.lang.String#getBytes()
+     */
+    public HTTPResponse Post(String file, String data, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        byte tmp[] = null;
+
+        if (data != null && data.length() > 0)
+        {
+            tmp = data.getBytes();
+        }
+
+        return Post(file, tmp, headers);
+    }
+
+
+    /**
+     * POSTs the raw data to the specified file. The request is sent using the
+     * content-type "application/octet-stream"
+     *
+     * @param file                 the absolute path of the file
+     * @param data                 the data
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Post(String file, byte data[])
+        throws IOException, ModuleException
+    {
+        return Post(file, data, null);
+    }
+
+
+    /**
+     * POSTs the raw data to the specified file using the specified headers.
+     *
+     * @param file                 the absolute path of the file
+     * @param data                 the data
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Post(String file, byte data[], NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        if (data == null)
+        {
+            data = new byte[0];
+        }
+        // POST must always have a CL
+        return setupRequest("POST", stripRef(file), headers, data, null);
+    }
+
+
+    /**
+     * POSTs the data written to the output stream to the specified file. The
+     * request is sent using the content-type "application/octet-stream"
+     *
+     * @param file                 the absolute path of the file
+     * @param stream               the output stream on which the data is
+     *      written
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Post(String file, HttpOutputStream stream)
+        throws IOException, ModuleException
+    {
+        return Post(file, stream, null);
+    }
+
+
+    /**
+     * POSTs the data written to the output stream to the specified file using
+     * the specified headers.
+     *
+     * @param file                 the absolute path of the file
+     * @param stream               the output stream on which the data is
+     *      written
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Post(String file, HttpOutputStream stream,
+            NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        return setupRequest("POST", stripRef(file), headers, null, stream);
+    }
+
+
+    /**
+     * PUTs the data into the specified file. The data is converted to an array
+     * of bytes using the default character converter. The request ist sent
+     * using the content-type "application/octet-stream".
+     *
+     * @param file                 the absolute path of the file
+     * @param data                 the data
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        java.lang.String#getBytes()
+     */
+    public HTTPResponse Put(String file, String data)
+        throws IOException, ModuleException
+    {
+        return Put(file, data, null);
+    }
+
+
+    /**
+     * PUTs the data into the specified file using the additional headers for
+     * the request.
+     *
+     * @param file                 the absolute path of the file
+     * @param data                 the data
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     * @see                        java.lang.String#getBytes()
+     */
+    public HTTPResponse Put(String file, String data, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        byte tmp[] = null;
+
+        if (data != null && data.length() > 0)
+        {
+            tmp = data.getBytes();
+        }
+
+        return Put(file, tmp, headers);
+    }
+
+
+    /**
+     * PUTs the raw data into the specified file. The request is sent using the
+     * content-type "application/octet-stream".
+     *
+     * @param file                 the absolute path of the file
+     * @param data                 the data
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Put(String file, byte data[])
+        throws IOException, ModuleException
+    {
+        return Put(file, data, null);
+    }
+
+
+    /**
+     * PUTs the raw data into the specified file using the additional headers.
+     *
+     * @param file                 the absolute path of the file
+     * @param data                 the data
+     * @param headers              any additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Put(String file, byte data[], NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        if (data == null)
+        {
+            data = new byte[0];
+        }
+        // PUT must always have a CL
+        return setupRequest("PUT", stripRef(file), headers, data, null);
+    }
+
+
+    /**
+     * PUTs the data written to the output stream into the specified file. The
+     * request is sent using the content-type "application/octet-stream".
+     *
+     * @param file                 the absolute path of the file
+     * @param stream               the output stream on which the data is
+     *      written
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Put(String file, HttpOutputStream stream)
+        throws IOException, ModuleException
+    {
+        return Put(file, stream, null);
+    }
+
+
+    /**
+     * PUTs the data written to the output stream into the specified file using
+     * the additional headers.
+     *
+     * @param file                 the absolute path of the file
+     * @param stream               the output stream on which the data is
+     *      written
+     * @param headers              any additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Put(String file, HttpOutputStream stream,
+            NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        return setupRequest("PUT", stripRef(file), headers, null, stream);
+    }
+
+
+    /**
+     * Request OPTIONS from the server. If <var>file</var> is "*" then the
+     * request applies to the server as a whole; otherwise it applies only to
+     * that resource.
+     *
+     * @param file                 the absolute path of the resource, or "*"
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Options(String file)
+        throws IOException, ModuleException
+    {
+        return Options(file, null, (byte[]) null);
+    }
+
+
+    /**
+     * Request OPTIONS from the server. If <var>file</var> is "*" then the
+     * request applies to the server as a whole; otherwise it applies only to
+     * that resource.
+     *
+     * @param file                 the absolute path of the resource, or "*"
+     * @param headers              the headers containing optional info.
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Options(String file, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        return Options(file, headers, (byte[]) null);
+    }
+
+
+    /**
+     * Request OPTIONS from the server. If <var>file</var> is "*" then the
+     * request applies to the server as a whole; otherwise it applies only to
+     * that resource.
+     *
+     * @param file                 the absolute path of the resource, or "*"
+     * @param headers              the headers containing optional info.
+     * @param data                 any data to be sent in the optional body
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Options(String file, NVPair[] headers, byte[] data)
+        throws IOException, ModuleException
+    {
+        return setupRequest("OPTIONS", stripRef(file), headers, data, null);
+    }
+
+
+    /**
+     * Request OPTIONS from the server. If <var>file</var> is "*" then the
+     * request applies to the server as a whole; otherwise it applies only to
+     * that resource.
+     *
+     * @param file                 the absolute path of the resource, or "*"
+     * @param headers              the headers containing optional info.
+     * @param stream               an output stream for sending the optional
+     *      body
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Options(String file, NVPair[] headers,
+            HttpOutputStream stream)
+        throws IOException, ModuleException
+    {
+        return setupRequest("OPTIONS", stripRef(file), headers, null, stream);
+    }
+
+
+    /**
+     * Requests that <var>file</var> be DELETEd from the server.
+     *
+     * @param file                 the absolute path of the resource
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Delete(String file)
+        throws IOException, ModuleException
+    {
+        return Delete(file, null);
+    }
+
+
+    /**
+     * Requests that <var>file</var> be DELETEd from the server.
+     *
+     * @param file                 the absolute path of the resource
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Delete(String file, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        return setupRequest("DELETE", stripRef(file), headers, null, null);
+    }
+
+
+    /**
+     * Requests a TRACE. Headers of particular interest here are "Via" and
+     * "Max-Forwards".
+     *
+     * @param file                 the absolute path of the resource
+     * @param headers              additional headers
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Trace(String file, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        return setupRequest("TRACE", stripRef(file), headers, null, null);
+    }
+
+
+    /**
+     * Requests a TRACE.
+     *
+     * @param file                 the absolute path of the resource
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse Trace(String file)
+        throws IOException, ModuleException
+    {
+        return Trace(file, null);
+    }
+
+
+    /**
+     * This is here to allow an arbitrary, non-standard request to be sent. I'm
+     * assuming you know what you are doing...
+     *
+     * @param method               the extension method
+     * @param file                 the absolute path of the resource, or null
+     * @param data                 optional data, or null
+     * @param headers              optional headers, or null
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse ExtensionMethod(String method, String file,
+            byte[] data, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        return setupRequest(method.trim(), stripRef(file), headers, data, null);
+    }
+
+
+    /**
+     * This is here to allow an arbitrary, non-standard request to be sent. I'm
+     * assuming you know what you are doing...
+     *
+     * @param method               the extension method
+     * @param file                 the absolute path of the resource, or null
+     * @param headers              optional headers, or null
+     * @param os                   Description of the Parameter
+     * @return                     an HTTPResponse structure containing the
+     *      response
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    public HTTPResponse ExtensionMethod(String method, String file,
+            HttpOutputStream os, NVPair[] headers)
+        throws IOException, ModuleException
+    {
+        return setupRequest(method.trim(), stripRef(file), headers, null, os);
+    }
+
+
+    /**
+     * Aborts all the requests currently in progress on this connection and
+     * closes all associated sockets. You usually do <em>not</em> need to invoke
+     * this - it only meant for when you need to abruptly stop things, such as
+     * for example the stop button in a browser. <P>
+     *
+     * Note: there is a small window where a request method such as <code>Get()</code>
+     * may have been invoked but the request has not been built and added to the
+     * list. Any request in this window will not be aborted.
+     *
+     * @since   V0.2-3
+     */
+    public void stop()
+    {
+        for (Request req = (Request) RequestList.enumerate(); req != null;
+                req = (Request) RequestList.next())
+        {
+            req.aborted = true;
+        }
+
+        for (StreamDemultiplexor demux =
+                (StreamDemultiplexor) DemuxList.enumerate();
+                demux != null; demux = (StreamDemultiplexor) DemuxList.next())
+        {
+            demux.abort();
+        }
+    }
+
+
+    /**
+     * Sets the default http headers to be sent with each request. The actual
+     * headers sent are determined as follows: for each header specified in
+     * multiple places a value given as part of the request takes priority over
+     * any default values set by this method, which in turn takes priority over
+     * any built-in default values. A different way of looking at it is that we
+     * start off with a list of all headers specified with the request, then add
+     * any default headers set by this method which aren't already in our list,
+     * and finally add any built-in headers which aren't yet in the list. There
+     * is one exception to this rule: the "Content-length" header is always
+     * ignored; and when posting form-data any default "Content-type" is ignored
+     * in favor of the built-in "application/x-www-form-urlencoded" (however it
+     * will be overriden by any content-type header specified as part of the
+     * request). <P>
+     *
+     * Typical headers you might want to set here are "Accept" and its
+     * "Accept-*" relatives, "Connection", "From", "User-Agent", etc.
+     *
+     * @param headers  an array of header-name/value pairs (do not give the
+     *      separating ':').
+     */
+    public void setDefaultHeaders(NVPair[] headers)
+    {
+        int length = (headers == null ? 0 : headers.length);
+        NVPair[] def_hdrs = new NVPair[length];
+
+        // weed out undesired headers
+        int sidx;
+
+        // weed out undesired headers
+        int didx;
+        for (sidx = 0, didx = 0; sidx < length; sidx++)
+        {
+            if (headers[sidx] == null)
+            {
+                continue;
+            }
+
+            String name = headers[sidx].getName().trim();
+            if (name.equalsIgnoreCase("Content-length"))
+            {
+                continue;
+            }
+
+            def_hdrs[didx++] = headers[sidx];
+        }
+
+        if (didx < length)
+        {
+            def_hdrs = Util.resizeArray(def_hdrs, didx);
+        }
+
+        synchronized (DefaultHeaders)
+        {
+            DefaultHeaders = def_hdrs;
+        }
+    }
+
+
+    /**
+     * Gets the current list of default http headers.
+     *
+     * @return   an array of header/value pairs.
+     */
+    public NVPair[] getDefaultHeaders()
+    {
+        synchronized (DefaultHeaders)
+        {
+            return (NVPair[]) DefaultHeaders.clone();
+        }
+    }
+
+
+    /**
+     * Returns the protocol this connection is talking.
+     *
+     * @return   a string containing the (lowercased) protocol
+     */
+    public String getProtocol()
+    {
+        switch (Protocol)
+        {
+            case HTTP:
+                return "http";
+            case HTTPS:
+                return "https";
+            case SHTTP:
+                return "shttp";
+            case HTTP_NG:
+                return "http-ng";
+            default:
+                throw new Error("HTTPClient Internal Error: invalid protocol " +
+                        Protocol);
+        }
+    }
+
+
+    /**
+     * Returns the host this connection is talking to.
+     *
+     * @return   a string containing the (lowercased) host name.
+     */
+    public String getHost()
+    {
+        return Host;
+    }
+
+
+    /**
+     * Returns the port this connection connects to. This is always the actual
+     * port number, never -1.
+     *
+     * @return   the port number
+     */
+    public int getPort()
+    {
+        return Port;
+    }
+
+
+    /**
+     * Returns the host of the proxy this connection is using.
+     *
+     * @return   a string containing the (lowercased) host name.
+     */
+    public String getProxyHost()
+    {
+        return Proxy_Host;
+    }
+
+
+    /**
+     * Returns the port of the proxy this connection is using.
+     *
+     * @return   the port number
+     */
+    public int getProxyPort()
+    {
+        return Proxy_Port;
+    }
+
+
+    /**
+     * See if the given uri is compatible with this connection. Compatible means
+     * that the given uri can be retrieved using this connection object.
+     *
+     * @param uri  the URI to check
+     * @return     true if they're compatible, false otherwise
+     * @since      V0.3-2
+     */
+    public boolean isCompatibleWith(URI uri)
+    {
+        if (!uri.getScheme().equals(getProtocol()) ||
+                !uri.getHost().equalsIgnoreCase(Host))
+        {
+            return false;
+        }
+
+        int port = uri.getPort();
+        if (port == -1)
+        {
+            port = URI.defaultPort(uri.getScheme());
+        }
+        return port == Port;
+    }
+
+
+    /**
+     * Sets/Resets raw mode. In raw mode all modules are bypassed, meaning the
+     * automatic handling of authorization requests, redirections, cookies, etc.
+     * is turned off. <P>
+     *
+     * The default is false.
+     *
+     * @param raw    if true removes all modules (except for the retry module)
+     * @deprecated   This is not really needed anymore; in V0.2 request were
+     *      synchronous and therefore to do pipelining you needed to disable the
+     *      processing of responses.
+     * @see          #removeModule(java.lang.Class)
+     */
+    public void setRawMode(boolean raw)
+    {
+        // Don't remove the retry module
+        String[] modules = {"HTTPClient.CookieModule",
+                "HTTPClient.RedirectionModule",
+                "HTTPClient.AuthorizationModule",
+                "HTTPClient.DefaultModule",
+                "HTTPClient.TransferEncodingModule",
+                "HTTPClient.ContentMD5Module",
+                "HTTPClient.ContentEncodingModule"};
+
+        for (int idx = 0; idx < modules.length; idx++)
+        {
+            try
+            {
+                if (raw)
+                {
+                    removeModule(Class.forName(modules[idx]));
+                }
+                else
+                {
+                    addModule(Class.forName(modules[idx]), -1);
+                }
+            }
+            catch (ClassNotFoundException cnfe)
+            {
+            }
+        }
+    }
+
+
+    /**
+     * Sets the default timeout value to be used for each new HTTPConnection.
+     * The default is 0.
+     *
+     * @param time  the timeout in milliseconds.
+     * @see         #setTimeout(int)
+     */
+    public static void setDefaultTimeout(int time)
+    {
+        DefaultTimeout = time;
+    }
+
+
+    /**
+     * Gets the default timeout value to be used for each new HTTPConnection.
+     *
+     * @return   the timeout in milliseconds.
+     * @see      #setTimeout(int)
+     */
+    public static int getDefaultTimeout()
+    {
+        return DefaultTimeout;
+    }
+
+
+    /**
+     * Sets the timeout to be used for creating connections and reading
+     * responses. When a timeout expires the operation will throw an
+     * InterruptedIOException. The operation may be restarted again afterwards.
+     * If the operation is not restarted and it is a read operation (i.e
+     * HTTPResponse.xxxx()) then <code>resp.getInputStream().close()</code>
+     * <strong>should</strong> be invoked. <P>
+     *
+     * When creating new sockets the timeout will limit the time spent doing the
+     * host name translation and establishing the connection with the server.
+     * <P>
+     *
+     * The timeout also influences the reading of the response headers. However,
+     * it does not specify a how long, for example, getStatusCode() may take, as
+     * might be assumed. Instead it specifies how long a read on the socket may
+     * take. If the response dribbles in slowly with packets arriving quicker
+     * than the timeout then the method will complete normally. I.e. the
+     * exception is only thrown if nothing arrives on the socket for the
+     * specified time. Furthermore, the timeout only influences the reading of
+     * the headers, not the reading of the body. <P>
+     *
+     * Read Timeouts are associated with responses, so that you may change this
+     * value before each request and it won't affect the reading of responses to
+     * previous requests.
+     *
+     * @param time  the time in milliseconds. A time of 0 means wait
+     *      indefinitely.
+     * @see         #stop()
+     */
+    public void setTimeout(int time)
+    {
+        Timeout = time;
+    }
+
+
+    /**
+     * Gets the timeout used for reading response data.
+     *
+     * @return   the current timeout value
+     * @see      #setTimeout(int)
+     */
+    public int getTimeout()
+    {
+        return Timeout;
+    }
+
+
+    /**
+     * Controls whether modules are allowed to prompt the user or pop up dialogs
+     * if neccessary.
+     *
+     * @param allow  if true allows modules to interact with user.
+     */
+    public void setAllowUserInteraction(boolean allow)
+    {
+        allowUI = allow;
+    }
+
+
+    /**
+     * returns whether modules are allowed to prompt or popup dialogs if
+     * neccessary.
+     *
+     * @return   true if modules are allowed to interact with user.
+     */
+    public boolean getAllowUserInteraction()
+    {
+        return allowUI;
+    }
+
+
+    /**
+     * Sets the default allow-user-action.
+     *
+     * @param allow  if true allows modules to interact with user.
+     */
+    public static void setDefaultAllowUserInteraction(boolean allow)
+    {
+        defaultAllowUI = allow;
+    }
+
+
+    /**
+     * Gets the default allow-user-action.
+     *
+     * @return   true if modules are allowed to interact with user.
+     */
+    public static boolean getDefaultAllowUserInteraction()
+    {
+        return defaultAllowUI;
+    }
+
+
+    /**
+     * Returns the default list of modules.
+     *
+     * @return   an array of classes
+     */
+    public static Class[] getDefaultModules()
+    {
+        return getModules(DefaultModuleList);
+    }
+
+
+    /**
+     * Adds a module to the default list. It must implement the <var>
+     * HTTPClientModule</var> interface. If the module is already in the list
+     * then this method does nothing. This method only affects instances of
+     * HTTPConnection created after this method has been invoked; it does not
+     * affect existing instances. <P>
+     *
+     * Example: <PRE>
+     * HTTPConnection.addDefaultModule(Class.forName("HTTPClient.CookieModule"), 1);
+     * </PRE> adds the cookie module as the second module in the list. <P>
+     *
+     * The default list is created at class initialization time from the
+     * property <var>HTTPClient.Modules</var> . This must contain a "|"
+     * separated list of classes in the order they're to be invoked. If this
+     * property is not set it defaults to: "HTTPClient.RetryModule |
+     * HTTPClient.CookieModule | HTTPClient.RedirectionModule |
+     * HTTPClient.AuthorizationModule | HTTPClient.DefaultModule |
+     * HTTPClient.TransferEncodingModule | HTTPClient.ContentMD5Module |
+     * HTTPClient.ContentEncodingModule"
+     *
+     * @param module  the module's Class object
+     * @param pos     the position of this module in the list; if <var>pos</var>
+     *      >= 0 then this is the absolute position in the list (0 is the first
+     *      position); if <var>pos</var> < 0 then this is the position relative
+     *      to the end of the list (-1 means the last element, -2 the second to
+     *      last element, etc).
+     * @return        true if module was successfully added; false if the module
+     *      is already in the list.
+     * @see           HTTPClientModule
+     */
+    public static boolean addDefaultModule(Class module, int pos)
+    {
+        return addModule(DefaultModuleList, module, pos);
+    }
+
+
+    /**
+     * Removes a module from the default list. If the module is not in the list
+     * it does nothing. This method only affects instances of HTTPConnection
+     * created after this method has been invoked; it does not affect existing
+     * instances.
+     *
+     * @param module  the module's Class object
+     * @return        true if module was successfully removed; false otherwise
+     */
+    public static boolean removeDefaultModule(Class module)
+    {
+        return removeModule(DefaultModuleList, module);
+    }
+
+
+    /**
+     * Returns the list of modules used currently.
+     *
+     * @return   an array of classes
+     */
+    public Class[] getModules()
+    {
+        return getModules(ModuleList);
+    }
+
+
+    /**
+     * Adds a module to the current list. It must implement the <var>
+     * HTTPClientModule</var> interface. If the module is already in the list
+     * then this method does nothing.
+     *
+     * @param module  the module's Class object
+     * @param pos     the position of this module in the list; if <var>pos</var>
+     *      >= 0 then this is the absolute position in the list (0 is the first
+     *      position); if <var>pos</var> < 0 then this is the position relative
+     *      to the end of the list (-1 means the last element, -2 the second to
+     *      last element, etc).
+     * @return        true if module was successfully added; false if the module
+     *      is already in the list.
+     * @see           HTTPClientModule
+     */
+    public boolean addModule(Class module, int pos)
+    {
+        return addModule(ModuleList, module, pos);
+    }
+
+
+    /**
+     * Removes a module from the current list. If the module is not in the list
+     * it does nothing.
+     *
+     * @param module  the module's Class object
+     * @return        true if module was successfully removed; false otherwise
+     */
+    public boolean removeModule(Class module)
+    {
+        return removeModule(ModuleList, module);
+    }
+
+
+    /**
+     * Gets the modules attribute of the HTTPConnection class
+     *
+     * @param list  Description of the Parameter
+     * @return      The modules value
+     */
+    private final static Class[] getModules(Vector list)
+    {
+        synchronized (list)
+        {
+            Class[] modules = new Class[list.size()];
+            list.copyInto(modules);
+            return modules;
+        }
+    }
+
+
+    /**
+     * Adds a feature to the Module attribute of the HTTPConnection class
+     *
+     * @param list    The feature to be added to the Module attribute
+     * @param module  The feature to be added to the Module attribute
+     * @param pos     The feature to be added to the Module attribute
+     * @return        Description of the Return Value
+     */
+    private final static boolean addModule(Vector list, Class module, int pos)
+    {
+        if (module == null)
+        {
+            return false;
+        }
+
+        // check if module implements HTTPClientModule
+        try
+        {
+            HTTPClientModule tmp = (HTTPClientModule) module.newInstance();
+        }
+        catch (RuntimeException re)
+        {
+            throw re;
+        }
+        catch (Exception e)
+        {
+            throw new RuntimeException(e.toString());
+        }
+
+        synchronized (list)
+        {
+            // check if module already in list
+            if (list.contains(module))
+            {
+                return false;
+            }
+
+            // add module to list
+            if (pos < 0)
+            {
+                list.insertElementAt(module, DefaultModuleList.size() + pos + 1);
+            }
+            else
+            {
+                list.insertElementAt(module, pos);
+            }
+        }
+
+        Log.write(Log.CONN, "Conn:  Added module " + module.getName() +
+                " to " +
+                ((list == DefaultModuleList) ? "default " : "") +
+                "list");
+
+        return true;
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param list    Description of the Parameter
+     * @param module  Description of the Parameter
+     * @return        Description of the Return Value
+     */
+    private final static boolean removeModule(Vector list, Class module)
+    {
+        if (module == null)
+        {
+            return false;
+        }
+
+        boolean removed = list.removeElement(module);
+        if (removed)
+        {
+            Log.write(Log.CONN, "Conn:  Removed module " + module.getName() +
+                    " from " +
+                    ((list == DefaultModuleList) ? "default " : "") +
+                    "list");
+        }
+
+        return removed;
+    }
+
+
+    /**
+     * Sets the current context. The context is used by modules such as the
+     * AuthorizationModule and the CookieModule which keep lists of info that is
+     * normally shared between all instances of HTTPConnection. This is usually
+     * the desired behaviour. However, in some cases one would like to simulate
+     * multiple independent clients within the same application and hence the
+     * sharing of such info should be restricted. This is where the context
+     * comes in. Modules will only share their info between requests using the
+     * same context (i.e. they keep multiple lists, one for each context). <P>
+     *
+     * The context may be any object. Contexts are considered equal if <code>equals()</code>
+     * returns true. Examples of useful context objects are threads (e.g. if you
+     * are running multiple clients, one per thread) and sockets (e.g. if you
+     * are implementing a gateway). <P>
+     *
+     * When a new HTTPConnection is created it is initialized with a default
+     * context which is the same for all instances. This method must be invoked
+     * immediately after a new HTTPConnection is created and before any request
+     * method is invoked. Furthermore, this method may only be called once (i.e.
+     * the context is "sticky").
+     *
+     * @param context  the new context; must be non-null
+     */
+    public void setContext(Object context)
+    {
+        if (context == null)
+        {
+            throw new IllegalArgumentException("Context must be non-null");
+        }
+        if (Context != null)
+        {
+            throw new IllegalStateException("Context already set");
+        }
+
+        Context = context;
+    }
+
+
+    /**
+     * Returns the current context.
+     *
+     * @return   the current context, or the default context if <code>setContext()</code>
+     *      hasn't been invoked
+     * @see      #setContext(java.lang.Object)
+     */
+    public Object getContext()
+    {
+        if (Context != null)
+        {
+            return Context;
+        }
+        else
+        {
+            return dflt_context;
+        }
+    }
+
+
+    /**
+     * Returns the default context.
+     *
+     * @return   the default context
+     * @see      #setContext(java.lang.Object)
+     */
+    public static Object getDefaultContext()
+    {
+        return dflt_context;
+    }
+
+
+    /**
+     * Adds an authorization entry for the "digest" authorization scheme to the
+     * list. If an entry already exists for the "digest" scheme and the
+     * specified realm then it is overwritten. <P>
+     *
+     * This is a convenience method and just invokes the corresponding method in
+     * AuthorizationInfo.
+     *
+     * @param realm   the realm
+     * @param user    the username
+     * @param passwd  The feature to be added to the DigestAuthorization
+     *      attribute
+     * @see           AuthorizationInfo#addDigestAuthorization(java.lang.String,
+     *      int, java.lang.String, java.lang.String, java.lang.String)
+     */
+    public void addDigestAuthorization(String realm, String user, String passwd)
+    {
+        AuthorizationInfo.addDigestAuthorization(Host, Port, realm, user,
+                passwd, getContext());
+    }
+
+
+    /**
+     * Adds an authorization entry for the "basic" authorization scheme to the
+     * list. If an entry already exists for the "basic" scheme and the specified
+     * realm then it is overwritten. <P>
+     *
+     * This is a convenience method and just invokes the corresponding method in
+     * AuthorizationInfo.
+     *
+     * @param realm   the realm
+     * @param user    the username
+     * @param passwd  The feature to be added to the BasicAuthorization
+     *      attribute
+     * @see           AuthorizationInfo#addBasicAuthorization(java.lang.String,
+     *      int, java.lang.String, java.lang.String, java.lang.String)
+     */
+    public void addBasicAuthorization(String realm, String user, String passwd)
+    {
+        AuthorizationInfo.addBasicAuthorization(Host, Port, realm, user,
+                passwd, getContext());
+    }
+
+
+    /**
+     * Sets the default proxy server to use. The proxy will only be used for new
+     * <var>HTTPConnection</var> s created after this call and will not affect
+     * currrent instances of <var>HTTPConnection</var> . A null or empty string
+     * <var>host</var> parameter disables the proxy. <P>
+     *
+     * In an application or using the Appletviewer an alternative to this method
+     * is to set the following properties (either in the properties file or on
+     * the command line): <var>http.proxyHost</var> and <var>http.proxyPort
+     * </var>. Whether <var>http.proxyHost</var> is set or not determines
+     * whether a proxy server is used. <P>
+     *
+     * If the proxy server requires authorization and you wish to set this
+     * authorization information in the code, then you may use any of the <var>
+     * AuthorizationInfo.addXXXAuthorization()</var> methods to do so. Specify
+     * the same <var>host</var> and <var>port</var> as in this method. If you
+     * have not given any authorization info and the proxy server requires
+     * authorization then you will be prompted for the necessary info via a
+     * popup the first time you do a request.
+     *
+     * @param host  the host on which the proxy server resides.
+     * @param port  the port the proxy server is listening on.
+     * @see         #setCurrentProxy(java.lang.String, int)
+     */
+    public static void setProxyServer(String host, int port)
+    {
+        if (host == null || host.trim().length() == 0)
+        {
+            Default_Proxy_Host = null;
+        }
+        else
+        {
+            Default_Proxy_Host = host.trim().toLowerCase();
+            Default_Proxy_Port = port;
+        }
+    }
+
+
+    /**
+     * Sets the proxy used by this instance. This can be used to override the
+     * proxy setting inherited from the default proxy setting. A null or empty
+     * string <var>host</var> parameter disables the proxy. <P>
+     *
+     * Note that if you set a proxy for the connection using this method, and a
+     * request made over this connection is redirected to a different server,
+     * then the connection used for new server will <em>not</em> pick this proxy
+     * setting, but instead will use the default proxy settings.
+     *
+     * @param host  the host the proxy runs on
+     * @param port  the port the proxy is listening on
+     * @see         #setProxyServer(java.lang.String, int)
+     */
+    public synchronized void setCurrentProxy(String host, int port)
+    {
+        if (host == null || host.trim().length() == 0)
+        {
+            Proxy_Host = null;
+        }
+        else
+        {
+            Proxy_Host = host.trim().toLowerCase();
+            if (port <= 0)
+            {
+                Proxy_Port = 80;
+            }
+            else
+            {
+                Proxy_Port = port;
+            }
+        }
+
+        // the proxy might be talking a different version, so renegotiate
+        switch (Protocol)
+        {
+            case HTTP:
+            case HTTPS:
+                if (force_1_0)
+                {
+                    ServerProtocolVersion = HTTP_1_0;
+                    ServProtVersKnown = true;
+                    RequestProtocolVersion = "HTTP/1.0";
+                }
+                else
+                {
+                    ServerProtocolVersion = HTTP_1_1;
+                    ServProtVersKnown = false;
+                    RequestProtocolVersion = "HTTP/1.1";
+                }
+                break;
+            case HTTP_NG:
+                ServerProtocolVersion = -1;
+                /*
+                 *  Unknown
+                 */
+                ServProtVersKnown = false;
+                RequestProtocolVersion = "";
+                break;
+            case SHTTP:
+                ServerProtocolVersion = -1;
+                /*
+                 *  Unknown
+                 */
+                ServProtVersKnown = false;
+                RequestProtocolVersion = "Secure-HTTP/1.3";
+                break;
+            default:
+                throw new Error("HTTPClient Internal Error: invalid protocol " +
+                        Protocol);
+        }
+
+        keepAliveUnknown = true;
+        doesKeepAlive = false;
+
+        input_demux = null;
+        early_stall = null;
+        late_stall = null;
+        prev_resp = null;
+    }
+
+
+    /**
+     * Add <var>host</var> to the list of hosts which should be accessed
+     * directly, not via any proxy set by <code>setProxyServer()</code>. <P>
+     *
+     * The <var>host</var> may be any of:
+     * <UL>
+     *   <LI> a complete host name (e.g. "www.disney.com")
+     *   <LI> a domain name; domain names must begin with a dot (e.g.
+     *   ".disney.com")
+     *   <LI> an IP-address (e.g. "12.34.56.78")
+     *   <LI> an IP-subnet, specified as an IP-address and a netmask separated
+     *   by a "/" (e.g. "34.56.78/255.255.255.192"); a 0 bit in the netmask
+     *   means that that bit won't be used in the comparison (i.e. the addresses
+     *   are AND'ed with the netmask before comparison).
+     * </UL>
+     * <P>
+     *
+     * The two properties <var>HTTPClient.nonProxyHosts</var> and <var>
+     * http.nonProxyHosts</var> are used when this class is loaded to initialize
+     * the list of non-proxy hosts. The second property is only read if the
+     * first one is not set; the second property is also used the JDK's
+     * URLConnection. These properties must contain a "|" separated list of
+     * entries which conform to the above rules for the <var>host</var>
+     * parameter (e.g. "11.22.33.44|.disney.com").
+     *
+     * @param host                a host name, domain name, IP-address or
+     *      IP-subnet.
+     * @exception ParseException  if the length of the netmask does not match
+     *      the length of the IP-address
+     */
+    public static void dontProxyFor(String host)
+        throws ParseException
+    {
+        host = host.trim().toLowerCase();
+
+        // check for domain name
+
+        if (host.charAt(0) == '.')
+        {
+            if (!non_proxy_dom_list.contains(host))
+            {
+                non_proxy_dom_list.addElement(host);
+            }
+            return;
+        }
+
+        // check for host name
+
+        for (int idx = 0; idx < host.length(); idx++)
+        {
+            if (!Character.isDigit(host.charAt(idx)) &&
+                    host.charAt(idx) != '.' && host.charAt(idx) != '/')
+            {
+                non_proxy_host_list.put(host, "");
+                return;
+            }
+        }
+
+        // must be an IP-address
+
+        byte[] ip_addr;
+        byte[] ip_mask;
+        int slash;
+        if ((slash = host.indexOf('/')) != -1)
+        {
+            // IP subnet
+
+            ip_addr = string2arr(host.substring(0, slash));
+            ip_mask = string2arr(host.substring(slash + 1));
+            if (ip_addr.length != ip_mask.length)
+            {
+                throw new ParseException("length of IP-address (" +
+                        ip_addr.length + ") != length of netmask (" +
+                        ip_mask.length + ")");
+            }
+        }
+        else
+        {
+            ip_addr = string2arr(host);
+            ip_mask = new byte[ip_addr.length];
+            for (int idx = 0; idx < ip_mask.length; idx++)
+            {
+                ip_mask[idx] = (byte) 255;
+            }
+        }
+
+        // check if addr or subnet already exists
+
+        ip_loop :
+        for (int idx = 0; idx < non_proxy_addr_list.size(); idx++)
+        {
+            byte[] addr = (byte[]) non_proxy_addr_list.elementAt(idx);
+            byte[] mask = (byte[]) non_proxy_mask_list.elementAt(idx);
+            if (addr.length != ip_addr.length)
+            {
+                continue;
+            }
+
+            for (int idx2 = 0; idx2 < addr.length; idx2++)
+            {
+                if ((ip_addr[idx2] & mask[idx2]) != (addr[idx2] & mask[idx2]) ||
+                        (mask[idx2] != ip_mask[idx2]))
+                {
+                    continue ip_loop;
+                }
+            }
+
+            return;
+            // already exists
+        }
+        non_proxy_addr_list.addElement(ip_addr);
+        non_proxy_mask_list.addElement(ip_mask);
+    }
+
+
+    /**
+     * Convenience method to add a number of hosts at once. If any one host is
+     * null or cannot be parsed it is ignored.
+     *
+     * @param hosts  The list of hosts to set
+     * @see          #dontProxyFor(java.lang.String)
+     * @since        V0.3-2
+     */
+    public static void dontProxyFor(String[] hosts)
+    {
+        if (hosts == null || hosts.length == 0)
+        {
+            return;
+        }
+
+        for (int idx = 0; idx < hosts.length; idx++)
+        {
+            try
+            {
+                if (hosts[idx] != null)
+                {
+                    dontProxyFor(hosts[idx]);
+                }
+            }
+            catch (ParseException pe)
+            {
+                // ignore it
+            }
+        }
+    }
+
+
+    /**
+     * Remove <var>host</var> from the list of hosts for which the proxy should
+     * not be used. This modifies the same list that <code>dontProxyFor()</code>
+     * uses, i.e. this is used to undo a <code>dontProxyFor()</code> setting.
+     * The syntax for <var>host</var> is specified in <code>dontProxyFor()</code>
+     * .
+     *
+     * @param host                a host name, domain name, IP-address or
+     *      IP-subnet.
+     * @return                    true if the remove was sucessful, false
+     *      otherwise
+     * @exception ParseException  if the length of the netmask does not match
+     *      the length of the IP-address
+     * @see                       #dontProxyFor(java.lang.String)
+     */
+    public static boolean doProxyFor(String host)
+        throws ParseException
+    {
+        host = host.trim().toLowerCase();
+
+        // check for domain name
+
+        if (host.charAt(0) == '.')
+        {
+            return non_proxy_dom_list.removeElement(host);
+        }
+
+        // check for host name
+
+        for (int idx = 0; idx < host.length(); idx++)
+        {
+            if (!Character.isDigit(host.charAt(idx)) &&
+                    host.charAt(idx) != '.' && host.charAt(idx) != '/')
+            {
+                return (non_proxy_host_list.remove(host) != null);
+            }
+        }
+
+        // must be an IP-address
+
+        byte[] ip_addr;
+        byte[] ip_mask;
+        int slash;
+        if ((slash = host.indexOf('/')) != -1)
+        {
+            // IP subnet
+
+            ip_addr = string2arr(host.substring(0, slash));
+            ip_mask = string2arr(host.substring(slash + 1));
+            if (ip_addr.length != ip_mask.length)
+            {
+                throw new ParseException("length of IP-address (" +
+                        ip_addr.length + ") != length of netmask (" +
+                        ip_mask.length + ")");
+            }
+        }
+        else
+        {
+            ip_addr = string2arr(host);
+            ip_mask = new byte[ip_addr.length];
+            for (int idx = 0; idx < ip_mask.length; idx++)
+            {
+                ip_mask[idx] = (byte) 255;
+            }
+        }
+
+        ip_loop :
+        for (int idx = 0; idx < non_proxy_addr_list.size(); idx++)
+        {
+            byte[] addr = (byte[]) non_proxy_addr_list.elementAt(idx);
+            byte[] mask = (byte[]) non_proxy_mask_list.elementAt(idx);
+            if (addr.length != ip_addr.length)
+            {
+                continue;
+            }
+
+            for (int idx2 = 0; idx2 < addr.length; idx2++)
+            {
+                if ((ip_addr[idx2] & mask[idx2]) != (addr[idx2] & mask[idx2]) ||
+                        (mask[idx2] != ip_mask[idx2]))
+                {
+                    continue ip_loop;
+                }
+            }
+
+            non_proxy_addr_list.removeElementAt(idx);
+            non_proxy_mask_list.removeElementAt(idx);
+            return true;
+        }
+        return false;
+    }
+
+
+    /**
+     * Turn an IP-address string into an array (e.g. "12.34.56.78" into { 12,
+     * 34, 56, 78 }).
+     *
+     * @param ip  IP-address
+     * @return    IP-address in network byte order
+     */
+    private static byte[] string2arr(String ip)
+    {
+        byte[] arr;
+        char[] ip_char = new char[ip.length()];
+        ip.getChars(0, ip_char.length, ip_char, 0);
+
+        int cnt = 0;
+        for (int idx = 0; idx < ip_char.length; idx++)
+        {
+            if (ip_char[idx] == '.')
+            {
+                cnt++;
+            }
+        }
+        arr = new byte[cnt + 1];
+
+        cnt = 0;
+        int pos = 0;
+        for (int idx = 0; idx < ip_char.length; idx++)
+        {
+            if (ip_char[idx] == '.')
+            {
+                arr[cnt] = (byte) Integer.parseInt(ip.substring(pos, idx));
+                cnt++;
+                pos = idx + 1;
+            }
+        }
+        arr[cnt] = (byte) Integer.parseInt(ip.substring(pos));
+
+        return arr;
+    }
+
+
+    /**
+     * Sets the SOCKS server to use. The server will only be used for new
+     * HTTPConnections created after this call and will not affect currrent
+     * instances of HTTPConnection. A null or empty string host parameter
+     * disables SOCKS. <P>
+     *
+     * The code will try to determine the SOCKS version to use at connection
+     * time. This might fail for a number of reasons, however, in which case you
+     * must specify the version explicitly.
+     *
+     * @param host  the host on which the proxy server resides. The port used is
+     *      the default port 1080.
+     * @see         #setSocksServer(java.lang.String, int, int)
+     */
+    public static void setSocksServer(String host)
+    {
+        setSocksServer(host, 1080);
+    }
+
+
+    /**
+     * Sets the SOCKS server to use. The server will only be used for new
+     * HTTPConnections created after this call and will not affect currrent
+     * instances of HTTPConnection. A null or empty string host parameter
+     * disables SOCKS. <P>
+     *
+     * The code will try to determine the SOCKS version to use at connection
+     * time. This might fail for a number of reasons, however, in which case you
+     * must specify the version explicitly.
+     *
+     * @param host  the host on which the proxy server resides.
+     * @param port  the port the proxy server is listening on.
+     * @see         #setSocksServer(java.lang.String, int, int)
+     */
+    public static void setSocksServer(String host, int port)
+    {
+        if (port <= 0)
+        {
+            port = 1080;
+        }
+
+        if (host == null || host.length() == 0)
+        {
+            Default_Socks_client = null;
+        }
+        else
+        {
+            Default_Socks_client = new SocksClient(host, port);
+        }
+    }
+
+
+    /**
+     * Sets the SOCKS server to use. The server will only be used for new
+     * HTTPConnections created after this call and will not affect currrent
+     * instances of HTTPConnection. A null or empty string host parameter
+     * disables SOCKS. <P>
+     *
+     * In an application or using the Appletviewer an alternative to this method
+     * is to set the following properties (either in the properties file or on
+     * the command line): <var>HTTPClient.socksHost</var> , <var>
+     * HTTPClient.socksPort</var> and <var>HTTPClient.socksVersion</var> .
+     * Whether <var>HTTPClient.socksHost</var> is set or not determines whether
+     * a SOCKS server is used; if <var>HTTPClient.socksPort</var> is not set it
+     * defaults to 1080; if <var>HTTPClient.socksVersion</var> is not set an
+     * attempt will be made to automatically determine the version used by the
+     * server. <P>
+     *
+     * Note: If you have also set a proxy server then a connection will be made
+     * to the SOCKS server, which in turn then makes a connection to the proxy
+     * server (possibly via other SOCKS servers), which in turn makes the final
+     * connection. <P>
+     *
+     * If the proxy server is running SOCKS version 5 and requires
+     * username/password authorization, and you wish to set this authorization
+     * information in the code, then you may use the <var>
+     * AuthorizationInfo.addAuthorization()</var> method to do so. Specify the
+     * same <var>host</var> and <var>port</var> as in this method, give the
+     * <var>scheme</var> "SOCKS5" and the <var>realm</var> "USER/PASS", set the
+     * <var>cookie</var> to null and the <var>params</var> to an array
+     * containing a single <var>NVPair</var> in turn containing the username and
+     * password. Example: <PRE>
+     *     NVPair[] up = { new NVPair(username, password) };
+     *     AuthorizationInfo.addAuthorization(host, port, "SOCKS5", "USER/PASS",
+     *                                        null, up);
+     * </PRE> If you have not given any authorization info and the proxy server
+     * requires authorization then you will be prompted for the necessary info
+     * via a popup the first time you do a request.
+     *
+     * @param host                the host on which the proxy server resides.
+     * @param port                the port the proxy server is listening on.
+     * @param version             the SOCKS version the server is running.
+     *      Currently this must be '4' or '5'.
+     * @exception SocksException  If <var>version</var> is not '4' or '5'.
+     */
+    public static void setSocksServer(String host, int port, int version)
+        throws SocksException
+    {
+        if (port <= 0)
+        {
+            port = 1080;
+        }
+
+        if (host == null || host.length() == 0)
+        {
+            Default_Socks_client = null;
+        }
+        else
+        {
+            Default_Socks_client = new SocksClient(host, port, version);
+        }
+    }
+
+
+    /**
+     * Removes the #... part. Returns the stripped name, or "" if either the
+     * <var>file</var> is null or is the empty string (after stripping).
+     *
+     * @param file  the name to strip
+     * @return      the stripped name
+     */
+    private final String stripRef(String file)
+    {
+        if (file == null)
+        {
+            return "";
+        }
+
+        int hash = file.indexOf('#');
+        if (hash != -1)
+        {
+            file = file.substring(0, hash);
+        }
+
+        return file.trim();
+    }
+
+
+    // private helper methods
+
+    /**
+     * Sets up the request, creating the list of headers to send and creating
+     * instances of the modules. This may be invoked by subclasses which add
+     * further methods (such as those from DAV and IPP).
+     *
+     * @param method               GET, POST, etc.
+     * @param resource             the resource
+     * @param headers              an array of headers to be used
+     * @param entity               the entity (or null)
+     * @param stream               the output stream (or null) - only one of
+     *      stream and entity may be non-null
+     * @return                     the response.
+     * @exception ModuleException  if an exception is encountered in any module.
+     * @exception IOException      Description of the Exception
+     */
+    protected final HTTPResponse setupRequest(String method, String resource,
+            NVPair[] headers, byte[] entity,
+            HttpOutputStream stream)
+        throws IOException, ModuleException
+    {
+        Request req = new Request(this, method, resource,
+                mergedHeaders(headers), entity, stream,
+                allowUI);
+        RequestList.addToEnd(req);
+
+        try
+        {
+            HTTPResponse resp = new HTTPResponse(gen_mod_insts(), Timeout, req, defaultIncrement);
+            handleRequest(req, resp, null, true);
+            return resp;
+        }
+        finally
+        {
+            RequestList.remove(req);
+        }
+    }
+
+
+    /**
+     * This merges built-in default headers, user-specified default headers, and
+     * method-specified headers. Method-specified take precedence over user
+     * defaults, which take precedence over built-in defaults. The following
+     * headers are removed if found: "Content-length".
+     *
+     * @param spec  the headers specified in the call to the method
+     * @return      an array consisting of merged headers.
+     */
+    private NVPair[] mergedHeaders(NVPair[] spec)
+    {
+        int spec_len = (spec != null ? spec.length : 0);
+        int
+                defs_len;
+        NVPair[] merged;
+
+        synchronized (DefaultHeaders)
+        {
+            defs_len = (DefaultHeaders != null ? DefaultHeaders.length : 0);
+            merged = new NVPair[spec_len + defs_len];
+
+            // copy default headers
+            System.arraycopy(DefaultHeaders, 0, merged, 0, defs_len);
+        }
+
+        // merge in selected headers
+        int sidx;
+
+        // merge in selected headers
+        int didx = defs_len;
+        for (sidx = 0; sidx < spec_len; sidx++)
+        {
+            if (spec[sidx] == null)
+            {
+                continue;
+            }
+
+            String s_name = spec[sidx].getName().trim();
+            if (s_name.equalsIgnoreCase("Content-length"))
+            {
+                continue;
+            }
+
+            int search;
+            for (search = 0; search < didx; search++)
+            {
+                if (merged[search].getName().trim().equalsIgnoreCase(s_name))
+                {
+                    break;
+                }
+            }
+
+            merged[search] = spec[sidx];
+            if (search == didx)
+            {
+                didx++;
+            }
+        }
+
+        if (didx < merged.length)
+        {
+            merged = Util.resizeArray(merged, didx);
+        }
+
+        return merged;
+    }
+
+
+    /**
+     * Generate an array of instances of the current modules.
+     *
+     * @return   Description of the Return Value
+     */
+    private HTTPClientModule[] gen_mod_insts()
+    {
+        synchronized (ModuleList)
+        {
+            HTTPClientModule[] mod_insts =
+                    new HTTPClientModule[ModuleList.size()];
+
+            for (int idx = 0; idx < ModuleList.size(); idx++)
+            {
+                Class mod = (Class) ModuleList.elementAt(idx);
+                try
+                {
+                    mod_insts[idx] = (HTTPClientModule) mod.newInstance();
+                }
+                catch (Exception e)
+                {
+                    throw new Error("HTTPClient Internal Error: could not " +
+                            "create instance of " + mod.getName() +
+                            " -\n" + e);
+                }
+            }
+
+            return mod_insts;
+        }
+    }
+
+
+    /**
+     * handles the Request. First the request handler for each module is is
+     * invoked, and then if no response was generated the request is sent.
+     *
+     * @param req                  the Request
+     * @param http_resp            the HTTPResponse
+     * @param resp                 the Response
+     * @param usemodules           if false then skip module loop
+     * @exception IOException      if any module or sendRequest throws it
+     * @exception ModuleException  if any module throws it
+     */
+    void handleRequest(Request req, HTTPResponse http_resp, Response resp,
+            boolean usemodules)
+        throws IOException, ModuleException
+    {
+        Response[] rsp_arr = {resp};
+        HTTPClientModule[] modules = http_resp.getModules();
+
+        // invoke requestHandler for each module
+
+        if (usemodules)
+        {
+            doModules :
+            for (int idx = 0; idx < modules.length; idx++)
+            {
+                int sts = modules[idx].requestHandler(req, rsp_arr);
+                switch (sts)
+                {
+                    case REQ_CONTINUE:
+                        // continue processing
+                        break;
+                    case REQ_RESTART:
+                        // restart processing with first module
+                        idx = -1;
+                        continue doModules;
+                    case REQ_SHORTCIRC:
+                        // stop processing and send
+                        break doModules;
+                    case REQ_RESPONSE:
+                    // go to phase 2
+                    case REQ_RETURN:
+                        // return response immediately
+                        if (rsp_arr[0] == null)
+                        {
+                            throw new Error("HTTPClient Internal Error: no " +
+                                    "response returned by module " +
+                                    modules[idx].getClass().getName());
+                        }
+                        http_resp.set(req, rsp_arr[0]);
+                        if (req.getStream() != null)
+                        {
+                            req.getStream().ignoreData(req);
+                        }
+                        if (req.internal_subrequest)
+                        {
+                            return;
+                        }
+                        if (sts == REQ_RESPONSE)
+                        {
+                            http_resp.handleResponse();
+                        }
+                        else
+                        {
+                            http_resp.init(rsp_arr[0]);
+                        }
+                        return;
+                    case REQ_NEWCON_RST:
+                        // new connection
+                        if (req.internal_subrequest)
+                        {
+                            return;
+                        }
+                        req.getConnection().
+                                handleRequest(req, http_resp, rsp_arr[0], true);
+                        return;
+                    case REQ_NEWCON_SND:
+                        // new connection, send immediately
+                        if (req.internal_subrequest)
+                        {
+                            return;
+                        }
+                        req.getConnection().
+                                handleRequest(req, http_resp, rsp_arr[0], false);
+                        return;
+                    default:
+                        // not valid
+                        throw new Error("HTTPClient Internal Error: invalid status" +
+                                " " + sts + " returned by module " +
+                                modules[idx].getClass().getName());
+                }
+            }
+        }
+
+        if (req.internal_subrequest)
+        {
+            return;
+        }
+
+        // Send the request across the wire
+
+        if (req.getStream() != null && req.getStream().getLength() == -1)
+        {
+            if (!ServProtVersKnown || ServerProtocolVersion < HTTP_1_1 ||
+                    no_chunked)
+            {
+                req.getStream().goAhead(req, null, http_resp.getTimeout());
+                http_resp.set(req, req.getStream());
+            }
+            else
+            {
+                // add Transfer-Encoding header if necessary
+                int idx;
+                NVPair[] hdrs = req.getHeaders();
+                for (idx = 0; idx < hdrs.length; idx++)
+                {
+                    if (hdrs[idx].getName().equalsIgnoreCase("Transfer-Encoding"))
+                    {
+                        break;
+                    }
+                }
+
+                if (idx == hdrs.length)
+                {
+                    hdrs = Util.resizeArray(hdrs, idx + 1);
+                    hdrs[idx] = new NVPair("Transfer-Encoding", "chunked");
+                    req.setHeaders(hdrs);
+                }
+                else
+                {
+                    String v = hdrs[idx].getValue();
+                    try
+                    {
+                        if (!Util.hasToken(v, "chunked"))
+                        {
+                            hdrs[idx] = new NVPair("Transfer-Encoding",
+                                    v + ", chunked");
+                        }
+                    }
+                    catch (ParseException pe)
+                    {
+                        throw new IOException(pe.toString());
+                    }
+                }
+
+                http_resp.set(req, sendRequest(req, http_resp.getTimeout()));
+            }
+        }
+        else
+        {
+            http_resp.set(req, sendRequest(req, http_resp.getTimeout()));
+        }
+
+        if (req.aborted)
+        {
+            throw new IOException("Request aborted by user");
+        }
+    }
+
+
+    /**
+     * These mark the response to stall the next request on, if any
+     */
+    private volatile Response early_stall = null;
+    private volatile Response late_stall = null;
+    private volatile Response prev_resp = null;
+    /**
+     * This marks the socket output stream as still being used
+     */
+    private boolean output_finished = true;
+
+
+    /**
+     * sends the request over the line.
+     *
+     * @param req                  the request
+     * @param con_timeout          the timeout to use when establishing a socket
+     *      connection; an InterruptedIOException is thrown if the procedure
+     *      times out.
+     * @return                     Description of the Return Value
+     * @exception IOException      if thrown by the socket
+     * @exception ModuleException  if any module throws it during the SSL-
+     *      tunneling handshake
+     */
+    Response sendRequest(Request req, int con_timeout)
+        throws IOException, ModuleException
+    {
+        ByteArrayOutputStream hdr_buf = new ByteArrayOutputStream(600);
+        Response resp = null;
+        boolean keep_alive;
+
+        // The very first request is special in that we need its response
+        // before any further requests may be made. This is to set things
+        // like the server version.
+
+        if (early_stall != null)
+        {
+            try
+            {
+                Log.write(Log.CONN, "Conn:  Early-stalling Request: " +
+                        req.getMethod() + " " +
+                        req.getRequestURI());
+
+                synchronized (early_stall)
+                {
+                    // wait till the response is received
+                    try
+                    {
+                        early_stall.getVersion();
+                    }
+                    catch (IOException ioe)
+                    {
+                    }
+                    early_stall = null;
+                }
+            }
+            catch (NullPointerException npe)
+            {
+            }
+        }
+
+        String[] con_hdrs = assembleHeaders(req, hdr_buf);
+
+        // determine if the connection should be kept alive after this
+        // request
+
+        try
+        {
+            if (ServerProtocolVersion >= HTTP_1_1 &&
+                    !Util.hasToken(con_hdrs[0], "close")
+                     ||
+                    ServerProtocolVersion == HTTP_1_0 &&
+                    Util.hasToken(con_hdrs[0], "keep-alive")
+                    )
+            {
+                keep_alive = true;
+            }
+            else
+            {
+                keep_alive = false;
+            }
+        }
+        catch (ParseException pe)
+        {
+            throw new IOException(pe.toString());
+        }
+
+        synchronized (this)
+        {
+            // Sometimes we must stall the pipeline until the previous request
+            // has been answered. However, if we are going to open up a new
+            // connection anyway we don't really need to stall.
+
+            if (late_stall != null)
+            {
+                if (input_demux != null || keepAliveUnknown)
+                {
+                    Log.write(Log.CONN, "Conn:  Stalling Request: " +
+                            req.getMethod() + " " + req.getRequestURI());
+
+                    try
+                    {
+                        // wait till the response is received
+
+                        late_stall.getVersion();
+                        if (keepAliveUnknown)
+                        {
+                            determineKeepAlive(late_stall);
+                        }
+                    }
+                    catch (IOException ioe)
+                    {
+                    }
+                }
+
+                late_stall = null;
+            }
+
+            /*
+             *  POSTs must not be pipelined because of problems if the connection
+             *  is aborted. Since it is generally impossible to know what urls
+             *  POST will influence it is impossible to determine if a sequence
+             *  of requests containing a POST is idempotent.
+             *  Also, for retried requests we don't want to pipeline either.
+             */
+            if ((req.getMethod().equals("POST") || req.dont_pipeline) &&
+                    prev_resp != null && input_demux != null)
+            {
+                Log.write(Log.CONN, "Conn:  Stalling Request: " +
+                        req.getMethod() + " " + req.getRequestURI());
+
+                try
+                {
+                    // wait till the response is received
+                    prev_resp.getVersion();
+                }
+                catch (IOException ioe)
+                {
+                }
+            }
+
+            // If the previous request used an output stream, then wait till
+            // all the data has been written
+
+            if (!output_finished)
+            {
+                try
+                {
+                    wait();
+                }
+                catch (InterruptedException ie)
+                {
+                    throw new IOException(ie.toString());
+                }
+            }
+
+            if (req.aborted)
+            {
+                throw new IOException("Request aborted by user");
+            }
+
+            int try_count = 3;
+            /*
+             *  what a hack! This is to handle the case where the server closes
+             *  the connection but we don't realize it until we try to send
+             *  something. The problem is that we only get IOException, but
+             *  we need a finer specification (i.e. whether it's an EPIPE or
+             *  something else); I don't trust relying on the message part
+             *  of IOException (which on SunOS/Solaris gives 'Broken pipe',
+             *  but what on Windoze/Mac?).
+             */
+            while (try_count-- > 0)
+            {
+                try
+                {
+                    // get a client socket
+
+                    Socket sock;
+                    if (input_demux == null ||
+                            (sock = input_demux.getSocket()) == null)
+                    {
+                        sock = getSocket(con_timeout);
+
+                        if (Protocol == HTTPS)
+                        {
+                            if (Proxy_Host != null)
+                            {
+                                Socket[] sarr = {sock};
+                                resp = enableSSLTunneling(sarr, req, con_timeout);
+                                if (resp != null)
+                                {
+                                    resp.final_resp = true;
+                                    return resp;
+                                }
+                                sock = sarr[0];
+                            }
+
+                            sock.setSoTimeout(con_timeout);
+                            //sock = new SSLSocket(sock);
+                        }
+
+                        input_demux = new StreamDemultiplexor(Protocol, sock, this);
+                        DemuxList.addToEnd(input_demux);
+                        keepAliveReqLeft = keepAliveReqMax;
+                    }
+
+                    if (req.aborted)
+                    {
+                        throw new IOException("Request aborted by user");
+                    }
+
+                    Log.write(Log.CONN, "Conn:  Sending Request: ", hdr_buf);
+
+                    // Send headers
+
+                    OutputStream sock_out = sock.getOutputStream();
+                    if (haveMSLargeWritesBug)
+                    {
+                        sock_out = new MSLargeWritesBugStream(sock_out);
+                    }
+
+                    hdr_buf.writeTo(sock_out);
+
+                    // Wait for "100 Continue" status if necessary
+
+                    try
+                    {
+                        if (ServProtVersKnown &&
+                                ServerProtocolVersion >= HTTP_1_1 &&
+                                Util.hasToken(con_hdrs[1], "100-continue"))
+                        {
+                            resp = new Response(req, (Proxy_Host != null && Protocol != HTTPS), input_demux);
+                            resp.timeout = 60;
+                            if (resp.getContinue() != 100)
+                            {
+                                break;
+                            }
+                        }
+                    }
+                    catch (ParseException pe)
+                    {
+                        throw new IOException(pe.toString());
+                    }
+                    catch (InterruptedIOException iioe)
+                    {
+                    }
+                    finally
+                    {
+                        if (resp != null)
+                        {
+                            resp.timeout = 0;
+                        }
+                    }
+
+                    // POST/PUT data
+
+                    if (req.getData() != null && req.getData().length > 0)
+                    {
+                        if (req.delay_entity > 0)
+                        {
+                            // wait for something on the network; check available()
+                            // roughly every 100 ms
+
+                            long num_units = req.delay_entity / 100;
+                            long one_unit = req.delay_entity / num_units;
+
+                            for (int idx = 0; idx < num_units; idx++)
+                            {
+                                if (input_demux.available(null) != 0)
+                                {
+                                    break;
+                                }
+                                try
+                                {
+                                    Thread.sleep(one_unit);
+                                }
+                                catch (InterruptedException ie)
+                                {
+                                }
+                            }
+
+                            if (input_demux.available(null) == 0)
+                            {
+                                sock_out.write(req.getData());
+                            }
+                            // he's still waiting
+                            else
+                            {
+                                keep_alive = false;
+                            }
+                            // Uh oh!
+                        }
+                        else
+                        {
+                            sock_out.write(req.getData());
+                        }
+                    }
+
+                    if (req.getStream() != null)
+                    {
+                        req.getStream().goAhead(req, sock_out, 0);
+                    }
+                    else
+                    {
+                        sock_out.flush();
+                    }
+
+                    // get a new response.
+                    // Note: this does not do a read on the socket.
+
+                    if (resp == null)
+                    {
+                        resp = new Response(req, (Proxy_Host != null &&
+                                Protocol != HTTPS),
+                                input_demux);
+                    }
+                }
+                catch (IOException ioe)
+                {
+                    Log.write(Log.CONN, "Conn:  ", ioe);
+
+                    closeDemux(ioe, true);
+
+                    if (try_count == 0 || ioe instanceof UnknownHostException ||
+                            ioe instanceof ConnectException ||
+                            ioe instanceof NoRouteToHostException ||
+                            ioe instanceof InterruptedIOException || req.aborted)
+                    {
+                        throw ioe;
+                    }
+
+                    Log.write(Log.CONN, "Conn:  Retrying request");
+                    continue;
+                }
+
+                break;
+            }
+
+            prev_resp = resp;
+
+            // close the stream after this response if necessary
+
+            if ((!keepAliveUnknown && !doesKeepAlive) || !keep_alive ||
+                    (keepAliveReqMax != -1 && keepAliveReqLeft-- == 0))
+            {
+                input_demux.markForClose(resp);
+                input_demux = null;
+            }
+            else
+            {
+                input_demux.restartTimer();
+            }
+
+            if (keepAliveReqMax != -1)
+            {
+                Log.write(Log.CONN, "Conn:  Number of requests left: " +
+                        keepAliveReqLeft);
+            }
+
+            /*
+             *  We don't pipeline the first request, as we need some info
+             *  about the server (such as which http version it complies with)
+             */
+            if (!ServProtVersKnown)
+            {
+                early_stall = resp;
+                resp.markAsFirstResponse(req);
+            }
+
+            /*
+             *  Also don't pipeline until we know if the server supports
+             *  keep-alive's or not.
+             *  Note: strictly speaking, HTTP/1.0 keep-alives don't mean we can
+             *  pipeline requests. I seem to remember some (beta?) version
+             *  of Netscape's Enterprise server which barfed if you tried
+             *  push requests down it's throat w/o waiting for the previous
+             *  response first. However, I've not been able to find such a
+             *  server lately, and so I'm taking the risk and assuming we
+             *  can in fact pipeline requests to HTTP/1.0 servers.
+             */
+            if (keepAliveUnknown ||
+            // We don't pipeline POST's ...
+                    !IdempotentSequence.methodIsIdempotent(req.getMethod()) ||
+                    req.dont_pipeline ||
+            // Retries disable pipelining too
+                    neverPipeline)
+            {
+                // Emergency measure: prevent all pipelining
+                late_stall = resp;
+            }
+
+            /*
+             *  If there is an output stream then just tell the other threads to
+             *  wait; the stream will notify() when it's done. If there isn't any
+             *  stream then wake up a waiting thread (if any).
+             */
+            if (req.getStream() != null)
+            {
+                output_finished = false;
+            }
+            else
+            {
+                output_finished = true;
+                notify();
+            }
+
+            // Looks like were finally done
+
+            Log.write(Log.CONN, "Conn:  Request sent");
+        }
+
+        return resp;
+    }
+
+
+    /**
+     * Gets a socket. Creates a socket to the proxy if set, or else to the
+     * actual destination.
+     *
+     * @param con_timeout      if not 0 then start a new thread to establish the
+     *      the connection and join(con_timeout) it. If the join() times out an
+     *      InteruptedIOException is thrown.
+     * @return                 The socket value
+     * @exception IOException  Description of the Exception
+     */
+    private Socket getSocket(int con_timeout)
+        throws IOException
+    {
+        Socket sock = null;
+
+        String actual_host;
+        int actual_port;
+
+        if (Proxy_Host != null)
+        {
+            actual_host = Proxy_Host;
+            actual_port = Proxy_Port;
+        }
+        else
+        {
+            actual_host = Host;
+            actual_port = Port;
+        }
+
+        Log.write(Log.CONN, "Conn:  Creating Socket: " + actual_host + ":" +
+                actual_port);
+
+        if (con_timeout == 0)
+        {
+            // normal connection establishment
+
+            if (Socks_client != null)
+            {
+                sock = Socks_client.getSocket(actual_host, actual_port);
+            }
+            else
+            {
+                // try all A records
+                InetAddress[] addr_list = InetAddress.getAllByName(actual_host);
+                for (int idx = 0; idx < addr_list.length; idx++)
+                {
+                    try
+                    {
+                        if (LocalAddr == null)
+                        {
+                            sock = new Socket(addr_list[idx], actual_port);
+                        }
+                        else
+                        {
+                            sock = new Socket(addr_list[idx], actual_port,
+                                    LocalAddr, LocalPort);
+                        }
+                        break;
+                        // success
+                    }
+                    catch (SocketException se)
+                    {
+                        if (idx == addr_list.length - 1)
+                        {
+                            throw se;
+                        }
+                        // we tried them all
+                    }
+                }
+            }
+        }
+        else
+        {
+            EstablishConnection con =
+                    new EstablishConnection(actual_host, actual_port, Socks_client);
+            con.start();
+            try
+            {
+                con.join((long) con_timeout);
+            }
+            catch (InterruptedException ie)
+            {
+            }
+
+            if (con.getException() != null)
+            {
+                throw con.getException();
+            }
+            if ((sock = con.getSocket()) == null)
+            {
+                con.forget();
+                if ((sock = con.getSocket()) == null)
+                {
+                    throw new InterruptedIOException("Connection establishment timed out");
+                }
+            }
+        }
+
+        return sock;
+    }
+
+
+    /**
+     * Enable SSL Tunneling if we're talking to a proxy. See ietf draft
+     * draft-luotonen-ssl-tunneling-03 for more info.
+     *
+     * @param sock                 the socket
+     * @param req                  the request initiating this connection
+     * @param timeout              the timeout
+     * @return                     the proxy's last response if unsuccessful, or
+     *      null if tunnel successfuly established
+     * @exception IOException
+     * @exception ModuleException
+     */
+    private Response enableSSLTunneling(Socket[] sock, Request req, int timeout)
+        throws IOException, ModuleException
+    {
+        // copy User-Agent and Proxy-Auth headers from request
+
+        Vector hdrs = new Vector();
+        for (int idx = 0; idx < req.getHeaders().length; idx++)
+        {
+            String name = req.getHeaders()[idx].getName();
+            if (name.equalsIgnoreCase("User-Agent") ||
+                    name.equalsIgnoreCase("Proxy-Authorization"))
+            {
+                hdrs.addElement(req.getHeaders()[idx]);
+            }
+        }
+
+        // create initial CONNECT subrequest
+
+        NVPair[] h = new NVPair[hdrs.size()];
+        hdrs.copyInto(h);
+        Request connect = new Request(this, "CONNECT", Host + ":" + Port, h,
+                null, null, req.allowUI());
+        connect.internal_subrequest = true;
+
+        ByteArrayOutputStream hdr_buf = new ByteArrayOutputStream(600);
+        HTTPResponse r = new HTTPResponse(gen_mod_insts(), timeout, connect, defaultIncrement);
+
+        // send and handle CONNECT request until successful or tired
+
+        Response resp = null;
+
+        while (true)
+        {
+            handleRequest(connect, r, resp, true);
+
+            hdr_buf.reset();
+            assembleHeaders(connect, hdr_buf);
+
+            Log.write(Log.CONN, "Conn:  Sending SSL-Tunneling Subrequest: ",
+                    hdr_buf);
+
+            // send CONNECT
+
+            hdr_buf.writeTo(sock[0].getOutputStream());
+
+            // return if successful
+
+            resp = new Response(connect, sock[0].getInputStream());
+            if (resp.getStatusCode() == 200)
+            {
+                return null;
+            }
+
+            // failed!
+
+            // make life easy: read data and close socket
+
+            try
+            {
+                resp.getData();
+            }
+            catch (IOException ioe)
+            {
+            }
+            try
+            {
+                sock[0].close();
+            }
+            catch (IOException ioe)
+            {
+            }
+
+            // handle response
+
+            r.set(connect, resp);
+            if (!r.handleResponse())
+            {
+                return resp;
+            }
+
+            sock[0] = getSocket(timeout);
+        }
+    }
+
+
+    /**
+     * This writes out the headers on the <var>hdr_buf</var> . It takes special
+     * precautions for the following headers: <DL> <DT>Content-type<DI>This is
+     * only written if the request has an entity. If the request has an entity
+     * and no content-type header was given for the request it defaults to
+     * "application/octet-stream" <DT>Content-length<DI>This header is generated
+     * if the request has an entity and the entity isn't being sent with the
+     * Transfer-Encoding "chunked". <DT>User-Agent <DI>If not present it will be
+     * generated with the current HTTPClient version strings. Otherwise the
+     * version string is appended to the given User-Agent string. <DT>Connection
+     * <DI>This header is only written if no proxy is used. If no connection
+     * header is specified and the server is not known to understand HTTP/1.1 or
+     * later then a "Connection: keep-alive" header is generated. <DT>
+     * Proxy-Connection<DI>This header is only written if a proxy is used. If no
+     * connection header is specified and the proxy is not known to understand
+     * HTTP/1.1 or later then a "Proxy-Connection: keep-alive" header is
+     * generated. <DT>Keep-Alive <DI>This header is only written if the
+     * Connection or Proxy-Connection header contains the Keep-Alive token. <DT>
+     * Expect <DI>If there is no entity and this header contains the
+     * "100-continue" token then this token is removed. before writing the
+     * header. <DT>TE <DI>If this header does not exist, it is created; else if
+     * the "trailers" token is not specified this token is added; else the
+     * header is not touched. </DL> Furthermore, it escapes various characters
+     * in request-URI.
+     *
+     * @param req              the Request
+     * @param hdr_buf          the buffer onto which to write the headers
+     * @return                 an array of headers; the first element contains
+     *      the the value of the Connection or Proxy-Connectin header, the
+     *      second element the value of the Expect header.
+     * @exception IOException  if writing on <var>hdr_buf</var> generates an an
+     *      IOException, or if an error occurs during parsing of a header
+     */
+    private String[] assembleHeaders(Request req,
+            ByteArrayOutputStream hdr_buf)
+        throws IOException
+    {
+        DataOutputStream dataout = new DataOutputStream(hdr_buf);
+        String[] con_hdrs = {"", ""};
+        NVPair[] hdrs = req.getHeaders();
+
+        // remember various headers
+
+        int ho_idx = -1;
+
+        // remember various headers
+
+        int
+                ct_idx = -1;
+
+        // remember various headers
+
+        int
+                ua_idx = -1;
+
+        // remember various headers
+
+        int
+                co_idx = -1;
+
+        // remember various headers
+
+        int
+                pc_idx = -1;
+
+        // remember various headers
+
+        int
+                ka_idx = -1;
+
+        // remember various headers
+
+        int
+                ex_idx = -1;
+
+        // remember various headers
+
+        int
+                te_idx = -1;
+
+        // remember various headers
+
+        int
+                tc_idx = -1;
+
+        // remember various headers
+
+        int
+                ug_idx = -1;
+        for (int idx = 0; idx < hdrs.length; idx++)
+        {
+            String name = hdrs[idx].getName().trim().toLowerCase();
+            if (name.equals("host"))
+            {
+                ho_idx = idx;
+            }
+            else if (name.equals("content-type"))
+            {
+                ct_idx = idx;
+            }
+            else if (name.equals("user-agent"))
+            {
+                ua_idx = idx;
+            }
+            else if (name.equals("connection"))
+            {
+                co_idx = idx;
+            }
+            else if (name.equals("proxy-connection"))
+            {
+                pc_idx = idx;
+            }
+            else if (name.equals("keep-alive"))
+            {
+                ka_idx = idx;
+            }
+            else if (name.equals("expect"))
+            {
+                ex_idx = idx;
+            }
+            else if (name.equals("te"))
+            {
+                te_idx = idx;
+            }
+            else if (name.equals("transfer-encoding"))
+            {
+                tc_idx = idx;
+            }
+            else if (name.equals("upgrade"))
+            {
+                ug_idx = idx;
+            }
+        }
+
+        // Generate request line and Host header
+
+        String file = Util.escapeUnsafeChars(req.getRequestURI());
+        if (Proxy_Host != null && Protocol != HTTPS && !file.equals("*"))
+        {
+            dataout.writeBytes(req.getMethod() + " http://" + Host + ":" + Port +
+                    file + " " + RequestProtocolVersion + "\r\n");
+        }
+        else
+        {
+            dataout.writeBytes(req.getMethod() + " " + file + " " +
+                    RequestProtocolVersion + "\r\n");
+        }
+
+        String h_hdr = (ho_idx >= 0) ? hdrs[ho_idx].getValue().trim() : Host;
+        if (Port != URI.defaultPort(getProtocol()))
+        {
+            dataout.writeBytes("Host: " + h_hdr + ":" + Port + "\r\n");
+        }
+        else
+        {
+            // Netscape-Enterprise has some bugs...
+            dataout.writeBytes("Host: " + h_hdr + "\r\n");
+        }
+
+        /*
+         *  What follows is the setup for persistent connections. We default
+         *  to doing persistent connections for both HTTP/1.0 and HTTP/1.1,
+         *  unless we're using a proxy server and HTTP/1.0 in which case we
+         *  must make sure we don't do persistence (because of the problem of
+         *  1.0 proxies blindly passing the Connection header on).
+         *
+         *  Note: there is a "Proxy-Connection" header for use with proxies.
+         *  This however is only understood by Netscape and Netapp caches.
+         *  Furthermore, it suffers from the same problem as the Connection
+         *  header in HTTP/1.0 except that at least two proxies must be
+         *  involved. But I've taken the risk now and decided to send the
+         *  Proxy-Connection header. If I get complaints I'll remove it again.
+         *
+         *  In any case, with this header we can now modify the above to send
+         *  the Proxy-Connection header whenever we wouldn't send the normal
+         *  Connection header.
+         */
+        String co_hdr = null;
+        if (!(ServProtVersKnown && ServerProtocolVersion >= HTTP_1_1 &&
+                co_idx == -1))
+        {
+            if (co_idx == -1)
+            {
+                // no connection header given by user
+                co_hdr = "Keep-Alive";
+                con_hdrs[0] = "Keep-Alive";
+            }
+            else
+            {
+                con_hdrs[0] = hdrs[co_idx].getValue().trim();
+                co_hdr = con_hdrs[0];
+            }
+
+            try
+            {
+                if (ka_idx != -1 &&
+                        Util.hasToken(con_hdrs[0], "keep-alive"))
+                {
+                    dataout.writeBytes("Keep-Alive: " +
+                            hdrs[ka_idx].getValue().trim() + "\r\n");
+                }
+            }
+            catch (ParseException pe)
+            {
+                throw new IOException(pe.toString());
+            }
+        }
+
+        if ((Proxy_Host != null && Protocol != HTTPS) &&
+                !(ServProtVersKnown && ServerProtocolVersion >= HTTP_1_1))
+        {
+            if (co_hdr != null)
+            {
+                dataout.writeBytes("Proxy-Connection: ");
+                dataout.writeBytes(co_hdr);
+                dataout.writeBytes("\r\n");
+                co_hdr = null;
+            }
+        }
+
+        if (co_hdr != null)
+        {
+            try
+            {
+                if (!Util.hasToken(co_hdr, "TE"))
+                {
+                    co_hdr += ", TE";
+                }
+            }
+            catch (ParseException pe)
+            {
+                throw new IOException(pe.toString());
+            }
+        }
+        else
+        {
+            co_hdr = "TE";
+        }
+
+        if (ug_idx != -1)
+        {
+            co_hdr += ", Upgrade";
+        }
+
+        if (co_hdr != null)
+        {
+            dataout.writeBytes("Connection: ");
+            dataout.writeBytes(co_hdr);
+            dataout.writeBytes("\r\n");
+        }
+
+        // handle TE header
+
+        if (te_idx != -1)
+        {
+            dataout.writeBytes("TE: ");
+            Vector pte;
+            try
+            {
+                pte = Util.parseHeader(hdrs[te_idx].getValue());
+            }
+            catch (ParseException pe)
+            {
+                throw new IOException(pe.toString());
+            }
+
+            if (!pte.contains(new HttpHeaderElement("trailers")))
+            {
+                dataout.writeBytes("trailers, ");
+            }
+
+            dataout.writeBytes(hdrs[te_idx].getValue().trim() + "\r\n");
+        }
+        else
+        {
+            dataout.writeBytes("TE: trailers\r\n");
+        }
+
+        // User-Agent
+
+        if (ua_idx != -1)
+        {
+            dataout.writeBytes("User-Agent: " + hdrs[ua_idx].getValue().trim() + "\r\n");
+        }
+        else
+        {
+            dataout.writeBytes("User-Agent: " + version + "\r\n");
+        }
+
+        // Write out any headers left
+
+        for (int idx = 0; idx < hdrs.length; idx++)
+        {
+            if (idx != ct_idx && idx != ua_idx && idx != co_idx &&
+                    idx != pc_idx && idx != ka_idx && idx != ex_idx &&
+                    idx != te_idx && idx != ho_idx)
+            {
+                dataout.writeBytes(hdrs[idx].getName().trim() + ": " +
+                        hdrs[idx].getValue().trim() + "\r\n");
+            }
+        }
+
+        // Handle Content-type, Content-length and Expect headers
+
+        if (req.getData() != null || req.getStream() != null)
+        {
+            dataout.writeBytes("Content-type: ");
+            if (ct_idx != -1)
+            {
+                dataout.writeBytes(hdrs[ct_idx].getValue().trim());
+            }
+            else
+            {
+                dataout.writeBytes("application/octet-stream");
+            }
+            dataout.writeBytes("\r\n");
+
+            if (req.getData() != null)
+            {
+                dataout.writeBytes("Content-length: " + req.getData().length +
+                        "\r\n");
+            }
+            else if (req.getStream().getLength() != -1 && tc_idx == -1)
+            {
+                dataout.writeBytes("Content-length: " +
+                        req.getStream().getLength() + "\r\n");
+            }
+
+            if (ex_idx != -1)
+            {
+                con_hdrs[1] = hdrs[ex_idx].getValue().trim();
+                dataout.writeBytes("Expect: " + con_hdrs[1] + "\r\n");
+            }
+        }
+        else if (ex_idx != -1)
+        {
+            Vector expect_tokens;
+            try
+            {
+                expect_tokens = Util.parseHeader(hdrs[ex_idx].getValue());
+            }
+            catch (ParseException pe)
+            {
+                throw new IOException(pe.toString());
+            }
+
+            // remove any 100-continue tokens
+
+            HttpHeaderElement cont = new HttpHeaderElement("100-continue");
+            while (expect_tokens.removeElement(cont))
+            {
+                ;
+            }
+
+            // write out header if any tokens left
+
+            if (!expect_tokens.isEmpty())
+            {
+                con_hdrs[1] = Util.assembleHeader(expect_tokens);
+                dataout.writeBytes("Expect: " + con_hdrs[1] + "\r\n");
+            }
+        }
+
+        dataout.writeBytes("\r\n");
+        // end of header
+
+        return con_hdrs;
+    }
+
+
+    /**
+     * The very first request is special in that we use it to figure out the
+     * protocol version the server (or proxy) is compliant with.
+     *
+     * @param req              Description of the Parameter
+     * @param resp             Description of the Parameter
+     * @return                 true if all went fine, false if the request needs
+     *      to be resent
+     * @exception IOException  if any exception is thrown by the response
+     */
+    boolean handleFirstRequest(Request req, Response resp)
+        throws IOException
+    {
+        // read response headers to get protocol version used by
+        // the server.
+
+        ServerProtocolVersion = String2ProtVers(resp.getVersion());
+        ServProtVersKnown = true;
+
+        /*
+         *  We need to treat connections through proxies specially, because
+         *  many HTTP/1.0 proxies do not downgrade an HTTP/1.1 response
+         *  version to HTTP/1.0 (i.e. when we are talking to an HTTP/1.1
+         *  server through an HTTP/1.0 proxy we are mislead to thinking we're
+         *  talking to an HTTP/1.1 proxy). We use the absence of the Via
+         *  header to detect whether we're talking to an HTTP/1.0 proxy,
+         *  unless the status code indicates an error from the proxy
+         *  itself. However, this only works when the chain contains
+         *  only HTTP/1.0 proxies; if you have <client - 1.0 proxy - 1.1
+         *  proxy - server> then this will fail too. Unfortunately there
+         *  seems to be no way to reliably detect broken HTTP/1.0
+         *  proxies...
+         */
+        int sts = resp.getStatusCode();
+        if ((Proxy_Host != null && Protocol != HTTPS) &&
+                resp.getHeader("Via") == null &&
+                sts != 407 && sts != 502 && sts != 504)
+        {
+            ServerProtocolVersion = HTTP_1_0;
+        }
+
+        Log.write(Log.CONN, "Conn:  Protocol Version established: " +
+                ProtVers2String(ServerProtocolVersion));
+
+        // some (buggy) servers return an error status if they get a
+        // version they don't comprehend
+
+        if (ServerProtocolVersion == HTTP_1_0 &&
+                (resp.getStatusCode() == 400 || resp.getStatusCode() == 500))
+        {
+            if (input_demux != null)
+            {
+                input_demux.markForClose(resp);
+            }
+            input_demux = null;
+            RequestProtocolVersion = "HTTP/1.0";
+            return false;
+        }
+
+        return true;
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param resp             Description of the Parameter
+     * @exception IOException  Description of the Exception
+     */
+    private void determineKeepAlive(Response resp)
+        throws IOException
+    {
+        // try and determine if this server does keep-alives
+
+        String con;
+
+        try
+        {
+            if (ServerProtocolVersion >= HTTP_1_1 ||
+                    (
+                    (
+                    ((Proxy_Host == null || Protocol == HTTPS) &&
+                    (con = resp.getHeader("Connection")) != null)
+                     ||
+                    ((Proxy_Host != null && Protocol != HTTPS) &&
+                    (con = resp.getHeader("Proxy-Connection")) != null)
+                    ) &&
+                    Util.hasToken(con, "keep-alive")
+                    )
+                    )
+            {
+                doesKeepAlive = true;
+                keepAliveUnknown = false;
+
+                Log.write(Log.CONN, "Conn:  Keep-Alive enabled");
+            }
+            else if (resp.getStatusCode() < 400)
+            {
+                keepAliveUnknown = false;
+            }
+
+            // get maximum number of requests
+
+            if (doesKeepAlive && ServerProtocolVersion == HTTP_1_0 &&
+                    (con = resp.getHeader("Keep-Alive")) != null)
+            {
+                HttpHeaderElement max =
+                        Util.getElement(Util.parseHeader(con), "max");
+                if (max != null && max.getValue() != null)
+                {
+                    keepAliveReqMax = Integer.parseInt(max.getValue());
+                    keepAliveReqLeft = keepAliveReqMax;
+
+                    Log.write(Log.CONN, "Conn:  Max Keep-Alive requests: " +
+                            keepAliveReqMax);
+                }
+            }
+        }
+        catch (ParseException pe)
+        {
+        }
+        catch (NumberFormatException nfe)
+        {
+        }
+        catch (ClassCastException cce)
+        {
+        }
+    }
+
+
+    /**
+     * Description of the Method
+     */
+    synchronized void outputFinished()
+    {
+        output_finished = true;
+        notify();
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param ioe        Description of the Parameter
+     * @param was_reset  Description of the Parameter
+     */
+    synchronized void closeDemux(IOException ioe, boolean was_reset)
+    {
+        if (input_demux != null)
+        {
+            input_demux.close(ioe, was_reset);
+        }
+
+        early_stall = null;
+        late_stall = null;
+        prev_resp = null;
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param prot_vers  Description of the Parameter
+     * @return           Description of the Return Value
+     */
+    final static String ProtVers2String(int prot_vers)
+    {
+        return "HTTP/" + (prot_vers >>> 16) + "." + (prot_vers & 0xFFFF);
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param prot_vers  Description of the Parameter
+     * @return           Description of the Return Value
+     */
+    final static int String2ProtVers(String prot_vers)
+    {
+        String vers = prot_vers.substring(5);
+        int dot = vers.indexOf('.');
+        return Integer.parseInt(vers.substring(0, dot)) << 16 |
+                Integer.parseInt(vers.substring(dot + 1));
+    }
+
+
+    /**
+     * Generates a string of the form protocol://host.domain:port .
+     *
+     * @return   the string
+     */
+    public String toString()
+    {
+        return getProtocol() + "://" + getHost() +
+                (getPort() != URI.defaultPort(getProtocol()) ? ":" + getPort() : "");
+    }
+
+
+    /**
+     * Description of the Class
+     *
+     * @author    Administrator
+     * @created   29. Dezember 2001
+     */
+    private class EstablishConnection extends Thread
+    {
+        String actual_host;
+        int actual_port;
+        IOException exception;
+        Socket sock;
+        SocksClient Socks_client;
+        boolean close;
+
+
+        /**
+         * Constructor for the EstablishConnection object
+         *
+         * @param host   Description of the Parameter
+         * @param port   Description of the Parameter
+         * @param socks  Description of the Parameter
+         */
+        EstablishConnection(String host, int port, SocksClient socks)
+        {
+            super("EstablishConnection (" + host + ":" + port + ")");
+            try
+            {
+                setDaemon(true);
+            }
+            catch (SecurityException se)
+            {
+            }
+            // Oh well...
+
+            actual_host = host;
+            actual_port = port;
+            Socks_client = socks;
+
+            exception = null;
+            sock = null;
+            close = false;
+        }
+
+
+        /**
+         * Main processing method for the EstablishConnection object
+         */
+        public void run()
+        {
+            try
+            {
+                if (Socks_client != null)
+                {
+                    sock = Socks_client.getSocket(actual_host, actual_port);
+                }
+                else
+                {
+                    // try all A records
+                    InetAddress[] addr_list = InetAddress.getAllByName(actual_host);
+                    for (int idx = 0; idx < addr_list.length; idx++)
+                    {
+                        try
+                        {
+                            if (LocalAddr == null)
+                            {
+                                sock = new Socket(addr_list[idx], actual_port);
+                            }
+                            else
+                            {
+                                sock = new Socket(addr_list[idx], actual_port,
+                                        LocalAddr, LocalPort);
+                            }
+                            break;
+                            // success
+                        }
+                        catch (SocketException se)
+                        {
+                            if (idx == addr_list.length - 1 || close)
+                            {
+                                throw se;
+                            }
+                            // we tried them all
+                        }
+                    }
+                }
+            }
+            catch (IOException ioe)
+            {
+                exception = ioe;
+            }
+
+            if (close && sock != null)
+            {
+                try
+                {
+                    sock.close();
+                }
+                catch (IOException ioe)
+                {
+                }
+                sock = null;
+            }
+        }
+
+
+        /**
+         * Gets the exception attribute of the EstablishConnection object
+         *
+         * @return   The exception value
+         */
+        IOException getException()
+        {
+            return exception;
+        }
+
+
+        /**
+         * Gets the socket attribute of the EstablishConnection object
+         *
+         * @return   The socket value
+         */
+        Socket getSocket()
+        {
+            return sock;
+        }
+
+
+        /**
+         * Description of the Method
+         */
+        void forget()
+        {
+            close = true;
+        }
+    }
+
+
+    /**
+     * M$ has yet another bug in their WinSock: if you try to write too much
+     * data at once it'll hang itself. This filter therefore splits big writes
+     * up into multiple writes of at most 20K.
+     *
+     * @author    Administrator
+     * @created   29. Dezember 2001
+     */
+    private class MSLargeWritesBugStream extends FilterOutputStream
+    {
+        private final int CHUNK_SIZE = 20000;
+
+
+        /**
+         * Constructor for the MSLargeWritesBugStream object
+         *
+         * @param os  Description of the Parameter
+         */
+        MSLargeWritesBugStream(OutputStream os)
+        {
+            super(os);
+        }
+
+
+        /**
+         * Description of the Method
+         *
+         * @param b                Description of the Parameter
+         * @param off              Description of the Parameter
+         * @param len              Description of the Parameter
+         * @exception IOException  Description of the Exception
+         */
+        public void write(byte[] b, int off, int len)
+            throws IOException
+        {
+            while (len > CHUNK_SIZE)
+            {
+                out.write(b, off, CHUNK_SIZE);
+                off += CHUNK_SIZE;
+                len -= CHUNK_SIZE;
+            }
+            out.write(b, off, len);
+        }
+    }
+
+    public void setDefaultReadIncement(int increment)
+    {
+        this.defaultIncrement = increment;
+    }
+
+    int defaultIncrement = 1000;
+
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/HTTPClient/HTTPResponse.java b/sandbox/contributions/webcrawler-LARM/src/HTTPClient/HTTPResponse.java
new file mode 100644
index 00000000000..c7db0d84846
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/HTTPClient/HTTPResponse.java
@@ -0,0 +1,1419 @@
+/*
+ *  @(#)HTTPResponse.java				0.3-3 06/05/2001
+ *
+ *  This file is part of the HTTPClient package
+ *  Copyright (C) 1996-2001 Ronald Tschalär
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You shou
+ *  d have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free
+ *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *  MA 02111-1307, USA
+ *
+ *  For questions, suggestions, bug-reports, enhancement-requests etc.
+ *  I may be contacted at:
+ *
+ *  ronald@innovation.ch
+ *
+ *  The HTTPClient's home page is located at:
+ *
+ *  http://www.innovation.ch/java/HTTPClient/
+ *
+ */
+package HTTPClient;
+
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.io.InputStream;
+import java.io.ByteArrayInputStream;
+import java.net.URL;
+import java.util.Date;
+import java.util.LinkedList;
+import java.util.Enumeration;
+import java.util.Iterator;
+
+/**
+ * This defines the http-response class returned by the requests. It's basically
+ * a wrapper around the Response class which first lets all the modules handle
+ * the response before finally giving the info to the user.
+ *
+ * @author    Ronald Tschalär
+ * @created   29. Dezember 2001
+ * @version   0.3-3 06/05/2001
+ * @since     0.3
+ */
+
+class ByteBlock
+{
+    byte[] block;
+    int length;
+    ByteBlock(int size)
+    {
+        block = new byte[size];
+    }
+}
+
+public class HTTPResponse implements HTTPClientModuleConstants
+{
+    /**
+     * the list of modules
+     */
+    private HTTPClientModule[] modules;
+
+    /**
+     * the timeout for reads
+     */
+    private int timeout;
+
+    /**
+     * the request
+     */
+    private Request request = null;
+
+    /**
+     * the current response
+     */
+    Response response = null;
+
+    /**
+     * the HttpOutputStream to synchronize on
+     */
+    private HttpOutputStream out_stream = null;
+
+    /**
+     * our input stream from the stream demux
+     */
+    private InputStream inp_stream;
+
+    /**
+     * the status code returned.
+     */
+    private int StatusCode;
+
+    /**
+     * the reason line associated with the status code.
+     */
+    private String ReasonLine;
+
+    /**
+     * the HTTP version of the response.
+     */
+    private String Version;
+
+    /**
+     * the original URI used.
+     */
+    private URI OriginalURI = null;
+
+    /**
+     * the final URI of the document.
+     */
+    private URI EffectiveURI = null;
+
+    /**
+     * any headers which were received and do not fit in the above list.
+     */
+    private CIHashtable Headers = null;
+
+    /**
+     * any trailers which were received and do not fit in the above list.
+     */
+    private CIHashtable Trailers = null;
+
+    /**
+     * the ContentLength of the data.
+     */
+    private int ContentLength = -1;
+
+    /**
+     * the data (body) returned.
+     */
+    private byte[] Data = null;
+
+    /**
+     * signals if we have got and parsed the headers yet?
+     */
+    private boolean initialized = false;
+
+    /**
+     * signals if we have got the trailers yet?
+     */
+    private boolean got_trailers = false;
+
+    /**
+     * marks this response as aborted (stop() in HTTPConnection)
+     */
+    private boolean aborted = false;
+
+    /**
+     * should the request be retried by the application?
+     */
+    private boolean retry = false;
+
+    /**
+     * the method used in the request
+     */
+    private String method = null;
+
+
+    // Constructors
+
+    /**
+     * Creates a new HTTPResponse.
+     *
+     * @param modules        the list of modules handling this response
+     * @param timeout        the timeout to be used on stream read()'s
+     * @param orig           Description of the Parameter
+     * @param readIncrement  Description of the Parameter
+     */
+    HTTPResponse(HTTPClientModule[] modules, int timeout, Request orig, int readIncrement)
+    {
+        this.modules = modules;
+        this.timeout = timeout;
+        try
+        {
+            int qp = orig.getRequestURI().indexOf('?');
+            this.OriginalURI = new URI(orig.getConnection().getProtocol(),
+                    null,
+                    orig.getConnection().getHost(),
+                    orig.getConnection().getPort(),
+                    qp < 0 ? orig.getRequestURI() :
+                    orig.getRequestURI().substring(0, qp),
+                    qp < 0 ? null :
+                    orig.getRequestURI().substring(qp + 1),
+                    null);
+        }
+        catch (ParseException pe)
+        {
+        }
+        this.method = orig.getMethod();
+        this.readIncrement = readIncrement;
+    }
+
+
+    int readIncrement = 1000;
+
+
+    /**
+     * Sets the readIncrement attribute of the HTTPResponse object
+     *
+     * @param readIncrement  The new readIncrement value
+     */
+    public void setReadIncrement(int readIncrement)
+    {
+        this.readIncrement = readIncrement;
+    }
+
+
+    /**
+     * Gets the readIncrement attribute of the HTTPResponse object
+     *
+     * @return   The readIncrement value
+     */
+    public int getReadIncrement()
+    {
+        return this.readIncrement;
+    }
+
+
+    /**
+     * @param req   the request
+     * @param resp  the response
+     */
+    void set(Request req, Response resp)
+    {
+        this.request = req;
+        this.response = resp;
+        resp.http_resp = this;
+        resp.timeout = timeout;
+        this.aborted = resp.final_resp;
+    }
+
+
+    /**
+     * @param req         the request
+     * @param out_stream  Description of the Parameter
+     */
+    void set(Request req, HttpOutputStream out_stream)
+    {
+        this.request = req;
+        this.out_stream = out_stream;
+    }
+
+
+    // Methods
+
+    /**
+     * Give the status code for this request. These are grouped as follows:
+     * <UL>
+     *   <LI> 1xx - Informational (new in HTTP/1.1)
+     *   <LI> 2xx - Success
+     *   <LI> 3xx - Redirection
+     *   <LI> 4xx - Client Error
+     *   <LI> 5xx - Server Error
+     * </UL>
+     *
+     *
+     * @return                     The statusCode value
+     * @exception IOException      if any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    public final int getStatusCode()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+        return StatusCode;
+    }
+
+
+    /**
+     * Give the reason line associated with the status code.
+     *
+     * @return                     The reasonLine value
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    public final String getReasonLine()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+        return ReasonLine;
+    }
+
+
+    /**
+     * Get the HTTP version used for the response.
+     *
+     * @return                     The version value
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    public final String getVersion()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+        return Version;
+    }
+
+
+    /**
+     * Get the name and type of server.
+     *
+     * @return                     The server value
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     * @deprecated                 This method is a remnant of V0.1; use <code>getHeader("Server")</code>
+     *      instead.
+     * @see                        #getHeader(java.lang.String)
+     */
+    public final String getServer()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+        return getHeader("Server");
+    }
+
+
+    /**
+     * Get the original URI used in the request.
+     *
+     * @return   the URI used in primary request
+     */
+    public final URI getOriginalURI()
+    {
+        return OriginalURI;
+    }
+
+
+    /**
+     * Get the final URL of the document. This is set if the original request
+     * was deferred via the "moved" (301, 302, or 303) return status.
+     *
+     * @return                     the effective URL, or null if no redirection
+     *      occured
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     * @deprecated                 use getEffectiveURI() instead
+     * @see                        #getEffectiveURI
+     */
+    public final URL getEffectiveURL()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+        if (EffectiveURI != null)
+        {
+            return EffectiveURI.toURL();
+        }
+        return null;
+    }
+
+
+    /**
+     * Get the final URI of the document. If the request was redirected via the
+     * "moved" (301, 302, 303, or 307) return status this returns the URI used
+     * in the last redirection; otherwise it returns the original URI.
+     *
+     * @return                     the effective URI
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    public final URI getEffectiveURI()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+        if (EffectiveURI != null)
+        {
+            return EffectiveURI;
+        }
+        return OriginalURI;
+    }
+
+
+    /**
+     * Retrieves the value for a given header.
+     *
+     * @param hdr                  the header name.
+     * @return                     the value for the header, or null if
+     *      non-existent.
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    public String getHeader(String hdr)
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+        return (String) Headers.get(hdr.trim());
+    }
+
+
+    /**
+     * Retrieves the value for a given header. The value is parsed as an int.
+     *
+     * @param hdr                        the header name.
+     * @return                           the value for the header if the header
+     *      exists
+     * @exception NumberFormatException  if the header's value is not a number
+     *      or if the header does not exist.
+     * @exception IOException            if any exception occurs on the socket.
+     * @exception ModuleException        if any module encounters an exception.
+     */
+    public int getHeaderAsInt(String hdr)
+        throws IOException, ModuleException, NumberFormatException
+    {
+        String val = getHeader(hdr);
+        if (val == null)
+        {
+            throw new NumberFormatException("null");
+        }
+        return Integer.parseInt(val);
+    }
+
+
+    /**
+     * Retrieves the value for a given header. The value is parsed as a date; if
+     * this fails it is parsed as a long representing the number of seconds
+     * since 12:00 AM, Jan 1st, 1970. If this also fails an exception is thrown.
+     * <br>
+     * Note: When sending dates use Util.httpDate().
+     *
+     * @param hdr                           the header name.
+     * @return                              the value for the header, or null if
+     *      non-existent.
+     * @exception IllegalArgumentException  if the header's value is neither a
+     *      legal date nor a number.
+     * @exception IOException               if any exception occurs on the
+     *      socket.
+     * @exception ModuleException           if any module encounters an
+     *      exception.
+     */
+    public Date getHeaderAsDate(String hdr)
+        throws IOException, IllegalArgumentException, ModuleException
+    {
+        String raw_date = getHeader(hdr);
+        if (raw_date == null)
+        {
+            return null;
+        }
+
+        // asctime() format is missing an explicit GMT specifier
+        if (raw_date.toUpperCase().indexOf("GMT") == -1 &&
+                raw_date.indexOf(' ') > 0)
+        {
+            raw_date += " GMT";
+        }
+
+        Date date;
+
+        try
+        {
+            date = Util.parseHttpDate(raw_date);
+        }
+        catch (IllegalArgumentException iae)
+        {
+            // some servers erroneously send a number, so let's try that
+            long time;
+            try
+            {
+                time = Long.parseLong(raw_date);
+            }
+            catch (NumberFormatException nfe)
+            {
+                throw iae;
+            }
+            // give up
+            if (time < 0)
+            {
+                time = 0;
+            }
+            date = new Date(time * 1000L);
+        }
+
+        return date;
+    }
+
+
+    /**
+     * Returns an enumeration of all the headers available via getHeader().
+     *
+     * @return                     Description of the Return Value
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    public Enumeration listHeaders()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+        return Headers.keys();
+    }
+
+
+    /**
+     * Retrieves the value for a given trailer. This should not be invoked until
+     * all response data has been read. If invoked before it will call <code>getData()</code>
+     * to force the data to be read.
+     *
+     * @param trailer              the trailer name.
+     * @return                     the value for the trailer, or null if
+     *      non-existent.
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     * @see                        #getData()
+     */
+    public String getTrailer(String trailer)
+        throws IOException, ModuleException
+    {
+        if (!got_trailers)
+        {
+            getTrailers();
+        }
+        return (String) Trailers.get(trailer.trim());
+    }
+
+
+    /**
+     * Retrieves the value for a given tailer. The value is parsed as an int.
+     *
+     * @param trailer                    the tailer name.
+     * @return                           the value for the trailer if the
+     *      trailer exists
+     * @exception NumberFormatException  if the trailer's value is not a number
+     *      or if the trailer does not exist.
+     * @exception IOException            if any exception occurs on the socket.
+     * @exception ModuleException        if any module encounters an exception.
+     */
+    public int getTrailerAsInt(String trailer)
+        throws IOException, ModuleException, NumberFormatException
+    {
+        String val = getTrailer(trailer);
+        if (val == null)
+        {
+            throw new NumberFormatException("null");
+        }
+        return Integer.parseInt(val);
+    }
+
+
+    /**
+     * Retrieves the value for a given trailer. The value is parsed as a date;
+     * if this fails it is parsed as a long representing the number of seconds
+     * since 12:00 AM, Jan 1st, 1970. If this also fails an
+     * IllegalArgumentException is thrown. <br>
+     * Note: When sending dates use Util.httpDate().
+     *
+     * @param trailer                       the trailer name.
+     * @return                              the value for the trailer, or null
+     *      if non-existent.
+     * @exception IllegalArgumentException  if the trailer's value is neither a
+     *      legal date nor a number.
+     * @exception IOException               if any exception occurs on the
+     *      socket.
+     * @exception ModuleException           if any module encounters an
+     *      exception.
+     */
+    public Date getTrailerAsDate(String trailer)
+        throws IOException, IllegalArgumentException, ModuleException
+    {
+        String raw_date = getTrailer(trailer);
+        if (raw_date == null)
+        {
+            return null;
+        }
+
+        // asctime() format is missing an explicit GMT specifier
+        if (raw_date.toUpperCase().indexOf("GMT") == -1 &&
+                raw_date.indexOf(' ') > 0)
+        {
+            raw_date += " GMT";
+        }
+
+        Date date;
+
+        try
+        {
+            date = Util.parseHttpDate(raw_date);
+        }
+        catch (IllegalArgumentException iae)
+        {
+            // some servers erroneously send a number, so let's try that
+            long time;
+            try
+            {
+                time = Long.parseLong(raw_date);
+            }
+            catch (NumberFormatException nfe)
+            {
+                throw iae;
+            }
+            // give up
+            if (time < 0)
+            {
+                time = 0;
+            }
+            date = new Date(time * 1000L);
+        }
+
+        return date;
+    }
+
+
+    /**
+     * Returns an enumeration of all the trailers available via getTrailer().
+     *
+     * @return                     Description of the Return Value
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    public Enumeration listTrailers()
+        throws IOException, ModuleException
+    {
+        if (!got_trailers)
+        {
+            getTrailers();
+        }
+        return Trailers.keys();
+    }
+
+
+    /**
+     * Reads all the response data into a byte array. Note that this method
+     * won't return until <em>all</em> the data has been received (so for
+     * instance don't invoke this method if the server is doing a server push).
+     * If <code>getInputStream()</code> had been previously invoked then this
+     * method only returns any unread data remaining on the stream and then
+     * closes it. <P>
+     *
+     * Note to the unwary: code like <PRE>
+     *     System.out.println("The data: " + resp.getData())
+     *</PRE> will probably not do what you want - use <PRE>
+     *     System.out.println("The data: " + resp.getText())
+     *</PRE> instead.
+     *
+     * @return                     an array containing the data (body) returned.
+     *      If no data was returned then it's set to a zero-length array.
+     * @exception IOException      If any io exception occured while reading the
+     *      data
+     * @exception ModuleException  if any module encounters an exception.
+     * @see                        #getInputStream()
+     */
+
+    public byte[] getData() throws IOException, ModuleException
+    {
+        return getData(-1);
+    }
+
+    public byte[] getData(int max)
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+
+        if (Data == null)
+        {
+            try
+            {
+                readResponseData(inp_stream, max);
+            }
+            catch (InterruptedIOException ie)
+            {
+                // don't intercept
+                throw ie;
+            }
+            catch (IOException ioe)
+            {
+                Log.write(Log.RESP, "HResp: (\"" + method + " " +
+                        OriginalURI.getPathAndQuery() + "\")");
+                Log.write(Log.RESP, "       ", ioe);
+
+                try
+                {
+                    inp_stream.close();
+                }
+                catch (Exception e)
+                {
+                }
+                throw ioe;
+            }
+
+            inp_stream.close();
+        }
+
+        return Data;
+    }
+
+
+    /**
+     * Reads all the response data into a buffer and turns it into a string
+     * using the appropriate character converter. Since this uses {@link
+     * #getData() getData()}, the caveats of that method apply here as well.
+     *
+     * @return                     the body as a String. If no data was returned
+     *      then an empty string is returned.
+     * @exception IOException      If any io exception occured while reading the
+     *      data, or if the content is not text
+     * @exception ModuleException  if any module encounters an exception.
+     * @exception ParseException   if an error occured trying to parse the
+     *      content-type header field
+     * @see                        #getData()
+     */
+    public synchronized String getText()
+        throws IOException, ModuleException, ParseException
+    {
+        String ct = getHeader("Content-Type");
+        if (ct == null || !ct.toLowerCase().startsWith("text/"))
+        {
+            throw new IOException("Content-Type `" + ct + "' is not a text type");
+        }
+
+        String charset = Util.getParameter("charset", ct);
+        if (charset == null)
+        {
+            charset = "ISO-8859-1";
+        }
+
+        return new String(getData(), charset);
+    }
+
+
+    /**
+     * Gets an input stream from which the returned data can be read. Note that
+     * if <code>getData()</code> had been previously invoked it will actually
+     * return a ByteArrayInputStream created from that data.
+     *
+     * @return                     the InputStream.
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     * @see                        #getData()
+     */
+    public synchronized InputStream getInputStream()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            handleResponse();
+        }
+
+        if (Data == null)
+        {
+            return inp_stream;
+        }
+        else
+        {
+            getData();
+            // ensure complete data is read
+            return new ByteArrayInputStream(Data);
+        }
+    }
+
+
+    /**
+     * Should the request be retried by the application? If the application used
+     * an <var>HttpOutputStream</var> in the request then various modules (such
+     * as the redirection and authorization modules) are not able to resend the
+     * request themselves. Instead, it becomes the application's responsibility.
+     * The application can check this flag, and if it's set, resend the exact
+     * same request. The modules such as the RedirectionModule or
+     * AuthorizationModule will then recognize the resend and fix up or redirect
+     * the request as required (i.e. they defer their normal action until the
+     * resend). <P>
+     *
+     * If the application resends the request then it <strong>must</strong> use
+     * the same <var>HttpOutputStream</var> instance. This is because the
+     * modules use this to recognize the retried request and to perform the
+     * necessary work on the request before it's sent. <P>
+     *
+     * Here is a skeleton example of usage: <PRE>
+     *     OutputStream out = new HttpOutputStream(1234);
+     *     do
+     *     {
+     *         rsp = con.Post("/cgi-bin/my_cgi", out);
+     *         out.write(...);
+     *         out.close();
+     *     } while (rsp.retryRequest());
+     *
+     *     if (rsp.getStatusCode() >= 300)
+     *         ...
+     * </PRE> <P>
+     *
+     * Note that for this to ever return true, the java system property <var>
+     * HTTPClient.deferStreamed</var> must be set to true at the beginning of
+     * the application (before the HTTPConnection class is loaded). This
+     * prevents unwary applications from causing inadvertent memory leaks. If an
+     * application does set this, then it <em>must</em> resend any request whose
+     * response returns true here in order to prevent memory leaks (a switch to
+     * JDK 1.2 will allow us to use weak references and eliminate this problem).
+     *
+     * @return                     true if the request should be retried.
+     * @exception IOException      If any exception occurs on the socket.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    public boolean retryRequest()
+        throws IOException, ModuleException
+    {
+        if (!initialized)
+        {
+            try
+            {
+                handleResponse();
+            }
+            catch (RetryException re)
+            {
+                this.retry = response.retry;
+            }
+        }
+        return retry;
+    }
+
+
+    /**
+     * produces a full list of headers and their values, one per line.
+     *
+     * @return   a string containing the headers
+     */
+    public String toString()
+    {
+        if (!initialized)
+        {
+            try
+            {
+                handleResponse();
+            }
+            catch (Exception e)
+            {
+                if (!(e instanceof InterruptedIOException))
+                {
+                    Log.write(Log.RESP, "HResp: (\"" + method + " " +
+                            OriginalURI.getPathAndQuery() + "\")");
+                    Log.write(Log.RESP, "       ", e);
+                }
+                return "Failed to read headers: " + e;
+            }
+        }
+
+        String nl = System.getProperty("line.separator", "\n");
+
+        StringBuffer str = new StringBuffer(Version);
+        str.append(' ');
+        str.append(StatusCode);
+        str.append(' ');
+        str.append(ReasonLine);
+        str.append(nl);
+
+        if (EffectiveURI != null)
+        {
+            str.append("Effective-URI: ");
+            str.append(EffectiveURI);
+            str.append(nl);
+        }
+
+        Enumeration hdr_list = Headers.keys();
+        while (hdr_list.hasMoreElements())
+        {
+            String hdr = (String) hdr_list.nextElement();
+            str.append(hdr);
+            str.append(": ");
+            str.append(Headers.get(hdr));
+            str.append(nl);
+        }
+
+        return str.toString();
+    }
+
+
+    // Helper Methods
+
+
+    /**
+     * Gets the modules attribute of the HTTPResponse object
+     *
+     * @return   The modules value
+     */
+    HTTPClientModule[] getModules()
+    {
+        return modules;
+    }
+
+
+    /**
+     * Processes a Response. This is done by calling the response handler in
+     * each module. When all is done, the various fields of this instance are
+     * intialized from the last Response.
+     *
+     * @return                     true if a new request was generated. This is
+     *      used for internal subrequests only
+     * @exception IOException      if any handler throws an IOException.
+     * @exception ModuleException  if any module encounters an exception.
+     */
+    synchronized boolean handleResponse()
+        throws IOException, ModuleException
+    {
+        if (initialized)
+        {
+            return false;
+        }
+
+        /*
+         *  first get the response if necessary
+         */
+        if (out_stream != null)
+        {
+            response = out_stream.getResponse();
+            response.http_resp = this;
+            out_stream = null;
+        }
+
+        /*
+         *  go through modules and handle them
+         */
+        doModules :
+        while (true)
+        {
+
+            Phase1 :
+            for (int idx = 0; idx < modules.length && !aborted; idx++)
+            {
+                try
+                {
+                    modules[idx].responsePhase1Handler(response, request);
+                }
+                catch (RetryException re)
+                {
+                    if (re.restart)
+                    {
+                        continue doModules;
+                    }
+                    else
+                    {
+                        throw re;
+                    }
+                }
+            }
+
+            Phase2 :
+            for (int idx = 0; idx < modules.length && !aborted; idx++)
+            {
+                int sts = modules[idx].responsePhase2Handler(response, request);
+                switch (sts)
+                {
+                    case RSP_CONTINUE:
+                        // continue processing
+                        break;
+                    case RSP_RESTART:
+                        // restart response processing
+                        idx = -1;
+                        continue doModules;
+                    case RSP_SHORTCIRC:
+                        // stop processing and return
+                        break doModules;
+                    case RSP_REQUEST:
+                    // go to phase 1
+                    case RSP_NEWCON_REQ:
+                        // process the request using a new con
+                        response.getInputStream().close();
+                        if (handle_trailers)
+                        {
+                            invokeTrailerHandlers(true);
+                        }
+                        if (request.internal_subrequest)
+                        {
+                            return true;
+                        }
+                        request.getConnection().
+                                handleRequest(request, this, response, true);
+                        if (initialized)
+                        {
+                            break doModules;
+                        }
+
+                        idx = -1;
+                        continue doModules;
+                    case RSP_SEND:
+                    // send the request immediately
+                    case RSP_NEWCON_SND:
+                        // send the request using a new con
+                        response.getInputStream().close();
+                        if (handle_trailers)
+                        {
+                            invokeTrailerHandlers(true);
+                        }
+                        if (request.internal_subrequest)
+                        {
+                            return true;
+                        }
+                        request.getConnection().
+                                handleRequest(request, this, response, false);
+                        idx = -1;
+                        continue doModules;
+                    default:
+                        // not valid
+                        throw new Error("HTTPClient Internal Error: invalid status" +
+                                " " + sts + " returned by module " +
+                                modules[idx].getClass().getName());
+                }
+            }
+
+            Phase3 :
+            for (int idx = 0; idx < modules.length && !aborted; idx++)
+            {
+                modules[idx].responsePhase3Handler(response, request);
+            }
+
+            break doModules;
+        }
+
+        /*
+         *  force a read on the response in case none of the modules did
+         */
+        response.getStatusCode();
+
+        /*
+         *  all done, so copy data
+         */
+        if (!request.internal_subrequest)
+        {
+            init(response);
+        }
+
+        if (handle_trailers)
+        {
+            invokeTrailerHandlers(false);
+        }
+
+        return false;
+    }
+
+
+    /**
+     * Copies the relevant fields from Response and marks this as initialized.
+     *
+     * @param resp  the Response class to copy from
+     */
+    void init(Response resp)
+    {
+        if (initialized)
+        {
+            return;
+        }
+
+        this.StatusCode = resp.StatusCode;
+        this.ReasonLine = resp.ReasonLine;
+        this.Version = resp.Version;
+        this.EffectiveURI = resp.EffectiveURI;
+        this.ContentLength = resp.ContentLength;
+        this.Headers = resp.Headers;
+        this.inp_stream = resp.inp_stream;
+        this.Data = resp.Data;
+        this.retry = resp.retry;
+        initialized = true;
+    }
+
+
+    private boolean handle_trailers = false;
+    private boolean trailers_handled = false;
+
+
+    /**
+     * This is invoked by the RespInputStream when it is close()'d. It just
+     * invokes the trailer handler in each module.
+     *
+     * @param force                invoke the handlers even if not initialized
+     *      yet?
+     * @exception IOException      if thrown by any module
+     * @exception ModuleException  if thrown by any module
+     */
+    void invokeTrailerHandlers(boolean force)
+        throws IOException, ModuleException
+    {
+        if (trailers_handled)
+        {
+            return;
+        }
+
+        if (!force && !initialized)
+        {
+            handle_trailers = true;
+            return;
+        }
+
+        for (int idx = 0; idx < modules.length && !aborted; idx++)
+        {
+            modules[idx].trailerHandler(response, request);
+        }
+
+        trailers_handled = true;
+    }
+
+
+    /**
+     * Mark this request as having been aborted. It's invoked by
+     * HTTPConnection.stop().
+     */
+    void markAborted()
+    {
+        aborted = true;
+    }
+
+
+    /**
+     * Gets any trailers from the response if we haven't already done so.
+     *
+     * @exception IOException      Description of the Exception
+     * @exception ModuleException  Description of the Exception
+     */
+    private synchronized void getTrailers()
+        throws IOException, ModuleException
+    {
+        if (got_trailers)
+        {
+            return;
+        }
+        if (!initialized)
+        {
+            handleResponse();
+        }
+
+        response.getTrailer("Any");
+        Trailers = response.Trailers;
+        got_trailers = true;
+
+        invokeTrailerHandlers(false);
+    }
+
+
+        /**
+     * Reads the response data received. Does not return until either
+     * Content-Length bytes have been read or EOF is reached.
+     *
+     * @param inp              Description of the Parameter
+     * @exception IOException  if any read on the input stream fails
+     * @inp                    the input stream from which to read the data
+     */
+    private void readResponseData(InputStream inp, int max)
+        throws IOException, ModuleException
+    {
+        boolean readUnlimited = (max == -1);
+
+        if (ContentLength == 0)
+        {
+            return;
+        }
+
+        if (Data == null)
+        {
+            Data = new byte[0];
+        }
+
+        // read response data
+
+        int off = Data.length;
+
+        try
+        {
+            // check Content-length header in case CE-Module removed it
+            if (getHeader("Content-Length") != null)
+            {
+                int rcvd = 0;
+                int total = max > 1 ? Math.min(ContentLength, max) : ContentLength;
+                //System.out.println("Reading with max file size: " + total);
+                Data = new byte[total];
+                do
+                {
+                    off += rcvd;
+                    rcvd = inp.read(Data, off, total - off);
+                } while (rcvd != -1 && off + rcvd < total);
+                // if max < ContentLength (&& max > -1): lose the rest
+                /*if(total < ContentLength)
+                {
+                    inp.skip(ContentLength - total);
+                }*/
+                /*
+                 *  Don't do this!
+                 *  If we do, then getData() won't work after a getInputStream()
+                 *  because we'll never get all the expected data. Instead, let
+                 *  the underlying RespInputStream throw the EOF.
+                 *  if (rcvd == -1)	// premature EOF
+                 *  {
+                 *  throw new EOFException("Encountered premature EOF while " +
+                 *  "reading headers: received " + off +
+                 *  " bytes instead of the expected " +
+                 *  ContentLength + " bytes");
+                 *  }
+                 */
+            }
+            else
+            {
+                //System.out.println("Reading with unknown file size");
+                java.util.LinkedList blocks = new java.util.LinkedList();
+                //System.out.println("new LinkedList()");
+                int total = 0;
+                int secondBlockSize = 10*2000;
+                byte[] secondBlock = new byte[secondBlockSize];
+                //System.out.println("new byte[" + secondBlockSize + "]");
+                int offInSecondBlock = 0;
+                int rcvd = 0;
+                do
+                {
+                    int bytesToRead = secondBlockSize - offInSecondBlock;
+                    if(bytesToRead < 1)
+                    {
+                        // System.out.println("adding block to list...");
+                        blocks.addLast(secondBlock);
+                        secondBlock = new byte[secondBlockSize];
+                        //System.out.println("new byte[" + secondBlockSize + "]");
+                        offInSecondBlock = 0;
+                        bytesToRead = secondBlockSize;
+                    }
+                    rcvd = inp.read(secondBlock, offInSecondBlock, bytesToRead);
+                    //System.out.println("read " + rcvd);
+                    // rcvd is usually << secondBlockSize
+                    if(rcvd != -1)
+                    {
+                        offInSecondBlock += rcvd;
+                        total += rcvd;
+                        max -= rcvd;
+                    }
+                } while(rcvd != -1 && (readUnlimited || max > 0));
+
+                // now we have: 1 x the last block as "secondBlock" + 0...n x blocks in the list
+                Data = new byte[total];  // I can't see how to do it without this second buffer
+                //System.out.println("new byte[" + total + "]");
+
+                int offset = 0;
+                while(blocks.size() > 0)
+                {
+                    byte[] block = (byte[]) blocks.removeFirst();
+                    System.arraycopy(block, 0, Data, offset, block.length);
+                    //System.out.println("System.arraycopy(" + block.length + ")");
+                    offset += block.length;
+                }
+                if(offInSecondBlock > 0)
+                {
+                    //System.out.println("System.arraycopy(" + offInSecondBlock + ")");
+                    System.arraycopy(secondBlock, 0, Data, offset, offInSecondBlock);
+                }
+
+
+            }
+        }
+        catch (IOException ioe)
+        {
+            Data = Util.resizeArray(Data, off);
+            throw ioe;
+        }
+        finally
+        {
+            try
+            {
+                inp.close();
+            }
+            catch (IOException ioe)
+            {
+            }
+        }
+    }
+
+
+
+    /*
+     * Reads the response data received. Does not return until either
+     * Content-Length bytes have been read or EOF is reached.
+     *
+     * @param inp                  Description of the Parameter
+     * @exception IOException      if any read on the input stream fails
+     * @exception ModuleException  Description of the Exception
+     * @inp                        the input stream from which to read the data
+     *
+    private void readResponseData(InputStream inp)
+        throws IOException, ModuleException
+    {
+        if (ContentLength == 0)
+        {
+            return;
+        }
+
+        if (Data == null)
+        {
+            Data = new byte[0];
+        }
+
+        // read response data
+
+        int off = Data.length;
+
+        LinkedList blocks = new java.util.LinkedList();
+
+        // check Content-length header in case CE-Module removed it
+        if (getHeader("Content-Length") != null)
+        {
+            try
+            {
+                int rcvd = 0;
+                Data = new byte[ContentLength];
+
+                do
+                {
+                    off += rcvd;
+                    rcvd = inp.read(Data, off, ContentLength - off);
+                } while (rcvd != -1 && off + rcvd < ContentLength);
+                /*
+                 *  Don't do this!
+                 *  If we do, then getData() won't work after a getInputStream()
+                 *  because we'll never get all the expected data. Instead, let
+                 *  the underlying RespInputStream throw the EOF.
+                 *  if (rcvd == -1)	// premature EOF
+                 *  {
+                 *  throw new EOFException("Encountered premature EOF while " +
+                 *  "reading headers: received " + off +
+                 *  " bytes instead of the expected " +
+                 *  ContentLength + " bytes");
+                 *  }
+                 *
+            }
+            catch (IOException ioe)
+            {
+                Data = Util.resizeArray(Data, off);
+                throw ioe;
+            }
+            finally
+            {
+                try
+                {
+                    inp.close();
+                }
+                catch (IOException ioe)
+                {
+                }
+            }
+        }
+        else
+        {
+            int total = 0;
+            int rcvd = 0;
+            try
+            {
+                ByteBlock actBlock = new ByteBlock(this.readIncrement);
+                // TODO: Blocks are very small (500-2000 Bytes) -> combine them
+                while ((actBlock.length = inp.read(actBlock.block, 0, this.readIncrement)) != -1)
+                {
+                    total += actBlock.length;
+                    // System.out.println(this.getOriginalURI().toExternalForm() + ": adding block with length " + actBlock.length + " complete: " + total);
+                    blocks.add(actBlock);
+                    actBlock = new ByteBlock(this.readIncrement);
+                    //off += rcvd;
+                    // Data = Util.resizeArray(Data, off + this.readIncrement);
+                }
+            }
+            catch (IOException ioe)
+            {
+                throw ioe;
+            }
+            finally
+            {
+                Iterator it = blocks.iterator();
+                Data = Util.resizeArray(Data, total);
+                 off = 0;
+                while (it.hasNext())
+                {
+                    ByteBlock act = (ByteBlock) it.next();
+                    //System.out.println(this.getOriginalURI().toExternalForm() + ": copied " + act.length + " -> off: " + off + ", left: " + total);
+                    System.arraycopy(act.block, 0, Data, off, act.length);
+                    off += act.length;
+                    total -= act.length;
+                }
+                try
+                {
+                    inp.close();
+                }
+                catch (IOException ioe)
+                {
+                }
+            }
+        }
+    }
+*/
+
+    /**
+     * Gets the timeout attribute of the HTTPResponse object
+     *
+     * @return   The timeout value
+     */
+    int getTimeout()
+    {
+        return timeout;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Constants.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Constants.java
new file mode 100644
index 00000000000..b8b4d7e3a36
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Constants.java
@@ -0,0 +1,38 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c) <p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.fetcher;
+
+/**
+ * contains all global constants used in this package
+ */
+public class Constants
+{
+
+    /**
+     * user agent string a fetcher task gives to the corresponding server
+     */
+    public static final String USER_AGENT = "Mozilla/4.06 [en] (WinNT; I)";
+
+    /**
+     * Crawler Identification
+     */
+    public static final String CRAWLER_AGENT = "Fetcher/0.95";
+
+    /**
+     * size of the temporary buffer to read web documents in
+     */
+    public final static int FETCHERTASK_READSIZE = 4096;
+
+    /**
+     * don't read more than... bytes
+     */
+    public final static int FETCHERTASK_MAXFILESIZE = 2000000;
+
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/DNSResolver.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/DNSResolver.java
new file mode 100644
index 00000000000..a724066daff
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/DNSResolver.java
@@ -0,0 +1,73 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.fetcher;
+
+import java.util.*;
+import java.net.*;
+
+/**
+ * filter class; gets IP Adresses from host names and forwards them to
+ * the other parts of the application
+ * since URLs cache their IP addresses themselves, and HTTP 1.1 needs the
+ * host names to be sent to the server, this class is not used anymore
+ */
+public class DNSResolver implements MessageListener
+{
+
+    HashMap ipCache = new HashMap();
+
+
+    public DNSResolver()
+    {
+    }
+
+    public void notifyAddedToMessageHandler(MessageHandler m)
+    {
+        this.messageHandler = m;
+    }
+
+    MessageHandler messageHandler;
+
+    public Message handleRequest(Message message)
+    {
+        if(message instanceof URLMessage)
+        {
+            URL url = ((URLMessage)message).getUrl();
+            String host = url.getHost();
+            InetAddress ip;
+            /*InetAddress ip = (InetAddress)ipCache.get(host);
+
+            if(ip == null)
+            {
+                */
+
+                try
+                {
+                     ip = InetAddress.getByName(host);
+                    /*
+                    ipCache.put(host, ip);
+                    //System.out.println("DNSResolver: new Cache Entry \"" + host + "\" = \"" + ip.getHostAddress() + "\"");*/
+                }
+                catch(UnknownHostException e)
+                {
+                    ip = null;
+                    return null;
+                    //System.out.println("DNSResolver: unknown host \"" + host + "\"");
+                }
+            /*}
+            else
+            {
+               //System.out.println("DNSResolver: Cache hit: " +  ip.getHostAddress());
+            }*/
+            //((URLMessage)message).setIpAddress(ip);
+        }
+        return message;
+    }
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Fetcher.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Fetcher.java
new file mode 100644
index 00000000000..e1ca56c2355
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Fetcher.java
@@ -0,0 +1,224 @@
+/*
+ *  LARM - LANLab Retrieval Machine
+ *
+ *  $history: $
+ *
+ */
+
+package de.lanlab.larm.fetcher;
+
+import de.lanlab.larm.threads.ThreadPool;
+import de.lanlab.larm.threads.ThreadPoolObserver;
+import de.lanlab.larm.threads.InterruptableTask;
+import de.lanlab.larm.storage.*;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+
+import de.lanlab.larm.fetcher.FetcherTask;
+
+/**
+ * filter class; the Fetcher is the main class which keeps the ThreadPool that
+ * gets the documents. It should be placed at the very end of the MessageQueue,
+ * so that all filtering can be made beforehand.
+ *
+ * @author    Clemens Marschner
+ *
+ */
+
+public class Fetcher implements MessageListener
+{
+    /**
+     * holds the threads
+     */
+    ThreadPool fetcherPool;
+
+    /**
+     * total number of docs read
+     */
+    int docsRead = 0;
+
+    /**
+     * the storage where the docs are saved to
+     */
+    DocumentStorage storage;
+
+    /**
+     * the host manager keeps track of host information
+     */
+    HostManager hostManager;
+
+
+    /**
+     * initializes the fetcher with the given number of threads in the thread
+     * pool and a document storage.
+     *
+     * @param maxThreads   the number of threads in the ThreadPool
+     * @param storage      the storage where all documents are stored
+     * @param hostManager  the host manager
+     */
+    public Fetcher(int maxThreads, DocumentStorage storage, HostManager hostManager)
+    {
+        this.storage = storage;
+        FetcherTask.setStorage(storage);
+        fetcherPool = new ThreadPool(maxThreads, new FetcherThreadFactory(hostManager));
+        fetcherPool.setQueue(new FetcherTaskQueue());
+        docsRead = 0;
+        this.hostManager = hostManager;
+    }
+
+
+    /**
+     * initializes the pool with default values (5 threads, NullStorage)
+     */
+    public void init()
+    {
+        fetcherPool.init();
+    }
+
+
+    /**
+     * initializes the pool with a NullStorage and the given number of threads
+     *
+     * @param maxThreads  the number of threads in the thread pool
+     */
+    public void init(int maxThreads)
+    {
+        fetcherPool.init();
+        docsRead = 0;
+    }
+
+
+    /**
+     * this function will be called by the message handler each time a URL
+     * passes all filters and gets to the fetcher. From here, it will be
+     * distributed to the FetcherPool, a thread pool which carries out the task,
+     * that is to fetch the document from the web.
+     *
+     * @param message  the message, which should actually be a URLMessage
+     * @return         Description of the Return Value
+     */
+    public Message handleRequest(Message message)
+    {
+        URLMessage urlMessage = (URLMessage) message;
+
+        fetcherPool.doTask(new FetcherTask(urlMessage), "");
+        docsRead++;
+
+        // eat the message
+        return null;
+    }
+
+
+    /**
+     * called by the message handler when this object is added to it
+     *
+     * @param handler  the message handler
+     */
+    public void notifyAddedToMessageHandler(MessageHandler handler)
+    {
+        this.messageHandler = handler;
+        FetcherTask.setMessageHandler(handler);
+    }
+
+
+    MessageHandler messageHandler;
+
+
+    /**
+     * the thread pool observer will be called each time a thread changes its
+     * state, i.e. from IDLE to RUNNING, and each time the number of thread
+     * queue entries change.
+     * this just wraps the thread pool method
+     *
+     * @param t  the class that implements the ThreadPoolObserver interface
+     */
+    public void addThreadPoolObserver(ThreadPoolObserver t)
+    {
+        fetcherPool.addThreadPoolObserver(t);
+    }
+
+
+    /**
+     * returns the number of tasks queued. Should return 0 if there are any idle
+     * threads. this method just wraps the ThreadPool method
+     *
+     * @return   The queueSize value
+     */
+    public int getQueueSize()
+    {
+        return fetcherPool.getQueueSize();
+    }
+
+
+    /**
+     * get the total number of threads.
+     * this method just wraps the ThreadPool method
+     *
+     * @return   The workingThreadsCount value
+     */
+    public int getWorkingThreadsCount()
+    {
+        return fetcherPool.getIdleThreadsCount() + fetcherPool.getBusyThreadsCount();
+    }
+
+
+    /**
+     * get the number of threads that are currently idle.
+     * this method just wraps the ThreadPool method
+     *
+     * @return   The idleThreadsCount value
+     */
+    public int getIdleThreadsCount()
+    {
+        return fetcherPool.getIdleThreadsCount();
+    }
+
+
+    /**
+     * get the number of threads that are currently busy.
+     * this method just wraps the ThreadPool method
+     *
+     * @return   The busyThreadsCount value
+     */
+    public int getBusyThreadsCount()
+    {
+        return fetcherPool.getBusyThreadsCount();
+    }
+
+
+    /**
+     * Gets the threadPool attribute of the Fetcher object
+     * beware: the original object is returned
+     *
+     * @TODO remove this / make it private if possible
+     * @return   The threadPool value
+     */
+    public ThreadPool getThreadPool()
+    {
+        return fetcherPool;
+    }
+
+
+    /**
+     * Gets the total number of docs read
+     *
+     * @return   number of docs read
+     */
+    public int getDocsRead()
+    {
+        return docsRead;
+    }
+
+
+    /**
+     * returns the (original) task queue
+     * @TODO remove this if possible
+     * @return   The taskQueue value
+     */
+    public FetcherTaskQueue getTaskQueue()
+    {
+        return (FetcherTaskQueue) this.fetcherPool.getTaskQueue();
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherGUIController.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherGUIController.java
new file mode 100644
index 00000000000..43b19768245
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherGUIController.java
@@ -0,0 +1,150 @@
+package de.lanlab.larm.fetcher;
+
+import java.awt.event.ActionListener;
+import java.awt.event.ActionEvent;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.*;
+import java.awt.event.*;
+import de.lanlab.larm.gui.*;
+import de.lanlab.larm.threads.*;
+
+/**
+ * this was used to connect the GUI to the fetcher
+ * @TODO put this into the GUI package, probably?
+ */
+public class FetcherGUIController implements ActionListener
+{
+	FetcherMain  fetcherMain;
+	FetcherSummaryFrame  fetcherFrame;
+
+
+	public FetcherGUIController(FetcherMain fetcherMainPrg, FetcherSummaryFrame fetcherFrameWin, String defaultStartURL)
+	{
+		this.fetcherMain  = fetcherMainPrg;
+		this.fetcherFrame = fetcherFrameWin;
+
+	    fetcherFrame.setRestrictTo(fetcherMain.urlScopeFilter.getRexString());
+	    fetcherFrame.setStartURL(defaultStartURL);
+
+		fetcherMain.fetcher.addThreadPoolObserver(
+		    new ThreadPoolObserver()
+		    {
+		       public void threadUpdate(int threadNr, String action, String info)
+		       {
+		            String status = threadNr + ": " + action + ": " + info;
+		            fetcherFrame.setIdleThreadsCount(fetcherMain.fetcher.getIdleThreadsCount());
+		            fetcherFrame.setBusyThreadsCount(fetcherMain.fetcher.getBusyThreadsCount());
+		            fetcherFrame.setWorkingThreadsCount(fetcherMain.fetcher.getWorkingThreadsCount());
+		       }
+
+		       public void queueUpdate(String info, String action)
+		       {
+		            fetcherFrame.setRequestQueueCount(fetcherMain.fetcher.getQueueSize());
+		       }
+		    }
+		);
+
+		fetcherMain.monitor.addObserver(new Observer()
+		{
+		    public void update(Observable o, Object arg)
+		    {
+		        // der ThreadMonitor wurde geupdated
+		        //fetcherFrame.setStalledThreads(fetcherMain.monitor.getStalledThreadCount(10, 500.0));
+		        //fetcherFrame.setBytesPerSecond(fetcherMain.monitor.getAverageReadCount(5));
+		        // fetcherFrame.setDocsPerSecond(fetcherMain.monitor.getDocsPerSecond(5));
+		        // wir nutzen die Gelegenheit, den aktuellen Speicherbestand auszugeben
+		        fetcherFrame.setFreeMem(Runtime.getRuntime().freeMemory());
+		        fetcherFrame.setTotalMem(Runtime.getRuntime().totalMemory());
+
+		    }
+
+		});
+
+	/*	fetcherMain.reFilter.addObserver(
+		    new Observer()
+		    {
+		        public void update(Observable o, Object arg)
+		        {
+		            fetcherFrame.setRobotsTxtCount(fetcherMain.reFilter.getExcludingHostsCount());
+		        }
+		    }
+		);*/
+
+        fetcherMain.messageHandler.addMessageQueueObserver(new Observer()
+            {
+                public void update(Observable o, Object arg)
+                {
+                    // a message has been added or deleted
+
+                    fetcherFrame.setURLsQueued(fetcherMain.messageHandler.getQueued());
+                }
+
+            }
+        );
+
+		// this observer will be called if a filter has decided to throw a
+		// message away.
+        fetcherMain.messageHandler.addMessageProcessorObserver(new Observer()
+            {
+                public void update(Observable o, Object arg)
+                {
+                    if(arg == fetcherMain.urlScopeFilter)
+                    {
+                        fetcherFrame.setScopeFiltered(fetcherMain.urlScopeFilter.getFiltered());
+                    }
+                    else if(arg == fetcherMain.urlVisitedFilter)
+                    {
+                        fetcherFrame.setVisitedFiltered(fetcherMain.urlVisitedFilter.getFiltered());
+                    }
+                    else if(arg == fetcherMain.reFilter)
+                    {
+                        fetcherFrame.setURLsCaughtCount(fetcherMain.reFilter.getFiltered());
+                    }
+					else // it's the fetcher
+					{
+						fetcherFrame.setDocsRead(fetcherMain.fetcher.getDocsRead());
+					}
+                }
+            }
+        );
+
+		fetcherFrame.addWindowListener(
+			new WindowAdapter()
+		    {
+		        public void windowClosed(WindowEvent e)
+		        {
+		            System.out.println("window Closed");
+		            System.exit(0);
+		        }
+
+
+		    }
+		);
+
+        fetcherFrame.addStartButtonListener((ActionListener)this);
+ 	}
+
+	/**
+     *   will be called when the start button is pressed
+     */
+    public void actionPerformed(ActionEvent e)
+    {
+        System.out.println("Füge Start-URL ein");
+        try
+        {
+           // urlVisitedFilter.printAllURLs();
+           // urlVisitedFilter.clearHashtable();
+			fetcherMain.setRexString(fetcherFrame.getRestrictTo());
+			fetcherMain.startMonitor();
+            fetcherMain.putURL(new URL(fetcherFrame.getStartURL()), false);
+        }
+        catch(Exception ex)
+        {
+            System.out.println("actionPerformed: Exception: " + ex.getMessage());
+        }
+    }
+
+}
+
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java
new file mode 100644
index 00000000000..2da43b08f68
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java
@@ -0,0 +1,362 @@
+/*
+ *  LARM - LANLab Retrieval Machine
+ *
+ *  $history: $
+ *
+ */
+package de.lanlab.larm.fetcher;
+
+import de.lanlab.larm.threads.ThreadPoolObserver;
+import de.lanlab.larm.threads.ThreadPool;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.*;
+import de.lanlab.larm.gui.*;
+import de.lanlab.larm.util.*;
+import de.lanlab.larm.storage.*;
+import javax.swing.UIManager;
+import HTTPClient.*;
+import org.apache.oro.text.regex.MalformedPatternException;
+
+
+/**
+ * ENTRY POINT: this class contains the main()-method of the application, does
+ * all the initializing and optionally connects the fetcher with the GUI.
+ *
+ * @author    Clemens Marschner
+ * @created   December 16, 2000
+ */
+public class FetcherMain
+{
+
+    /**
+     * the main message pipeline
+     */
+    protected MessageHandler messageHandler;
+
+    /**
+     * this filter records all incoming URLs and filters everything it already
+     * knows
+     */
+    protected URLVisitedFilter urlVisitedFilter;
+
+    /**
+     * the scope filter filters URLs that fall out of the scope given by the
+     * regular expression
+     */
+    protected URLScopeFilter urlScopeFilter;
+
+    /*
+     * The DNS resolver was supposed to hold the host addresses for all hosts
+     * this is done by URL itself today
+     *
+     * protected DNSResolver dnsResolver;
+     */
+
+    /**
+     * the robot exclusion filter looks if a robots.txt is present on a host
+     * before it is first accessed
+     */
+    protected RobotExclusionFilter reFilter;
+
+    /**
+     * the host manager keeps track of all hosts and is used by the filters.
+     */
+    protected HostManager hostManager;
+
+    /**
+     * this rather flaky filter just filters out some URLs, i.e. different views
+     * of Apache the apache DirIndex module. Has to be made
+     * configurable in near future
+     */
+    protected KnownPathsFilter knownPathsFilter;
+
+    /**
+     * this is the main document fetcher. It contains a thread pool that fetches the
+     * documents and stores them
+     */
+    protected Fetcher fetcher;
+
+
+    /**
+     * the thread monitor once was only a monitoring tool, but now has become a
+     * vital part of the system that computes statistics and
+     * flushes the log file buffers
+     */
+
+    protected ThreadMonitor monitor;
+
+    /**
+     * the storage is a central class that puts all fetched documents somewhere.
+     * Several differnt implementations exist.
+     */
+    protected DocumentStorage storage;
+
+    /**
+     * the URL length filter filters URLs that are too long, i.e. because of errors
+     * in the implementation of dynamic web sites
+     */
+    protected URLLengthFilter urlLengthFilter;
+
+    /**
+     * initializes all classes and registers anonymous adapter classes as
+     * listeners for fetcher events.
+     *
+     * @param nrThreads  number of fetcher threads to be created
+     */
+    public FetcherMain(int nrThreads)
+    {
+        // to make things clear, this method is commented a bit better than
+        // the rest of the program...
+
+        // this is the main message queue. handlers are registered with
+        // the queue, and whenever a message is put in it, they are passed to the
+        // filters in a "chain of responibility" manner. Every listener can decide
+        // to throw the message away
+        messageHandler = new MessageHandler();
+
+        // the storage is the class which saves a WebDocument somewhere, no
+        // matter how it does it, whether it's in a file, in a database or
+        // whatever
+
+
+        // example for the (very slow) SQL Server storage:
+        // this.storage = new SQLServerStorage("sun.jdbc.odbc.JdbcOdbcDriver","jdbc:odbc:search","sa","...",nrThreads);
+
+        // the LogStorage used here does extensive logging. It logs all links and
+        // document information.
+        // it also saves all documents to page files. Probably this single storage
+        // could also be replaced by a pipeline; or even incorporated into the
+        // existing message pipeline
+        SimpleLogger log = new SimpleLogger("store", false);
+        this.storage = new LogStorage(log, true, "logs/pagefile");
+
+        // a third example would be the NullStorage, which converts the documents into
+        // heat, which evaporates above the processor
+        // NullStorage();
+
+        // create the filters and add them to the message queue
+        urlScopeFilter = new URLScopeFilter();
+
+        urlVisitedFilter = new URLVisitedFilter(100000, log);
+
+        // dnsResolver = new DNSResolver();
+        hostManager = new HostManager(1000);
+
+        reFilter = new RobotExclusionFilter(hostManager);
+
+        fetcher = new Fetcher(nrThreads, storage, hostManager);
+
+        knownPathsFilter = new KnownPathsFilter();
+
+        urlLengthFilter = new URLLengthFilter(255);
+
+        // prevent message box popups
+        HTTPConnection.setDefaultAllowUserInteraction(false);
+
+        // prevent GZipped files from being decoded
+        HTTPConnection.removeDefaultModule(HTTPClient.ContentEncodingModule.class);
+
+        // initialize the threads
+        fetcher.init();
+
+        // the thread monitor watches the thread pool.
+
+        monitor = new ThreadMonitor(urlLengthFilter,
+                urlVisitedFilter,
+                urlScopeFilter,
+                /*dnsResolver,*/
+                reFilter,
+                messageHandler,
+                fetcher.getThreadPool(),
+                hostManager,
+                5000        // wake up every 5 seconds
+                );
+
+
+        // add all filters to the handler.
+        messageHandler.addListener(urlLengthFilter);
+        messageHandler.addListener(urlScopeFilter);
+        messageHandler.addListener(reFilter);
+        messageHandler.addListener(urlVisitedFilter);
+        messageHandler.addListener(knownPathsFilter);
+        messageHandler.addListener(fetcher);
+
+        /* uncomment this to enable HTTPClient logging
+        try
+        {
+            HTTPClient.Log.setLogWriter(new java.io.FileWriter("logs/HttpClient.log"),false);
+            HTTPClient.Log.setLogging(HTTPClient.Log.ALL, true);
+        }
+        catch (Exception e)
+        {
+            e.printStackTrace();
+        }
+        */
+    }
+
+
+    /**
+     * Sets the RexString attribute of the FetcherMain object
+     *
+     * @param restrictTo                          The new RexString value
+     */
+    public void setRexString(String restrictTo) throws MalformedPatternException
+    {
+        urlScopeFilter.setRexString(restrictTo);
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param url                                 Description of Parameter
+     * @param isFrame                             Description of the Parameter
+     * @exception java.net.MalformedURLException  Description of Exception
+     */
+    public void putURL(URL url, boolean isFrame)
+        throws java.net.MalformedURLException
+    {
+        try
+        {
+            messageHandler.putMessage(new URLMessage(url, null, isFrame));
+        }
+        catch (Exception e)
+        {
+            System.out.println("Exception: " + e.getMessage());
+            e.printStackTrace();
+        }
+        //System.out.println("URLs geschrieben");
+    }
+
+
+    /**
+     * Description of the Method
+     */
+    public void startMonitor()
+    {
+        monitor.start();
+    }
+
+
+
+    /*
+     * the GUI is not working at this time. It was used in the very beginning, but
+     * synchronous updates turned out to slow down the program a lot, even if the
+     * GUI would be turned off. Thus, a lot
+     * of Observer messages where removed later. Nontheless, it's quite cool to see
+     * it working...
+     *
+     * @param f         Description of Parameter
+     * @param startURL  Description of Parameter
+     */
+
+     /*
+    public void initGui(FetcherMain f, String startURL)
+    {
+        // if we're on a windows platform, make it look a bit more convenient
+        try
+        {
+            UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
+        }
+        catch (Exception e)
+        {
+            // dann halt nicht...
+        }
+        System.out.println("Init FetcherFrame");
+
+        FetcherSummaryFrame fetcherFrame;
+        fetcherFrame = new FetcherSummaryFrame();
+        fetcherFrame.setSize(640, 450);
+        fetcherFrame.setVisible(true);
+        FetcherGUIController guiController = new FetcherGUIController(f, fetcherFrame, startURL);
+    }
+        */
+
+
+    /**
+     * The main program. parsed
+     *
+     * @param args  The command line arguments
+     */
+    public static void main(String[] args)
+    {
+        int nrThreads = 10;
+
+        String startURL = "";
+        String restrictTo = "http://141.84.120.82/ll/cmarschn/.*";
+        boolean gui = false;
+        boolean showInfo = false;
+        System.out.println("LARM - LANLab Retrieval Machine - Fetcher - V 1.00 - (C) LANLab 2000-02");
+        for (int i = 0; i < args.length; i++)
+        {
+            if (args[i].equals("-start"))
+            {
+                i++;
+                startURL = args[i];
+                System.out.println("Start-URL set to: " + startURL);
+            }
+            else if (args[i].equals("-restrictto"))
+            {
+                i++;
+                restrictTo = args[i];
+                System.out.println("Restricting URLs to " + restrictTo);
+            }
+            else if (args[i].equals("-threads"))
+            {
+                i++;
+                nrThreads = Integer.parseInt(args[i]);
+                System.out.println("Threads set to " + nrThreads);
+            }
+            else if (args[i].equals("-gui"))
+            {
+                gui = true;
+            }
+            else if (args[i].equals("-?"))
+            {
+                showInfo = true;
+            }
+            else
+            {
+                System.out.println("Unknown option: " + args[i] + "; use -? to get syntax");
+                System.exit(0);
+            }
+        }
+
+        //URL.setURLStreamHandlerFactory(new HttpTimeoutFactory(500));
+        // replaced by HTTPClient
+
+        FetcherMain f = new FetcherMain(nrThreads);
+        if (showInfo || (startURL.equals("") && gui == false))
+        {
+            System.out.println("Usage: FetcherMain -start <URL> -restrictto <RegEx> [-threads <nr=10>]"); // [-gui]
+            System.exit(0);
+        }
+        try
+        {
+            f.setRexString(restrictTo);
+
+            if (gui)
+            {
+                // f.initGui(f, startURL);
+            }
+            else
+            {
+                try
+                {
+                    f.startMonitor();
+                    f.putURL(new URL(startURL), false);
+                }
+                catch (MalformedURLException e)
+                {
+                    System.out.println("Malformed URL");
+
+                }
+            }
+        }
+        catch (MalformedPatternException e)
+        {
+            System.out.println("Wrong RegEx syntax. Must be a valid PERL RE");
+        }
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTask.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTask.java
new file mode 100644
index 00000000000..9f6edf904e4
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTask.java
@@ -0,0 +1,617 @@
+/*
+ *  LARM - LANLab Retrieval Machine
+ *
+ *  $history: $
+ *
+ */
+package de.lanlab.larm.fetcher;
+
+import java.net.URL;
+import de.lanlab.larm.threads.*;
+import de.lanlab.larm.util.InputStreamObserver;
+import de.lanlab.larm.util.ObservableInputStream;
+import de.lanlab.larm.util.WebDocument;
+import de.lanlab.larm.util.SimpleCharArrayReader;
+import de.lanlab.larm.storage.DocumentStorage;
+import de.lanlab.larm.util.State;
+import de.lanlab.larm.util.SimpleLogger;
+import de.lanlab.larm.net.HttpTimeoutFactory;
+import HTTPClient.*;
+import java.net.*;
+import java.io.*;
+import java.util.*;
+import java.text.*;
+import de.lanlab.larm.parser.Tokenizer;
+import de.lanlab.larm.parser.LinkHandler;
+
+/**
+ * this class gets the documents from the web. It connects to the server given
+ * by the IP address in the URLMessage, gets the document, and forwards it to
+ * the storage. If it's an HTML document, it will be parsed and all links will
+ * be put into the message handler again.
+ *
+ * @author    Clemens Marschner
+ *
+ */
+public class FetcherTask
+         implements InterruptableTask, LinkHandler, Serializable
+{
+
+    protected volatile boolean isInterrupted = false;
+
+    /**
+     * each task has its own number. the class variable counts up if an instance
+     * of a fetcher task is created
+     */
+    static volatile int taskIdentity = 0;
+
+    /**
+     * the number of this object
+     */
+    int taskNr;
+
+    /**
+     * the BASE Href (defaults to contextUrl, may be changed with a <base> tag
+     * only valid within a doTask call
+     */
+    private volatile URL base;
+
+    /**
+     * the URL of the docuzment
+     * only valid within a doTask call
+     */
+    private volatile URL contextUrl;
+
+    /**
+     * the message handler the URL message comes from; same for all tasks
+     */
+    protected static volatile MessageHandler messageHandler;
+
+    /**
+     * actual number of bytes read
+     * only valid within a doTask call
+     */
+    private volatile long bytesRead = 0;
+
+    /**
+     * the storage this task will put the document to
+     */
+    private static volatile DocumentStorage storage;
+
+    /**
+     * task state IDs. comparisons will be done by their references, so always
+     * use the IDs
+     */
+    public final static String FT_IDLE = "idle";
+    public final static String FT_STARTED = "started";
+    public final static String FT_OPENCONNECTION = "opening connection";
+    public final static String FT_CONNECTING = "connecting";
+    public final static String FT_GETTING = "getting";
+    public final static String FT_READING = "reading";
+    public final static String FT_SCANNING = "scanning";
+    public final static String FT_STORING = "storing";
+    public final static String FT_READY = "ready";
+    public final static String FT_CLOSING = "closing";
+    public final static String FT_EXCEPTION = "exception";
+    public final static String FT_INTERRUPTED = "interrupted";
+
+    private volatile State taskState = new State(FT_IDLE);
+
+    /**
+     * the URLs found will be stored and only added to the message handler in the very
+     * end, to avoid too many synchronizations
+     */
+    private volatile LinkedList foundUrls;
+
+    /**
+     * the URL to be get
+     */
+    protected volatile URLMessage actURLMessage;
+
+    /**
+     * the document title, if present
+     */
+    private volatile String title;
+
+    /**
+     * headers for HTTPClient
+     */
+    private static volatile NVPair headers[] = new NVPair[1];
+
+    static
+    {
+        headers[0] = new HTTPClient.NVPair("User-Agent", Constants.CRAWLER_AGENT);
+
+    }
+
+
+    /**
+     * Gets a copy of the current taskState
+     *
+     * @return   The taskState value
+     */
+    public State getTaskState()
+    {
+        return taskState.cloneState();
+    }
+
+
+    /**
+     * Constructor for the FetcherTask object
+     *
+     * @param urlMessage   Description of the Parameter
+     */
+    public FetcherTask(URLMessage urlMessage)
+    {
+        actURLMessage = urlMessage;
+    }
+
+
+    /**
+     * Gets the uRLMessages attribute of the FetcherTask object
+     *
+     * @return   The uRLMessages value
+     */
+    public URLMessage getActURLMessage()
+    {
+        return this.actURLMessage;
+    }
+
+
+    /**
+     * Sets the document storage
+     *
+     * @param storage  The new storage
+     */
+    public static void setStorage(DocumentStorage storage)
+    {
+        FetcherTask.storage = storage;
+    }
+
+
+    /**
+     * Sets the messageHandler
+     *
+     * @param messageHandler  The new messageHandler
+     */
+    public static void setMessageHandler(MessageHandler messageHandler)
+    {
+        FetcherTask.messageHandler = messageHandler;
+    }
+
+
+    /**
+     * @return   the URL as a string
+     */
+    public String getInfo()
+    {
+        return actURLMessage.getURLString();
+    }
+
+
+    /**
+     * Gets the uRL attribute of the FetcherTask object
+     *
+     * @return   The uRL value
+     */
+    public URL getURL()
+    {
+        return actURLMessage.getUrl();
+    }
+
+    SimpleLogger log;
+    SimpleLogger errorLog;
+    //private long startTime;
+
+    /**
+     * this will be called by the fetcher thread and will do all the work
+     *
+     * @TODO probably split this up into different processing steps
+     * @param thread  Description of the Parameter
+     */
+    public void run(ServerThread thread)
+    {
+
+        taskState.setState(FT_STARTED); // state information is always set to make the thread monitor happy
+
+        log = thread.getLog();
+        HostManager hm = ((FetcherThread)thread).getHostManager();
+
+        errorLog = thread.getErrorLog();
+
+        // startTime = System.currentTimeMillis();
+        int threadNr = ((FetcherThread) thread).getThreadNumber();
+
+        log.log("start");
+        base = contextUrl = actURLMessage.getUrl();
+        String urlString = actURLMessage.getURLString();
+        String host = contextUrl.getHost();
+        int hostPos = urlString.indexOf(host);
+        int hostLen = host.length();
+
+        HostInfo hi = hm.getHostInfo(host); // get and create
+
+        if(!hi.isHealthy())
+        {
+            // we make this check as late as possible to get the most current information
+            log.log("Bad Host: " + contextUrl + "; returning");
+            System.out.println("[" + threadNr + "] bad host: " + this.actURLMessage.getUrl());
+
+            taskState.setState(FT_READY, null);
+            return;
+        }
+
+        foundUrls = new java.util.LinkedList();
+
+        HTTPConnection conn = null;
+
+        title = "*untitled*";
+
+        int size = 1;
+
+        InputStream in = null;
+        bytesRead = 0;
+
+
+        try
+        {
+
+            URL ipURL = contextUrl;
+
+            taskState.setState(FT_OPENCONNECTION, urlString);
+
+            log.log("connecting to " + ipURL.getHost());
+            taskState.setState(FT_CONNECTING, ipURL);
+            conn = new HTTPConnection(host);
+
+            conn.setDefaultTimeout(75000);
+            // 75 s
+            conn.setDefaultAllowUserInteraction(false);
+
+            taskState.setState(this.FT_GETTING, ipURL);
+            log.log("getting");
+
+            HTTPResponse response = conn.Get(ipURL.getFile(), "", headers);
+            response.setReadIncrement(2720);
+            int statusCode = response.getStatusCode();
+            byte[] fullBuffer = null;
+            String contentType = "";
+            int contentLength = 0;
+
+            if (statusCode != 404 && statusCode != 403)
+            {
+                // read up to Constants.FETCHERTASK_MAXFILESIZE bytes into a byte array
+                taskState.setState(FT_READING, ipURL);
+                contentType = response.getHeader("Content-Type");
+                String length = response.getHeader("Content-Length");
+                if (length != null)
+                {
+                    contentLength = Integer.parseInt(length);
+                }
+                log.log("reading");
+
+                fullBuffer = response.getData(Constants.FETCHERTASK_MAXFILESIZE); // max. 2 MB
+                if (fullBuffer != null)
+                {
+                    contentLength = fullBuffer.length;
+                    this.bytesRead += contentLength;
+                }
+            }
+            //conn.stop();    // close connection. todo: Do some caching...
+
+
+            /*
+             *  conn.disconnect();
+             */
+            if (isInterrupted)
+            {
+                System.out.println("FetcherTask: interrupted while reading. File truncated");
+                log.log("interrupted while reading. File truncated");
+            }
+            else
+            {
+                if (fullBuffer != null)
+                {
+                    taskState.setState(FT_SCANNING, ipURL);
+
+                    log.log("read file (" + fullBuffer.length + " bytes). Now scanning.");
+
+                    if (contentType.startsWith("text/html"))
+                    {
+
+                        // ouch. I haven't found a better solution yet. just slower ones.
+                        char[] fullCharBuffer = new char[contentLength];
+                        new InputStreamReader(new ByteArrayInputStream(fullBuffer)).read(fullCharBuffer);
+                        Tokenizer tok = new Tokenizer();
+                        tok.setLinkHandler(this);
+                        tok.parse(new SimpleCharArrayReader(fullCharBuffer));
+                    }
+                    else
+                    {
+                        // System.out.println("Discovered unknown content type: " + contentType + " at " + urlString);
+                        errorLog.log("[" + threadNr + "] Discovered unknown content type at " + urlString + ": " + contentType + ". just storing");
+                    }
+                    log.log("scanned");
+                }
+                taskState.setState(FT_STORING, ipURL);
+                messageHandler.putMessages(foundUrls);
+                storage.store(new WebDocument(contextUrl, contentType, fullBuffer, statusCode, actURLMessage.getReferer(), contentLength, title));
+                log.log("stored");
+            }
+        }
+        catch (InterruptedIOException e)
+        {
+            // timeout while reading this file
+            System.out.println("[" + threadNr + "] FetcherTask: Timeout while opening: " + this.actURLMessage.getUrl());
+            errorLog.log("error: Timeout: " + this.actURLMessage.getUrl());
+            hi.badRequest();
+        }
+        catch (FileNotFoundException e)
+        {
+            taskState.setState(FT_EXCEPTION);
+            System.out.println("[" + threadNr + "] FetcherTask: File not Found: " + this.actURLMessage.getUrl());
+            errorLog.log("error: File not Found: " + this.actURLMessage.getUrl());
+        }
+        catch(NoRouteToHostException e)
+        {
+            // router is down or firewall prevents to connect
+            hi.setReachable(false);
+            taskState.setState(FT_EXCEPTION);
+            System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
+            // e.printStackTrace();
+            errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
+        }
+        catch(ConnectException e)
+        {
+            // no server is listening at this port
+            hi.setReachable(false);
+            taskState.setState(FT_EXCEPTION);
+            System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
+            // e.printStackTrace();
+            errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
+        }
+        catch (SocketException e)
+        {
+            taskState.setState(FT_EXCEPTION);
+            System.out.println("[" + threadNr + "]: SocketException:" + e.getMessage());
+            errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
+
+        }
+        catch(UnknownHostException e)
+        {
+            // IP Address not to be determined
+            hi.setReachable(false);
+            taskState.setState(FT_EXCEPTION);
+            System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
+            // e.printStackTrace();
+            errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage());
+
+        }
+        catch (IOException e)
+        {
+            taskState.setState(FT_EXCEPTION);
+            System.out.println("[" + threadNr + "] " + e.getClass().getName() + ": " + e.getMessage());
+            // e.printStackTrace();
+            errorLog.log("error: IOException: " + e.getClass().getName() + ": " + e.getMessage());
+
+        }
+        catch (OutOfMemoryError ome)
+        {
+            taskState.setState(FT_EXCEPTION);
+            System.out.println("[" + threadNr + "] Task " + this.taskNr + " OutOfMemory after " + size + " bytes");
+            errorLog.log("error: OutOfMemory after " + size + " bytes");
+        }
+        catch (Throwable e)
+        {
+            taskState.setState(FT_EXCEPTION);
+            System.out.println("[" + threadNr + "] " + e.getMessage() + " type: " + e.getClass().getName());
+            e.printStackTrace();
+            System.out.println("[" + threadNr + "]: stopping");
+            errorLog.log("error: " + e.getClass().getName() + ": " + e.getMessage() + "; stopping");
+
+        }
+        finally
+        {
+
+            if (isInterrupted)
+            {
+                System.out.println("Task was interrupted");
+                log.log("interrupted");
+                taskState.setState(FT_INTERRUPTED);
+            }
+        }
+        if (isInterrupted)
+        {
+            System.out.println("Task: closed everything");
+        }
+        /*
+         *  }
+         */
+        taskState.setState(FT_CLOSING);
+        conn.stop();
+
+        taskState.setState(FT_READY);
+        foundUrls = null;
+    }
+
+
+    /**
+     * the interrupt method. not in use since the change to HTTPClient
+     * @TODO decide if we need this anymore
+     */
+    public void interrupt()
+    {
+        System.out.println("FetcherTask: interrupted!");
+        this.isInterrupted = true;
+        /*
+         *  try
+         *  {
+         *  if (conn != null)
+         *  {
+         *  ((HttpURLConnection) conn).disconnect();
+         *  System.out.println("FetcherTask: disconnected URL Connection");
+         *  conn = null;
+         *  }
+         *  if (in != null)
+         *  {
+         *  in.close();
+         *  / possibly hangs at close() .> KeepAliveStream.close() -> MeteredStream.skip()
+         *  System.out.println("FetcherTask: Closed Input Stream");
+         *  in = null;
+         *  }
+         *  }
+         *  catch (IOException e)
+         *  {
+         *  System.out.println("IOException while interrupting: ");
+         *  e.printStackTrace();
+         *  }
+         *  System.out.println("FetcherTask: Set all IOs to null");
+         */
+    }
+
+
+    /**
+     * this is called whenever a links was found in the current document,
+     * Don't create too many objects here, this will be called
+     * millions of times
+     *
+     * @param link  Description of the Parameter
+     */
+    public void handleLink(String link, boolean isFrame)
+    {
+        try
+        {
+            // cut out Ref part
+
+
+            int refPart = link.indexOf("#");
+            //System.out.println(link);
+            if (refPart == 0)
+            {
+                return;
+            }
+            else if (refPart > 0)
+            {
+                link = link.substring(0, refPart);
+            }
+
+            URL url = null;
+            if (link.startsWith("http:"))
+            {
+                // distinguish between absolute and relative URLs
+
+                url = new URL(link);
+            }
+            else
+            {
+                // relative url
+                url = new URL(base, link);
+            }
+
+            URLMessage urlMessage =  new URLMessage(url, contextUrl, isFrame);
+
+            String urlString = urlMessage.getURLString();
+
+            foundUrls.add(urlMessage);
+            //messageHandler.putMessage(new actURLMessage(url)); // put them in the very end
+        }
+        catch (MalformedURLException e)
+        {
+            //log.log("malformed url: base:" + base + " -+- link:" + link);
+            log.log("warning: " + e.getClass().getName() + ": " + e.getMessage());
+        }
+        catch (Exception e)
+        {
+            log.log("warning: " + e.getClass().getName() + ": " + e.getMessage());
+            // e.printStackTrace();
+        }
+
+    }
+
+
+    /**
+     * called when a BASE tag was found
+     *
+     * @param base  the HREF attribute
+     */
+    public void handleBase(String base)
+    {
+        try
+        {
+            this.base = new URL(base);
+        }
+        catch (MalformedURLException e)
+        {
+            log.log("warning: " + e.getClass().getName() + ": " + e.getMessage() + " while converting '" + base + "' to URL in document " + contextUrl);
+        }
+    }
+
+
+    /**
+     * called when a TITLE tag was found
+     *
+     * @param title  the string between &lt;title> and &gt;/title>
+     */
+    public void handleTitle(String title)
+    {
+        this.title = title;
+    }
+
+
+
+    /*
+     *  public void notifyOpened(ObservableInputStream in, long timeElapsed)
+     *  {
+     *  }
+     *  public void notifyClosed(ObservableInputStream in, long timeElapsed)
+     *  {
+     *  }
+     *  public void notifyRead(ObservableInputStream in, long timeElapsed, int nrRead, int totalRead)
+     *  {
+     *  if(totalRead / ((double)timeElapsed) < 0.3) // weniger als 300 bytes/s
+     *  {
+     *  System.out.println("Task " + this.taskNr + " stalled at pos " + totalRead + " with " + totalRead / (timeElapsed / 1000.0) + " bytes/s");
+     *  }
+     *  }
+     *  public void notifyFinished(ObservableInputStream in, long timeElapsed, int totalRead)
+     *  {
+     *  /System.out.println("Task " + this.taskNr + " finished (" + totalRead + " bytes in " + timeElapsed + " ms with " + totalRead / (timeElapsed / 1000.0) + " bytes/s)");
+     *  }
+     */
+    public long getBytesRead()
+    {
+        return bytesRead;
+    }
+
+
+    /**
+     * do nothing if a warning occurs within the html parser
+     *
+     * @param message                  Description of the Parameter
+     * @param systemID                 Description of the Parameter
+     * @param line                     Description of the Parameter
+     * @param column                   Description of the Parameter
+     * @exception java.lang.Exception  Description of the Exception
+     */
+    public void warning(String message, String systemID, int line, int column)
+        throws java.lang.Exception { }
+
+
+    /**
+     * do nothing if a fatal error occurs...
+     *
+     * @param message        Description of the Parameter
+     * @param systemID       Description of the Parameter
+     * @param line           Description of the Parameter
+     * @param column         Description of the Parameter
+     * @exception Exception  Description of the Exception
+     */
+    public void fatal(String message, String systemID, int line, int column)
+        throws Exception
+    {
+        System.out.println("fatal error: " + message);
+        log.log("fatal error: " + message);
+    }
+
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTaskQueue.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTaskQueue.java
new file mode 100644
index 00000000000..f2c9083708b
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTaskQueue.java
@@ -0,0 +1,198 @@
+package de.lanlab.larm.fetcher;
+
+import de.lanlab.larm.threads.*;
+import de.lanlab.larm.util.*;
+import java.util.*;
+import java.net.URL;
+
+/**
+ * this special kind of task queue reorders the incoming tasks so that every subsequent
+ * task is for a different host.
+ * This is done by a "HashedCircularLinkedList" which allows random adding while
+ * a differnet thread iterates through the collection circularly.
+ *
+ * @author    Clemens Marschner
+ * @created   23. November 2001
+ */
+public class FetcherTaskQueue extends TaskQueue
+{
+    /**
+     * this is a hash that contains an entry for each server, which by itself is a
+     * CachingQueue that stores all tasks for this server
+     * @TODO probably link this to the host info structure
+     */
+    HashedCircularLinkedList servers = new HashedCircularLinkedList(100, 0.75f);
+    int size = 0;
+
+
+    /**
+     * Constructor for the FetcherTaskQueue object. Does nothing
+     */
+    public FetcherTaskQueue() { }
+
+
+    /**
+     * true if no task is queued
+     *
+     * @return   The empty value
+     */
+    public boolean isEmpty()
+    {
+        return (size == 0);
+    }
+
+
+    /**
+     * clear the queue. not synchronized.
+     */
+    public void clear()
+    {
+        servers.clear();
+    }
+
+
+    /**
+     * puts task into Queue.
+     * Warning: not synchronized
+     *
+     * @param t  the task to be added. must be a FetcherTask
+     */
+    public void insert(Object t)
+    {
+        // assert (t != null && t.getURL() != null)
+
+        URLMessage um = ((FetcherTask)t).getActURLMessage();
+        URL act = um.getUrl();
+        String host = act.getHost();
+        Queue q;
+        q = ((Queue) servers.get(host));
+        if (q == null)
+        {
+            // add a new host to the queue
+            //String host2 = host.replace(':', '_').replace('/', '_').replace('\\', '_');
+            // make it file system ready
+            q = new CachingQueue(host, 100);
+            servers.put(host, q);
+        }
+        // assert((q != null) && (q instanceof FetcherTaskQueue));
+        q.insert(t);
+        size++;
+    }
+
+
+    /**
+     * the size of the queue. make sure that insert() and size() calls are synchronized
+     * if the exact number matters.
+     *
+     * @return   Description of the Return Value
+     */
+    public int size()
+    {
+        return size;
+    }
+
+    /**
+     * the number of different hosts queued at the moment
+     */
+    public int getNumHosts()
+    {
+        return servers.size();
+    }
+
+    /**
+     * get the next task. warning: not synchronized
+     *
+     * @return   Description of the Return Value
+     */
+    public Object remove()
+    {
+        FetcherTask t = null;
+        if (servers.size() > 0)
+        {
+            Queue q = (Queue) servers.next();
+            // assert(q != null && q.size() > 0)
+            t = (FetcherTask)q.remove();
+            if (q.size() == 0)
+            {
+                servers.removeCurrent();
+                q = null;
+            }
+            size--;
+        }
+        return t;
+    }
+
+
+    /**
+     * tests
+     *
+     * @param args  Description of the Parameter
+     */
+    public static void main(String args[])
+    {
+        FetcherTaskQueue q = new FetcherTaskQueue();
+        System.out.println("Test 1. put in 4 yahoos and 3 lmus. pull out LMU/Yahoo/LMU/Yahoo/LMU/Yahoo/Yahoo");
+        try
+        {
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false)));
+        }
+        catch (Throwable t)
+        {
+            t.printStackTrace();
+        }
+
+        System.out.println(((FetcherTask) q.remove()).getInfo());
+        System.out.println(((FetcherTask) q.remove()).getInfo());
+        System.out.println(((FetcherTask) q.remove()).getInfo());
+        System.out.println(((FetcherTask) q.remove()).getInfo());
+        System.out.println(((FetcherTask) q.remove()).getInfo());
+        System.out.println(((FetcherTask) q.remove()).getInfo());
+        System.out.println(((FetcherTask) q.remove()).getInfo());
+
+        System.out.println("Test 2. new Queue");
+        q = new FetcherTaskQueue();
+        System.out.println("size [0]:");
+        System.out.println(q.size());
+        try
+        {
+            System.out.println("put 3 lmus.");
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false)));
+            System.out.print("pull out 1st element [lmu/1]: ");
+            System.out.println(((FetcherTask) q.remove()).getInfo());
+            System.out.println("size now [2]: " + q.size());
+            System.out.print("pull out 2nd element [lmu/2]: ");
+            System.out.println(((FetcherTask) q.remove()).getInfo());
+            System.out.println("size now [1]: " + q.size());
+            System.out.println("put in 3 yahoos");
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false)));
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false)));
+            System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
+            System.out.println("Size now [3]: " + q.size());
+            System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
+            System.out.println("Size now [2]: " + q.size());
+            System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
+            System.out.println("Size now [1]: " + q.size());
+            System.out.println("put in another Yahoo");
+            q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false)));
+            System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
+            System.out.println("Size now [1]: " + q.size());
+            System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo());
+            System.out.println("Size now [0]: " + q.size());
+        }
+        catch (Throwable t)
+        {
+            t.printStackTrace();
+        }
+
+    }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThread.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThread.java
new file mode 100644
index 00000000000..54930fa9fc3
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThread.java
@@ -0,0 +1,91 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.fetcher;
+
+import de.lanlab.larm.threads.ServerThread;
+import de.lanlab.larm.util.State;
+
+/**
+ * a server thread for the thread pool that records the number
+ * of bytes read and the number of tasks run
+ * mainly for statistical purposes and to keep most of the information a task needs
+ * static
+ */
+public class FetcherThread extends ServerThread
+{
+
+    long totalBytesRead = 0;
+    long totalTasksRun  = 0;
+
+    HostManager hostManager;
+
+    byte[] documentBuffer = new byte[Constants.FETCHERTASK_READSIZE];
+
+    public HostManager getHostManager()
+    {
+        return hostManager;
+    }
+
+    public FetcherThread(int threadNumber, ThreadGroup threadGroup, HostManager hostManager)
+    {
+        super(threadNumber,"FetcherThread " + threadNumber, threadGroup);
+        this.hostManager = hostManager;
+    }
+
+    public static String STATE_IDLE = "Idle";
+
+    State idleState = new State(STATE_IDLE); // only set if task is finished
+
+    protected void taskReady()
+    {
+        totalBytesRead += ((FetcherTask)task).getBytesRead();
+        totalTasksRun++;
+        super.taskReady();
+        idleState.setState(STATE_IDLE);
+
+    }
+
+
+    public long getTotalBytesRead()
+    {
+        if(task != null)
+        {
+            return totalBytesRead + ((FetcherTask)task).getBytesRead();
+        }
+        else
+        {
+            return totalBytesRead;
+        }
+    }
+
+    public long getTotalTasksRun()
+    {
+        return totalTasksRun;
+    }
+
+    public byte[] getDocumentBuffer()
+    {
+        return documentBuffer;
+    }
+
+    public State getTaskState()
+    {
+        if(task != null)
+        {
+            // task could be null here
+            return ((FetcherTask)task).getTaskState();
+        }
+        else
+        {
+            return idleState.cloneState();
+        }
+    }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThreadFactory.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThreadFactory.java
new file mode 100644
index 00000000000..99035c24ee0
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThreadFactory.java
@@ -0,0 +1,38 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.fetcher;
+import de.lanlab.larm.threads.*;
+
+/**
+ * this factory simply creates fetcher threads. It's passed
+ * to the ThreadPool because the pool is creating the threads on its own
+ */
+public class FetcherThreadFactory extends ThreadFactory
+{
+
+    //static int count = 0;
+
+    ThreadGroup threadGroup = new ThreadGroup("FetcherThreads");
+
+    HostManager hostManager;
+
+    public FetcherThreadFactory(HostManager hostManager)
+    {
+        this.hostManager = hostManager;
+    }
+
+
+    public  ServerThread createServerThread(int count)
+    {
+        ServerThread newThread = new FetcherThread(count, threadGroup, hostManager);
+        newThread.setPriority(4);
+        return newThread;
+    }
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Filter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Filter.java
new file mode 100644
index 00000000000..0a3be1c0e7e
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Filter.java
@@ -0,0 +1,29 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.fetcher;
+
+
+/**
+ * base class of all filter classes
+ */
+public abstract class Filter
+{
+	/**
+	 * number of items filtered. augmented directly by
+	 * the inheriting classes
+	 */
+    protected int filtered = 0;
+
+
+    public int getFiltered()
+    {
+        return filtered;
+    }
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/GZipTest.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/GZipTest.java
new file mode 100644
index 00000000000..ad6d5b3ed32
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/GZipTest.java
@@ -0,0 +1,56 @@
+package de.lanlab.larm.fetcher;
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * @author
+ * @version   1.0
+ */
+
+import java.io.*;
+import java.util.zip.*;
+import java.net.*;
+
+/**
+ * Description of the Class
+ *
+ * @author    Administrator
+ * @created   28. Januar 2002
+ */
+public class GZipTest
+{
+
+    /**
+     * Constructor for the GZipTest object
+     */
+    public GZipTest() { }
+
+
+    /**
+     * The main program for the GZipTest class
+     *
+     * @param args  The command line arguments
+     */
+    public static void main(String[] args)
+    {
+        try
+        {
+            String url = "http://speechdat.phonetik.uni-muenchen.de/speechdt//speechDB/FIXED1SL/BLOCK00/SES0006/A10006O5.aif";
+
+            ByteArrayOutputStream a = new ByteArrayOutputStream(url.length());
+            GZIPOutputStream g = new GZIPOutputStream(a);
+            OutputStreamWriter o = new OutputStreamWriter(g,"ISO-8859-1");
+
+            o.write(url);
+            o.close();
+            g.finish();
+            byte[] array = a.toByteArray();
+            System.out.println("URL: " + url + " \n Length: " + url.length() + "\n zipped: " + array.length
+                    );
+        }
+        catch (Exception e)
+        { e.printStackTrace();
+        }
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostInfo.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostInfo.java
new file mode 100644
index 00000000000..ff48f26f31f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostInfo.java
@@ -0,0 +1,121 @@
+package de.lanlab.larm.fetcher;
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * @author Clemens Marschner
+ * @version   1.0
+ */
+
+import java.util.HashMap;
+import java.net.*;
+import de.lanlab.larm.util.CachingQueue;
+import de.lanlab.larm.util.Queue;
+
+/**
+ * contains information about a host. If a host doesn't respond too often, it's
+ * excluded from the crawl.
+ * This class is used by the HostManager
+ *
+ * @author    Clemens Marschner
+ * @created   16. Februar 2002
+ */
+public class HostInfo
+{
+    static final String[] emptyKeepOutDirectories = new String[0];
+
+    int id;
+    int healthyCount = 5;   // five strikes, and you're out
+    boolean isReachable = true;
+    boolean robotTxtChecked = false;
+    String[] disallows;    // robot exclusion
+    boolean isLoadingRobotsTxt = false;
+    Queue queuedRequests = null; // robot exclusion
+    String hostName;
+
+    public HostInfo(String hostName, int id)
+    {
+        this.id = id;
+        this.disallows = HostInfo.emptyKeepOutDirectories;
+        this.hostName = hostName;
+    }
+
+    /**
+     * is this host reachable and responding?
+     */
+    public boolean isHealthy()
+    {
+        return (healthyCount > 0) && isReachable;
+    }
+
+    /**
+     * signals that the host returned with a bad request of whatever type
+     */
+    public void badRequest()
+    {
+        healthyCount--;
+    }
+
+    public void setReachable(boolean reachable)
+    {
+        isReachable = reachable;
+    }
+
+    public boolean isReachable()
+    {
+        return isReachable;
+    }
+
+    public boolean isRobotTxtChecked()
+    {
+        return robotTxtChecked;
+    }
+
+    /**
+     * must be synchronized externally
+     */
+    public boolean isLoadingRobotsTxt()
+    {
+        return this.isLoadingRobotsTxt;
+    }
+
+    public void setLoadingRobotsTxt(boolean isLoading)
+    {
+        this.isLoadingRobotsTxt = isLoading;
+        if(isLoading)
+        {
+            this.queuedRequests = new CachingQueue("HostInfo_" + id + "_QueuedRequests", 100);
+        }
+
+    }
+
+    public void setRobotsChecked(boolean isChecked, String[] disallows)
+    {
+        this.robotTxtChecked = isChecked;
+        if(disallows != null)
+        {
+            this.disallows = disallows;
+        }
+        else
+        {
+            this.disallows = emptyKeepOutDirectories;
+        }
+
+    }
+
+    public synchronized boolean isAllowed(String path)
+    {
+        // assume keepOutDirectories is pretty short
+        // assert disallows != null
+        int length = disallows.length;
+        for(int i=0; i<length; i++)
+        {
+            if(path.startsWith(disallows[i]))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostManager.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostManager.java
new file mode 100644
index 00000000000..dc892d8f4fa
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostManager.java
@@ -0,0 +1,86 @@
+package de.lanlab.larm.fetcher;
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * @author
+ * @version   1.0
+ */
+
+import java.util.HashMap;
+
+/**
+ * Description of the Class
+ *
+ * @author    Administrator
+ * @created   16. Februar 2002
+ */
+public class HostManager
+{
+    HashMap hosts;
+    static int hostCount = 0;
+
+
+    /**
+     * Constructor for the HostInfo object
+     *
+     * @param initialSize  Description of the Parameter
+     */
+    public HostManager(int initialCapacity)
+    {
+        hosts = new HashMap(initialCapacity);
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param hostName  Description of the Parameter
+     * @return          Description of the Return Value
+     */
+    public HostInfo put(String hostName)
+    {
+        if (!hosts.containsKey(hostName))
+        {
+            int hostID;
+            synchronized (this)
+            {
+                hostID = hostCount++;
+            }
+            HostInfo hi = new HostInfo(hostName,hostID);
+            hosts.put(hostName, hi);
+            return hi;
+        }
+        return (HostInfo)hosts.get(hostName);
+        /*else
+        {
+            hostID = hosts.get()
+        }
+        // assert hostID != -1;
+        return hostID;*/
+
+    }
+
+
+    /**
+     * Gets the hostID attribute of the HostInfo object
+     *
+     * @param hostName  Description of the Parameter
+     * @return          The hostID value
+     */
+    public HostInfo getHostInfo(String hostName)
+    {
+        HostInfo hi = (HostInfo)hosts.get(hostName);
+        if(hi == null)
+        {
+            return put(hostName);
+        }
+        return hi;
+    }
+
+    public int getSize()
+    {
+       return hosts.size();
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/KnownPathsFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/KnownPathsFilter.java
new file mode 100644
index 00000000000..3984bafe990
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/KnownPathsFilter.java
@@ -0,0 +1,111 @@
+package de.lanlab.larm.fetcher;
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * @author
+ * @created   17. Februar 2002
+ * @version   1.0
+ */
+import java.net.*;
+
+/**
+ * this can be considered a hack
+ * @TODO implement this as a fast way to filter out different URL endings or beginnings
+ */
+public class KnownPathsFilter extends Filter implements MessageListener
+{
+
+    MessageHandler messageHandler;
+
+    String[] pathsToFilter =
+    {
+        "/robots.txt"
+    };
+
+    String[] hostFilter =
+    {
+        "www.nm.informatik.uni-muenchen.de",
+        "cgi.cip.informatik.uni-muenchen.de"
+    };
+
+    String[] filesToFilter =
+    {
+            // exclude Apache directory files
+            "/?D=D",
+            "/?S=D",
+            "/?M=D",
+            "/?N=D",
+            "/?D=A",
+            "/?S=A",
+            "/?M=A",
+            "/?N=A",
+    };
+
+    int pathLength;
+    int fileLength;
+    int hostLength;
+
+
+    /**
+     * Constructor for the KnownPathsFilter object
+     */
+    public KnownPathsFilter()
+    {
+        pathLength = pathsToFilter.length;
+        fileLength = filesToFilter.length;
+        hostLength = hostFilter.length;
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param message  Description of the Parameter
+     * @return         Description of the Return Value
+     */
+    public Message handleRequest(Message message)
+    {
+        URL url = ((URLMessage)message).getUrl();
+        String file = url.getFile();
+        String host = url.getHost();
+        int i;
+        for (i = 0; i < pathLength; i++)
+        {
+            if (file.startsWith(pathsToFilter[i]))
+            {
+                filtered++;
+                return null;
+            }
+        }
+        for (i = 0; i < fileLength; i++)
+        {
+            if (file.endsWith(filesToFilter[i]))
+            {
+                filtered++;
+                return null;
+            }
+        }
+        for (i = 0; i<hostLength; i++)
+        {
+            if(hostFilter[i].equals(host))
+            {
+                filtered++;
+                return null;
+            }
+        }
+        return message;
+    }
+
+
+    /**
+     * will be called as soon as the Listener is added to the Message Queue
+     *
+     * @param handler  the Message Handler
+     */
+    public void notifyAddedToMessageHandler(MessageHandler handler)
+    {
+        this.messageHandler = messageHandler;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Message.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Message.java
new file mode 100644
index 00000000000..fccf11877b3
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Message.java
@@ -0,0 +1,11 @@
+package de.lanlab.larm.fetcher;
+
+import java.io.*;
+
+/**
+ * Marker interface.
+ * represents a simple message.
+ */
+public interface Message
+{
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageHandler.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageHandler.java
new file mode 100644
index 00000000000..5f507e700d6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageHandler.java
@@ -0,0 +1,248 @@
+package de.lanlab.larm.fetcher;
+
+import java.util.*;
+import de.lanlab.larm.util.SimpleObservable;
+import de.lanlab.larm.util.CachingQueue;
+import de.lanlab.larm.util.UnderflowException;
+
+/**
+ *  this is a message handler that runs in its own thread.
+ *  Messages can be put via <code>putMessage</code> or <code>putMessages</code>
+ *  (use the latter whenever possible).<br>
+ *  The messages are passed to the filters in the order in which the filters where
+ *  added to the handler.<br>
+ *  They can consume the message by returning null. Otherwise, they return a Message
+ *  object, usually the one they got.<br>
+ *  The filters will run synchronously within the message handler thread<br>
+ *  This implements a chain of responsibility-style message handling
+ */
+public class MessageHandler implements Runnable
+{
+
+    /**
+     * the queue where messages are put in.
+     * Holds max. 2 x 5000 = 10.000 messages in RAM
+     */
+    private CachingQueue messageQueue = new CachingQueue("fetcherURLMessageQueue", 5000);
+
+    /**
+     * list of Observers
+     */
+    private LinkedList listeners = new LinkedList();
+
+    /**
+     * true as long as the thread is running
+     */
+    private boolean running = true;
+
+    /**
+     * the message handler thread
+     */
+    private Thread t;
+
+    /**
+     * flag for thread communication
+     */
+    boolean messagesWaiting = false;
+
+    /**
+     * true when a message is processed by the filters
+     */
+    boolean workingOnMessage = false;
+
+    Object queueMonitor = new Object();
+
+    SimpleObservable messageQueueObservable = new SimpleObservable();
+    SimpleObservable messageProcessorObservable = new SimpleObservable();
+
+    public boolean isWorkingOnMessage()
+    {
+        return workingOnMessage;
+    }
+
+    /**
+     *  messageHandler-Thread erzeugen und starten
+     */
+    MessageHandler()
+    {
+        t = new Thread(this,"MessageHandler Thread");
+        t.setPriority(5);   // higher priority to prevent starving when a lot of fetcher threads are used
+        t.start();
+    }
+
+    /**
+     *   join messageHandler-Thread
+     */
+    public void finalize()
+    {
+        if(t != null)
+        {
+            try
+            {
+                t.join();
+                t = null;
+            }
+            catch(InterruptedException e) {}
+        }
+    }
+
+    /**
+     *   registers a filter to the message handler
+     *   @param MessageListener - the Listener
+     */
+    public void addListener(MessageListener m)
+    {
+        m.notifyAddedToMessageHandler(this);
+        listeners.addLast(m);
+    }
+
+    /**
+     *  registers a MessageQueueObserver
+     *  It will be notified whenever a message is put into the Queue  (Parameter is Int(1)) oder
+     *  removed (Parameter is Int(-1))
+     *  @param o  the Observer
+     */
+    public void addMessageQueueObserver(Observer o)
+    {
+        messageQueueObservable.addObserver(o);
+    }
+
+    /**
+     *  adds a message processorObeserver
+     *  It will be notified when a message is consumed. In this case the parameter
+     *  is the filter that consumed the message
+     *  @param o  the Observer
+     */
+    public void addMessageProcessorObserver(Observer o)
+    {
+        messageProcessorObservable.addObserver(o);
+    }
+
+
+    /**
+     *  einen Event in die Schlange schreiben
+     */
+    public void putMessage(Message msg)
+    {
+        messageQueue.insert(msg);
+        messageQueueObservable.setChanged();
+        messageQueueObservable.notifyObservers(new Integer(1));
+        synchronized(queueMonitor)
+        {
+            messagesWaiting = true;
+            queueMonitor.notify();
+        }
+    }
+
+    /**
+     *  add a collection of events to the message queue
+     */
+    public void putMessages(Collection msgs)
+    {
+        for(Iterator i = msgs.iterator(); i.hasNext();)
+        {
+          Message msg = (Message)i.next();
+          messageQueue.insert(msg);
+        }
+        messageQueueObservable.setChanged();
+        messageQueueObservable.notifyObservers(new Integer(1));
+        synchronized(queueMonitor)
+        {
+            messagesWaiting = true;
+            queueMonitor.notify();
+        }
+    }
+
+    /**
+     *  the main messageHandler-Thread.
+     */
+    public void run()
+    {
+        while(running)
+        {
+            //System.out.println("MessageHandler-Thread started");
+
+            synchronized(queueMonitor)
+            {
+                // wait for new messages
+                workingOnMessage=false;
+                try
+                {
+                    queueMonitor.wait();
+                }
+                catch(InterruptedException e)
+                {
+                    System.out.println("MessageHandler: Caught InterruptedException");
+                }
+                workingOnMessage=true;
+            }
+            //messagesWaiting = false;
+            Message m;
+            try
+            {
+                while(messagesWaiting)
+                {
+                    synchronized(this.queueMonitor)
+                    {
+                        m = (Message)messageQueue.remove();
+                        if(messageQueue.size() == 0)
+                        {
+                            messagesWaiting = false;
+                        }
+
+                    }
+                    //System.out.println("MessageHandler:run: Entferne erstes Element");
+
+                    messageQueueObservable.setChanged();
+                    messageQueueObservable.notifyObservers(new Integer(-1));      // Message processed
+
+                    // und verteilen. Die Listener erhalten die Message in ihrer
+                    // Eintragungsreihenfolge und können die Message auch verändern
+
+                    Iterator i = listeners.iterator();
+                    while(i.hasNext())
+                    {
+                        //System.out.println("Verteile...");
+                        try
+                        {
+                            MessageListener listener = (MessageListener)i.next();
+                            m = (Message)listener.handleRequest(m);
+                            if (m == null)
+                            {
+                                messageProcessorObservable.setChanged();
+                                messageProcessorObservable.notifyObservers(listener);
+                                break;     // Handler hat die Message konsumiert
+                            }
+                        }
+                        catch(ClassCastException e)
+                        {
+                          System.out.println("MessageHandler:run: ClassCastException(2): " + e.getMessage());
+                        }
+                    }
+                }
+            }
+            catch (ClassCastException e)
+            {
+                System.out.println("MessageHandler:run: ClassCastException: " + e.getMessage());
+            }
+            catch (UnderflowException e)
+            {
+                messagesWaiting = false;
+                // System.out.println("MessageHandler: messagesWaiting = true although nothing queued!");
+                // @FIXME: here is still a multi threading issue. I don't get it why this happens.
+                //         does someone want to draw a petri net of this?
+            }
+            catch (Exception e)
+            {
+                System.out.println("MessageHandler: " + e.getClass() + " " + e.getMessage());
+                e.printStackTrace();
+            }
+
+        }
+    }
+
+    public int getQueued()
+    {
+        return messageQueue.size();
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageListener.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageListener.java
new file mode 100644
index 00000000000..f39681cbdbf
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageListener.java
@@ -0,0 +1,36 @@
+/*
+ *  LARM - LANLab Retrieval Machine
+ *
+ *  $history: $
+ *
+ *
+ */
+package de.lanlab.larm.fetcher;
+
+/**
+ * A Message Listener works on messages in a message queue Usually it returns
+ * the message back into the queue. But it can also change the message or create
+ * a new object. If it returns null, the message handler stops
+ *
+ * @author    Administrator
+ * @created   24. November 2001
+ */
+public interface MessageListener
+{
+    /**
+     * the handler
+     *
+     * @param message  the message to be handled
+     * @return         Message  usually the original message
+     *                 null: the message was consumed
+     */
+    public Message handleRequest(Message message);
+
+
+    /**
+     * will be called as soon as the Listener is added to the Message Queue
+     *
+     * @param handler  the Message Handler
+     */
+    public void notifyAddedToMessageHandler(MessageHandler handler);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/RobotExclusionFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/RobotExclusionFilter.java
new file mode 100644
index 00000000000..35158d4f53d
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/RobotExclusionFilter.java
@@ -0,0 +1,429 @@
+/**
+ * Title: LARM Lanlab Retrieval Machine<p>
+ *
+ * Description: <p>
+ *
+ * Copyright: Copyright (c)<p>
+ *
+ * Company: <p>
+ *
+ *
+ *
+ * @author    Clemens Marschner
+ * @version   1.0
+ */
+package de.lanlab.larm.fetcher;
+
+import de.lanlab.larm.util.SimpleObservable;
+import de.lanlab.larm.util.State;
+import java.util.*;
+import java.net.*;
+import java.io.*;
+import org.apache.oro.text.perl.Perl5Util;
+import de.lanlab.larm.util.*;
+import de.lanlab.larm.threads.*;
+import HTTPClient.*;
+
+/**
+ * this factory simply creates fetcher threads. It's gonna be passed to the
+ * ThreadPool because the pool is creating the threads on its own
+ *
+ * @author    Administrator
+ * @created   17. Februar 2002
+ */
+class REFThreadFactory extends ThreadFactory
+{
+
+    ThreadGroup threadGroup = new ThreadGroup("RobotExclusionFilter");
+
+
+    /**
+     * Description of the Method
+     *
+     * @param count  Description of the Parameter
+     * @return       Description of the Return Value
+     */
+    public ServerThread createServerThread(int count)
+    {
+        ServerThread newThread = new ServerThread(count, "REF-" + count, threadGroup);
+        newThread.setPriority(4);
+        return newThread;
+    }
+}
+
+/**
+ * the RE filter obeys the robot exclusion standard. If a new host name is supposed
+ * to be accessed, it first loads a "/robots.txt" on the given server and records the
+ * disallows stated in that file.
+ * The REFilter has a thread pool on its own to prevent the message handler from being
+ * clogged up if the server doesn't respond. Incoming messages are queued while the
+ * robots.txt is loaded.
+ * The information is stored in HostInfo records of the host manager class
+ *
+ * @author    Clemens Marschner
+ * @created   17. Februar 2002
+ */
+public class RobotExclusionFilter extends Filter implements MessageListener
+{
+
+
+    protected HostManager hostManager;
+
+    protected SimpleLogger log;
+
+
+    /**
+     * Constructor for the RobotExclusionFilter object
+     *
+     * @param hm  Description of the Parameter
+     */
+    public RobotExclusionFilter(HostManager hm)
+    {
+        log = new SimpleLogger("RobotExclusionFilter");
+        hostManager = hm;
+        rePool = new ThreadPool(2, new REFThreadFactory());
+        rePool.init();
+        log.setFlushAtOnce(true);
+        log.log("refilter: initialized");
+    }
+
+
+    /**
+     * called by the message handler
+     */
+    public void notifyAddedToMessageHandler(MessageHandler handler)
+    {
+        this.messageHandler = handler;
+    }
+
+
+    MessageHandler messageHandler = null;
+    ThreadPool rePool;
+
+
+    /**
+     * method that handles each URL request<p>
+     *
+     * This method will get the robots.txt file the first time a server is
+     * requested. See the description above.
+     *
+     * @param message
+     *      the (URL)Message
+     * @return
+     *      the original message or NULL if this host had a disallow on that URL
+     * @link{http://info.webcrawler.com/mak/projects/robots/norobots.html})
+     */
+
+    public Message handleRequest(Message message)
+    {
+        //log.logThreadSafe("handleRequest: got message: " + message);
+        try
+        {
+            // assert message instanceof URLMessage;
+            URLMessage urlMsg = ((URLMessage) message);
+            URL url = urlMsg.getUrl();
+            //assert url != null;
+            HostInfo h = hostManager.getHostInfo(url.getHost());
+            if (!h.isRobotTxtChecked() && !h.isLoadingRobotsTxt())
+            {
+                log.logThreadSafe("handleRequest: starting to get robots.txt");
+                // probably this results in Race Conditions here
+
+                rePool.doTask(new RobotExclusionTask(h), new Integer(h.id));
+                h.setLoadingRobotsTxt(true);
+            }
+
+            synchronized (h)
+            {
+                // isLoading...() and queuedRequest.insert() must be atomic
+                if (h.isLoadingRobotsTxt())
+                {
+
+                    //log.logThreadSafe("handleRequest: other thread is loading");
+                    // assert h.queuedRequests != null
+                    h.queuedRequests.insert(message);
+                    // not thread safe
+                    log.logThreadSafe("handleRequest: queued file " + url);
+                    return null;
+                }
+            }
+
+            //log.logThreadSafe("handleRequest: no thread is loading; robots.txt loaded");
+            //log.logThreadSafe("handleRequest: checking if allowed");
+            String path = url.getPath();
+            if (path == null || path.equals(""))
+            {
+                path = "/";
+            }
+
+            if (h.isAllowed(path))
+            {
+                // log.logThreadSafe("handleRequest: file " + urlMsg.getURLString() + " ok");
+                return message;
+            }
+            log.logThreadSafe("handleRequest: file " + urlMsg.getURLString() + " filtered");
+            this.filtered++;
+        }
+        catch (Exception e)
+        {
+            e.printStackTrace();
+        }
+        return null;
+    }
+
+
+    private static volatile NVPair headers[] = new NVPair[1];
+
+    static
+    {
+        headers[0] = new HTTPClient.NVPair("User-Agent", Constants.CRAWLER_AGENT);
+
+    }
+
+
+    /**
+     * the task that actually loads and parses the robots.txt files
+     *
+     * @author    Clemens Marschner
+     * @created   17. Februar 2002
+     */
+    class RobotExclusionTask implements InterruptableTask
+    {
+        HostInfo hostInfo;
+
+
+
+        /**
+         * Constructor for the RobotExclusionTask object
+         *
+         * @param hostInfo  Description of the Parameter
+         */
+        public RobotExclusionTask(HostInfo hostInfo)
+        {
+            this.hostInfo = hostInfo;
+        }
+
+
+        /**
+         * dummy
+         *
+         * @return   The info value
+         */
+        public String getInfo()
+        {
+            return "";
+        }
+
+
+        /**
+         * not used
+         */
+        public void interrupt() { }
+
+
+        /**
+         * gets a robots.txt file and adds the information to the hostInfo
+         * structure
+         *
+         * @param thread  the server thread (passed by the thread pool)
+         */
+        public void run(ServerThread thread)
+        {
+            // assert hostInfo != null;
+            String threadName = Thread.currentThread().getName();
+
+            log.logThreadSafe("task " + threadName + ": starting to load " + hostInfo.hostName);
+            //hostInfo.setLoadingRobotsTxt(true);
+            String[] disallows = null;
+            boolean errorOccured = false;
+            try
+            {
+                log.logThreadSafe("task " + threadName + ": getting connection");
+                HTTPConnection conn = new HTTPConnection(hostInfo.hostName);
+                conn.setTimeout(30000);
+                // wait at most 20 secs
+
+                HTTPResponse res = conn.Get("/robots.txt", (String) null, headers);
+                log.logThreadSafe("task " + threadName + ": got connection.");
+                if (res.getStatusCode() != 200)
+                {
+                    errorOccured = true;
+                }
+                else
+                {
+
+                    log.logThreadSafe("task " + threadName + ": reading");
+                    byte[] file = res.getData(40000);
+                    // max. 40 kb
+                    log.logThreadSafe("task " + threadName + ": reading done. parsing");
+                    disallows = parse(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(file))));
+                    log.logThreadSafe("task " + threadName + ": parsing done. found " + disallows.length + " disallows");
+                    // assert disallows != null
+                    // HostInfo hostInfo = hostManager.getHostInfo(this.hostName);
+                    // assert hostInfo != null
+                    log.logThreadSafe("task " + threadName + ": setting disallows");
+                }
+            }
+            catch (java.net.UnknownHostException e)
+            {
+                hostInfo.setReachable(false);
+                log.logThreadSafe("task " + threadName + ": unknown host. setting to unreachable");
+                errorOccured = true;
+            }
+            catch (java.net.NoRouteToHostException e)
+            {
+                hostInfo.setReachable(false);
+                log.logThreadSafe("task " + threadName + ": no route to. setting to unreachable");
+                errorOccured = true;
+            }
+            catch (java.net.ConnectException e)
+            {
+                hostInfo.setReachable(false);
+                log.logThreadSafe("task " + threadName + ": connect exception. setting to unreachable");
+                errorOccured = true;
+            }
+            catch (java.io.InterruptedIOException e)
+            {
+                // time out. fatal in this case
+                hostInfo.setReachable(false);
+                log.logThreadSafe("task " + threadName + ": time out. setting to unreachable");
+                errorOccured = true;
+            }
+
+            catch (Throwable e)
+            {
+                errorOccured = true;
+                log.log("task " + threadName + ": unknown exception: " + e.getClass().getName() + ": " + e.getMessage() + ". continuing");
+                log.log(e);
+
+            }
+            finally
+            {
+                if (errorOccured)
+                {
+                    synchronized (hostInfo)
+                    {
+                        hostInfo.setRobotsChecked(true, null);
+                        // crawl everything
+                        hostInfo.setLoadingRobotsTxt(false);
+                        log.logThreadSafe("task " + threadName + ": error occured");
+                        log.logThreadSafe("task " + threadName + ": now put " + hostInfo.queuedRequests.size() + " queueud requests back");
+                        hostInfo.isLoadingRobotsTxt = false;
+                        putBackURLs();
+                    }
+                }
+                else
+                {
+                    synchronized (hostInfo)
+                    {
+                        hostInfo.setRobotsChecked(true, disallows);
+                        log.logThreadSafe("task " + threadName + ": done");
+                        log.logThreadSafe("task " + threadName + ": now put " + hostInfo.queuedRequests.size() + " queueud requests back");
+                        hostInfo.isLoadingRobotsTxt = false;
+                        putBackURLs();
+                    }
+                }
+            }
+        }
+
+
+        /**
+         * put back queued URLs
+         */
+        private void putBackURLs()
+        {
+            while (hostInfo.queuedRequests.size() > 0)
+            {
+                messageHandler.putMessage((Message) hostInfo.queuedRequests.remove());
+            }
+            log.logThreadSafe("task " + Thread.currentThread().getName() + ": finished");
+            hostInfo.queuedRequests = null;
+        }
+
+
+        /**
+         * this parses the robots.txt file. It was taken from the PERL implementation
+         * Since this is only rarely called, it's not optimized for speed
+         *
+         * @param r                the robots.txt file
+         * @return                 the disallows
+         * @exception IOException  any IOException
+         */
+        public String[] parse(BufferedReader r)
+            throws IOException
+        {
+            // taken from Perl
+            Perl5Util p = new Perl5Util();
+            String line;
+            boolean isMe = false;
+            boolean isAnon = false;
+            ArrayList disallowed = new ArrayList();
+            String ua = null;
+
+            while ((line = r.readLine()) != null)
+            {
+                if (p.match("/^#.*/", line))
+                {
+                    // a comment
+                    continue;
+                }
+                line = p.substitute("s/\\s*\\#.* //", line);
+                if (p.match("/^\\s*$/", line))
+                {
+                    if (isMe)
+                    {
+                        break;
+                    }
+                }
+                else if (p.match("/^User-Agent:\\s*(.*)/i", line))
+                {
+                    ua = p.group(1);
+                    ua = p.substitute("s/\\s+$//", ua);
+                    if (isMe)
+                    {
+                        break;
+                    }
+                    else if (ua.equals("*"))
+                    {
+                        isAnon = true;
+                    }
+                    else if (Constants.CRAWLER_AGENT.startsWith(ua))
+                    {
+                        isMe = true;
+                    }
+                }
+                else if (p.match("/^Disallow:\\s*(.*)/i", line))
+                {
+                    if (ua == null)
+                    {
+                        isAnon = true;
+                        // warn...
+                    }
+                    String disallow = p.group(1);
+                    if (disallow != null && disallow.length() > 0)
+                    {
+                        // assume we have a relative path
+                        ;
+                    }
+                    else
+                    {
+                        disallow = "/";
+                    }
+                    if (isMe || isAnon)
+                    {
+                        disallowed.add(disallow);
+                    }
+                }
+                else
+                {
+                    // warn: unexpected line
+                }
+            }
+            String[] disalloweds = new String[disallowed.size()];
+            disallowed.toArray(disalloweds);
+            return disalloweds;
+        }
+
+    }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/ThreadMonitor.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/ThreadMonitor.java
new file mode 100644
index 00000000000..140924ab81a
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/ThreadMonitor.java
@@ -0,0 +1,545 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.fetcher;
+
+
+import de.lanlab.larm.threads.*;
+import java.util.*;
+import java.text.*;
+import java.io.*;
+import de.lanlab.larm.util.State;
+import de.lanlab.larm.util.SimpleLoggerManager;
+
+/**
+ * this monitor takes a sample of every thread every x milliseconds,
+ * and logs a lot of information. In the near past it has evolved into the multi
+ * purpose monitoring and maintenance facility.
+ * At the moment it prints status information
+ * to log files and to the console
+ * @TODO this can be done better. Probably with an agent where different services
+ * can be registered to be called every X seconds
+ */
+public class ThreadMonitor extends Observable implements Runnable
+{
+    /**
+     * a reference to the thread pool that's gonna be observed
+     */
+    private ThreadPool threadPool;
+
+
+    class Sample
+    {
+        long bytesRead;
+        long  docsRead;
+        long time;
+        public Sample(long bytesRead, long docsRead, long time)
+        {
+            this.bytesRead = bytesRead;
+            this.docsRead = docsRead;
+            this.time = time;
+        }
+    }
+
+    ArrayList bytesReadPerPeriod;
+
+    /**
+     * Zeit zwischen den Messungen
+     */
+    int sampleDelta;
+
+    /**
+     * the thread where this monitor runs in. Will run with high priority
+     */
+    Thread thread;
+
+
+    URLVisitedFilter urlVisitedFilter;
+    URLScopeFilter   urlScopeFilter;
+//    DNSResolver dnsResolver;
+    RobotExclusionFilter reFilter;
+    MessageHandler messageHandler;
+    URLLengthFilter urlLengthFilter;
+    HostManager hostManager;
+
+    public final static double KBYTE = 1024;
+    public final static double MBYTE = 1024 * KBYTE;
+    public final static double ONEGBYTE = 1024 * MBYTE;
+
+
+    String formatBytes(long lbytes)
+    {
+        double bytes = (double)lbytes;
+        if(bytes >= ONEGBYTE)
+        {
+            return fractionFormat.format((bytes/ONEGBYTE)) + " GB";
+        }
+        else if(bytes >= MBYTE)
+        {
+            return fractionFormat.format(bytes/MBYTE) + " MB";
+        }
+        else if(bytes >= KBYTE)
+        {
+            return fractionFormat.format(bytes/KBYTE) + " KB";
+        }
+        else
+        {
+            return fractionFormat.format(bytes) + " Bytes";
+        }
+
+    }
+
+    /**
+     * a logfile where status information is posted
+     * FIXME: put that in a seperate class (double code in FetcherTask)
+     */
+    PrintWriter logWriter;
+    private SimpleDateFormat formatter
+     = new SimpleDateFormat ("hh:mm:ss:SSSS");
+    private DecimalFormat fractionFormat = new DecimalFormat("0.00");
+
+    long startTime = System.currentTimeMillis();
+
+    private void log(String text)
+    {
+        try
+        {
+            logWriter.println(formatter.format(new Date()) + ";" + (System.currentTimeMillis()-startTime) + ";"  + text);
+            logWriter.flush();
+        }
+        catch(Exception e)
+        {
+            System.out.println("Couldn't write to logfile");
+        }
+    }
+
+    /**
+     * construct the monitor gets a reference to all monitored filters
+     * @param threadPool  the pool to be observed
+     * @param sampleDelta time in ms between samples
+     */
+    public ThreadMonitor(URLLengthFilter urlLengthFilter,
+                         URLVisitedFilter urlVisitedFilter,
+                         URLScopeFilter urlScopeFilter,
+                         /*DNSResolver dnsResolver,*/
+                         RobotExclusionFilter reFilter,
+                         MessageHandler messageHandler,
+                         ThreadPool threadPool,
+                         HostManager hostManager,
+                         int sampleDelta)
+    {
+        this.urlLengthFilter = urlLengthFilter;
+        this.urlVisitedFilter = urlVisitedFilter;
+        this.urlScopeFilter   = urlScopeFilter;
+       /* this.dnsResolver = dnsResolver;*/
+        this.hostManager = hostManager;
+        this.reFilter = reFilter;
+        this.messageHandler = messageHandler;
+
+        this.threadPool = threadPool;
+        bytesReadPerPeriod = new ArrayList();
+        this.sampleDelta = sampleDelta;
+        this.thread = new Thread(this, "ThreadMonitor");
+        this.thread.setPriority(7);
+
+        try
+        {
+            File logDir = new File("logs");
+            logDir.mkdir();
+            logWriter = new PrintWriter(new BufferedWriter(new FileWriter("logs/ThreadMonitor.log")));
+        }
+        catch(IOException e)
+        {
+            System.out.println("Couldn't create logfile (ThreadMonitor)");
+        }
+
+    }
+
+    /**
+     * java.lang.Threads run method. To be invoked via start()
+     * the monitor's main thread takes the samples every sampleDelta ms
+     * Since Java is not real time, it remembers
+     */
+    public void run()
+    {
+        int nothingReadCount = 0;
+        long lastPeriodBytesRead = -1;
+        long monitorRunCount = 0;
+        long startTime = System.currentTimeMillis();
+        log("time;overallBytesRead;overallTasksRun;urlsQueued;urlsWaiting;isWorkingOnMessage;urlsScopeFiltered;urlsVisitedFiltered;urlsREFiltered;memUsed;memFree;totalMem;nrHosts;visitedSize;visitedStringSize;urlLengthFiltered");
+        while(true)
+        {
+            try
+            {
+                try
+                {
+                    thread.sleep(sampleDelta);
+                }
+                catch(InterruptedException e)
+                {
+                    return;
+                }
+
+                Iterator threadIterator = threadPool.getThreadIterator();
+                int i=0;
+                StringBuffer bytesReadString = new StringBuffer(200);
+                StringBuffer rawBytesReadString = new StringBuffer(200);
+                StringBuffer tasksRunString = new StringBuffer(200);
+                long overallBytesRead = 0;
+                long overallTasksRun  = 0;
+                long now = System.currentTimeMillis();
+                boolean finished = false;
+                //System.out.print("\f");
+                /*while(!finished)
+                {
+                    boolean restart = false;*/
+                boolean allThreadsIdle = true;
+                StringBuffer sb = new StringBuffer(500);
+
+                while(threadIterator.hasNext())
+                {
+                    FetcherThread thread = (FetcherThread)threadIterator.next();
+                    long totalBytesRead = thread.getTotalBytesRead();
+                    overallBytesRead += totalBytesRead;
+                    bytesReadString.append(formatBytes(totalBytesRead)).append( "; ");
+                    rawBytesReadString.append(totalBytesRead).append("; ");
+                    long tasksRun = thread.getTotalTasksRun();
+                    overallTasksRun += tasksRun;
+                    tasksRunString.append(tasksRun).append("; ");
+
+                    // check task status
+                    State state = thread.getTaskState();
+                    //StringBuffer sb = new StringBuffer(200);
+                    sb.setLength(0);
+                    System.out.println(sb + "[" + thread.getThreadNumber() + "] " + state.getState() + " for " +
+                                       (now - state.getStateSince() ) + " ms " +
+                                       (state.getInfo() != null ? "(" + state.getInfo() +")" : "")
+                                       );
+                    if(!(state.getState().equals(FetcherThread.STATE_IDLE)))
+                    {
+                        //if(allThreadsIdle) System.out.println("(not all threads are idle, '"+state.getState()+"' != '"+FetcherThread.STATE_IDLE+"')");
+                        allThreadsIdle = false;
+                    }
+                    if (((state.equals(FetcherTask.FT_CONNECTING)) || (state.equals(FetcherTask.FT_GETTING)) || (state.equals(FetcherTask.FT_READING)) || (state.equals(FetcherTask.FT_CLOSING)))
+                        && ((now - state.getStateSince()) > 160000))
+                    {
+                        System.out.println("****Restarting Thread " + thread.getThreadNumber());
+                        threadPool.restartThread(thread.getThreadNumber());
+                        break;  // Iterator is invalid
+                    }
+
+                }
+                /*if(restart)
+                {
+                    continue;
+                }
+                finished = true;
+                }*/
+                /*
+                if(overallBytesRead == lastPeriodBytesRead)
+                {
+                    *
+                    disabled kickout feature - cm
+
+                    nothingReadCount ++;
+                   System.out.println("Anomaly: nothing read during the last period(s). " + (20-nothingReadCount+1) + " periods to exit");
+                    if(nothingReadCount > 20)  // nothing happens anymore
+                    {
+                        log("Ending");
+                        System.out.println("End at " + new Date().toString());
+                        // print some information
+                        System.exit(0);
+                    }
+
+
+                }
+                else
+                {
+                    nothingReadCount = 0;
+                }*/
+
+                lastPeriodBytesRead = overallBytesRead;
+
+                //State reState = new State("hhh"); //reFilter.getState();
+                sb.setLength(0);
+                //System.out.println(sb + "Robot-Excl.Filter State: " + reState.getState() + " since " + (now-reState.getStateSince()) + " ms " + (reState.getInfo() != null ? " at " + reState.getInfo() : ""));
+
+                addSample(new Sample(overallBytesRead, overallTasksRun, System.currentTimeMillis()));
+                int nrHosts = ((FetcherTaskQueue)threadPool.getTaskQueue()).getNumHosts();
+                int visitedSize       = urlVisitedFilter.size();
+                int visitedStringSize = urlVisitedFilter.getStringSize();
+
+                double bytesPerSecond = getAverageBytesRead();
+                double docsPerSecond = getAverageDocsRead();
+                sb.setLength(0);
+                System.out.println(sb + "\nBytes total:          " + formatBytes(overallBytesRead) + "  (" + formatBytes((long)(((double)overallBytesRead)*1000/(System.currentTimeMillis()-startTime))) + " per second since start)" +
+                                   "\nBytes per Second:     " + formatBytes((int)bytesPerSecond) + " (50 secs)" +
+                                   "\nDocs per Second:      " + docsPerSecond +
+                                   "\nBytes per Thread:     " + bytesReadString);
+                double docsPerSecondTotal = ((double)overallTasksRun)*1000/(System.currentTimeMillis()-startTime);
+                sb.setLength(0);
+                System.out.println(sb + "Docs read total:      " + overallTasksRun + "    Docs/s: " + fractionFormat.format(docsPerSecondTotal) +
+                                   "\nDocs p.thread:        " + tasksRunString);
+
+                long memUsed = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+                long memFree = Runtime.getRuntime().freeMemory();
+                long totalMem = Runtime.getRuntime().totalMemory();
+                sb.setLength(0);
+                System.out.println(sb + "Mem used:             " + formatBytes(memUsed) +  ", free: " + formatBytes(memFree) + "     total VM: " + totalMem);
+                int urlsQueued = messageHandler.getQueued();
+                int urlsWaiting = threadPool.getQueueSize();
+                boolean isWorkingOnMessage = messageHandler.isWorkingOnMessage();
+                int urlsScopeFiltered = urlScopeFilter.getFiltered();
+                int urlsVisitedFiltered = urlVisitedFilter.getFiltered();
+                int urlsREFiltered = reFilter.getFiltered();
+                int urlLengthFiltered = urlLengthFilter.getFiltered();
+                sb.setLength(0);
+                System.out.println(sb + "URLs queued:          " + urlsQueued + "     waiting: " + urlsWaiting);
+                sb.setLength(0);
+                System.out.println(sb + "Message is being processed: " + isWorkingOnMessage);
+                sb.setLength(0);
+                System.out.println(sb + "URLs Filtered: length: " + urlLengthFiltered + "      scope: " + urlsScopeFiltered + "     visited: " + urlsVisitedFiltered + "      robot.txt: " + urlsREFiltered);
+                sb.setLength(0);
+                System.out.println(sb + "Visited size: " + visitedSize + "; String Size in VisitedFilter: " + visitedStringSize + "; Number of Hosts: " + nrHosts + "; hosts in Host Manager: " + hostManager.getSize() + "\n");
+                sb.setLength(0);
+                log(sb + "" + now + ";" + overallBytesRead + ";" + overallTasksRun + ";" + urlsQueued + ";" + urlsWaiting + ";" + isWorkingOnMessage + ";" + urlsScopeFiltered + ";" + urlsVisitedFiltered + ";" + urlsREFiltered + ";" + memUsed + ";" + memFree + ";" + totalMem + ";" + nrHosts + ";" + visitedSize + ";" + visitedStringSize + ";" + rawBytesReadString + ";" + urlLengthFiltered);
+
+
+                if(!isWorkingOnMessage && (urlsQueued == 0) && (urlsWaiting == 0) && allThreadsIdle)
+                {
+                    nothingReadCount++;
+                    if(nothingReadCount > 3)
+                    {
+                        SimpleLoggerManager.getInstance().flush();
+                        System.exit(0);
+                    }
+
+                }
+                else
+                {
+                    nothingReadCount = 0;
+                }
+
+                this.setChanged();
+                this.notifyObservers();
+
+                // Request Garbage Collection
+                monitorRunCount++;
+
+                if(monitorRunCount % 6 == 0)
+                {
+                    System.runFinalization();
+                }
+
+                if(monitorRunCount % 2 == 0)
+                {
+                    System.gc();
+                    SimpleLoggerManager.getInstance().flush();
+                }
+
+            }
+            catch(Exception e)
+            {
+                System.out.println("Monitor: Exception: " + e.getClass().getName());
+                e.printStackTrace();
+            }
+        }
+    }
+
+    /**
+     * start the thread
+     */
+    public void start()
+    {
+        this.clear();
+        thread.start();
+    }
+
+    /**
+     * interrupt the monitor thread
+     */
+    public void interrupt()
+    {
+        thread.interrupt();
+    }
+
+
+    public synchronized void clear()
+    {
+        //sampleTimeStamps.clear();
+        /*for(int i=0; i < timeSamples.length; i++)
+        {
+            timeSamples[i].clear();
+        }
+        */
+    }
+
+/*    public synchronized double getAverageReadCount(int maxPeriods)
+    {
+        int lastPeriod = bytesReadPerPeriod.size()-1;
+        int periods = Math.min(lastPeriod, maxPeriods);
+        if(periods < 2)
+        {
+            return 0.0;
+        }
+
+
+        long bytesLastPeriod =   ((Sample)bytesReadPerPeriod.get(lastPeriod)).bytesRead;
+        long bytesBeforePeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod - periods)).bytesRead;
+        long bytesRead = bytesLastPeriod - bytesBeforePeriod;
+
+        long endTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue();
+        long startTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1 - periods)).longValue();
+        long duration = endTime - startTime;
+        System.out.println("bytes read: " + bytesRead + " duration in s: " + duration/1000.0 + " = " + ((double)bytesRead) / (duration/1000.0) + " per second");
+
+        return ((double)bytesRead) / (duration/1000.0);
+    }
+*/
+
+    /*public synchronized double getDocsPerSecond(int maxPeriods)
+    {
+        int lastPeriod = bytesReadPerPeriod.size()-1;
+        int periods = Math.min(lastPeriod, maxPeriods);
+        if(periods < 2)
+        {
+            return 0.0;
+        }
+
+
+        long docsLastPeriod =   ((Sample)bytesReadPerPeriod.get(lastPeriod)).docsRead;
+        long docsBeforePeriod = ((Sample)bytesReadPerPeriod.get(lastPeriod - periods)).docsRead;
+        long docsRead = docsLastPeriod - docsBeforePeriod;
+
+        long endTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue();
+        long startTime = ((Long)sampleTimeStamps.get(sampleTimeStamps.size() - periods)).longValue();
+        long duration = endTime - startTime;
+        System.out.println("docs read: " + docsRead + " duration in s: " + duration/1000.0 + " = " + ((double)docsRead) / (duration/1000.0) + " per second");
+
+        return ((double)docsRead) / (duration/1000.0);
+    }*/
+
+    /**
+     * retrieves the number of threads whose byteCount is below the threshold
+     * @param maxPeriods the number of periods to look back
+     * @param threshold  the number of bytes per second that acts as the threshold for a stalled thread
+     */
+    /*public synchronized int getStalledThreadCount(int maxPeriods, double threshold)
+    {
+        int periods = Math.min(sampleTimeStamps.size(), maxPeriods);
+        int stalledThreads = 0;
+        int j=0, i=0;
+        if(periods > 1)
+        {
+            for(j=0; j<timeSamples.length; j++)
+            {
+                long threadByteCount = 0;
+                ArrayList actArrayList = timeSamples[j];
+                double bytesPerSecond = 0;
+                try
+                {
+                    for(i=0; i<periods; i++)
+                    {
+
+                        Sample actSample = (Sample)(actArrayList.get(i));
+                        threadByteCount += actSample.bytesRead;
+                    }
+                }
+                catch(Exception e)
+                {
+                    System.out.println("getAverageReadCount: " + e.getClass().getName() + ": " + e.getMessage() + "(" + i + ";" + j + ")");
+                    e.printStackTrace();
+                }
+
+                bytesPerSecond = ((double)threadByteCount) /
+                       ((double)((Long)sampleTimeStamps.get(sampleTimeStamps.size()-1)).longValue()
+                      - ((Long)sampleTimeStamps.get(sampleTimeStamps.size()-periods)).longValue()) * 1000.0;
+                if(bytesPerSecond < threshold)
+                {
+                    stalledThreads++;
+                }
+            }
+        }
+
+        return stalledThreads;
+    }
+*/
+
+    int samples=0;
+
+    public void addSample(Sample s)
+    {
+        if(samples < 10)
+        {
+            bytesReadPerPeriod.add(s);
+            samples++;
+        }
+        else
+        {
+            bytesReadPerPeriod.set(samples % 10, s);
+        }
+    }
+
+    public double getAverageBytesRead()
+    {
+        Iterator i = bytesReadPerPeriod.iterator();
+        Sample oldest = null;
+        Sample newest = null;
+        while(i.hasNext())
+        {
+
+            Sample s = (Sample)i.next();
+            if(oldest == null)
+            {
+                oldest = newest = s;
+            }
+            else
+            {
+                if(s.time < oldest.time)
+                {
+                    oldest = s;
+                }
+                else if(s.time > newest.time)
+                {
+                    newest = s;
+                }
+            }
+        }
+        return ((newest.bytesRead - oldest.bytesRead)/((newest.time - oldest.time)/1000.0));
+    }
+    public double getAverageDocsRead()
+    {
+        Iterator i = bytesReadPerPeriod.iterator();
+        Sample oldest = null;
+        Sample newest = null;
+        while(i.hasNext())
+        {
+
+            Sample s = (Sample)i.next();
+            if(oldest == null)
+            {
+                oldest = newest = s;
+            }
+            else
+            {
+                if(s.time < oldest.time)
+                {
+                    oldest = s;
+                }
+                else if(s.time > newest.time)
+                {
+                    newest = s;
+                }
+            }
+        }
+        return ((newest.docsRead - oldest.docsRead)/((newest.time - oldest.time)/1000.0));
+    }
+}
+
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java
new file mode 100644
index 00000000000..61f49c448f4
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java
@@ -0,0 +1,69 @@
+package de.lanlab.larm.fetcher;
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * @author
+ * @created   28. Januar 2002
+ * @version   1.0
+ */
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * kills URLs longer than X characters. Used to prevent endless loops where
+ * the page contains the current URL + some extension
+ *
+ * @author Clemens Marschner
+ * @created   28. Januar 2002
+ */
+
+public class URLLengthFilter extends Filter implements MessageListener
+{
+    /**
+     * called by the message handler
+     *
+     * @param handler  the handler
+     */
+    public void notifyAddedToMessageHandler(MessageHandler handler)
+    {
+        this.messageHandler = handler;
+    }
+
+
+    MessageHandler messageHandler;
+
+    int maxLength;
+
+
+    /**
+     * Constructor for the URLLengthFilter object
+     *
+     * @param maxLength  max length of the _total_ URL (protocol+host+port+path)
+     */
+    public URLLengthFilter(int maxLength)
+    {
+        this.maxLength = maxLength;
+    }
+
+
+    /**
+     * handles the message
+     *
+     * @param message  Description of the Parameter
+     * @return         the original message or NULL if the URL was too long
+     */
+    public Message handleRequest(Message message)
+    {
+        URLMessage m = (URLMessage) message;
+        String file = m.getUrl().getFile();
+        if (file != null && file.length() > maxLength) // path + query
+        {
+            filtered++;
+            return null;
+        }
+        return message;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLMessage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLMessage.java
new file mode 100644
index 00000000000..24973f93929
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLMessage.java
@@ -0,0 +1,87 @@
+package de.lanlab.larm.fetcher;
+
+import java.net.*;
+import java.io.*;
+import de.lanlab.larm.util.URLUtils;
+
+/**
+ * represents a URL which is passed around in the messageHandler
+ */
+public class URLMessage implements Message, Serializable
+{
+    /**
+     * the URL
+     */
+    protected URL url;
+    protected String urlString;
+
+    protected URL referer;
+    protected String refererString;
+    boolean isFrame;
+
+    public URLMessage(URL url, URL referer, boolean isFrame)
+    {
+        //super();
+        this.url = url;
+        this.urlString = url != null ? URLUtils.toExternalFormNoRef(url) : null;
+
+        this.referer = referer;
+        this.refererString = referer != null ? URLUtils.toExternalFormNoRef(referer) : null;
+        this.isFrame = isFrame;
+        //System.out.println("" + refererString + " -> " + urlString);
+    }
+
+    public URL getUrl()
+    {
+        return this.url;
+    }
+
+    public URL getReferer()
+    {
+        return this.referer;
+    }
+
+
+    public String toString()
+    {
+        return urlString;
+    }
+
+    public String getURLString()
+    {
+        return urlString;
+    }
+
+    public String getRefererString()
+    {
+        return refererString;
+    }
+
+
+    public int hashCode()
+    {
+        return url.hashCode();
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws IOException
+    {
+        out.writeObject(url);
+        out.writeObject(referer);
+        out.writeBoolean(isFrame);
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException
+    {
+        url = (URL)in.readObject();
+        referer = (URL)in.readObject();
+        urlString = url.toExternalForm();
+        refererString = referer.toExternalForm();
+        isFrame = in.readBoolean();
+    }
+
+    public String getInfo()
+    {
+        return (referer != null ? refererString : "<start>") + "\t" + urlString + "\t" + (isFrame ? "1" : "0");
+    }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLScopeFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLScopeFilter.java
new file mode 100644
index 00000000000..66d66fd5a94
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLScopeFilter.java
@@ -0,0 +1,75 @@
+package de.lanlab.larm.fetcher;
+
+import org.apache.oro.text.regex.Perl5Matcher;
+import org.apache.oro.text.regex.Perl5Compiler;
+import org.apache.oro.text.regex.Pattern;
+
+/**
+ * Filter-Klasse; prüft eine eingegangene Message auf Einhaltung eines
+ * regulären Ausdrucks. Wenn die URL diesem Ausdruck
+ * nicht entspricht, wird sie verworfen
+ * @author Clemens Marschner
+ */
+class URLScopeFilter extends Filter implements MessageListener
+{
+    public void notifyAddedToMessageHandler(MessageHandler handler)
+    {
+      this.messageHandler = handler;
+    }
+    MessageHandler messageHandler;
+
+    /**
+	 * the regular expression which describes a valid URL
+	 */
+    private Pattern pattern;
+    private Perl5Matcher matcher;
+    private Perl5Compiler compiler;
+
+    public URLScopeFilter()
+    {
+            matcher = new Perl5Matcher();
+            compiler = new Perl5Compiler();
+    }
+
+    public String getRexString()
+    {
+        return pattern.toString();
+    }
+
+	/**
+	 * set the regular expression
+	 * @param rexString the expression
+	 */
+    public void setRexString(String rexString) throws org.apache.oro.text.regex.MalformedPatternException
+    {
+        this.pattern = compiler.compile(rexString, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK);
+        //System.out.println("pattern set to: " + pattern);
+    }
+
+
+    /**
+     * this method will be called by the message handler. Tests the URL
+	 * and throws it out if it's not in the scope
+     */
+	public Message handleRequest(Message message)
+	{
+	    if(message instanceof URLMessage)
+	    {
+	        String urlString = ((URLMessage)message).toString();
+	        int length = urlString.length();
+	        char buffer[] = new char[length];
+	        urlString.getChars(0,length,buffer,0);
+
+            //System.out.println("using pattern: " + pattern);
+	        boolean match = matcher.matches(buffer, pattern);
+	        if(!match)
+	        {
+	            //System.out.println("not in Scope: " + urlString);
+                filtered++;
+	            return null;
+	        }
+	    }
+        return message;
+	}
+
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLVisitedFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLVisitedFilter.java
new file mode 100644
index 00000000000..0c9ba7cb75b
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLVisitedFilter.java
@@ -0,0 +1,114 @@
+package de.lanlab.larm.fetcher;
+
+import java.net.URL;
+import java.util.*;
+
+import de.lanlab.larm.util.SimpleLogger;
+
+/**
+ * contains a HashMap of all URLs already passed. Adds each URL to that list, or
+ * consumes it if it is already present
+ *
+ * @todo find ways to reduce memory consumption here. the approach is somewhat naive
+ *
+ * @author    Clemens Marschner
+ * @created   3. Januar 2002
+ */
+class URLVisitedFilter extends Filter implements MessageListener
+{
+
+    /**
+     * Description of the Method
+     *
+     * @param handler  Description of the Parameter
+     */
+    public void notifyAddedToMessageHandler(MessageHandler handler)
+    {
+        this.messageHandler = handler;
+    }
+
+
+    MessageHandler messageHandler;
+
+    SimpleLogger log;
+
+    HashSet urlHash;
+
+    static Boolean dummy = new Boolean(true);
+
+
+
+    /**
+     * Constructor for the URLVisitedFilter object
+     *
+     * @param initialHashCapacity  Description of the Parameter
+     */
+    public URLVisitedFilter(int initialHashCapacity, SimpleLogger log)
+    {
+        urlHash = new HashSet(initialHashCapacity);
+        this.log = log;
+        //urlVector = new Vector(initialHashCapacity);
+    }
+
+
+    /**
+     * clears everything
+     */
+    public void clearHashtable()
+    {
+        urlHash.clear();
+        // urlVector.clear();
+    }
+
+
+
+    /**
+     * @param message  Description of the Parameter
+     * @return         Description of the Return Value
+     */
+    public Message handleRequest(Message message)
+    {
+        if (message instanceof URLMessage)
+        {
+            URLMessage urlMessage = ((URLMessage) message);
+            URL url = urlMessage.getUrl();
+            String urlString = urlMessage.getURLString();
+            if (urlHash.contains(urlString))
+            {
+                //System.out.println("URLVisitedFilter: " + urlString + " already present.");
+                filtered++;
+                if(log != null)
+                {
+                    log.logThreadSafe(urlMessage.getInfo());
+                }
+                return null;
+            }
+            else
+            {
+                // System.out.println("URLVisitedFilter: " + urlString + " not present yet.");
+                urlHash.add(urlString);
+                stringSize += urlString.length(); // see below
+                //urlVector.add(urlString);
+            }
+        }
+        return message;
+    }
+
+
+    private int stringSize = 0;
+
+    /**
+     * just a method to get a rough number of characters contained in the array
+     * with that you see that the total memory  is mostly used by this class
+     */
+    public int getStringSize()
+    {
+        return stringSize;
+    }
+
+    public int size()
+    {
+        return urlHash.size();
+    }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/graph/DistanceCount.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/graph/DistanceCount.java
new file mode 100644
index 00000000000..444523ff6b2
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/graph/DistanceCount.java
@@ -0,0 +1,875 @@
+package de.lanlab.larm.graph;
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * @author
+ * @version   1.0
+ */
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * Description of the Class
+ *
+ * @author    Administrator
+ * @created   30. Januar 2002
+ */
+class Node implements Comparable
+{
+    LinkedList incoming;
+    // 16 + 4 per entry
+    //HashSet incomingNodes; // 16 + 16 per entry, 11 x 16 default size = 192
+    LinkedList outgoing;
+    // 16 + 4 per entry
+    //Object o;
+    //HashSet outgoingNodes; // 16 + 16 per entry, 11 x 16 default size = 192
+
+    //LinkedList shortestIncoming;
+    int id;
+    // 4
+    float distance;
+    // 8
+    String name;
+    // 4 + String object
+    String title;
+    // 4 + String object
+    float nodeRank[] = new float[2];
+    // 16
+    // 470 bytes + 2 string objects
+    /**
+     * Description of the Field
+     */
+    public static int sortType = 0;
+
+
+    /**
+     * Description of the Method
+     *
+     * @param n  Description of the Parameter
+     * @return   Description of the Return Value
+     */
+    public int compareTo(Object n)
+    {
+        if (sortType < 2)
+        {
+            double diff = ((Node) n).nodeRank[sortType] - nodeRank[sortType];
+            return diff < 0 ? -1 : diff > 0 ? 1 : 0;
+        }
+        else
+        {
+            return (((Node) n).incoming.size() - incoming.size());
+        }
+    }
+
+
+    /**
+     * Constructor for the Node object
+     *
+     * @param id     Description of the Parameter
+     * @param name   Description of the Parameter
+     * @param title  Description of the Parameter
+     */
+    public Node(int id, String name, String title)
+    {
+        this.id = id;
+        this.name = name;
+        this.title = title;
+        this.incoming = new LinkedList();
+        this.outgoing = new LinkedList();
+        //this.incomingNodes = new HashSet();
+        //this.outgoingNodes = new HashSet();
+        this.distance = Float.MAX_VALUE;
+        this.nodeRank[0] = this.nodeRank[1] = 1;
+    }
+
+
+    /**
+     * Adds a feature to the Incoming attribute of the Node object
+     *
+     * @param incomingT  The feature to be added to the Incoming attribute
+     * @return           Description of the Return Value
+     */
+    public boolean addIncoming(Transition incomingT)
+    {
+        Integer id = new Integer(incomingT.getFrom().id);
+        if (!incoming.contains(id))
+        {
+            // attn: doesn't scale well, but also saves memory
+
+            incoming.addLast(incomingT);
+            //incomingNodes.add(id);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+
+    /**
+     * Adds a feature to the Outgoing attribute of the Node object
+     *
+     * @param outgoingT  The feature to be added to the Outgoing attribute
+     * @return           Description of the Return Value
+     */
+    public boolean addOutgoing(Transition outgoingT)
+    {
+        Integer id = new Integer(outgoingT.getTo().id);
+        if (!outgoing.contains(id))
+        {
+            outgoing.addLast(outgoingT);
+            //outgoingNodes.add(id);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+
+    /**
+     * Gets the incoming attribute of the Node object
+     *
+     * @return   The incoming value
+     */
+    public LinkedList getIncoming()
+    {
+        return incoming;
+    }
+
+
+    /**
+     * Gets the outgoing attribute of the Node object
+     *
+     * @return   The outgoing value
+     */
+    public LinkedList getOutgoing()
+    {
+        return outgoing;
+    }
+
+
+    /**
+     * Sets the distance attribute of the Node object
+     *
+     * @param distance  The new distance value
+     */
+    public void setDistance(float distance)
+    {
+        this.distance = distance;
+    }
+
+
+    /**
+     * Gets the distance attribute of the Node object
+     *
+     * @return   The distance value
+     */
+    public float getDistance()
+    {
+        return distance;
+    }
+
+
+    /**
+     * Gets the name attribute of the Node object
+     *
+     * @return   The name value
+     */
+    public String getName()
+    {
+        return name;
+    }
+
+
+    /**
+     * Sets the title attribute of the Node object
+     *
+     * @param title  The new title value
+     */
+    public void setTitle(String title)
+    {
+        this.title = title;
+    }
+
+
+    /**
+     * Gets the title attribute of the Node object
+     *
+     * @return   The title value
+     */
+    public String getTitle()
+    {
+        return title;
+    }
+
+
+    /**
+     * Gets the nodeRank attribute of the Node object
+     *
+     * @param idx  Description of the Parameter
+     * @return     The nodeRank value
+     */
+    public float getNodeRank(int idx)
+    {
+        return nodeRank[idx];
+    }
+
+
+    /**
+     * Sets the nodeRank attribute of the Node object
+     *
+     * @param nodeRank  The new nodeRank value
+     * @param idx       The new nodeRank value
+     */
+    public void setNodeRank(float nodeRank, int idx)
+    {
+        this.nodeRank[idx] = nodeRank;
+    }
+
+}
+
+/**
+ * Description of the Class
+ *
+ * @author    Administrator
+ * @created   30. Januar 2002
+ */
+class Transition
+{
+
+
+    Node from;
+    Node to;
+    float distance;
+    float linkRank[] = new float[2];
+    boolean isFrame;
+
+
+    /**
+     * Constructor for the Transition object
+     *
+     * @param from     Description of the Parameter
+     * @param to       Description of the Parameter
+     * @param isFrame  Description of the Parameter
+     */
+    public Transition(Node from, Node to, boolean isFrame)
+    {
+        LinkedList l = from.getOutgoing();
+        Iterator i = l.iterator();
+        while(i.hasNext())
+        {
+            Transition t = (Transition)i.next();
+            if(t.getTo() == to)
+            {
+                return; // schon enthalten
+            }
+        }
+        this.from = from;
+        this.to = to;
+        from.addOutgoing(this);
+        to.addIncoming(this);
+        this.distance = Integer.MAX_VALUE;
+        this.isFrame = isFrame;
+        this.linkRank[0] = this.linkRank[1] = 1;
+    }
+
+
+    /**
+     * Gets the to attribute of the Transition object
+     *
+     * @return   The to value
+     */
+    public Node getTo()
+    {
+        return to;
+    }
+
+
+    /**
+     * Gets the from attribute of the Transition object
+     *
+     * @return   The from value
+     */
+    public Node getFrom()
+    {
+        return from;
+    }
+
+
+    /**
+     * Gets the distance attribute of the Transition object
+     *
+     * @return   The distance value
+     */
+    public float getDistance()
+    {
+        return distance;
+    }
+
+
+    /**
+     * Sets the distance attribute of the Transition object
+     *
+     * @param distance  The new distance value
+     */
+    public void setDistance(float distance)
+    {
+        this.distance = distance;
+    }
+
+
+    /**
+     * Gets the frame attribute of the Transition object
+     *
+     * @return   The frame value
+     */
+    public boolean isFrame()
+    {
+        return isFrame;
+    }
+
+
+    /**
+     * Gets the linkRank attribute of the Transition object
+     *
+     * @param idx  Description of the Parameter
+     * @return     The linkRank value
+     */
+    public float getLinkRank(int idx)
+    {
+        return linkRank[idx];
+    }
+
+
+    /**
+     * Sets the linkRank attribute of the Transition object
+     *
+     * @param linkRank  The new linkRank value
+     * @param idx       The new linkRank value
+     */
+    public void setLinkRank(float linkRank, int idx)
+    {
+        this.linkRank[idx] = linkRank;
+    }
+}
+
+/**
+ * Description of the Class
+ *
+ * @author    Administrator
+ * @created   30. Januar 2002
+ */
+public class DistanceCount
+{
+
+
+    HashMap nodes = new HashMap(100000);
+    LinkedList nodesToDo = new LinkedList();
+    static int id = 0;
+
+
+    /**
+     * Gets the orCreateNode attribute of the DistanceCount object
+     *
+     * @param name   Description of the Parameter
+     * @param title  Description of the Parameter
+     * @return       The orCreateNode value
+     */
+    Node getOrCreateNode(String name, String title)
+    {
+        Node node = (Node) nodes.get(name);
+        if (node != null)
+        {
+            if (title != null)
+            {
+                node.setTitle(title);
+            }
+            return node;
+        }
+        else
+        {
+            node = new Node(id++, name, title);
+            nodes.put(name, node);
+            return node;
+        }
+    }
+
+
+    /**
+     * Constructor for the DistanceCount object
+     *
+     * @param filename         Description of the Parameter
+     * @exception IOException  Description of the Exception
+     */
+    public DistanceCount(String filename)
+        throws IOException
+    {
+        System.out.println("reading file...");
+        long t1 = System.currentTimeMillis();
+        BufferedReader b = new BufferedReader(new FileReader(filename));
+        String line;
+        boolean firstNotFound = true;
+        Node firstNode = null;
+        int lines = 0;
+        while ((line = b.readLine()) != null)
+        {
+            lines++;
+            String title = null;
+            try
+            {
+                //StringTokenizer st = new StringTokenizer(line, " ");
+                StringTokenizer st = new StringTokenizer(line, "\t");
+                String from = st.nextToken();
+                if (from.endsWith("/"))
+                {
+                    from = from.substring(0, from.length() - 1);
+                }
+                from = from.toLowerCase();
+                String to = st.nextToken();
+                if (to.endsWith("/"))
+                {
+                    to = to.substring(0, to.length() - 1);
+                }
+                to = to.toLowerCase();
+                boolean isFrame = (Integer.parseInt(st.nextToken()) == 1);
+                if (st.countTokens() > 3)
+                {
+                    title = "<untitled>";
+                    //StringBuffer sb = new StringBuffer();
+                    st.nextToken();
+                    // result
+                    st.nextToken();
+                    // Mime Type
+                    st.nextToken();
+                    // Size
+                    /*
+                     *  while(st.hasMoreTokens())
+                     *  {
+                     *  sb.append(st.nextToken()).append(" ");
+                     *  }
+                     */
+                    title = st.nextToken();
+                    if (title.length() > 2)
+                    {
+
+                        title = title.substring(1, title.length() - 1);
+                        int indexOfPara = title.indexOf("\"");
+                        if (indexOfPara > -1)
+                        {
+                            title = title.substring(0, indexOfPara);
+                        }
+                    }
+                }
+                Node fromNode = getOrCreateNode(from, null);
+                Node toNode = getOrCreateNode(to, title);
+                Transition t = new Transition(fromNode, toNode, isFrame);
+                /*
+                 *  if(firstNotFound && to.equals("http://127.0.0.1"))
+                 *  {
+                 *  firstNode = toNode;
+                 *  firstNotFound = false;
+                 *  }
+                 */
+                if (lines % 10000 == 0)
+                {
+                    System.out.println("" + lines + " Lines; " + nodes.size() + " nodes");
+                }
+            }
+            catch (NoSuchElementException e)
+            {
+                System.out.println("Malformed line " + lines + ": field number doesn't match");
+            }
+            catch (NumberFormatException e)
+            {
+                System.out.println("Malformed line " + lines + ": NumberFormat wrong");
+            }
+        }
+        System.out.println("finished; b" + lines + " Lines; " + nodes.size() + " nodes");
+        long t2 = System.currentTimeMillis();
+        System.out.println("" + (t2 - t1) + " ms");
+
+        /*
+         *  if(firstNotFound)
+         *  {
+         *  System.out.println("Couldn't find start page");
+         *  System.exit(-1);
+         *  }
+         */
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param firstNode  Description of the Parameter
+     */
+    public void calculateShortestDistance(Node firstNode)
+    {
+        clearDistances();
+        firstNode.setDistance(0);
+        nodesToDo.addLast(firstNode);
+        int calculations = 0;
+        while (!nodesToDo.isEmpty())
+        {
+            if (calculations % 100000 == 0)
+            {
+                System.out.println("Calculations: " + calculations + "; nodes to go: " + nodesToDo.size() + " total Mem: " + Runtime.getRuntime().totalMemory() + "; free mem: " + Runtime.getRuntime().freeMemory());
+            }
+            calculations++;
+
+            Node act = (Node) nodesToDo.removeFirst();
+            LinkedList outTrans = act.getOutgoing();
+            float distance = act.getDistance();
+            Iterator i = outTrans.iterator();
+            //distance++;
+
+            while (i.hasNext())
+            {
+                Transition t = (Transition) i.next();
+                float transDistance = t.getDistance();
+                /*if (t.isFrame())
+                {
+                    System.out.println("Frame from " + t.from.getName() + " to " + t.to.getName());
+                }*/
+                float newDistance = distance + (t.isFrame() ? 0.25f : 1f);
+                if (transDistance > newDistance)
+                {
+                    t.setDistance(newDistance);
+                    Node to = t.getTo();
+                    if (to.distance > distance)
+                    {
+                        to.setDistance(newDistance);
+                        nodesToDo.addLast(to);
+                    }
+                }
+            }
+            /*
+             *  if(looksGood)
+             *  {
+             *  System.out.println("Node " + act.id + " looks good");
+             *  }
+             */
+        }
+        System.out.println("Calculations: " + calculations );
+
+    }
+
+
+    public void clearDistances()
+    {
+        System.out.println("Clearing distance data...");
+        Iterator it = nodes.values().iterator();
+        int nr = 0;
+        while (it.hasNext())
+        {
+            Node n = (Node) it.next();
+            nr++;
+            n.setDistance(Float.MAX_VALUE);
+        }
+        System.out.println("cleared " + nr + " nodes. done");
+
+    }
+    /**
+     * Description of the Method
+     *
+     * @param nodeFrom  Description of the Parameter
+     * @param nodeTo    Description of the Parameter
+     */
+    public void printDistance(String nodeFrom, String nodeTo)
+    {
+
+        Node firstNode = (Node) nodes.get(nodeFrom);
+        if (firstNode == null)
+        {
+            System.out.println("FROM node not found");
+            return;
+        }
+        Node toNode = (Node) nodes.get(nodeTo);
+        if (toNode == null)
+        {
+            System.out.println("TO node not found");
+            return;
+        }
+        //System.out.println("resetting node distance...");
+        //clearDistances();
+
+        System.out.println("calculating...");
+        calculateShortestDistance(firstNode);
+
+        //t1 = System.currentTimeMillis();
+        //System.out.println("" + (t1-t2) + " ms");
+
+
+        System.out.println("\nSorting...");
+
+        /*
+         *  Collection nodeCollection = nodes.values();
+         *  Object[] nodeArray = nodeCollection.toArray();
+         *  Arrays.sort(nodeArray);
+         *  t2 = System.currentTimeMillis();
+         *  System.out.println("" + (t2-t1) + " ms");
+         *  int from = 0;
+         *  int to = 1;
+         */
+        /*
+         *  /calculate page Rank
+         *  for(int i = 0; i< 1; i++)
+         *  {
+         *  from = i%2;
+         *  to = (i+1) % 2;
+         *  for(int j = 0; j<nodeArray.length; j++)
+         *  {
+         *  Node act = (Node)nodeArray[j];
+         *  LinkedList inc = act.getIncoming();
+         *  float pageRank = 0;
+         *  Iterator it = inc.iterator();
+         *  while(it.hasNext())
+         *  {
+         *  Transition t = (Transition)it.next();
+         *  pageRank += t.getLinkRank(from);
+         *  }
+         *  act.setNodeRank(pageRank, to);
+         *  LinkedList out = act.getOutgoing();
+         *  int size = out.size();
+         *  if(size > 0)
+         *  {
+         *  float linkRank = pageRank / size;
+         *  it = out.iterator();
+         *  while(it.hasNext())
+         *  {
+         *  Transition t = (Transition)it.next();
+         *  t.setLinkRank(linkRank, to);
+         *  }
+         *  }
+         *  }
+         *  }
+         */
+        /*
+         *  System.out.println("\nLink Count:");
+         *  for(int i=0; i<10; i++)
+         *  {
+         *  Node n = ((Node)nodeArray[i]);
+         *  System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
+         *  }
+         *  for(int i=nodeArray.length/2; i<nodeArray.length/2+10; i++)
+         *  {
+         *  Node n = ((Node)nodeArray[i]);
+         *  System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
+         *  }
+         *  for(int i=nodeArray.length-10; i<nodeArray.length; i++)
+         *  {
+         *  Node n = ((Node)nodeArray[i]);
+         *  System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
+         *  }
+         *  Node.sortType = to;
+         *  Arrays.sort(nodeArray);
+         *  System.out.println("\nPageRank Count:");
+         *  for(int i=0; i<10; i++)
+         *  {
+         *  Node n = ((Node)nodeArray[i]);
+         *  System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
+         *  }
+         *  for(int i=nodeArray.length/2; i<nodeArray.length/2+10; i++)
+         *  {
+         *  Node n = ((Node)nodeArray[i]);
+         *  System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
+         *  }
+         *  for(int i=nodeArray.length-10; i<nodeArray.length; i++)
+         *  {
+         *  Node n = ((Node)nodeArray[i]);
+         *  System.out.println("Node " + n.name + ": " + n.getIncoming().size() + "; pageRank: " + n.getNodeRank(to));
+         *  }
+         *  System.out.println("\nStats...");
+         *  float distanceAccumulated=0;
+         *  float distanceMax = 0;
+         *  int notCounted = 0;
+         *  for(int j = 0; j<nodeArray.length; j++)
+         *  {
+         *  Node n = (Node)nodeArray[j];
+         *  if(n.distance != Integer.MAX_VALUE)
+         *  {
+         *  distanceAccumulated += n.distance;
+         *  distanceMax = Math.max(distanceMax, n.distance);
+         *  }
+         *  else
+         *  {
+         *  notCounted++;
+         *  }
+         *  }
+         *  System.out.println("Mean Distance:          " + ((double)distanceAccumulated)/nodeArray.length);
+         *  System.out.println("Max Distance:           " + (distanceMax));
+         *  System.out.println("Not reachable nodes(?): " + notCounted);
+         *  System.out.println("Referer Median:         " + ((Node)(nodeArray[Math.round(nodeArray.length/2)])).incoming.size());
+         *  System.out.println("\nSamples:");
+         */
+
+        printShortestRoute(toNode, 0,0);
+
+    }
+
+
+    /**
+     * Description of the Method
+     */
+    public void printRandomRoute()
+    {
+        Random r = new java.util.Random(System.currentTimeMillis());
+        Collection nodeColl = nodes.values();
+        Object[] nodeArray = (Object[])nodeColl.toArray();
+        int rnd = (int) (r.nextDouble() * nodeArray.length);
+        Node from = (Node) nodeArray[rnd];
+         rnd = (int) (r.nextDouble() * nodeArray.length);
+        Node to = (Node) nodeArray[rnd];
+        System.out.println("Calculating distance...");
+        calculateShortestDistance(from);
+        System.out.println("printing...");
+        printShortestRoute(to, 0,0);
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param n       Description of the Parameter
+     * @param indent  Description of the Parameter
+     */
+    public void printShortestRoute(Node n, int indent, int linkCount)
+    {
+        String spaces = "                                                            ".substring(0, indent);
+
+        if (n.getIncoming().isEmpty())
+        {
+            System.out.println(spaces + "<start>");
+        }
+        else
+        {
+            System.out.print(spaces + "+- " + n.name + "    (" + (n.getTitle() != null ? n.getTitle().substring(0,Math.min(n.getTitle().length(),25)) : "") + "\")     D:" + n.distance + "; L:" + n.getIncoming().size() + "; C:" + linkCount);
+            Iterator it = n.getIncoming().iterator();
+            float dist = n.distance;
+            if (dist > 10000000)
+            {
+                System.out.println(spaces + "\n--no link--");
+                return;
+            }
+            while (it.hasNext())
+            {
+                Transition t = (Transition) it.next();
+                if (t.distance <= dist)
+                {
+                    if (t.isFrame())
+                    {
+                        System.out.println(" **F** ->");
+                    }
+                    else
+                    {
+                        System.out.println(" -> ");
+                    }
+                    printShortestRoute(t.getFrom(), indent + 1, linkCount + n.getIncoming().size());
+                }
+            }
+        }
+        //System.out.println("");
+    }
+
+
+    /**
+     * this class reads in store.log, constructs a graph of the crawled web and is able
+     * to perform a breadth-first search for the shortest distance between two nodes<br>
+     * Note: this is experimental stuff. get into the source code to see how it works
+     * @param args  args[0] must point to the store.log file
+     */
+    public static void main(String[] args)
+    {
+        // Syntax: DistanceCount <store.log>
+        try
+        {
+            DistanceCount dc = new DistanceCount(args[0]);
+            boolean running = true;
+            BufferedReader in = new BufferedReader(new InputStreamReader(System.in),400);
+            while (running)
+            {
+                System.out.print("\n\nCommand (? for help) > ");
+                String newL;
+                String input = "";
+                //while((newL = in.readLine()) != null)
+                //{
+                    input = in.readLine();
+                StringTokenizer st = new StringTokenizer(input," ");
+                String command;
+                boolean printHelp = false;
+
+                if (!st.hasMoreTokens())
+                {
+                    printHelp = true;
+                    command = "?";
+                }
+                else
+                {
+                    command = st.nextToken();
+                }
+
+                try
+                {
+                    if ("?".equals(command))
+                    {
+                        printHelp = true;
+                    }
+                    else if ("d".equals(command))
+                    {
+                        String from = st.nextToken();
+                        String to = st.nextToken();
+                        dc.printDistance(from ,to);
+                    }
+                    else if ("q".equals(command))
+                    {
+                        running = false;
+                    }
+                    else if ("r".equals(command))
+                    {
+                        dc.printRandomRoute();
+                    }
+                    else
+                    {
+                        System.out.println("unknown command '" + command + "'");
+                    }
+                }
+                catch (java.util.NoSuchElementException e)
+                {
+                    System.out.println("Syntax error");
+                    e.printStackTrace();
+                    printHelp = true;
+                }
+                catch(Exception e)
+                {
+                    e.printStackTrace();
+                }
+
+                if (printHelp)
+                {
+                    System.out.println("\nSyntax\n" +
+                            "?   print this help message\n" +
+                            "d <page1> <page2>   print shortest route from page1 to page2\n" +
+                            "r                   print random walk\n" +
+                            "q                   quit");
+
+                }
+            }
+
+        }
+        catch (IOException e)
+        {
+            e.printStackTrace();
+        }
+        catch (ArrayIndexOutOfBoundsException e)
+        {
+            System.out.println("Syntax: java ... store.log");
+        }
+
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/AboutDialog.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/AboutDialog.java
new file mode 100644
index 00000000000..e2a1137faaa
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/AboutDialog.java
@@ -0,0 +1,154 @@
+package de.lanlab.larm.gui;
+
+/*
+	A basic extension of the java.awt.Dialog class
+ */
+
+import java.awt.*;
+
+public class AboutDialog extends Dialog {
+
+	public AboutDialog(Frame parent, boolean modal)
+	{
+		super(parent, modal);
+
+		// This code is automatically generated by Visual Cafe when you add
+		// components to the visual environment. It instantiates and initializes
+		// the components. To modify the code, only use code syntax that matches
+		// what Visual Cafe can generate, or Visual Cafe may be unable to back
+		// parse your Java file into its visual environment.
+        
+		//{{INIT_CONTROLS
+		setLayout(null);
+		setSize(249,150);
+		setVisible(false);
+		label1.setText("LARM - LANLab Retrieval Machine");
+		add(label1);
+		label1.setBounds(12,12,228,24);
+		okButton.setLabel("OK");
+		add(okButton);
+		okButton.setBounds(95,85,66,27);
+		label2.setText("(C) 2000 Clemens Marschner");
+		add(label2);
+		label2.setBounds(12,36,228,24);
+		setTitle("AWT-Anwendung - Info");
+		//}}
+        
+		//{{REGISTER_LISTENERS
+		SymWindow aSymWindow = new SymWindow();
+		this.addWindowListener(aSymWindow);
+		SymAction lSymAction = new SymAction();
+		okButton.addActionListener(lSymAction);
+		//}}
+
+	}
+    
+	public AboutDialog(Frame parent, String title, boolean modal)
+	{
+		this(parent, modal);
+		setTitle(title);
+	}
+
+	public void addNotify()
+	{
+		// Record the size of the window prior to calling parents addNotify.
+                Dimension d = getSize();
+
+		super.addNotify();
+
+		// Only do this once.
+		if (fComponentsAdjusted)
+			return;
+
+		// Adjust components according to the insets
+		Insets insets = getInsets();
+		setSize(insets.left + insets.right + d.width, insets.top + insets.bottom + d.height);
+		Component components[] = getComponents();
+		for (int i = 0; i < components.length; i++)
+		{
+			Point p = components[i].getLocation();
+			p.translate(insets.left, insets.top);
+			components[i].setLocation(p);
+		}
+
+		// Used for addNotify check.
+		fComponentsAdjusted = true;
+	}
+
+	public void setVisible(boolean b)
+	{
+	    if (b)
+	    {
+    		Rectangle bounds = getParent().getBounds();
+    		Rectangle abounds = getBounds();
+
+    		setLocation(bounds.x + (bounds.width - abounds.width)/ 2,
+    			 bounds.y + (bounds.height - abounds.height)/2);
+	    }
+
+		super.setVisible(b);
+	}
+
+	//{{DECLARE_CONTROLS
+	java.awt.Label label1 = new java.awt.Label();
+	java.awt.Button okButton = new java.awt.Button();
+	java.awt.Label label2 = new java.awt.Label();
+	//}}
+    
+    // Used for addNotify check.
+	boolean fComponentsAdjusted = false;
+    
+	class SymAction implements java.awt.event.ActionListener
+	{
+		public void actionPerformed(java.awt.event.ActionEvent event)
+		{
+			Object object = event.getSource();
+			if (object == okButton)
+				okButton_ActionPerformed(event);
+		}
+	}
+
+	void okButton_ActionPerformed(java.awt.event.ActionEvent event)
+	{
+		// to do: code goes here.
+			 
+		okButton_ActionPerformed_Interaction1(event);
+	}
+
+
+	void okButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
+	{
+		try {
+			this.dispose();
+		} catch (Exception e) {
+		}
+	}
+
+
+	class SymWindow extends java.awt.event.WindowAdapter
+	{
+		public void windowClosing(java.awt.event.WindowEvent event)
+		{
+			Object object = event.getSource();
+			if (object == AboutDialog.this)
+				AboutDialog_WindowClosing(event);
+		}
+	}
+
+	void AboutDialog_WindowClosing(java.awt.event.WindowEvent event)
+	{
+		// to do: code goes here.
+			 
+		AboutDialog_WindowClosing_Interaction1(event);
+	}
+
+
+	void AboutDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
+	{
+		try {
+			this.dispose();
+		} catch (Exception e) {
+		}
+	}
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherFrame.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherFrame.java
new file mode 100644
index 00000000000..a3d8dd242ee
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherFrame.java
@@ -0,0 +1,485 @@
+package de.lanlab.larm.gui;
+
+/*
+	This simple extension of the java.awt.Frame class
+	contains all the elements necessary to act as the
+	main window of an application.
+ */
+
+import java.awt.*;
+import java.awt.event.ActionListener;
+//import com.sun.java.swing.*;
+
+public class FetcherFrame extends Frame
+{
+	public FetcherFrame()
+	{
+		// This code is automatically generated by Visual Cafe when you add
+		// components to the visual environment. It instantiates and initializes
+		// the components. To modify the code, only use code syntax that matches
+		// what Visual Cafe can generate, or Visual Cafe may be unable to back
+		// parse your Java file into its visual environment.
+		
+		//{{INIT_CONTROLS
+		setLayout(new BorderLayout(0,0));
+		setSize(800,600);
+		setVisible(false);
+		openFileDialog1.setMode(FileDialog.LOAD);
+		openFileDialog1.setTitle("Öffnen");
+		//$$ openFileDialog1.move(24,312);
+		mainPanelWithBorders.setLayout(new BorderLayout(0,0));
+		add("Center", mainPanelWithBorders);
+		mainPanelWithBorders.setBounds(0,0,800,600);
+		northBorder.setLayout(null);
+		mainPanelWithBorders.add("North", northBorder);
+		northBorder.setBackground(java.awt.Color.lightGray);
+		northBorder.setBounds(0,0,800,3);
+		southBorder.setLayout(null);
+		mainPanelWithBorders.add("South", southBorder);
+		southBorder.setBackground(java.awt.Color.lightGray);
+		southBorder.setBounds(0,597,800,3);
+		westBorder.setLayout(null);
+		mainPanelWithBorders.add("West", westBorder);
+		westBorder.setBackground(java.awt.Color.lightGray);
+		westBorder.setBounds(0,3,3,594);
+		eastBorder.setLayout(null);
+		mainPanelWithBorders.add("East", eastBorder);
+		eastBorder.setBackground(java.awt.Color.lightGray);
+		eastBorder.setBounds(797,3,3,594);
+		mainPanel.setLayout(new BorderLayout(0,3));
+		mainPanelWithBorders.add("Center", mainPanel);
+		mainPanel.setBackground(java.awt.Color.lightGray);
+		mainPanel.setBounds(3,3,794,594);
+		upperPanel.setLayout(new GridLayout(1,2,0,0));
+		mainPanel.add("North", upperPanel);
+		upperPanel.setBounds(0,0,794,150);
+		preferencesPanel.setLayout(null);
+		upperPanel.add(preferencesPanel);
+		preferencesPanel.setBounds(0,0,397,150);
+		startURLlabel.setText("Start-URL");
+		preferencesPanel.add(startURLlabel);
+		startURLlabel.setBounds(12,0,121,24);
+		startURL.setText("uni-muenchen.de");
+		preferencesPanel.add(startURL);
+		startURL.setBounds(132,0,133,24);
+		startButton.setLabel("Start");
+		preferencesPanel.add(startButton);
+		startButton.setFont(new Font("Dialog", Font.BOLD, 12));
+		startButton.setBounds(288,36,99,24);
+		restrictToLabel.setText("Restrict host to");
+		preferencesPanel.add(restrictToLabel);
+		restrictToLabel.setBounds(12,36,121,28);
+		preferencesPanel.add(restrictTo);
+		restrictTo.setBounds(133,36,133,24);
+		logPanel.setLayout(new BorderLayout(0,0));
+		upperPanel.add(logPanel);
+		logPanel.setBounds(397,0,397,150);
+		logPanel.add("Center", logList);
+		logList.setBackground(java.awt.Color.white);
+		logList.setBounds(0,0,397,150);
+		lowerPanel.setLayout(new GridLayout(1,3,3,3));
+		mainPanel.add("Center", lowerPanel);
+		lowerPanel.setBounds(0,153,794,441);
+		urlQueuePanel.setLayout(new BorderLayout(0,0));
+		lowerPanel.add(urlQueuePanel);
+		urlQueuePanel.setBounds(0,0,196,441);
+		urlQueueLabel.setText("URLQueue");
+		urlQueuePanel.add("North", urlQueueLabel);
+		urlQueueLabel.setBounds(0,0,196,23);
+		urlQueuePanel.add("Center", urlQueueList);
+		urlQueueList.setBackground(java.awt.Color.white);
+		urlQueueList.setBounds(0,23,196,418);
+		urlThreadPanel.setLayout(new BorderLayout(0,0));
+		lowerPanel.add(urlThreadPanel);
+		urlThreadPanel.setBounds(199,0,196,441);
+		urlThreadLabel.setText("URLThreads");
+		urlThreadPanel.add("North", urlThreadLabel);
+		urlThreadLabel.setBounds(0,0,196,23);
+		urlThreadPanel.add("Center", urlThreadList);
+		urlThreadList.setBackground(java.awt.Color.white);
+		urlThreadList.setBounds(0,23,196,418);
+		docQueuePanel.setLayout(new BorderLayout(0,0));
+		lowerPanel.add(docQueuePanel);
+		docQueuePanel.setBounds(398,0,196,441);
+		docQueueLabel.setText("DocQueue");
+		docQueuePanel.add("North", docQueueLabel);
+		docQueueLabel.setBounds(0,0,196,23);
+		docQueuePanel.add("Center", docQueueList);
+		docQueueList.setBackground(java.awt.Color.white);
+		docQueueList.setBounds(0,23,196,418);
+		docThreadPanel.setLayout(new BorderLayout(0,0));
+		lowerPanel.add(docThreadPanel);
+		docThreadPanel.setBounds(597,0,196,441);
+		docThreadLabel.setText("DocThreads");
+		docThreadPanel.add("North", docThreadLabel);
+		docThreadLabel.setBounds(0,0,196,23);
+		docThreadPanel.add("Center", docThreadList);
+		docThreadList.setBackground(java.awt.Color.white);
+		docThreadList.setBounds(0,23,196,418);
+		setTitle("LARM - Fetcher");
+		//}}
+		
+		//{{INIT_MENUS
+		menu1.setLabel("Datei");
+		menu1.add(newMenuItem);
+		newMenuItem.setEnabled(false);
+		newMenuItem.setLabel("Neu");
+		newMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_N,false));
+		menu1.add(openMenuItem);
+		openMenuItem.setLabel("Öffnen...");
+		openMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_O,false));
+		menu1.add(saveMenuItem);
+		saveMenuItem.setEnabled(false);
+		saveMenuItem.setLabel("Speichern");
+		saveMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_S,false));
+		menu1.add(saveAsMenuItem);
+		saveAsMenuItem.setEnabled(false);
+		saveAsMenuItem.setLabel("Speichern unter...");
+		menu1.add(separatorMenuItem);
+		separatorMenuItem.setLabel("-");
+		menu1.add(exitMenuItem);
+		exitMenuItem.setLabel("Beenden");
+		mainMenuBar.add(menu1);
+		menu2.setLabel("Bearbeiten");
+		menu2.add(cutMenuItem);
+		cutMenuItem.setEnabled(false);
+		cutMenuItem.setLabel("Ausschneiden");
+		cutMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_X,false));
+		menu2.add(copyMenuItem);
+		copyMenuItem.setEnabled(false);
+		copyMenuItem.setLabel("Kopieren");
+		copyMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_C,false));
+		menu2.add(pasteMenuItem);
+		pasteMenuItem.setEnabled(false);
+		pasteMenuItem.setLabel("Einfügen");
+		pasteMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_V,false));
+		mainMenuBar.add(menu2);
+		menu3.setLabel("Hilfe");
+		menu3.add(aboutMenuItem);
+		aboutMenuItem.setLabel("Info...");
+		mainMenuBar.add(menu3);
+		//$$ mainMenuBar.move(0,312);
+		setMenuBar(mainMenuBar);
+		//}}
+		
+		//{{REGISTER_LISTENERS
+		SymWindow aSymWindow = new SymWindow();
+		this.addWindowListener(aSymWindow);
+		SymAction lSymAction = new SymAction();
+		openMenuItem.addActionListener(lSymAction);
+		exitMenuItem.addActionListener(lSymAction);
+		aboutMenuItem.addActionListener(lSymAction);
+		startButton.addActionListener(lSymAction);
+		//}}
+	}
+	
+	public FetcherFrame(String title)
+	{
+		this();
+		setTitle(title);
+	}
+	
+    /**
+     * Shows or hides the component depending on the boolean flag b.
+     * @param b  if true, show the component; otherwise, hide the component.
+     * @see java.awt.Component#isVisible
+     */
+    public void setVisible(boolean b)
+	{
+		if(b)
+		{
+			setLocation(50, 50);
+		}	
+		super.setVisible(b);
+	}
+	
+	static public void main(String args[])
+	{
+		try
+		{
+			//Create a new instance of our application's frame, and make it visible.
+    		(new FetcherFrame()).setVisible(true);
+		}
+		catch (Throwable t)
+		{
+			System.err.println(t);
+			t.printStackTrace();
+			//Ensure the application exits with an error condition.
+			System.exit(1);
+		}
+	}
+	
+	public void addNotify()
+	{
+		// Record the size of the window prior to calling parents addNotify.
+		Dimension d = getSize();
+		
+		super.addNotify();
+	
+		if (fComponentsAdjusted)
+			return;
+	
+		// Adjust components according to the insets
+		setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height);
+		Component components[] = getComponents();
+		for (int i = 0; i < components.length; i++)
+		{
+			Point p = components[i].getLocation();
+			p.translate(getInsets().left, getInsets().top);
+			components[i].setLocation(p);
+		}
+		fComponentsAdjusted = true;
+	}
+	
+	// Used for addNotify check.
+	boolean fComponentsAdjusted = false;
+	
+	//{{DECLARE_CONTROLS
+	java.awt.FileDialog openFileDialog1 = new java.awt.FileDialog(this);
+	java.awt.Panel mainPanelWithBorders = new java.awt.Panel();
+	java.awt.Panel northBorder = new java.awt.Panel();
+	java.awt.Panel southBorder = new java.awt.Panel();
+	java.awt.Panel westBorder = new java.awt.Panel();
+	java.awt.Panel eastBorder = new java.awt.Panel();
+	java.awt.Panel mainPanel = new java.awt.Panel();
+	java.awt.Panel upperPanel = new java.awt.Panel();
+	java.awt.Panel preferencesPanel = new java.awt.Panel();
+	java.awt.Label startURLlabel = new java.awt.Label();
+	java.awt.TextField startURL = new java.awt.TextField(30);
+	java.awt.Button startButton = new java.awt.Button();
+	java.awt.Label restrictToLabel = new java.awt.Label();
+	java.awt.TextField restrictTo = new java.awt.TextField();
+	java.awt.Panel logPanel = new java.awt.Panel();
+	java.awt.List logList = new java.awt.List(8);
+	java.awt.Panel lowerPanel = new java.awt.Panel();
+	java.awt.Panel urlQueuePanel = new java.awt.Panel();
+	java.awt.Label urlQueueLabel = new java.awt.Label();
+	java.awt.List urlQueueList = new java.awt.List(5);
+	java.awt.Panel urlThreadPanel = new java.awt.Panel();
+	java.awt.Label urlThreadLabel = new java.awt.Label();
+	java.awt.List urlThreadList = new java.awt.List(4);
+	java.awt.Panel docQueuePanel = new java.awt.Panel();
+	java.awt.Label docQueueLabel = new java.awt.Label();
+	java.awt.List docQueueList = new java.awt.List(4);
+	java.awt.Panel docThreadPanel = new java.awt.Panel();
+	java.awt.Label docThreadLabel = new java.awt.Label();
+	java.awt.List docThreadList = new java.awt.List(4);
+	//}}
+	
+	//{{DECLARE_MENUS
+	java.awt.MenuBar mainMenuBar = new java.awt.MenuBar();
+	java.awt.Menu menu1 = new java.awt.Menu();
+	java.awt.MenuItem newMenuItem = new java.awt.MenuItem();
+	java.awt.MenuItem openMenuItem = new java.awt.MenuItem();
+	java.awt.MenuItem saveMenuItem = new java.awt.MenuItem();
+	java.awt.MenuItem saveAsMenuItem = new java.awt.MenuItem();
+	java.awt.MenuItem separatorMenuItem = new java.awt.MenuItem();
+	java.awt.MenuItem exitMenuItem = new java.awt.MenuItem();
+	java.awt.Menu menu2 = new java.awt.Menu();
+	java.awt.MenuItem cutMenuItem = new java.awt.MenuItem();
+	java.awt.MenuItem copyMenuItem = new java.awt.MenuItem();
+	java.awt.MenuItem pasteMenuItem = new java.awt.MenuItem();
+	java.awt.Menu menu3 = new java.awt.Menu();
+	java.awt.MenuItem aboutMenuItem = new java.awt.MenuItem();
+	//}}
+	
+	class SymWindow extends java.awt.event.WindowAdapter
+	{
+		public void windowClosing(java.awt.event.WindowEvent event)
+		{
+			Object object = event.getSource();
+			if (object == FetcherFrame.this)
+				FetcherFrame_WindowClosing(event);
+		}
+	}
+	
+	void FetcherFrame_WindowClosing(java.awt.event.WindowEvent event)
+	{
+		// to do: code goes here.
+			 
+		FetcherFrame_WindowClosing_Interaction1(event);
+	}
+
+
+	void FetcherFrame_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
+	{
+		try {
+			// QuitDialog Create and show as modal
+			(new QuitDialog(this, true)).setVisible(true);
+		} catch (Exception e) {
+		}
+	}
+
+	
+	class SymAction implements java.awt.event.ActionListener
+	{
+		public void actionPerformed(java.awt.event.ActionEvent event)
+		{
+			Object object = event.getSource();
+			if (object == openMenuItem)
+				openMenuItem_ActionPerformed(event);
+			else if (object == aboutMenuItem)
+				aboutMenuItem_ActionPerformed(event);
+			else if (object == exitMenuItem)
+				exitMenuItem_ActionPerformed(event);
+			else if (object == startButton)
+				startButton_ActionPerformed(event);
+		}
+	}
+	
+	void openMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
+	{
+		// to do: code goes here.
+			 
+		openMenuItem_ActionPerformed_Interaction1(event);
+	}
+
+
+	void openMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
+	{
+		try {
+			// OpenFileDialog Create and show as modal
+		    int		defMode         = openFileDialog1.getMode();
+		    String	defTitle        = openFileDialog1.getTitle();
+		    String defDirectory     = openFileDialog1.getDirectory();
+		    String defFile          = openFileDialog1.getFile();
+
+		    openFileDialog1 = new java.awt.FileDialog(this, defTitle, defMode);
+		    openFileDialog1.setDirectory(defDirectory);
+		    openFileDialog1.setFile(defFile);
+		    openFileDialog1.setVisible(true);
+		} catch (Exception e) {
+		}
+	}
+
+
+	void aboutMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
+	{
+		// to do: code goes here.
+			 
+		aboutMenuItem_ActionPerformed_Interaction1(event);
+	}
+
+
+	void aboutMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
+	{
+		try {
+			// AboutDialog Create and show as modal
+			(new AboutDialog(this, true)).setVisible(true);
+		} catch (Exception e) {
+		}
+	}
+	
+	
+	void exitMenuItem_ActionPerformed(java.awt.event.ActionEvent event)
+	{
+		// to do: code goes here.
+			 
+		exitMenuItem_ActionPerformed_Interaction1(event);
+	}
+
+
+	void exitMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
+	{
+		try {
+			// QuitDialog Create and show as modal
+		    (new QuitDialog(this, true)).setVisible(true);
+		} catch (Exception e) {
+		}
+	}
+
+
+	public void startButton_ActionPerformed(java.awt.event.ActionEvent event)
+	{
+		// to do: code goes here.
+			 
+	}
+	 
+	public void addUrlQueueItem(String item)
+	{
+		urlQueueList.add(item);
+	}
+	
+	public void removeUrlQueueItem(String item)
+	{
+		urlQueueList.remove(item);
+	}
+	public void addDocQueueItem(String item)
+	{
+		docQueueList.add(item);
+	}
+	
+	public void removeDocQueueItem(String item)
+	{
+		docQueueList.remove(item);
+	}
+	
+	public synchronized int addUrlThreadItem(String item)
+	{
+		urlThreadList.add(item);
+		return urlThreadList.getItemCount();
+	}
+
+	public synchronized int addUrlThreadItem(String item, int pos)
+	{
+		urlThreadList.add(item,pos);
+		return urlThreadList.getItemCount();
+	}
+	
+	public void replaceUrlThreadItem(String item, int index)
+	{
+		urlThreadList.replaceItem(item,index);
+	}
+	
+	public synchronized int addDocThreadItem(String item)
+	{
+		docThreadList.add(item);
+		return docThreadList.getItemCount();
+	}
+
+	public void replaceDocThreadItem(String item, int index)
+	{
+		docThreadList.replaceItem(item,index);
+	}
+	
+	
+		
+	public void addLogEntry(String entry)
+	{
+		logList.add(entry);
+		logList.makeVisible(logList.getItemCount()-1);
+	}
+	
+	public void clearLog()
+	{
+		logList.removeAll();
+	}
+	
+	public void addStartButtonListener(ActionListener a)
+	{
+		startButton.addActionListener(a);
+	}
+	
+	public String getRestrictTo()
+	{
+	   return restrictTo.getText();
+	}
+	public void setRestrictTo(String restrictTo)
+	{
+	   this.restrictTo.setText(restrictTo);
+	}
+	public String getStartURL()
+	{
+	   return startURL.getText();
+	}
+    public void setStartURL(String startURL)
+    {
+        this.startURL.setText(startURL);
+    }	
+
+    //public void setInfoText(String text)
+    //{
+    //    thi
+    //}
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherSummaryFrame.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherSummaryFrame.java
new file mode 100644
index 00000000000..405f9db7839
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherSummaryFrame.java
@@ -0,0 +1,332 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c) <p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.gui;
+
+import javax.swing.*;
+import java.awt.*;
+import java.awt.event.*;
+
+
+public class FetcherSummaryFrame extends JFrame
+{
+    JPanel lowerPanel = new JPanel();
+    JPanel progressPanel = new JPanel();
+    JPanel middlePanel = new JPanel();
+    JPanel rightPanel = new JPanel();
+    BorderLayout borderLayout1 = new BorderLayout();
+    JPanel propertyPanel = new JPanel();
+    JLabel hostLabel = new JLabel();
+    JLabel urlRestrictionFrame = new JLabel();
+    JTextField startURL = new JTextField();
+    JTextField restrictTo = new JTextField();
+    JButton startButton = new JButton();
+    GridLayout gridLayout1 = new GridLayout();
+    JProgressBar urlQueuedProgress = new JProgressBar(0,100);
+    JLabel urlQueuedLabel = new JLabel();
+    JLabel scopeFilteredLabel = new JLabel();
+    JProgressBar scopeFilteredProgress = new JProgressBar(0,100);
+    JLabel visitedFilteredLabel = new JLabel();
+    JProgressBar visitedFilteredProgress = new JProgressBar(0,100);
+    JLabel workingThreadsLabel = new JLabel();
+    JProgressBar workingThreadsProgress = new JProgressBar(0,100);
+    JLabel idleThreadsLabel = new JLabel();
+    JProgressBar idleThreadsProgress = new JProgressBar(0,100);
+    JLabel busyThreadsLabel = new JLabel();
+    JProgressBar busyThreadsProgress = new JProgressBar(0,100);
+    JLabel requestQueueLabel = new JLabel();
+    JProgressBar requestQueueProgress = new JProgressBar();
+    JLabel stalledThreadsLabel = new JLabel();
+    JProgressBar stalledThreadsProgress = new JProgressBar();
+    JLabel dnsLabel = new JLabel();
+    JProgressBar dnsProgress = new JProgressBar(0,100);
+    JLabel freeMemLabel = new JLabel();
+    JLabel freeMemText = new JLabel();
+    JLabel totalMemLabel = new JLabel();
+    JLabel totalMemText = new JLabel();
+    JLabel bpsLabel = new JLabel();
+    JLabel bpsText = new JLabel();
+    JLabel docsLabel = new JLabel();
+    JLabel docsText = new JLabel();
+	JLabel docsReadLabel = new JLabel();
+	JLabel docsReadText  = new JLabel();
+    JProgressBar urlsCaughtProgress = new JProgressBar(0,100);
+    JLabel urlsCaughtText = new JLabel();
+    JLabel robotsTxtsText = new JLabel();
+    JProgressBar robotsTxtsProgress = new JProgressBar(0,100);
+
+    public FetcherSummaryFrame()
+    {
+        try
+        {
+           jbInit();
+           this.setTitle("LARM - LANLab Retrieval Machine");
+           this.setSize(new Dimension(640,350));
+           this.urlQueuedProgress.setStringPainted(true);
+           this.urlQueuedProgress.setString("0");
+           this.scopeFilteredProgress.setStringPainted(true);
+           this.scopeFilteredProgress.setString("0");
+           this.visitedFilteredProgress.setStringPainted(true);
+           this.visitedFilteredProgress.setString("0");
+           workingThreadsProgress.setStringPainted(true);
+           workingThreadsProgress.setString("0");
+           idleThreadsProgress.setStringPainted(true);
+           idleThreadsProgress.setString("0");
+           busyThreadsProgress.setStringPainted(true);
+           busyThreadsProgress.setString("0");
+           stalledThreadsProgress.setStringPainted(true);
+           stalledThreadsProgress.setString("0");
+           requestQueueProgress.setStringPainted(true);
+           requestQueueProgress.setString("0");
+           dnsProgress.setStringPainted(true);
+           dnsProgress.setString("0");
+           urlsCaughtProgress.setStringPainted(true);
+           urlsCaughtProgress.setString("0");
+           robotsTxtsProgress.setStringPainted(true);
+           robotsTxtsProgress.setString("0");
+        }
+        catch(Exception e)
+        {
+            e.printStackTrace();
+        }
+    }
+
+    private void jbInit() throws Exception
+    {
+        this.getContentPane().setLayout(borderLayout1);
+        propertyPanel.setMinimumSize(new Dimension(10, 70));
+        propertyPanel.setPreferredSize(new Dimension(10, 80));
+        propertyPanel.setLayout(null);
+        hostLabel.setText("Startseite");
+        hostLabel.setBounds(new Rectangle(18, 15, 76, 17));
+        urlRestrictionFrame.setText("URL-Restriction (regul. Ausdruck)");
+        urlRestrictionFrame.setBounds(new Rectangle(18, 37, 208, 17));
+        startURL.setBounds(new Rectangle(224, 14, 281, 21));
+        restrictTo.setBounds(new Rectangle(224, 38, 281, 21));
+        startButton.setActionCommand("start");
+        startButton.setText("Start");
+        startButton.setBounds(new Rectangle(528, 14, 79, 47));
+        lowerPanel.setLayout(gridLayout1);
+        urlQueuedLabel.setToolTipText("");
+        urlQueuedLabel.setText("URLs queued");
+        scopeFilteredLabel.setToolTipText("");
+        scopeFilteredLabel.setText("Scope-gefiltert");
+        visitedFilteredLabel.setText("Visited gefiltert");
+        workingThreadsLabel.setText("Number of Working Threads");
+        idleThreadsLabel.setText("Idle Threads");
+        busyThreadsLabel.setText("Busy Threads");
+        requestQueueLabel.setText("requests queued");
+        stalledThreadsLabel.setText("stalled Threads");
+        stalledThreadsProgress.setPreferredSize(new Dimension(190, 25));
+        requestQueueProgress.setPreferredSize(new Dimension(190, 25));
+        busyThreadsProgress.setPreferredSize(new Dimension(190, 25));
+        idleThreadsProgress.setPreferredSize(new Dimension(190, 25));
+        workingThreadsProgress.setPreferredSize(new Dimension(190, 25));
+        urlQueuedProgress.setPreferredSize(new Dimension(190, 25));
+        scopeFilteredProgress.setPreferredSize(new Dimension(190, 25));
+        visitedFilteredProgress.setPreferredSize(new Dimension(190, 25));
+        dnsLabel.setText("DNS Hosts cached");
+        dnsProgress.setPreferredSize(new Dimension(190, 25));
+        freeMemLabel.setText("Free Mem");
+        freeMemLabel.setPreferredSize(new Dimension(60, 17));
+        freeMemText.setText("0");
+        freeMemText.setPreferredSize(new Dimension(120, 17));
+        freeMemText.setMinimumSize(new Dimension(100, 17));
+        totalMemLabel.setText("total Mem");
+        totalMemLabel.setPreferredSize(new Dimension(60, 17));
+        totalMemText.setText("0");
+        totalMemText.setPreferredSize(new Dimension(120, 17));
+        totalMemText.setMinimumSize(new Dimension(100, 17));
+        bpsLabel.setPreferredSize(new Dimension(60, 17));
+        bpsLabel.setText("Bytes/s");
+        bpsText.setMinimumSize(new Dimension(100, 17));
+        bpsText.setPreferredSize(new Dimension(120, 17));
+        bpsText.setText("0");
+        docsLabel.setText("Docs/s");
+        docsLabel.setPreferredSize(new Dimension(60, 17));
+        docsText.setText("0");
+        docsText.setPreferredSize(new Dimension(120, 17));
+        docsText.setMinimumSize(new Dimension(100, 17));
+        docsReadLabel.setText("Docs read");
+        docsReadLabel.setPreferredSize(new Dimension(60, 17));
+        docsReadText.setText("0");
+        docsReadText.setPreferredSize(new Dimension(120, 17));
+        docsReadText.setMinimumSize(new Dimension(100, 17));
+        urlsCaughtProgress.setPreferredSize(new Dimension(190, 25));
+        urlsCaughtText.setText("URLs caught by Robots.txt");
+        robotsTxtsText.setText("Robots.txts found");
+        robotsTxtsProgress.setPreferredSize(new Dimension(190, 25));
+        this.getContentPane().add(lowerPanel, BorderLayout.CENTER);
+        lowerPanel.add(progressPanel, null);
+        progressPanel.add(urlQueuedLabel, null);
+        progressPanel.add(urlQueuedProgress, null);
+        progressPanel.add(scopeFilteredLabel, null);
+        progressPanel.add(scopeFilteredProgress, null);
+        progressPanel.add(visitedFilteredLabel, null);
+        progressPanel.add(visitedFilteredProgress, null);
+        progressPanel.add(dnsLabel, null);
+        progressPanel.add(dnsProgress, null);
+        progressPanel.add(robotsTxtsText, null);
+        progressPanel.add(robotsTxtsProgress, null);
+        progressPanel.add(urlsCaughtText, null);
+        progressPanel.add(urlsCaughtProgress, null);
+        lowerPanel.add(middlePanel, null);
+        middlePanel.add(workingThreadsLabel, null);
+        middlePanel.add(workingThreadsProgress, null);
+        middlePanel.add(idleThreadsLabel, null);
+        middlePanel.add(idleThreadsProgress, null);
+        middlePanel.add(busyThreadsLabel, null);
+        middlePanel.add(busyThreadsProgress, null);
+        middlePanel.add(requestQueueLabel, null);
+        middlePanel.add(requestQueueProgress, null);
+        middlePanel.add(stalledThreadsLabel, null);
+        middlePanel.add(stalledThreadsProgress, null);
+        lowerPanel.add(rightPanel, null);
+        rightPanel.add(docsLabel, null);
+        rightPanel.add(docsText, null);
+		rightPanel.add(docsReadLabel, null);
+		rightPanel.add(docsReadText, null);
+        rightPanel.add(bpsLabel, null);
+        rightPanel.add(bpsText, null);
+        rightPanel.add(totalMemLabel, null);
+        rightPanel.add(totalMemText, null);
+        rightPanel.add(freeMemLabel, null);
+        rightPanel.add(freeMemText, null);
+        this.getContentPane().add(propertyPanel, BorderLayout.NORTH);
+        propertyPanel.add(urlRestrictionFrame, null);
+        propertyPanel.add(restrictTo, null);
+        propertyPanel.add(hostLabel, null);
+        propertyPanel.add(startButton, null);
+        propertyPanel.add(startURL, null);
+    }
+
+    public void setCounterProgressBar(JProgressBar p, int value)
+    {
+        int oldMax = p.getMaximum();
+        int oldValue = p.getValue();
+
+        if(value > oldMax)
+        {
+            p.setMaximum(oldMax * 2);
+        }
+        else if (value < oldMax / 2 && oldValue >= oldMax / 2)
+        {
+            p.setMaximum(oldMax / 2);
+        }
+        p.setValue(value);
+        p.setString("" + value);
+    }
+
+    public void setURLsQueued(int queued)
+    {
+        setCounterProgressBar(this.urlQueuedProgress, queued);
+    }
+
+    public void setScopeFiltered(int filtered)
+    {
+        setCounterProgressBar(this.scopeFilteredProgress, filtered);
+    }
+
+    public void setVisitedFiltered(int filtered)
+    {
+        setCounterProgressBar(this.visitedFilteredProgress, filtered);
+    }
+
+    public void setWorkingThreadsCount(int threads)
+    {
+        setCounterProgressBar(this.workingThreadsProgress, threads);
+    }
+
+    public void setIdleThreadsCount(int threads)
+    {
+        setCounterProgressBar(this.idleThreadsProgress, threads);
+    }
+
+    public void setBusyThreadsCount(int threads)
+    {
+        setCounterProgressBar(this.busyThreadsProgress, threads);
+    }
+
+    public void setRequestQueueCount(int requests)
+    {
+        setCounterProgressBar(this.requestQueueProgress, requests);
+    }
+
+    public void setDNSCount(int count)
+    {
+        setCounterProgressBar(this.dnsProgress, count);
+    }
+
+    public void setURLsCaughtCount(int count)
+    {
+        setCounterProgressBar(this.urlQueuedProgress, count);
+    }
+
+   	public void addStartButtonListener(ActionListener a)
+	{
+		startButton.addActionListener(a);
+	}
+
+
+
+	public String getRestrictTo()
+	{
+	   return restrictTo.getText();
+	}
+	public void setRestrictTo(String restrictTo)
+	{
+	   this.restrictTo.setText(restrictTo);
+	}
+	public String getStartURL()
+	{
+	   return startURL.getText();
+	}
+    public void setStartURL(String startURL)
+    {
+        this.startURL.setText(startURL);
+    }
+
+    public void setStalledThreads(int stalled)
+    {
+        stalledThreadsProgress.setValue(stalled);
+    }
+
+    public void setBytesPerSecond(double bps)
+    {
+        bpsText.setText("" + bps);
+    }
+
+
+    public void setDocsPerSecond(double docs)
+    {
+        bpsText.setText("" + docs);
+    }
+
+    public void setFreeMem(long freeMem)
+    {
+        freeMemText.setText("" + freeMem);
+    }
+
+    public void setTotalMem(long totalMem)
+    {
+        totalMemText.setText("" + totalMem);
+    }
+
+    public void setRobotsTxtCount(int robotsTxtCount)
+    {
+        setCounterProgressBar(robotsTxtsProgress, robotsTxtCount);
+    }
+
+	public void setDocsRead(int docs)
+	{
+		bpsText.setText("" + docs);
+	}
+
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/QuitDialog.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/QuitDialog.java
new file mode 100644
index 00000000000..d06b91642f9
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/QuitDialog.java
@@ -0,0 +1,184 @@
+package de.lanlab.larm.gui;
+/*
+	A basic extension of the java.awt.Dialog class
+ */
+
+import java.awt.*;
+import java.awt.event.*;
+
+public class QuitDialog extends Dialog
+{
+	public QuitDialog(Frame parent, boolean modal)
+	{
+		super(parent, modal);
+
+        //Keep a local reference to the invoking frame
+        frame = parent;
+        
+		// This code is automatically generated by Visual Cafe when you add
+		// components to the visual environment. It instantiates and initializes
+		// the components. To modify the code, only use code syntax that matches
+		// what Visual Cafe can generate, or Visual Cafe may be unable to back
+		// parse your Java file into its visual environment.
+		//{{INIT_CONTROLS
+		setLayout(null);
+		setSize(337,135);
+		setVisible(false);
+		yesButton.setLabel(" Ja ");
+		add(yesButton);
+		yesButton.setFont(new Font("Dialog", Font.BOLD, 12));
+		yesButton.setBounds(72,80,79,22);
+		noButton.setLabel("  Nein  ");
+		add(noButton);
+		noButton.setFont(new Font("Dialog", Font.BOLD, 12));
+		noButton.setBounds(185,80,79,22);
+		label1.setText("Möchten Sie LARM beenden?");
+		label1.setAlignment(java.awt.Label.CENTER);
+		add(label1);
+		label1.setBounds(68,33,220,23);
+		setTitle("LARM - Beenden");
+		//}}
+
+		//{{REGISTER_LISTENERS
+		SymWindow aSymWindow = new SymWindow();
+		this.addWindowListener(aSymWindow);
+		SymAction lSymAction = new SymAction();
+		noButton.addActionListener(lSymAction);
+		yesButton.addActionListener(lSymAction);
+		//}}
+	}
+
+	public void addNotify()
+	{
+	    // Record the size of the window prior to calling parents addNotify.
+	    Dimension d = getSize();
+	    
+		super.addNotify();
+
+		if (fComponentsAdjusted)
+			return;
+
+		// Adjust components according to the insets
+		setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height);
+		Component components[] = getComponents();
+		for (int i = 0; i < components.length; i++)
+		{
+			Point p = components[i].getLocation();
+			p.translate(getInsets().left, getInsets().top);
+			components[i].setLocation(p);
+		}
+		fComponentsAdjusted = true;
+	}
+
+	public QuitDialog(Frame parent, String title, boolean modal)
+	{
+		this(parent, modal);
+		setTitle(title);
+	}
+
+    /**
+     * Shows or hides the component depending on the boolean flag b.
+     * @param b  if true, show the component; otherwise, hide the component.
+     * @see java.awt.Component#isVisible
+     */
+    public void setVisible(boolean b)
+	{
+		if(b)
+		{
+			Rectangle bounds = getParent().getBounds();
+			Rectangle abounds = getBounds();
+	
+			setLocation(bounds.x + (bounds.width - abounds.width)/ 2,
+				 bounds.y + (bounds.height - abounds.height)/2);
+			Toolkit.getDefaultToolkit().beep();
+		}
+		super.setVisible(b);
+	}
+
+    // Used for addNotify check.
+	boolean fComponentsAdjusted = false;
+	// Invoking frame
+	Frame frame = null;
+
+	//{{DECLARE_CONTROLS
+	java.awt.Button yesButton = new java.awt.Button();
+	java.awt.Button noButton = new java.awt.Button();
+	java.awt.Label label1 = new java.awt.Label();
+	//}}
+
+	class SymAction implements java.awt.event.ActionListener
+	{
+		public void actionPerformed(java.awt.event.ActionEvent event)
+		{
+			Object object = event.getSource();
+			if (object == yesButton)
+				yesButton_ActionPerformed(event);
+			else if (object == noButton)
+				noButton_ActionPerformed(event);
+		}
+	}
+
+	void yesButton_ActionPerformed(java.awt.event.ActionEvent event)
+	{
+		// to do: code goes here.
+			 
+		yesButton_ActionPerformed_Interaction1(event);
+	}
+
+
+	void yesButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
+	{
+		try {
+	        frame.setVisible(false);    // Hide the invoking frame
+	        frame.dispose();            // Free system resources
+	        this.dispose();                  // Free system resources
+		    System.exit(0);             // close the application
+		} catch (Exception e) {
+		}
+	}
+
+
+	void noButton_ActionPerformed(java.awt.event.ActionEvent event)
+	{
+		// to do: code goes here.
+			 
+		noButton_ActionPerformed_Interaction1(event);
+	}
+
+
+	void noButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event)
+	{
+		try {
+			this.dispose();
+		} catch (Exception e) {
+		}
+	}
+
+
+	class SymWindow extends java.awt.event.WindowAdapter
+	{
+		public void windowClosing(java.awt.event.WindowEvent event)
+		{
+			Object object = event.getSource();
+			if (object == QuitDialog.this)
+				QuitDialog_WindowClosing(event);
+		}
+	}
+
+	void QuitDialog_WindowClosing(java.awt.event.WindowEvent event)
+	{
+		// to do: code goes here.
+			 
+		QuitDialog_WindowClosing_Interaction1(event);
+	}
+
+
+	void QuitDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event)
+	{
+		try {
+			this.dispose();
+		} catch (Exception e) {
+		}
+	}
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpClientTimeout.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpClientTimeout.java
new file mode 100644
index 00000000000..b2dd21fc353
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpClientTimeout.java
@@ -0,0 +1,136 @@
+package de.lanlab.larm.net;
+
+// whatever package you want
+import sun.net.www.http.HttpClient;
+import sun.net.www.MessageHeader;
+import sun.net.ProgressEntry;
+
+import java.net.*;
+import java.io.*;
+
+
+/**
+ *  Description of the Class
+ *
+ *@author     cmarschn
+ *@created    2. Mai 2001
+ */
+public class HttpClientTimeout extends HttpClient {
+	private int timeout = -1;
+
+
+	/**
+	 *  Constructor for the HttpClientTimeout object
+	 *
+	 *@param  url              Description of Parameter
+	 *@param  proxy            Description of Parameter
+	 *@param  proxyPort        Description of Parameter
+	 *@exception  IOException  Description of Exception
+	 */
+	public HttpClientTimeout(URL url, String proxy, int proxyPort) throws IOException {
+		super(url, proxy, proxyPort);
+	}
+
+
+	/**
+	 *  Constructor for the HttpClientTimeout object
+	 *
+	 *@param  url              Description of Parameter
+	 *@exception  IOException  Description of Exception
+	 */
+	public HttpClientTimeout(URL url) throws IOException {
+		super(url, null, -1);
+	}
+
+
+	/**
+	 *  Sets the Timeout attribute of the HttpClientTimeout object
+	 *
+	 *@param  i                    The new Timeout value
+	 *@exception  SocketException  Description of Exception
+	 */
+	public void setTimeout(int i) throws SocketException {
+		this.timeout = -1;
+		serverSocket.setSoTimeout(i);
+	}
+
+
+	/**
+	 *  Gets the Socket attribute of the HttpClientTimeout object
+	 *
+	 *@return    The Socket value
+	 */
+	public Socket getSocket() {
+		return serverSocket;
+	}
+
+
+	/**
+	 *  Description of the Method
+	 *
+	 *@param  header                   Description of Parameter
+	 *@param  entry                    Description of Parameter
+	 *@return                          Description of the Returned Value
+	 *@exception  java.io.IOException  Description of Exception
+	 */
+	public boolean parseHTTP(MessageHeader header, ProgressEntry entry) throws java.io.IOException {
+		if (this.timeout != -1) {
+			try {
+				serverSocket.setSoTimeout(this.timeout);
+			}
+			catch (SocketException e) {
+				throw new java.io.IOException("unable to set socket timeout!");
+			}
+		}
+		return super.parseHTTP(header, entry);
+	}
+
+
+	/**
+	 *  Description of the Method
+	 *
+	 *@exception  IOException  Description of Exception
+	 */
+	public void close() throws IOException {
+		serverSocket.close();
+	}
+
+
+	/*
+	 * public void SetTimeout(int i) throws SocketException {
+	 * serverSocket.setSoTimeout(i);
+	 * }
+	 */
+	/*
+	 * This class has no public constructor for HTTP.  This method is used to
+	 * get an HttpClient to the specifed URL.  If there's currently an
+	 * active HttpClient to that server/port, you'll get that one.
+	 *
+	 * no longer syncrhonized -- it slows things down too much
+	 * synchronize at a higher level
+	 */
+	/**
+	 *  Gets the New attribute of the HttpClientTimeout class
+	 *
+	 *@param  url              Description of Parameter
+	 *@return                  The New value
+	 *@exception  IOException  Description of Exception
+	 */
+	public static HttpClientTimeout getNew(URL url) throws IOException {
+		/*
+		 * see if one's already around
+		 */
+		HttpClientTimeout ret = (HttpClientTimeout) kac.get(url);
+		if (ret == null) {
+			ret = new HttpClientTimeout(url);
+			// CTOR called openServer()
+		}
+		else {
+			ret.url = url;
+		}
+		// don't know if we're keeping alive until we parse the headers
+		// for now, keepingAlive is false
+		return ret;
+	}
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutFactory.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutFactory.java
new file mode 100644
index 00000000000..aff661cb6c1
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutFactory.java
@@ -0,0 +1,50 @@
+package de.lanlab.larm.net;
+
+import java.net.*;
+
+/**
+ *  Description of the Class
+ *
+ *@author     cmarschn
+ *@created    2. Mai 2001
+ */
+public class HttpTimeoutFactory implements URLStreamHandlerFactory {
+	int fiTimeoutVal;
+
+
+	/**
+	 *  Constructor for the HttpTimeoutFactory object
+	 *
+	 *@param  iT  Description of Parameter
+	 */
+	public HttpTimeoutFactory(int iT) {
+		fiTimeoutVal = iT;
+	}
+
+
+	/**
+	 *  Description of the Method
+	 *
+	 *@param  str  Description of Parameter
+	 *@return      Description of the Returned Value
+	 */
+	public URLStreamHandler createURLStreamHandler(String str) {
+		return new HttpTimeoutHandler(fiTimeoutVal);
+	}
+
+    static HttpTimeoutFactory instance = null;
+
+    /**
+     * gets an instance. only the first call will create it. In subsequent calls the iT
+     * parameter doesn't have a meaning.
+     */
+    public static HttpTimeoutFactory getInstance(int iT)
+    {
+        if(instance == null)
+        {
+            instance = new HttpTimeoutFactory(iT);
+        }
+        return instance;
+    }
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutHandler.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutHandler.java
new file mode 100644
index 00000000000..b551e4fa6c2
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutHandler.java
@@ -0,0 +1,80 @@
+package de.lanlab.larm.net;
+
+import java.net.*;
+import java.io.IOException;
+
+/**
+ *  Description of the Class
+ *
+ *@author     cmarschn
+ *@created    2. Mai 2001
+ */
+public class HttpTimeoutHandler extends sun.net.www.protocol.http.Handler {
+	int timeoutVal;
+	HttpURLConnectionTimeout fHUCT;
+
+
+	/**
+	 *  Constructor for the HttpTimeoutHandler object
+	 *
+	 *@param  iT  Description of Parameter
+	 */
+	public HttpTimeoutHandler(int iT) {
+		timeoutVal = iT;
+	}
+
+
+	/**
+	 *  Gets the Socket attribute of the HttpTimeoutHandler object
+	 *
+	 *@return    The Socket value
+	 */
+	public Socket getSocket() {
+		return fHUCT.getSocket();
+	}
+
+
+	/**
+	 *  Description of the Method
+	 *
+	 *@exception  Exception  Description of Exception
+	 */
+	public void close() throws Exception {
+		fHUCT.close();
+	}
+
+
+	/**
+	 *  Description of the Method
+	 *
+	 *@param  u                Description of Parameter
+	 *@return                  Description of the Returned Value
+	 *@exception  IOException  Description of Exception
+	 */
+	protected java.net.URLConnection openConnection(URL u) throws IOException {
+		return fHUCT = new HttpURLConnectionTimeout(u, this, timeoutVal);
+	}
+
+
+	/**
+	 *  Gets the Proxy attribute of the HttpTimeoutHandler object
+	 *
+	 *@return    The Proxy value
+	 */
+	String getProxy() {
+		return proxy;
+		// breaking encapsulation
+	}
+
+
+	/**
+	 *  Gets the ProxyPort attribute of the HttpTimeoutHandler object
+	 *
+	 *@return    The ProxyPort value
+	 */
+	int getProxyPort() {
+		return proxyPort;
+		// breaking encapsulation
+	}
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpURLConnectionTimeout.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpURLConnectionTimeout.java
new file mode 100644
index 00000000000..16b07ace098
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpURLConnectionTimeout.java
@@ -0,0 +1,226 @@
+package de.lanlab.larm.net;
+
+import java.net.*;
+import java.io.*;
+import sun.net.www.http.HttpClient;
+
+/**
+ *  Description of the Class
+ *
+ *@author     cmarschn
+ *@created    2. Mai 2001
+ */
+public class HttpURLConnectionTimeout extends sun.net.www.protocol.http.HttpURLConnection {
+    int fiTimeoutVal;
+    HttpTimeoutHandler fHandler;
+    HttpClientTimeout fClient;
+
+
+    /**
+     *  Constructor for the HttpURLConnectionTimeout object
+     *
+     *@param  u                Description of Parameter
+     *@param  handler          Description of Parameter
+     *@param  iTimeout         Description of Parameter
+     *@exception  IOException  Description of Exception
+     */
+    public HttpURLConnectionTimeout(URL u, HttpTimeoutHandler handler, int iTimeout) throws IOException {
+        super(u, handler);
+        fHandler = handler;
+        fiTimeoutVal = iTimeout;
+    }
+
+
+    /**
+     *  Constructor for the HttpURLConnectionTimeout object
+     *
+     *@param  u                Description of Parameter
+     *@param  host             Description of Parameter
+     *@param  port             Description of Parameter
+     *@exception  IOException  Description of Exception
+     */
+    public HttpURLConnectionTimeout(URL u, String host, int port) throws IOException {
+        super(u, host, port);
+    }
+
+
+    /**
+     *  Description of the Method
+     *
+     *@exception  IOException  Description of Exception
+     */
+    public void connect() throws IOException {
+        if (connected) {
+            return;
+        }
+        try {
+            if ("http".equals(url.getProtocol())
+            /*
+             * && !failedOnce <- PRIVATE
+             */
+                    ) {
+                // for safety's sake, as reported by KLGroup
+                synchronized (url) {
+                    http = HttpClientTimeout.getNew(url);
+                }
+                fClient = (HttpClientTimeout) http;
+                ((HttpClientTimeout) http).setTimeout(fiTimeoutVal);
+            }
+            else {
+                // make sure to construct new connection if first
+                // attempt failed
+                http = new HttpClientTimeout(url, fHandler.getProxy(), fHandler.getProxyPort());
+            }
+            ps = (PrintStream) http.getOutputStream();
+        }
+        catch (IOException e) {
+            throw e;
+        }
+        // this was missing from the original version
+        connected = true;
+    }
+
+
+    /**
+     *  Create a new HttpClient object, bypassing the cache of HTTP client
+     *  objects/connections.
+     *
+     *@param  url              the URL being accessed
+     *@return                  The NewClient value
+     *@exception  IOException  Description of Exception
+     */
+    protected HttpClient getNewClient(URL url)
+             throws IOException {
+        HttpClientTimeout client = new HttpClientTimeout(url, (String) null, -1);
+        try {
+            client.setTimeout(fiTimeoutVal);
+        }
+        catch (Exception e) {
+            System.out.println("Unable to set timeout value");
+        }
+        return (HttpClient) client;
+    }
+
+
+    /**
+     *  Gets the Socket attribute of the HttpURLConnectionTimeout object
+     *
+     *@return    The Socket value
+     */
+    Socket getSocket() {
+        return fClient.getSocket();
+    }
+
+
+    /**
+     *  Description of the Method
+     *
+     *@exception  Exception  Description of Exception
+     */
+    void close() throws Exception {
+        fClient.close();
+    }
+
+
+    /**
+     *  opens a stream allowing redirects only to the same host.
+     *
+     *@param  c                Description of Parameter
+     *@return                  Description of the Returned Value
+     *@exception  IOException  Description of Exception
+     */
+    public static InputStream openConnectionCheckRedirects(URLConnection c)
+             throws IOException {
+        boolean redir;
+        int redirects = 0;
+        InputStream in = null;
+
+        do {
+            if (c instanceof HttpURLConnectionTimeout) {
+                ((HttpURLConnectionTimeout) c).setInstanceFollowRedirects(false);
+            }
+
+            // We want to open the input stream before
+            // getting headers, because getHeaderField()
+            // et al swallow IOExceptions.
+            in = c.getInputStream();
+            redir = false;
+
+            if (c instanceof HttpURLConnectionTimeout) {
+                HttpURLConnectionTimeout http = (HttpURLConnectionTimeout) c;
+                int stat = http.getResponseCode();
+                if (stat >= 300 && stat <= 305 &&
+                        stat != HttpURLConnection.HTTP_NOT_MODIFIED) {
+                    URL base = http.getURL();
+                    String loc = http.getHeaderField("Location");
+                    URL target = null;
+                    if (loc != null) {
+                        target = new URL(base, loc);
+                    }
+                    http.disconnect();
+                    if (target == null
+                             || !base.getProtocol().equals(target.getProtocol())
+                             || base.getPort() != target.getPort()
+                             || !HostsEquals(base, target)
+                             || redirects >= 5) {
+                        throw new SecurityException("illegal URL redirect");
+                    }
+                    redir = true;
+                    c = target.openConnection();
+                    redirects++;
+                }
+            }
+        } while (redir);
+        return in;
+    }
+
+
+    // Same as java.net.URL.hostsEqual
+
+    /**
+     *  Description of the Method
+     *
+     *@param  u1  Description of Parameter
+     *@param  u2  Description of Parameter
+     *@return     Description of the Returned Value
+     */
+    static boolean HostsEquals(URL u1, URL u2) {
+        final String h1 = u1.getHost();
+        final String h2 = u2.getHost();
+
+        if (h1 == null) {
+            return h2 == null;
+        }
+        else if (h2 == null) {
+            return false;
+        }
+        else if (h1.equalsIgnoreCase(h2)) {
+            return true;
+        }
+        // Have to resolve addresses before comparing, otherwise
+        // names like tachyon and tachyon.eng would compare different
+        final boolean result[] = {false};
+
+        java.security.AccessController.doPrivileged(
+            new java.security.PrivilegedAction() {
+                /**
+                 *  Main processing method for the HttpURLConnectionTimeout object
+                 *
+                 *@return    Description of the Returned Value
+                 */
+                public Object run() {
+                    try {
+                        InetAddress a1 = InetAddress.getByName(h1);
+                        InetAddress a2 = InetAddress.getByName(h2);
+                        result[0] = a1.equals(a2);
+                    }
+                    catch (UnknownHostException e) {
+                    }
+                    catch (SecurityException e) {
+                    }
+                    return null;
+                }
+            });
+        return result[0];
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/LinkHandler.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/LinkHandler.java
new file mode 100644
index 00000000000..5f96063da54
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/LinkHandler.java
@@ -0,0 +1,17 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.parser;
+
+public interface LinkHandler
+{
+    public void handleLink(String value, boolean isFrame);
+    public void handleBase(String value);
+    public void handleTitle(String value);
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/Tokenizer.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/Tokenizer.java
new file mode 100644
index 00000000000..9ccda662ed6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/Tokenizer.java
@@ -0,0 +1,1340 @@
+/*
+ *  $Id$
+ *
+ *  Copyright 2000 LANLab
+ *
+ */
+package de.lanlab.larm.parser;
+
+import hplb.org.xml.sax.*;
+import hplb.xml.*;
+import hplb.xml.util.*;
+
+import java.util.Dictionary;
+import java.util.Hashtable;
+import java.io.*;
+import hplb.misc.ByteArray;
+import java.net.URL;
+
+/**
+ * This parser is based on HEX, the HTML enabled XML parser, written by
+ * Anders Kristensen, HP Labs Bristol.
+ * It was stripped down and specialized to handle links in HTML pages. I removed
+ * some bugs. And it's FAST, about 10 x faster than the original HEX parser.
+ * Being some sort of SAX parser it calls the callback functions of the LinkHandler
+ * when links are found.
+ * @todo add handling of anchor texts
+ *
+ * @author    Clemens Marschner
+ */
+public class Tokenizer implements hplb.org.xml.sax.Parser
+{
+    /**
+     * Sets the entityHandler attribute of the Tokenizer object
+     *
+     * @param e  The new entityHandler value
+     */
+    public void setEntityHandler(hplb.org.xml.sax.EntityHandler e) { }
+
+
+    /**
+     * Sets the errorHandler attribute of the Tokenizer object
+     *
+     * @param e  The new errorHandler value
+     */
+    public void setErrorHandler(hplb.org.xml.sax.ErrorHandler e) { }
+
+
+    /**
+     * Sets the documentHandler attribute of the Tokenizer object
+     *
+     * @param e  The new documentHandler value
+     */
+    public void setDocumentHandler(hplb.org.xml.sax.DocumentHandler e) { }
+
+
+    /**
+     * The value of boolean attributes is this string.
+     */
+    public final static String BOOLATTR = Atom.getAtom("BOOLATTR");
+
+    // FSM states:
+    final static int ST_START = 1;
+    final static int ST_TAG_LT = 3;
+    final static int ST_TAG_NAME = 4;
+    final static int ST_TAG_WS = 5;
+    final static int ST_EMPTY_TAG_SLASH = 6;
+    final static int ST_NAME = 7;
+    final static int ST_NAME_WS = 8;
+    final static int ST_EQ = 9;
+    final static int ST_VALUE = 10;
+    final static int ST_VALUE_QUOTED = 11;
+    final static int ST_PCDATA = 21;
+    final static int ST_COMMENT = 22;
+
+    LinkHandler linkHandler;
+
+    String sysID = "what's this?";
+
+    /**
+     * Description of the Field
+     */
+    protected Hashtable noCaseElms;
+    /**
+     * Description of the Field
+     */
+    public boolean rcgnzWS = true;
+    // is white space chars recognized as PCDATA
+    // even when preceeding tags?
+    /**
+     * Description of the Field
+     */
+    public boolean rcgnzEntities = true;
+    /**
+     * Description of the Field
+     */
+    public boolean rcgnzCDATA = true;
+    /**
+     * Description of the Field
+     */
+    public boolean rcgnzComments = true;
+    //
+    /**
+     * Description of the Field
+     */
+    public boolean atomize = false;
+    // make element and attr names atoms
+
+    private final static int ATTR_HREF = 1;
+    private final static int ATTR_SRC = 2;
+
+    private final static int LINKTYPE_NONE = 0;
+    private final static int LINKTYPE_LINK = 1;
+    private final static int LINKTYPE_BASE = 2;
+    private final static int LINKTYPE_FRAME = 3;
+
+
+    private byte linkTagType;
+    private boolean linkAttrFound;
+    private int linkAttrType;
+    private String linkValue;
+    private boolean keepPCData;
+    private boolean isInTitleTag;
+    private boolean isInAnchorTag;
+
+    CharBuffer buf = new CharBuffer();
+    boolean isStartTag = true;
+    /**
+     * Signals whether a non-empty element has any children. If not we must
+     * generate an artificial empty-string child [characters(buf, 0, 0)].
+     */
+    boolean noChildren;
+    CharBuffer tagname = new CharBuffer();
+    CharBuffer attrName = new CharBuffer();
+    CharBuffer attrValue = new CharBuffer(1000);
+    CharBuffer pcData = new CharBuffer(8000);
+
+    Reader in;
+
+    /**
+     * Description of the Field
+     */
+    public final EntityManager entMngr = new EntityManager(this);
+    /**
+     * Description of the Field
+     */
+    protected int state = ST_START;
+    /**
+     * Description of the Field
+     */
+    protected int qchar;
+
+
+    // <'> or <"> when parsing quoted attr values
+
+
+    /**
+     * Constructor for the Tokenizer object
+     */
+    public Tokenizer() { }
+
+
+    /**
+     * Sets the linkHandler attribute of the Tokenizer object
+     *
+     * @param handler  The new linkHandler value
+     */
+    public void setLinkHandler(LinkHandler handler)
+    {
+        linkHandler = handler;
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param publicID       Description of the Parameter
+     * @param sysID          Description of the Parameter
+     * @exception Exception  Description of the Exception
+     */
+    public void parse(String publicID, String sysID)
+        throws Exception
+    {
+        this.sysID = sysID;
+        parse(new URL(sysID).openStream());
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param in             Description of the Parameter
+     * @exception Exception  Description of the Exception
+     */
+    public void parse(InputStream in)
+        throws Exception
+    {
+        parse(new BufferedReader(new InputStreamReader(in)));
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param in             Description of the Parameter
+     * @exception Exception  Description of the Exception
+     */
+    public void parse(Reader in)
+        throws Exception
+    {
+        if (linkHandler == null)
+        {
+            throw new IllegalStateException("parse called without LinkHandler being set");
+        }
+
+        this.in = in;
+        toStart();
+        tokenize();
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param elementName  Description of the Parameter
+     */
+    public void ignoreCase(String elementName)
+    {
+        if (noCaseElms == null)
+        {
+            noCaseElms = new Hashtable();
+        }
+        noCaseElms.put(elementName.toLowerCase(), elementName);
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param b  Description of the Parameter
+     */
+    public void rcgnzWS(boolean b)
+    {
+        rcgnzWS = b;
+    }
+
+
+    // invoked after doing any Handler callback - resets state
+    /**
+     * Description of the Method
+     */
+    protected void toStart()
+    {
+        state = ST_START;
+        buf.reset();
+        tagname.reset();
+        attrName.reset();
+        attrValue.reset();
+        pcData.reset();
+        //attrs.clear();
+        isStartTag = true;
+        // until proven wrong
+
+        linkTagType = LINKTYPE_NONE;
+        linkAttrFound = false;
+        linkAttrType = 0;
+        linkValue = "";
+        //keepPCData= false;
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @exception Exception  Description of the Exception
+     */
+    public void tokenize()
+        throws Exception
+    {
+        int c;
+
+
+        while ((c = read()) != -1)
+        {
+            switch (state)
+            {
+                case ST_START:
+                    switch (c)
+                    {
+                        case '<':
+                            state = ST_TAG_LT;
+                            linkTagType = LINKTYPE_NONE;
+                            linkAttrFound = false;
+                            linkAttrType = 0;
+                            linkValue = "";
+
+                            isStartTag = true;
+                            keepPCData= false;
+
+                            // until proven wrong
+                            tagname.reset();
+                            break;
+                        case ' ':
+                        case '\t':
+                        case '\r':
+                        case '\n':
+                            if (!rcgnzWS)
+                            {
+                                break;
+                            }
+                        // else fall through
+                        default:
+                            state = ST_PCDATA;
+                            if(keepPCData)
+                            {
+                                pcData.write(c);
+                            }
+
+                    }
+                    break;
+                case ST_PCDATA:
+                    if (c == '<')
+                    {
+                        if(keepPCData)
+                        {
+                            gotPCDATA(true);
+                            keepPCData = false;
+                        }
+                        linkTagType = LINKTYPE_NONE;
+                        linkAttrFound = false;
+                        linkAttrType = 0;
+                        linkValue = "";
+                        state = ST_TAG_LT;
+                    }
+                    else
+                    {
+                        if(keepPCData)
+                        {
+                            pcData.write(c);
+                        }
+                    }
+                    break;
+                case ST_TAG_LT:
+                    switch (c)
+                    {
+                        case '/':
+                            isStartTag = false;
+                            state = ST_TAG_NAME;
+                            break;
+                        case '!':
+                            c = read();
+                            if ((c == '-' && !rcgnzComments) || (c == '[' && !rcgnzCDATA))
+                            {
+                                state = ST_PCDATA;
+                                pcData.reset();
+                                pcData.write(c);
+                                break;
+                            }
+                            if (c == '-')
+                            {
+                                state = ST_COMMENT;
+                            }
+                            else if (c == '[')
+                            {
+                                parseCDATA();
+                            }
+                            else
+                            {
+                                // FIXME: shouldn't be delivered as PCDATA
+                                //warning("Bad markup " + buf);
+                                state = ST_PCDATA;
+                                pcData.reset();
+                                pcData.write(c);
+                            }
+                            break;
+                        case '?':
+                            parsePI();
+                            break;
+                        case ' ':
+                        case '\t':
+                        case '\r':
+                        case '\n':
+                            state = ST_TAG_WS;
+                            break;
+                        default:
+                            tagname.write(Character.toLowerCase((char) c));
+                            // ## changed
+                            state = ST_TAG_NAME;
+                    }
+                    break;
+                case ST_TAG_NAME:
+                    switch (c)
+                    {
+                        case ' ':
+                        case '\t':
+                        case '\r':
+                        case '\n':
+                            state = ST_TAG_WS;
+                            gotTagName();
+                            // ## changed
+                            break;
+                        case '/':
+                            state = ST_EMPTY_TAG_SLASH;
+                            gotTagName();
+                            // ## changed
+                            break;
+                        case '>':
+                            gotTagName();
+                            // ## changed
+                            gotTag();
+                            break;
+                        default:
+                            tagname.write(Character.toLowerCase((char) c));
+                        // ## changed
+                    }
+                    break;
+                case ST_TAG_WS:
+                    switch (c)
+                    {
+                        case ' ':
+                        case '\t':
+                        case '\r':
+                        case '\n':
+                            break;
+                        case '/':
+                            state = ST_EMPTY_TAG_SLASH;
+                            break;
+                        case '>':
+                            gotTag();
+                            break;
+                        case '?':
+                        // NOTE: if !inXMLDecl we fall through to default case
+                        default:
+                            if (!isStartTag)
+                            {
+                                // bit of a hack this...
+                                //errHandler.warning("Malformed tag: "+buf, sysID, _line, _column);
+                                //err_continue("Malformed tag: "+buf);
+                                toStart();
+                                // ## changed
+                                if (c == '<')
+                                {
+                                    gotPCDATA(true);
+                                    keepPCData = false;
+                                    state = ST_TAG_LT;
+                                }
+                                else
+                                {
+                                    // we get here e.g. if there's an end tag with attributes
+                                    state = ST_PCDATA;
+                                    pcData.reset();
+                                }
+                            }
+                            else
+                            {
+                                // FIXME: this accepts way too many first chars for attr name
+                                attrName.write(Character.toLowerCase((char) c));
+                                state = ST_NAME;
+                            }
+                    }
+                    break;
+                case ST_EMPTY_TAG_SLASH:
+                    if (c == '>')
+                    {
+                        //tagtype = TAG_EMPTY;
+                        gotTag();
+                        break;
+                    }
+                    else
+                    {
+                        // ERROR !? - can't throw Exception here - we go to next tag...
+                        state = ST_PCDATA;
+                        pcData.reset();
+                    }
+                    break;
+                case ST_NAME:
+                    switch (c)
+                    {
+                        case ' ':
+                        case '\t':
+                        case '\r':
+                        case '\n':
+                            if (attrName.size() > 0)
+                            {
+                                state = ST_NAME_WS;
+                            }
+                            break;
+                        case '>':
+                            if (attrName.size() > 0)
+                            {
+                                gotAttr();
+                            }
+                            gotTag();
+                            break;
+                        case '=':
+                            state = ST_EQ;
+                            break;
+                        default:
+                            if (isCtlOrTspecial(c))
+                            {
+                                state = ST_PCDATA;
+                                pcData.reset();
+                            }
+                            else
+                            {
+                                attrName.write(Character.toLowerCase((char) c));
+                            }
+                    }
+                    break;
+                case ST_NAME_WS:
+                    // white-space between name and '='
+                    switch (c)
+                    {
+                        case ' ':
+                        case '\t':
+                        case '\r':
+                        case '\n':
+                            break;
+                        case '=':
+                            state = ST_EQ;
+                            break;
+                        case '>':
+                            gotAttr();
+                            gotTag();
+                            break;
+                        default:
+                            if (isNameChar(c))
+                            {
+                                gotAttr();
+                                attrName.write(Character.toLowerCase((char) c));
+                                state = ST_TAG_WS;
+                            }
+                            else
+                            {
+                                state = ST_PCDATA;
+                                pcData.reset();
+                            }
+                    }
+                    break;
+                case ST_EQ:
+                    // white-space between '=' and value
+                    switch (c)
+                    {
+                        case ' ':
+                        case '\t':
+                        case '\r':
+                        case '\n':
+                            break;
+                        case '"':
+                            qchar = '"';
+                            state = ST_VALUE_QUOTED;
+                            break;
+                        case '\'':
+                            qchar = '\'';
+                            state = ST_VALUE_QUOTED;
+                            break;
+                        default:
+                            if (isCtlOrTspecial(c))
+                            {
+                                state = ST_PCDATA;
+                                pcData.reset();
+                            }
+                            else
+                            {
+                                attrValue.write(c);
+                                state = ST_VALUE;
+                            }
+                    }
+                    break;
+                case ST_VALUE:
+                    switch (c)
+                    {
+                        case ' ':
+                        case '\t':
+                        case '\r':
+                        case '\n':
+                            gotAttr();
+                            state = ST_TAG_WS;
+                            break;
+                        case '>':
+                            gotAttr();
+                            gotTag();
+                            break;
+                        /*
+                         *  case '/':     // FIXME: HTML knows things like <a href=a/b.html> !!
+                         *  gotAttr();
+                         *  state = ST_EMPTY_TAG_SLASH;
+                         *  break;
+                         */
+                        default:
+                            if (isValueBreaker(c))
+                            {
+                                state = ST_PCDATA;
+                                pcData.reset();
+                            }
+                            else
+                            {
+                                attrValue.write(c);
+                            }
+                    }
+                    break;
+                case ST_VALUE_QUOTED:
+                    if (c == qchar)
+                    {
+                        gotAttr();
+                        state = ST_TAG_WS;
+                    }
+                    else
+                    {
+                        attrValue.write(c);
+                    }
+                    break;
+                case ST_COMMENT:
+                    // we've seen "...<!-" by now
+                    try
+                    {
+                        if (c != '-')
+                        {
+                            //warning("Bad comment");
+                            state = ST_PCDATA;
+                            pcData.reset();
+                            break;
+                        }
+                        // we're within comment - read till we see "--"
+                        while (true)
+                        {
+                            while (read_ex() != '-')
+                            {
+                                ;
+                            }
+                            if (read_ex() == '-')
+                            {
+                                break;
+                            }
+                        }
+                        // seen "--" - gotComment() reads past next '>'
+                        gotComment();
+                        //while (read_ex() != '>') ;
+                        //state = ST_PCDATA;
+                    }
+                    catch (EmptyInputStream ex)
+                    {
+                        gotPCDATA(false);
+                        keepPCData = false;
+                        break;
+                    }
+            }
+        }
+
+        // input stream ended - return rest, if any, as PCDATA
+        if (buf.size() > 0)
+        {
+            gotPCDATA(false);
+            keepPCData = false;
+            buf.reset();
+        }
+    }
+
+
+    // counts lines and columns - used in error reporting
+    // a line can be a single \r or \n or it can be \r\n - we handle them all
+    int cc;
+
+    // last char read
+
+
+    /**
+     * Description of the Method
+     *
+     * @return                 Description of the Return Value
+     * @exception IOException  Description of the Exception
+     */
+    public final int read()
+        throws IOException
+    {
+        int c = in.read();
+        if (c != -1)
+        {
+            buf.write(c);
+        }
+
+        return c;
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @return                      Description of the Return Value
+     * @exception IOException       Description of the Exception
+     * @exception EmptyInputStream  Description of the Exception
+     */
+    public final int read_ex()
+        throws IOException, EmptyInputStream
+    {
+        int c = read();
+        if (c == -1)
+        {
+            throw new EmptyInputStream();
+        }
+        return c;
+    }
+
+
+    // HTML allows <em>boolean</em> attributes - attributes without a
+    // value, or rather an implicit value which is the same as the name.
+    /**
+     * Description of the Method
+     *
+     * @exception Exception  Description of the Exception
+     */
+    protected final void gotAttr()
+        throws Exception
+    {
+        // gotTag has to be called first, setting waitForAtt = ATT_HREF or ATT_SRC
+        if (!linkAttrFound)
+        {
+            char[] attName = attrName.getCharArray();
+            int attLength = attrName.getLength();
+            boolean gotcha = false;
+
+            switch (attLength)
+            {
+                case 4:
+                    if (attName[0] == 'h' && attName[1] == 'r' && attName[2] == 'e' && attName[3] == 'f')
+                    {
+                        gotcha = true;
+                    }
+                    break;
+                case 3:
+                    if (attName[0] == 's' && attName[1] == 'r' && attName[2] == 'c')
+                    {
+                        gotcha = true;
+                    }
+                    break;
+            }
+            if (gotcha)
+            {
+                linkValue = (rcgnzEntities ? entMngr.entityDecode(attrValue) :
+                        attrValue).toString();
+                linkAttrFound = true;
+            }
+            else
+            {
+                linkValue = "";
+            }
+        }
+        attrName.reset();
+        attrValue.reset();
+        //attrs.put(nm, val);
+    }
+
+
+    /**
+     * Description of the Method
+     */
+    protected void gotTagName()
+    {
+        char[] tag = tagname.getCharArray();
+        int tagLength = tagname.getLength();
+        switch (tagLength)
+        {
+            case 1:
+                // A
+                if (tag[0] == 'a')
+                {
+                    linkTagType = LINKTYPE_LINK;
+                    linkAttrType = ATTR_HREF;
+
+                }
+                break;
+            // [case 3: // IMG]
+            case 4:
+                // BASE, AREA [, LINK]
+                if(isStartTag)
+                {
+                    if (tag[0] == 'b' && tag[1] == 'a' && tag[2] == 's' && tag[3] == 'e')
+                    {
+                        linkTagType = LINKTYPE_BASE;
+                        linkAttrType = ATTR_HREF;
+                    }
+                    else if (tag[0] == 'a' && tag[1] == 'r' && tag[2] == 'e' && tag[3] == 'a')
+                    {
+                        linkTagType = LINKTYPE_LINK;
+                        linkAttrType = ATTR_HREF;
+                    }
+                }
+                break;
+            case 5:
+                // FRAME
+                if(isStartTag)
+                {
+                    if (tag[0] == 'f' && tag[1] == 'r' && tag[2] == 'a' && tag[3] == 'm' && tag[4] == 'e')
+                    {
+                        linkTagType = LINKTYPE_FRAME;
+                        linkAttrType = ATTR_SRC;
+                    }
+                    else if (tag[0] == 't' && tag[1] == 'i' && tag[2] == 't' && tag[3] == 'l' && tag[4] == 'e')
+                    {
+                        isInTitleTag = true;
+                        keepPCData = true;
+                    }
+                }
+            default:
+        }
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @exception Exception  Description of the Exception
+     */
+    protected void gotTag()
+        throws Exception
+    {
+        if (linkAttrFound && isStartTag)
+        {
+            switch (linkTagType)
+            {
+                case LINKTYPE_LINK:
+                    //System.out.println("got link " + linkValue);
+                    linkHandler.handleLink(linkValue, false);
+                    break;
+                case LINKTYPE_FRAME:
+                    //System.out.println("got link " + linkValue);
+                    linkHandler.handleLink(linkValue, true);
+                    break;
+                case LINKTYPE_BASE:
+                    linkHandler.handleBase(linkValue);
+                    break;
+            }
+        }
+        toStart();
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param attrs  Description of the Parameter
+     */
+    public final void keysToLowerCase(SAXAttributeMap attrs)
+    {
+        for (int i = 0; i < attrs.n; i++)
+        {
+            attrs.keys[i] = attrs.keys[i].toLowerCase();
+            if (atomize)
+            {
+                attrs.keys[i] = Atom.getAtom(attrs.keys[i]);
+            }
+        }
+    }
+
+
+    // toomuch true iff we read a '<' of the next token
+    /**
+     * Description of the Method
+     *
+     * @param toomuch        Description of the Parameter
+     * @exception Exception  Description of the Exception
+     */
+    protected void gotPCDATA(boolean toomuch)
+        throws Exception
+    {
+        if(isInTitleTag)
+        {
+            linkHandler.handleTitle(pcData.toString());
+            isInTitleTag = false;
+        }
+
+        // ignore it
+        toStart();
+    }
+
+
+    /*
+     *  noChildren = false;
+     *  if (toomuch) {
+     *  buf.setLength(buf.size() - 1);
+     *  }
+     *  CharBuffer buf1 = rcgnzEntities ? entMngr.entityDecode(buf) : buf;
+     *  docHandler.characters(buf1.getCharArray(), 0, buf1.size());
+     *  /handler.gotText(getBuffer());
+     *  toStart();
+     *  if (toomuch) {
+     *  buf.write('<');
+     *  column--;
+     *  }
+     *  }
+     */
+    // XXX: should pass the comment on as docHandler.ignorable() ??
+    /**
+     * Description of the Method
+     *
+     * @exception IOException       Description of the Exception
+     * @exception EmptyInputStream  Description of the Exception
+     */
+    protected void gotComment()
+        throws IOException, EmptyInputStream
+    {
+        //toStart();  // so an unexpected EOF causes rest to be returned as PCDATA
+        while (read_ex() != '>')
+        {
+            ;
+        }
+        toStart();
+    }
+
+
+    // Processing Instruction
+    /**
+     * Description of the Method
+     *
+     * @exception Exception  Description of the Exception
+     */
+    protected void parsePI()
+        throws Exception
+    {
+        // ignore this
+
+        /*
+         *  int i;
+         *  String target;
+         *  noChildren = false;
+         *  inXMLDecl = false;
+         *  i = buf.size();
+         *  try {
+         *  while (!isWS(read_ex())) ;
+         *  target = buf.toString();
+         *  target = target.substring(i, target.length() - 1);
+         *  if ("XML".equals(target)) {
+         *  inXMLDecl = true;
+         *  state = ST_TAG_WS;
+         *  return;
+         *  }
+         *  while (isWS(read_ex())) ;
+         *  i = buf.size() - 1;
+         *  while (true) {
+         *  while (read_ex() != '?') ;
+         *  if (read_ex() == '>') {
+         *  String s = buf.toString();
+         *  docHandler.processingInstruction(
+         *  Atom.getAtom(target), s.substring(i, s.length()-2));
+         *  /handler.gotPI(Atom.getAtom(target),
+         *  /              s.substring(i, s.length()-2));
+         *  break;
+         *  }
+         *  }
+         *  } catch (EmptyInputStream ex) {
+         *  gotPCDATA(false);
+         *  errHandler.warning("EOF while parsing PI", sysID, _line, _column);
+         *  /err_continue("EOF while parsing PI");
+         *  }
+         */
+        toStart();
+    }
+
+
+    // CDATA section
+    // XXX: should contents be amalgamated with surrounding PCDATA?
+    /**
+     * Description of the Method
+     *
+     * @exception Exception  Description of the Exception
+     */
+    protected void parseCDATA()
+        throws Exception
+    {
+        // we've seen "<![" by now
+        try
+        {
+            if (read_ex() == 'C' && read_ex() == 'D' && read_ex() == 'A' &&
+                    read_ex() == 'T' && read_ex() == 'A' && read_ex() == '[')
+            {
+                int i1 = buf.size();
+                while (read_ex() != ']' ||
+                        read_ex() != ']' ||
+                        read_ex() != '>')
+                {
+                    ;
+                }
+                // docHandler.characters(buf.getCharArray(), i1, buf.size()-3-i1);
+            }
+            else
+            {
+                warning("Bad CDATA markup");
+                state = ST_PCDATA;
+                pcData.reset();
+            }
+        }
+        catch (EmptyInputStream ex)
+        {
+            warning("EOF while parsing CDATA section");
+            //gotPCDATA(false);
+        }
+        toStart();
+    }
+
+
+    /**
+     * Gets the wS attribute of the Tokenizer object
+     *
+     * @param c  Description of the Parameter
+     * @return   The wS value
+     */
+    public boolean isWS(int c)
+    {
+        switch (c)
+        {
+            case ' ':
+            case '\t':
+            case '\r':
+            case '\n':
+                return true;
+            default:
+                return false;
+        }
+    }
+
+
+    /**
+     * Gets the valueBreaker attribute of the Tokenizer class
+     *
+     * @param c  Description of the Parameter
+     * @return   The valueBreaker value
+     */
+    public final static boolean isValueBreaker(int c)
+    {
+        switch (c)
+        {
+            // control characters (0-31 and 127):
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+            case 8:
+            case 9:
+            case 10:
+            case 11:
+            case 12:
+            case 13:
+            case 14:
+            case 15:
+            case 16:
+            case 17:
+            case 18:
+            case 19:
+            case 20:
+            case 21:
+            case 22:
+            case 23:
+            case 24:
+            case 25:
+            case 26:
+            case 27:
+            case 28:
+            case 29:
+            case 30:
+            case 31:
+            case 127:
+
+            // tspecials:
+            case '>':
+            case ' ':
+                return true;
+            default:
+                return false;
+        }
+    }
+
+
+    /**
+     * Returns true if c is either an ascii control character or a tspecial
+     * according to the HTTP specification.
+     *
+     * @param c  Description of the Parameter
+     * @return   The ctlOrTspecial value
+     */
+    //   private static final boolean[] isCtlOrTSpecial = new boolean[]
+//     {
+//        /* 0 */     true , true , true , true , true , true , true , true , true , true , true , true , true , true ,
+//        /* 14 */    true , true , true , true , true , true , true , true , true , true , true , true , true , true ,
+//        /* 28 */    true , true , true , true , true , false, true , false, false, false, false, false, true , true ,
+//        /* 42 */    false, false, true , false, false, true , false, false, false, false, false, false, false, false,
+//        /* 56 */    false, false, /*FIX: / no control char: true*/ false, true , true , true , true , true , true , false, false, false, false, false,
+//        /* 70 */    false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 84 */    false, false, false, false, false, false, false, true , true , true , false, false, false, false,
+//        /* 98 */    false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 112 */   false, false, false, false, false, false, false, false, false, false, false, true , false, true ,
+//        /* 126 */   false, true , false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 140 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 154 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 168 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 182 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 196 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 210 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 224 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 238 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 252 */   false, false, false, false
+//    };
+
+    public final static boolean isCtlOrTspecial(int c)
+    {
+        switch (c)
+        {
+            // control characters (0-31 and 127):
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+            case 8:
+            case 9:
+            case 10:
+            case 11:
+            case 12:
+            case 13:
+            case 14:
+            case 15:
+            case 16:
+            case 17:
+            case 18:
+            case 19:
+            case 20:
+            case 21:
+            case 22:
+            case 23:
+            case 24:
+            case 25:
+            case 26:
+            case 27:
+            case 28:
+            case 29:
+            case 30:
+            case 31:
+            case 127:
+
+            // tspecials:
+            case '(':
+            case ')':
+            case '<':
+            case '>':
+            case '@':
+            case ',':
+            case ';':
+            case ':':
+            case '\\':
+            case '"':
+            /*
+             *  case '/':
+             */
+            case '[':
+            case ']':
+            case '?':
+            case '=':
+            case '{':
+            case '}':
+            case ' ':
+                // case '\t':
+                return true;
+            default:
+                return false;
+        }
+    }
+
+
+    /*
+     *  public static void main(String[])
+     *  {
+     *  System.out.println("private static final boolean[] isCtlOrTSpecial = \n{");  // bzw. isNameChar
+     *  for(int i=0; i<256; i++)
+     *  {
+     *  if(i>0)
+     *  System.out.print(", ");
+     *  if(i % 14 == 0)
+     *  {
+     *  System.out.print("\n/* " + i + " *" + "/   ");
+     *  }
+     *  if(Tokenizer.isCtlOrTspecial(i))  // bzw. isNameChar(i)
+     *  {
+     *  System.out.print("true ");
+     *  }
+     *  else
+     *  {
+     *  System.out.print("false");
+     *  }
+     *  }
+     *  System.out.print("};\n\n");
+     *  }
+     */
+//    public static final boolean isCtlOrTspecial(int c)
+//    {
+//        return (c < 256 ? isCtlOrTSpecial[c] : false);
+//    }
+//
+//    private static final boolean[] isNameChar =
+//    {
+//        /* 0 */     false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 14 */    false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 28 */    false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 42 */    false, false, false, true , true , false, true , true , true , true , true , true , true , true ,
+//        /* 56 */    true , true , false, false, false, false, false, false, false, true , true , true , true , true ,
+//        /* 70 */    true , true , true , true , true , true , true , true , true , true , true , true , true , true ,
+//        /* 84 */    true , true , true , true , true , true , true , false, false, false, false, true , false, true ,
+//        /* 98 */    true , true , true , true , true , true , true , true , true , true , true , true , true , true ,
+//        /* 112 */   true , true , true , true , true , true , true , true , true , true , true , false, false, false,
+//        /* 126 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 140 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 154 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 168 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 182 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 196 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 210 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 224 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 238 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 252 */   false, false, false, false
+//    };
+//    public static final boolean isNameChar(int c)
+//    {
+//        return (c < 256 ? isNameChar[c] : false);
+//    }
+//
+    /*
+     *  / I don't think this is a very standard definition of what can
+     *  / go into tag and attribute names.
+     */
+    /**
+     * Gets the nameChar attribute of the Tokenizer class
+     *
+     * @param c  Description of the Parameter
+     * @return   The nameChar value
+     */
+    public final static boolean isNameChar(int c)
+    {
+        return ('a' <= c && c <= 'z') ||
+                ('A' <= c && c <= 'Z') ||
+                ('0' <= c && c <= '9') ||
+                c == '.' || c == '-' || c == '_';
+    }
+
+
+
+    /**
+     * Description of the Method
+     *
+     * @param s              Description of the Parameter
+     * @exception Exception  Description of the Exception
+     */
+    protected final void warning(String s)
+        throws Exception
+    {
+        //errHandler.warning(s, sysID, _line, _column);
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param s              Description of the Parameter
+     * @exception Exception  Description of the Exception
+     */
+    protected final void fatal(String s)
+        throws Exception
+    {
+        //errHandler.fatal(s, sysID, _line, _column);
+    }
+
+
+
+    /**
+     * The main program for the Tokenizer class
+     *
+     * @param argv  The command line arguments
+     */
+    public static void main(String[] argv)
+    {
+        Tokenizer tok = new Tokenizer();
+        tok.setLinkHandler(
+            new LinkHandler()
+            {
+                int nr = 0;
+
+
+                public void handleLink(String link, boolean isFrame)
+                {
+                    System.out.println("found link " + (++nr) + ": " + link);
+                }
+                public void handleTitle(String title)
+                {
+                    System.out.println("found title " + (++nr) + ": " + title);
+                }
+
+
+                public void handleBase(String link)
+                {
+                    System.out.println("found base " + (++nr) + ": " + link);
+                }
+            });
+        try
+        {
+            tok.parse(new FileReader("C:\\witest.htm"));
+            /*
+             *  "<frame src=\\"link1\"> </head>" +
+             *  "This is some Text\n" +
+             *  "<a name=_sometest href='link2'>and this is... the link</a>" +
+             *  "<table width=234><base href=\"'link3'\">"));
+             */
+        }
+        catch (Exception e)
+        {
+            System.out.println("Caught Exception: " + e.getClass().getName());
+            e.printStackTrace();
+        }
+    }
+}
+
+/**
+ * Description of the Class
+ *
+ * @author    Administrator
+ * @created   29. Dezember 2001
+ */
+class EmptyInputStream extends Exception
+{
+
+
+    /**
+     * Constructor for the EmptyInputStream object
+     */
+    EmptyInputStream() { }
+
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/DocumentStorage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/DocumentStorage.java
new file mode 100644
index 00000000000..26417c05449
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/DocumentStorage.java
@@ -0,0 +1,37 @@
+
+/**
+ * Title: LARM Lanlab Retrieval Machine<p>
+ *
+ * Description: <p>
+ *
+ * Copyright: Copyright (c)<p>
+ *
+ * Company: <p>
+ *
+ *
+ *
+ * @author
+ * @version   1.0
+ */
+package de.lanlab.larm.storage;
+import de.lanlab.larm.util.*;
+
+/**
+ * This interface stores documents provided by a fetcher task
+ * @author    Clemens Marschner
+ */
+public interface DocumentStorage
+{
+    /**
+     * called once when the storage is supposed to be initialized
+     */
+    public void open();
+
+
+    /**
+     * called to store a web document
+     *
+     * @param doc  the document
+     */
+    public void store(WebDocument doc);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java
new file mode 100644
index 00000000000..2b6507195c3
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java
@@ -0,0 +1,165 @@
+package de.lanlab.larm.storage;
+
+import de.lanlab.larm.util.WebDocument;
+import de.lanlab.larm.util.SimpleLogger;
+import java.io.*;
+
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * @author
+ * @created   11. Januar 2002
+ * @version   1.0
+ */
+
+
+
+/**
+ * this class saves the documents into page files of 50 MB and keeps a record of all
+ * the positions into a Logger. the log file contains URL, page file number, and
+ * index within the page file.
+ *
+ */
+
+public class LogStorage implements DocumentStorage
+{
+
+    SimpleLogger log;
+
+    File pageFile;
+    FileOutputStream out;
+    int pageFileCount;
+    String filePrefix;
+    int offset;
+    boolean isValid = false;
+    /**
+     * Description of the Field
+     */
+    public final static int MAXLENGTH = 50000000;
+    boolean logContents = false;
+    String fileName;
+
+
+    /**
+     * Constructor for the LogStorage object
+     *
+     * @param log          the logger where index information is saved to
+     * @param logContents  whether all docs are to be stored in page files or not
+     * @param filePrefix   the file name where the page file number is appended
+     */
+    public LogStorage(SimpleLogger log, boolean logContents, String filePrefix)
+    {
+        this.log = log;
+        pageFileCount = 0;
+        this.filePrefix = filePrefix;
+        this.logContents = logContents;
+        if (logContents)
+        {
+            openPageFile();
+        }
+    }
+
+
+    /**
+     * Description of the Method
+     */
+    public void open() { }
+
+
+    /**
+     * Description of the Method
+     */
+    public void openPageFile()
+    {
+        int id = ++pageFileCount;
+        fileName = filePrefix + "_" + id + ".pfl";
+        try
+        {
+            this.offset = 0;
+            out = new FileOutputStream(fileName);
+            isValid = true;
+        }
+        catch (IOException io)
+        {
+            log.logThreadSafe("**ERROR: IOException while opening pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
+            isValid = false;
+        }
+    }
+
+
+    /**
+     * Gets the outputStream attribute of the LogStorage object
+     *
+     * @return   The outputStream value
+     */
+    public OutputStream getOutputStream()
+    {
+        if (offset > MAXLENGTH)
+        {
+            try
+            {
+                out.close();
+            }
+            catch (IOException io)
+            {
+                log.logThreadSafe("**ERROR: IOException while closing pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
+            }
+            openPageFile();
+        }
+        return out;
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param bytes  Description of the Parameter
+     * @return       Description of the Return Value
+     */
+    public synchronized int writeToPageFile(byte[] bytes)
+    {
+        try
+        {
+            OutputStream out = getOutputStream();
+            int oldOffset = this.offset;
+            out.write(bytes);
+            this.offset += bytes.length;
+            return oldOffset;
+        }
+        catch (IOException io)
+        {
+            log.logThreadSafe("**ERROR: IOException while writing " + bytes.length + " bytes to pageFile " + fileName + ": " + io.getClass().getName() + "; " + io.getMessage());
+        }
+        return -1;
+    }
+
+
+    /**
+     * Sets the logger attribute of the LogStorage object
+     *
+     * @param log  The new logger value
+     */
+    public void setLogger(SimpleLogger log)
+    {
+        this.log = log;
+    }
+
+
+    /**
+     * stores the document if storing is enabled
+     *
+     * @param doc  Description of the Parameter
+     */
+    public void store(WebDocument doc)
+    {
+        String docInfo = doc.getInfo();
+        if (logContents && isValid && doc.getDocumentBytes() != null)
+        {
+            int offset = writeToPageFile(doc.getDocumentBytes());
+            docInfo = docInfo + "\t" + pageFileCount + "\t" + offset;
+        }
+        log.logThreadSafe(docInfo);
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/NullStorage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/NullStorage.java
new file mode 100644
index 00000000000..57037ce3d0f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/NullStorage.java
@@ -0,0 +1,26 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.storage;
+import de.lanlab.larm.util.*;
+
+/**
+ * doesn't do a lot
+ */
+public class NullStorage implements DocumentStorage
+{
+
+    public NullStorage()
+    {
+    }
+
+    public void open() {}
+    public void store(WebDocument doc) {}
+
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/SQLServerStorage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/SQLServerStorage.java
new file mode 100644
index 00000000000..522a8760d24
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/SQLServerStorage.java
@@ -0,0 +1,176 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.storage;
+import java.sql.*;
+import de.lanlab.larm.util.*;
+import java.util.*;
+
+/**
+ * saves the document into an sql table. At this time only in MS SQL (and probably Sybase)
+ * a table "Document" with the columns DO_URL(varchar), DO_MimeType(varchar) and
+ * DO_Data2(BLOB) is created after start<br>
+ * notes: experimental; slow
+ */
+public class SQLServerStorage implements DocumentStorage
+{
+
+    private Vector freeCons;
+    private Vector busyCons;
+
+    private Vector freeStatements;
+    private Vector busyStatements;
+
+    private PreparedStatement addDoc;
+
+    public SQLServerStorage(String driver, String connectionString, String account, String password, int nrConnections)
+    {
+        try
+        {
+            Class.forName(driver);
+            freeCons = new Vector(nrConnections);
+            busyCons = new Vector(nrConnections);
+            freeStatements = new Vector(nrConnections);
+            busyStatements = new Vector(nrConnections);
+
+            Connection sqlConn;
+            PreparedStatement statement;
+            for(int i=0; i<nrConnections; i++)
+            {
+                sqlConn = DriverManager.getConnection(connectionString, account, password);
+                statement = sqlConn.prepareStatement("INSERT INTO Document (DO_URL, DO_MimeType, DO_Data2) VALUES (?,?,?)");
+                freeCons.add(sqlConn);
+                freeStatements.add(statement);
+            }
+
+
+
+        }
+        catch(SQLException e)
+        {
+            synchronized(this)
+            {
+                System.out.println(/*"Task " + taskNr + ": */ "SQLException: " + e.getMessage());
+                System.err.println("       SQLState:     " + e.getSQLState());
+                System.err.println("       VendorError:  " + e.getErrorCode());
+            }
+            return;
+        }
+
+        catch(Exception e)
+        {
+            System.out.println("SQLServerStorage: " + e.getClass().getName() + ": " + e.getMessage());
+            e.printStackTrace();
+            System.exit(0);
+        }
+    }
+
+    public Connection getConnection()
+    {
+        synchronized(this)
+        {
+            Connection actual = (Connection)freeCons.firstElement();
+            freeCons.removeElementAt(0);
+            if(actual == null)
+            {
+                return null;
+            }
+            busyCons.add(actual);
+            return actual;
+        }
+    }
+
+    public void releaseConnection(Connection con)
+    {
+        synchronized(this)
+        {
+            busyCons.remove(con);
+            freeCons.add(con);
+        }
+    }
+
+    public PreparedStatement getStatement()
+    {
+        synchronized(this)
+        {
+            PreparedStatement actual = (PreparedStatement)freeStatements.firstElement();
+            freeStatements.removeElementAt(0);
+            if(actual == null)
+            {
+                return null;
+            }
+            busyStatements.add(actual);
+            return actual;
+        }
+    }
+
+    public void releaseStatement(PreparedStatement statement)
+    {
+        synchronized(this)
+        {
+            busyStatements.remove(statement);
+            freeStatements.add(statement);
+        }
+    }
+
+    public void open()
+    {
+        Connection conn = null;
+        try
+        {
+            conn = getConnection();
+            Statement delDoc = conn.createStatement();
+
+            // bisherige Daten löschen, indem die Tabelle neu angelegt wird (geht schneller)
+
+            delDoc.executeUpdate("if exists (select * from sysobjects where id = object_id(N'[dbo].[Document]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)drop table [dbo].[Document]");
+            delDoc.executeUpdate("CREATE TABLE [dbo].[Document] ([DO_ID] [int] IDENTITY (1, 1) NOT NULL ,	[DA_CrawlPass] [int] NULL ,	[DO_URL] [varchar] (255) NULL ,	[DO_ContentType] [varchar] (50) NULL ,	[DO_Data] [text] NULL ,	[DO_Hashcode] [int] NULL ,	[DO_ContentLength] [int] NULL ,	[DO_ContentEncoding] [varchar] (20) NULL ,	[DO_Data2] [image] NULL, [DO_MimeType] [varchar] (255) NULL) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]");       // löschen
+        }
+        catch(SQLException e)
+        {
+            System.out.println(/*"Task " + taskNr + ": */"SQLException: " + e.getMessage());
+            System.err.println("       SQLState:     " + e.getSQLState());
+            System.err.println("       VendorError:  " + e.getErrorCode());
+        }
+        finally
+        {
+            if(conn != null)
+            {
+                releaseConnection(conn);
+            }
+        }
+    }
+
+    public void store(WebDocument document)
+    {
+
+        PreparedStatement addDoc = null;
+        try
+        {
+            addDoc = getStatement();
+            addDoc.setString(1, document.getURLString());
+            addDoc.setString(2, document.getMimeType());
+            addDoc.setBytes(3,  document.getDocumentBytes());
+            addDoc.execute();
+        }
+        catch(SQLException e)
+        {
+            System.out.println(/* "Task " + taskNr + ": */ "SQLException: " + e.getMessage());
+            System.err.println("       SQLState:     " + e.getSQLState());
+            System.err.println("       VendorError:  " + e.getErrorCode());
+        }
+        finally
+        {
+            if(addDoc != null)
+            {
+                releaseStatement(addDoc);
+            }
+        }
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/InterruptableTask.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/InterruptableTask.java
new file mode 100644
index 00000000000..e31c53363b2
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/InterruptableTask.java
@@ -0,0 +1,9 @@
+
+package de.lanlab.larm.threads;
+
+public interface InterruptableTask
+{
+	public void run(ServerThread thread);
+	public void interrupt();
+	public String getInfo();
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ServerThread.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ServerThread.java
new file mode 100644
index 00000000000..6f5f08df99c
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ServerThread.java
@@ -0,0 +1,173 @@
+package de.lanlab.larm.threads;
+
+import java.util.Vector;
+import java.util.Iterator;
+import java.io.*;
+import java.util.*;
+import de.lanlab.larm.util.*;
+
+/**
+ * This thread class acts like a server. It's running idle within
+ * a thread pool until "runTask" is called. The given task will then
+ * be executed asynchronously
+ */
+public class ServerThread extends Thread
+{
+    /**
+     * the task that is to be executed. null in idle-mode
+     */
+    protected InterruptableTask task = null;
+
+    private boolean busy = false;
+
+    private ArrayList listeners = new ArrayList();
+    private boolean isInterrupted = false;
+    private int threadNumber;
+
+    SimpleLogger log;
+    SimpleLogger errorLog;
+
+    public ServerThread(int threadNumber, String name, ThreadGroup threadGroup)
+    {
+        super(threadGroup, name);
+        init(threadNumber);
+    }
+
+
+    public ServerThread(int threadNumber, String name)
+    {
+        super(name);
+        init(threadNumber);
+    }
+
+    void init(int threadNumber)
+    {
+        this.threadNumber = threadNumber;
+        File logDir = new File("logs");
+        logDir.mkdir();
+        log = new SimpleLogger("thread" + threadNumber);
+        errorLog = new SimpleLogger("thread" + threadNumber + "_errors");
+
+    }
+
+    /**
+     * constructor
+     * @param threadNumber assigns an arbitrary number to this thread
+     *        used by ServerThreadFactory
+     */
+    public ServerThread(int threadNumber)
+    {
+        init(threadNumber);
+    }
+
+    /**
+     * the run method runs asynchronously. It waits until runTask() is
+     * called
+     */
+    public void run()
+    {
+        try
+        {
+
+            while(!isInterrupted)
+            {
+                synchronized(this)
+                {
+                    while(task == null)
+                    {
+                        wait();
+                    }
+                }
+                task.run(this);
+                taskReady();
+            }
+        }
+        catch(InterruptedException e)
+        {
+            System.out.println("ServerThread " + threadNumber + " interrupted");
+            log.log("** Thread Interrupted **");
+        }
+    }
+
+
+    /**
+     * this is the main method that will invoke a task to run.
+     */
+    public synchronized void runTask(InterruptableTask t)
+    {
+        busy = true;
+        task = t;
+        notify();
+    }
+
+    /**
+     * it should be possible to interrupt a task with this function.
+     * therefore, the task has to check its interrupted()-state
+     */
+    public void interruptTask()
+    {
+        if(task != null)
+        {
+            task.interrupt();
+        }
+    }
+
+    /**
+     * the server thread can either be in idle or busy mode
+     */
+    public boolean isBusy()
+    {
+        return busy;
+    }
+
+    public void addTaskReadyListener(TaskReadyListener l)
+    {
+        listeners.add(l);
+    }
+
+    public void removeTaskReadyListener(TaskReadyListener l)
+    {
+        listeners.remove(l);
+    }
+
+    public void interrupt()
+    {
+        super.interrupt();
+        isInterrupted = true;
+    }
+
+    public int getThreadNumber()
+    {
+        return this.threadNumber;
+    }
+
+    public InterruptableTask getTask()
+    {
+        return task;
+    }
+
+    /**
+     * this method will be called when the task ends. It notifies all
+     * of its observers about its changed state
+     */
+    protected void taskReady()
+    {
+        task = null;
+        busy = false;
+        Iterator Ie = listeners.iterator();
+        while(Ie.hasNext())
+        {
+            ((TaskReadyListener)Ie.next()).taskReady(this);
+        }
+    }
+
+    public SimpleLogger getLog()
+    {
+        return log;
+    }
+
+    public SimpleLogger getErrorLog()
+    {
+        return errorLog;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskQueue.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskQueue.java
new file mode 100644
index 00000000000..568c4a01a37
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskQueue.java
@@ -0,0 +1,80 @@
+package de.lanlab.larm.threads;
+import de.lanlab.larm.util.Queue;
+import java.util.Collection;
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine
+ * Description:
+ * Copyright:    Copyright (c)
+ * Company:
+ * @author
+ * @version 1.0
+ */
+
+import java.util.LinkedList;
+import java.util.Iterator;
+
+public class TaskQueue implements Queue
+{
+    LinkedList queue = new LinkedList();
+
+    /**
+     *
+     */
+    public TaskQueue()
+    {
+
+    }
+
+
+    public void insertMultiple(Collection c)
+    {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * push a task to the start of the queue
+     * @param i the task
+     */
+    public void insert(Object i)
+    {
+        queue.addFirst(i);
+    }
+
+    /**
+     * get the last element out of the queue
+     * The element will be removed from the queue
+     * @return the task
+     */
+    public Object remove()
+    {
+       return queue.isEmpty() ? null : (InterruptableTask)queue.removeLast();
+    }
+
+    /**
+     *
+     */
+    public Iterator iterator()
+    {
+        return queue.iterator();
+    }
+
+    /**
+     *
+     */
+    public void clear()
+    {
+        queue.clear();
+    }
+
+    public boolean isEmpty()
+    {
+        return queue.isEmpty();
+    }
+
+    public int size()
+    {
+        return queue.size();
+    }
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskReadyListener.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskReadyListener.java
new file mode 100644
index 00000000000..75196422b78
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskReadyListener.java
@@ -0,0 +1,9 @@
+package de.lanlab.larm.threads;
+
+import de.lanlab.larm.util.Observer;
+
+public interface TaskReadyListener extends Observer
+{
+	public void taskReady(ServerThread s);
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadFactory.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadFactory.java
new file mode 100644
index 00000000000..bf22d939c33
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadFactory.java
@@ -0,0 +1,20 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c)<p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.threads;
+
+public class ThreadFactory
+{
+    // static int count = 0;
+
+    public  ServerThread createServerThread(int count)
+    {
+        return new ServerThread(count);
+    }
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPool.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPool.java
new file mode 100644
index 00000000000..84c1ef57fa7
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPool.java
@@ -0,0 +1,380 @@
+
+package de.lanlab.larm.threads;
+
+//import java.util.Vector;
+import java.util.*;
+
+/**
+ *  if you have many tasks to accomplish, you can do this with one of the
+ *  following strategies:
+ *  <uL>
+ *    <li> do it one after another (single threaded). this may often be
+ *    inefficient because most programs often wait for external resources
+ *    <li> assign a new thread for each task (thread on demand). This will clog
+ *    up the system if many tasks have to be accomplished synchronously
+ *    <li> hold a number of tasks, and queue the requests if there are more
+ *    tasks than threads (ThreadPool).
+ *  </ul>
+ *  This thread pool is based on an article in Java-Magazin 06/2000.
+ *  synchronizations were removed unless necessary
+ *
+ *
+ */
+public class ThreadPool implements ThreadingStrategy, TaskReadyListener {
+    private int maxThreads = MAX_THREADS;
+    /**
+     *  references to all threads are stored here
+     */
+    private HashMap allThreads = new HashMap();
+    /**
+     *  this vector takes all idle threads
+     */
+    private Vector idleThreads = new Vector();
+    /**
+     *  this vector takes all threads that are in operation (busy)
+     */
+    private Vector busyThreads = new Vector();
+
+    /**
+     *  if there are no idleThreads, tasks will go here
+     */
+    private TaskQueue queue = new TaskQueue();
+
+    /**
+     *  thread pool observers will be notified of status changes
+     */
+    private Vector threadPoolObservers = new Vector();
+
+    private boolean isStopped = false;
+
+    /**
+     *  default maximum number of threads, if not given by the user
+     */
+    public final static int MAX_THREADS = 5;
+
+    /**
+     *  thread was created
+     */
+    public final static String THREAD_CREATE = "T_CREATE";
+    /**
+     *  thread was created
+     */
+    public final static String THREAD_START = "T_START";
+    /**
+     *  thread is running
+     */
+    public final static String THREAD_RUNNING = "T_RUNNING";
+    /**
+     *  thread was stopped
+     */
+    public final static String THREAD_STOP = "T_STOP";
+    /**
+     *  thread was destroyed
+     */
+    public final static String THREAD_END = "T_END";
+    /**
+     *  thread is idle
+     */
+    public final static String THREAD_IDLE = "T_IDLE";
+
+    /**
+     *  a task was added to the queue, because all threads were busy
+     */
+    public final static String THREADQUEUE_ADD = "TQ_ADD";
+
+    /**
+     *  a task was removed from the queue, because a thread had finished and was
+     *  ready
+     */
+    public final static String THREADQUEUE_REMOVE = "TQ_REMOVE";
+
+    /**
+     *  this factory will create the tasks
+     */
+    ThreadFactory factory;
+
+
+    /**
+     *  this constructor will create the pool with MAX_THREADS threads and the
+     *  default factory
+     */
+    public ThreadPool() {
+        this(MAX_THREADS, new ThreadFactory());
+    }
+
+
+    /**
+     *  this constructor will create the pool with the default Factory
+     *
+     *@param  max  the maximum number of threads
+     */
+    public ThreadPool(int max) {
+        this(max, new ThreadFactory());
+    }
+
+
+    /**
+     *  constructor
+     *
+     *@param  max      maximum number of threads
+     *@param  factory  the thread factory with which the threads will be created
+     */
+    public ThreadPool(int max, ThreadFactory factory) {
+        maxThreads = max;
+        this.factory = factory;
+    }
+
+
+    /**
+     *  this init method will create the tasks. It must be called by hand
+     */
+    public void init() {
+        for (int i = 0; i < maxThreads; i++) {
+            createThread(i);
+        }
+    }
+
+
+    /**
+     *  Description of the Method
+     *
+     *@param  i  Description of the Parameter
+     */
+    public void createThread(int i) {
+        ServerThread s = factory.createServerThread(i);
+        idleThreads.add(s);
+        allThreads.put(new Integer(i), s);
+        s.addTaskReadyListener(this);
+        sendMessage(i, THREAD_CREATE, "");
+        s.start();
+        sendMessage(i, THREAD_IDLE, "");
+    }
+
+
+    // FIXME: synchronisationstechnisch buggy
+    /**
+     *  Description of the Method
+     *
+     *@param  i  Description of the Parameter
+     */
+    public void restartThread(int i) {
+        sendMessage(i, THREAD_STOP, "");
+        ServerThread t = (ServerThread) allThreads.get(new Integer(i));
+        idleThreads.remove(t);
+        busyThreads.remove(t);
+        allThreads.remove(new Integer(i));
+        t.interruptTask();
+        t.interrupt();
+        //t.join();
+        // deprecated, I know, but the only way to overcome SUN's bugs
+        t = null;
+        createThread(i);
+    }
+
+
+    /**
+     *  Description of the Method
+     *
+     *@param  t    Description of the Parameter
+     *@param  key  Description of the Parameter
+     */
+    public synchronized void doTask(InterruptableTask t, Object key) {
+        if (!idleThreads.isEmpty()) {
+            ServerThread s = (ServerThread) idleThreads.firstElement();
+            idleThreads.remove(s);
+            busyThreads.add(s);
+            sendMessage(s.getThreadNumber(), THREAD_START, t.getInfo());
+            s.runTask(t);
+            sendMessage(s.getThreadNumber(), THREAD_RUNNING, t.getInfo());
+        } else {
+
+            queue.insert(t);
+            sendMessage(-1, THREADQUEUE_ADD, t.getInfo());
+        }
+    }
+
+
+    /**
+     *  this will interrupt all threads. Therefore the InterruptableTasks must
+     *  attend on the interrupted-flag
+     */
+    public void interrupt() {
+        Iterator tasks = queue.iterator();
+        while (tasks.hasNext()) {
+            InterruptableTask t = (InterruptableTask) tasks.next();
+            t.interrupt();
+            sendMessage(-1, THREADQUEUE_REMOVE, t.getInfo());
+            // In der Hoffnung, dass alles klappt...
+        }
+        queue.clear();
+        Iterator threads = busyThreads.iterator();
+        while (threads.hasNext()) {
+            ((ServerThread) threads.next()).interruptTask();
+        }
+    }
+
+
+    /**
+     *  this will interrupt the tasks and end all threads
+     */
+    public void stop() {
+        isStopped = true;
+        interrupt();
+        Iterator threads = idleThreads.iterator();
+        while (threads.hasNext()) {
+            ((ServerThread) threads.next()).interruptTask();
+        }
+        idleThreads.clear();
+    }
+
+
+    /**
+     *  wird von einem ServerThread aufgerufen, wenn dieser fertig ist
+     *
+     *@param  s  Description of the Parameter
+     *@param:    ServerThread s - der aufrufende Thread
+     */
+    public synchronized void taskReady(ServerThread s) {
+        if (isStopped) {
+            s.interrupt();
+            sendMessage(s.getThreadNumber(), THREAD_STOP, s.getTask().getInfo());
+            busyThreads.remove(s);
+        } else if (!queue.isEmpty()) {
+            InterruptableTask t = (InterruptableTask) queue.remove();
+            //queue.remove(t);
+            sendMessage(-1, THREADQUEUE_REMOVE, t.getInfo());
+            sendMessage(s.getThreadNumber(), THREAD_START, "");
+            s.runTask(t);
+            sendMessage(s.getThreadNumber(), THREAD_RUNNING, s.getTask().getInfo());
+        } else {
+            sendMessage(s.getThreadNumber(), THREAD_IDLE, "");
+            idleThreads.add(s);
+            busyThreads.remove(s);
+        }
+        synchronized (idleThreads) {
+            idleThreads.notify();
+        }
+
+    }
+
+
+    /**
+     *  Description of the Method
+     */
+    public void waitForFinish() {
+        synchronized (idleThreads) {
+            while (busyThreads.size() != 0) {
+                //System.out.println("busyThreads: " + busyThreads.size());
+                try {
+                    idleThreads.wait();
+                } catch (InterruptedException e) {
+                    System.out.println("Interrupted: " + e.getMessage());
+                }
+            }
+            //System.out.println("busyThreads: " + busyThreads.size());
+        }
+    }
+
+
+    /**
+     *  Adds a feature to the ThreadPoolObserver attribute of the ThreadPool
+     *  object
+     *
+     *@param  o  The feature to be added to the ThreadPoolObserver attribute
+     */
+    public void addThreadPoolObserver(ThreadPoolObserver o) {
+        threadPoolObservers.add(o);
+    }
+
+
+    /**
+     *  Description of the Method
+     *
+     *@param  threadNr  Description of the Parameter
+     *@param  action    Description of the Parameter
+     *@param  info      Description of the Parameter
+     */
+    protected void sendMessage(int threadNr, String action, String info) {
+
+        Iterator Ie = threadPoolObservers.iterator();
+        //System.out.println("ThreadPool: Sende " + action + " message an " + threadPoolObservers.size() + " Observers");
+        if (threadNr != -1) {
+            while (Ie.hasNext()) {
+                ((ThreadPoolObserver) Ie.next()).threadUpdate(threadNr, action, info);
+            }
+        } else {
+            while (Ie.hasNext()) {
+                ((ThreadPoolObserver) Ie.next()).queueUpdate(info, action);
+            }
+        }
+    }
+
+
+    /**
+     *  Gets the queueSize attribute of the ThreadPool object
+     *
+     *@return    The queueSize value
+     */
+    public synchronized int getQueueSize() {
+        return this.queue.size();
+    }
+
+
+    /**
+     *  Gets the idleThreadsCount attribute of the ThreadPool object
+     *
+     *@return    The idleThreadsCount value
+     */
+    public synchronized int getIdleThreadsCount() {
+        return this.idleThreads.size();
+    }
+
+
+    /**
+     *  Gets the busyThreadsCount attribute of the ThreadPool object
+     *
+     *@return    The busyThreadsCount value
+     */
+    public synchronized int getBusyThreadsCount() {
+        return this.busyThreads.size();
+    }
+
+
+    /**
+     *  Gets the threadCount attribute of the ThreadPool object
+     *
+     *@return    The threadCount value
+     */
+    public synchronized int getThreadCount() {
+        return this.idleThreads.size() + this.busyThreads.size();
+    }
+
+
+    /**
+     *  Gets the threadIterator attribute of the ThreadPool object
+     *
+     *@return    The threadIterator value
+     */
+    public Iterator getThreadIterator() {
+        return allThreads.values().iterator();
+        // return allThreads.iterator();
+    }
+
+
+    /**
+     *  Description of the Method
+     *
+     *@param  queue  Description of the Parameter
+     */
+    public void setQueue(TaskQueue queue) {
+        this.queue = queue;
+    }
+
+    public TaskQueue getTaskQueue()
+    {
+        return queue;
+    }
+
+}
+
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPoolObserver.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPoolObserver.java
new file mode 100644
index 00000000000..47e11156265
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPoolObserver.java
@@ -0,0 +1,12 @@
+package de.lanlab.larm.threads;
+
+import de.lanlab.larm.util.Observer;
+
+/**
+ * an observer that observes the thread pool...
+ */
+public interface ThreadPoolObserver extends Observer
+{
+	public void queueUpdate(String info, String action);
+ 	public void threadUpdate(int threadNr, String action, String info);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadingStrategy.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadingStrategy.java
new file mode 100644
index 00000000000..ab78ae89dcb
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadingStrategy.java
@@ -0,0 +1,8 @@
+package de.lanlab.larm.threads;
+
+public interface ThreadingStrategy
+{
+	public void doTask(InterruptableTask t, Object key);
+	public void interrupt();
+	public void stop();
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/CachingQueue.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/CachingQueue.java
new file mode 100644
index 00000000000..2cb43ba8831
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/CachingQueue.java
@@ -0,0 +1,721 @@
+
+/**
+ * Title: LARM Lanlab Retrieval Machine<p>
+ *
+ * Description: <p>
+ *
+ * Copyright: Copyright (c)<p>
+ *
+ * Company: <p>
+ *
+ *
+ *
+ * @author
+ * @version   1.0
+ */
+package de.lanlab.larm.util;
+import java.io.*;
+import java.util.*;
+
+
+class StoreException extends RuntimeException
+{
+    Exception origException;
+
+
+    /**
+     * Constructor for the StoreException object
+     *
+     * @param e  Description of the Parameter
+     */
+    public StoreException(Exception e)
+    {
+        origException = e;
+    }
+
+
+    /**
+     * Gets the message attribute of the StoreException object
+     *
+     * @return   The message value
+     */
+    public String getMessage()
+    {
+        return origException.getMessage();
+    }
+
+
+    /**
+     * Description of the Method
+     */
+    public void printStackTrace()
+    {
+        System.err.println("StoreException occured with reason: " + origException.getMessage());
+        origException.printStackTrace();
+    }
+}
+
+/**
+ * internal class that represents one block within a queue
+ *
+ * @author    Clemens Marschner
+ * @created   3. Januar 2002
+ */
+class QueueBlock
+{
+
+
+    /**
+     * the elements section will be set to null if it is on disk Vector elements
+     * must be Serializable
+     */
+    LinkedList elements;
+
+    /**
+     * Anzahl Elemente im Block. Kopie von elements.size()
+     */
+    int size;
+
+    /**
+     * maximale Blockgröße
+     */
+    int maxSize;
+
+    /**
+     * if set, elements is null and block was written to file
+     */
+    boolean onDisk;
+
+    /**
+     * Blockname
+     */
+    String name;
+
+
+    /**
+     * initialisiert den Block
+     *
+     * @param name     Der Blockname (muss eindeutig sein, sonst Kollision auf
+     *      Dateiebene)
+     * @param maxSize  maximale Blockgröße. Über- und Unterläufe werden durch
+     *      Exceptions behandelt
+     */
+    public QueueBlock(String name, int maxSize)
+    {
+        this.name = name;
+        this.onDisk = false;
+        this.elements = new LinkedList();
+        this.maxSize = maxSize;
+    }
+
+
+    /**
+     * serialisiert und speichert den Block auf Platte
+     *
+     * @exception StoreException  Description of the Exception
+     */
+    public void store()
+        throws StoreException
+    {
+        try
+        {
+            ObjectOutputStream o = new ObjectOutputStream(new FileOutputStream(getFileName()));
+            o.writeObject(elements);
+            elements = null;
+            o.close();
+            onDisk = true;
+            //System.out.println("CachingQueue.store: Block stored");
+        }
+        catch (IOException e)
+        {
+            System.err.println("CachingQueue.store: IOException");
+            throw new StoreException(e);
+        }
+    }
+
+
+    /**
+     * @return   the filename of the block
+     */
+    String getFileName()
+    {
+        // package protected!
+
+        return "cachingqueue/" + name + ".cqb";
+    }
+
+
+    /**
+     * load the block from disk
+     *
+     * @exception StoreException  Description of the Exception
+     */
+    public void load()
+        throws StoreException
+    {
+        try
+        {
+            ObjectInputStream i = new ObjectInputStream(new FileInputStream(getFileName()));
+            elements = (LinkedList) i.readObject();
+            i.close();
+            onDisk = false;
+            size = elements.size();
+            if (!(new File(getFileName()).delete()))
+            {
+                System.err.println("CachingQueue.load: file could not be deleted");
+            }
+            //System.out.println("CachingQueue.load: Block loaded");
+        }
+        catch (Exception e)
+        {
+            System.err.println("CachingQueue.load: Exception " + e.getClass().getName() + " occured");
+            throw new StoreException(e);
+        }
+    }
+
+
+    /**
+     * inserts an object at the start of the queue must be synchronized by
+     * calling class to be thread safe
+     *
+     * @param o                   Description of the Parameter
+     * @exception StoreException  Description of the Exception
+     */
+    public void insert(Object o)
+        throws StoreException
+    {
+        if (onDisk)
+        {
+            load();
+        }
+        if (size >= maxSize)
+        {
+            throw new OverflowException();
+        }
+        elements.addFirst(o);
+        size++;
+    }
+
+
+    /**
+     * gibt das letzte Element aus der Queue zurück und löscht dieses must be
+     * made synchronized by calling class to be thread safe
+     *
+     * @return                        Description of the Return Value
+     * @exception UnderflowException  Description of the Exception
+     * @exception StoreException      Description of the Exception
+     */
+    public Object remove()
+        throws UnderflowException, StoreException
+    {
+        if (onDisk)
+        {
+            load();
+        }
+        if (size <= 0)
+        {
+            throw new UnderflowException();
+        }
+        size--;
+        return elements.removeLast();
+    }
+
+
+    /**
+     * @return   the number of elements in the block
+     */
+    public int size()
+    {
+        return size;
+    }
+
+
+    /**
+     * destructor. Assures that all files are deleted, even if the queue was not
+     * empty at the time when the program ended
+     */
+    public void finalize()
+    {
+        // System.err.println("finalize von " + name + " called");
+        if (onDisk)
+        {
+            // temp-Datei löschen. Passiert, wenn z.B. eine Exception aufgetreten ist
+            // System.err.println("CachingQueue.finalize von Block " + name + ": lösche Datei");
+            if (!(new File(getFileName()).delete()))
+            {
+                // Dateifehler möglich durch Exception: ignorieren
+
+                // System.err.println("CachingQueue.finalize: file could not be deleted although onDisk was true");
+            }
+        }
+    }
+}
+
+
+/**
+ * this class holds a queue whose data is kept on disk whenever possible.
+ * It's a single ended queue, meaning data can only be added at the front and
+ * taken from the back. the queue itself is divided into blocks. Only the first
+ * and last blocks are kept in main memory, the rest is stored on disk. Only a
+ * LinkedList entry is kept in memory then.
+ * Blocks are swapped if an overflow (in case of insertions) or underflow (in case
+ * of removals) occur.<br>
+ *
+ * <pre>
+ *         +---+---+---+---+-+
+ *  put -> | M | S | S | S |M| -> remove
+ *         +---+---+---+---+-+
+ * </pre>
+ * the maximum number of entries can be specified with the blockSize parameter. Thus,
+ * the queue actually holds a maximum number of 2 x blockSize objects in main memory,
+ * plus a few bytes for each block.<br>
+ * The objects contained in the blocks are stored with the standard Java
+ * serialization mechanism
+ * The files are named "cachingqueue\\Queuename_BlockNumber.cqb"
+ * note that the class is not synchronized
+ * @author    Clemens Marschner
+ * @created   3. Januar 2002
+ */
+
+public class CachingQueue implements Queue
+{
+
+
+    /**
+     * the Blocks
+     */
+    LinkedList queueBlocks;
+
+    /**
+     * fast access to the first block
+     */
+    QueueBlock first = null;
+
+    /**
+     * fast access to the last block
+     */
+    QueueBlock last = null;
+
+    /**
+     * maximum block size
+     */
+    int blockSize;
+
+    /**
+     * "primary key" identity count for each block
+     */
+    int blockCount = 0;
+
+    /**
+     * active blocks
+     */
+    int numBlocks = 0;
+
+    /**
+     * queue name
+     */
+    String name;
+
+    /**
+     * total number of objects
+     */
+    int size;
+
+
+    /**
+     * init
+     *
+     * @param name the name of the queue, used in files names
+     * @param blockSize maximum number of objects stored in one block
+     */
+    public CachingQueue(String name, int blockSize)
+    {
+        queueBlocks = new LinkedList();
+        this.name = name;
+        this.blockSize = blockSize;
+        File cq = new File("cachingqueue");
+        cq.mkdir();
+    }
+
+
+    /**
+     * inserts an object to the front of the queue
+     *
+     * @param o                   the object to be inserted. must implement Serializable
+     * @exception StoreException  encapsulates Exceptions that occur when writing to hard disk
+     */
+    public synchronized void insert(Object o)
+        throws StoreException
+    {
+        if (last == null && first == null)
+        {
+            first = last = newBlock();
+            queueBlocks.addFirst(first);
+            numBlocks++;
+        }
+        if (last == null && first != null)
+        {
+            // assert((last==null && first==null) || (last!= null && first!=null));
+            System.err.println("Error in CachingQueue: last!=first==null");
+        }
+
+        if (first.size() >= blockSize)
+        {
+            // save block and create a new one
+            QueueBlock newBlock = newBlock();
+            numBlocks++;
+            if (last != first)
+            {
+                first.store();
+            }
+            queueBlocks.addFirst(newBlock);
+            first = newBlock;
+        }
+        first.insert(o);
+        size++;
+    }
+
+
+    /**
+     * returns the last object from the queue
+     *
+     * @return                     the object returned
+     *
+     * @exception StoreException   Description of the Exception
+     * @exception UnderflowException if the queue was empty
+     */
+    public synchronized Object remove()
+        throws StoreException, UnderflowException
+    {
+        if (last == null)
+        {
+            throw new UnderflowException();
+        }
+        if (last.size() <= 0)
+        {
+            queueBlocks.removeLast();
+            numBlocks--;
+            if (numBlocks == 1)
+            {
+                last = first;
+            }
+            else if (numBlocks == 0)
+            {
+                first = last = null;
+                throw new UnderflowException();
+            }
+            else if (numBlocks < 0)
+            {
+                // assert(numBlocks >= 0)
+                System.err.println("CachingQueue.remove: numBlocks<0!");
+                throw new UnderflowException();
+            }
+            else
+            {
+                last = (QueueBlock) queueBlocks.getLast();
+            }
+        }
+        --size;
+        return last.remove();
+    }
+
+
+    /**
+     * not supported
+     *
+     * @param c  Description of the Parameter
+     */
+    public void insertMultiple(java.util.Collection c)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+
+    /**
+     * creates a new block
+     *
+     * @return   Description of the Return Value
+     */
+    private QueueBlock newBlock()
+    {
+        return new QueueBlock(name + "_" + blockCount++, blockSize);
+    }
+
+
+    /**
+     * total number of objects contained in the queue
+     *
+     * @return   Description of the Return Value
+     */
+    public int size()
+    {
+        return size;
+    }
+
+
+    /**
+     * testing
+     *
+     * @param args  The command line arguments
+     */
+    public static void main(String[] args)
+    {
+        System.out.println("Test1: " + CachingQueueTester.testUnderflow());
+        System.out.println("Test2: " + CachingQueueTester.testInsert());
+        System.out.println("Test3: " + CachingQueueTester.testBufReadWrite());
+        System.out.println("Test4: " + CachingQueueTester.testBufReadWrite2());
+        System.out.println("Test5: " + CachingQueueTester.testUnderflow2());
+        System.out.println("Test6: " + CachingQueueTester.testBufReadWrite3());
+        System.out.println("Test7: " + CachingQueueTester.testExceptions());
+    }
+}
+
+/**
+ * Testklasse TODO: auslagern und per JUnit handhaben
+ *
+ * @author    Administrator
+ * @created   3. Januar 2002
+ */
+class AssertionFailedException extends RuntimeException
+{
+}
+
+/**
+ * Testklasse. Enthält einige Tests für die Funktionalität der CachingQueue
+ *
+ * @author    Administrator
+ * @created   3. Januar 2002
+ */
+class CachingQueueTester
+{
+
+
+    /**
+     * A unit test for JUnit
+     *
+     * @return   Description of the Return Value
+     */
+    public static boolean testUnderflow()
+    {
+        CachingQueue cq = new CachingQueue("testQueue1", 10);
+        try
+        {
+            cq.remove();
+        }
+        catch (UnderflowException e)
+        {
+            return true;
+        }
+        catch (Exception e)
+        {
+            e.printStackTrace();
+        }
+        return false;
+    }
+
+
+    /**
+     * A unit test for JUnit
+     *
+     * @return   Description of the Return Value
+     */
+    public static boolean testInsert()
+    {
+        CachingQueue cq = new CachingQueue("testQueue2", 10);
+        String test = "Test1";
+        assert(cq.size() == 0);
+        cq.insert(test);
+        assert(cq.size() == 1);
+        return (cq.remove() == test);
+    }
+
+
+    /**
+     * A unit test for JUnit
+     *
+     * @return   Description of the Return Value
+     */
+    public static boolean testBufReadWrite()
+    {
+        CachingQueue cq = new CachingQueue("testQueue3", 2);
+        String test1 = "Test1";
+        String test2 = "Test2";
+        String test3 = "Test3";
+        cq.insert(test1);
+        cq.insert(test2);
+        cq.insert(test3);
+        assert(cq.size() == 3);
+        cq.remove();
+        cq.remove();
+        assert(cq.size() == 1);
+        return (cq.remove() == test3);
+    }
+
+
+    /**
+     * A unit test for JUnit
+     *
+     * @return   Description of the Return Value
+     */
+    public static boolean testBufReadWrite2()
+    {
+        CachingQueue cq = new CachingQueue("testQueue4", 2);
+        String test1 = "Test1";
+        String test2 = "Test2";
+        String test3 = "Test3";
+        String test4 = "Test4";
+        String test5 = "Test5";
+        cq.insert(test1);
+        cq.insert(test2);
+        cq.insert(test3);
+        cq.insert(test4);
+        cq.insert(test5);
+        assert(cq.size() == 5);
+        String t = (String) cq.remove();
+        assert(t.equals(test1));
+        t = (String) cq.remove();
+        assert(t.equals(test2));
+        t = (String) cq.remove();
+        assert(t.equals(test3));
+        t = (String) cq.remove();
+        assert(t.equals(test4));
+        t = (String) cq.remove();
+        assert(cq.size() == 0);
+        return (t.equals(test5));
+    }
+
+
+    /**
+     * Description of the Method
+     *
+     * @param expr  Description of the Parameter
+     */
+    public static void assert(boolean expr)
+    {
+        if (!expr)
+        {
+            throw new AssertionFailedException();
+        }
+    }
+
+
+    /**
+     * A unit test for JUnit
+     *
+     * @return   Description of the Return Value
+     */
+    public static boolean testUnderflow2()
+    {
+        CachingQueue cq = new CachingQueue("testQueue5", 2);
+        String test1 = "Test1";
+        String test2 = "Test2";
+        String test3 = "Test3";
+        String test4 = "Test4";
+        String test5 = "Test5";
+        cq.insert(test1);
+        cq.insert(test2);
+        cq.insert(test3);
+        cq.insert(test4);
+        cq.insert(test5);
+        assert(cq.remove().equals(test1));
+        assert(cq.remove().equals(test2));
+        assert(cq.remove().equals(test3));
+        assert(cq.remove().equals(test4));
+        assert(cq.remove().equals(test5));
+        try
+        {
+            cq.remove();
+        }
+        catch (UnderflowException e)
+        {
+            return true;
+        }
+        return false;
+    }
+
+
+    /**
+     * A unit test for JUnit
+     *
+     * @return   Description of the Return Value
+     */
+    public static boolean testBufReadWrite3()
+    {
+        CachingQueue cq = new CachingQueue("testQueue4", 1);
+        String test1 = "Test1";
+        String test2 = "Test2";
+        String test3 = "Test3";
+        String test4 = "Test4";
+        String test5 = "Test5";
+        cq.insert(test1);
+        cq.insert(test2);
+        cq.insert(test3);
+        cq.insert(test4);
+        cq.insert(test5);
+        String t = (String) cq.remove();
+        assert(t.equals(test1));
+        t = (String) cq.remove();
+        assert(t.equals(test2));
+        t = (String) cq.remove();
+        assert(t.equals(test3));
+        t = (String) cq.remove();
+        assert(t.equals(test4));
+        t = (String) cq.remove();
+        return (t.equals(test5));
+    }
+
+
+    /**
+     * A unit test for JUnit
+     *
+     * @return   Description of the Return Value
+     */
+    public static boolean testExceptions()
+    {
+        System.gc();
+        CachingQueue cq = new CachingQueue("testQueue5", 1);
+        String test1 = "Test1";
+        String test2 = "Test2";
+        String test3 = "Test3";
+        String test4 = "Test4";
+        String test5 = "Test5";
+        cq.insert(test1);
+        cq.insert(test2);
+        cq.insert(test3);
+        cq.insert(test4);
+        cq.insert(test5);
+        try
+        {
+            if (!(new File("testQueue5_1.cqb").delete()))
+            {
+                System.err.println("CachingQueueTester.textExceptions: Store 1 nicht vorhanden. Filename geändert?");
+            }
+            if (!(new File("testQueue5_2.cqb").delete()))
+            {
+                System.err.println("CachingQueueTester.textExceptions: Store 2 nicht vorhanden. Filename geändert?");
+            }
+            String t = (String) cq.remove();
+            assert(t.equals(test1));
+            t = (String) cq.remove();
+            assert(t.equals(test2));
+            t = (String) cq.remove();
+            assert(t.equals(test3));
+            t = (String) cq.remove();
+            assert(t.equals(test4));
+            t = (String) cq.remove();
+            assert(t.equals(test5));
+        }
+        catch (StoreException e)
+        {
+            return true;
+        }
+        finally
+        {
+            cq = null;
+            System.gc();
+            // finalizer müssten aufgerufen werden
+        }
+        return false;
+    }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ClassInfo.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ClassInfo.java
new file mode 100644
index 00000000000..231c17d3f9f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ClassInfo.java
@@ -0,0 +1,273 @@
+package de.lanlab.larm.util;
+
+import java.lang.reflect.*;
+import java.io.*;
+import java.util.*;
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine
+ * Description:
+ * Copyright:    Copyright (c)
+ * Company:
+ * @author
+ * @version 1.0
+ */
+
+/**
+ *  prints class information with the reflection api
+ *  for debugging only
+ */
+public class ClassInfo
+{
+
+    public ClassInfo()
+    {
+    }
+
+    /**
+     * Usage: java ClassInfo PackageName.MyNewClassName PackageName.DerivedClassName
+     */
+    public static void main(String[] args)
+    {
+
+        String name = args[0];
+        String derivedName = args[1];
+        LinkedList l = new LinkedList();
+        ListIterator itry = l.listIterator();
+
+        try
+        {
+            Class cls = Class.forName(name);
+            name = cls.getName();
+            String pkg =  getPackageName(name);
+            String clss = getClassName(name);
+
+            StringWriter importsWriter = new StringWriter();
+            PrintWriter imports = new PrintWriter(importsWriter);
+            StringWriter outWriter = new StringWriter();
+            PrintWriter out = new PrintWriter(outWriter);
+
+            TreeSet importClasses = new TreeSet();
+            importClasses.add(getImportStatement(name));
+
+            out.println("/**\n * (class description here)\n */\npublic class " + derivedName + " " + (cls.isInterface() ? "implements " : "extends ") + clss + "\n{");
+
+            Method[] m = cls.getMethods();
+            for(int i= 0; i< m.length; i++)
+            {
+                Method thism = m[i];
+                if((thism.getModifiers() & Modifier.PRIVATE) == 0 && ((thism.getModifiers() & Modifier.FINAL) == 0)
+                   && (thism.getDeclaringClass().getName() != "java.lang.Object"))
+                {
+                    out.println("    /**");
+                    out.println("     * (method description here)");
+                    out.println("     * defined in " + thism.getDeclaringClass().getName());
+
+                    Class[] parameters = thism.getParameterTypes();
+                    for(int j = 0; j < parameters.length; j ++)
+                    {
+                        if(getPackageName(parameters[j].getName()) != "")
+                        {
+                            importClasses.add(getImportStatement(parameters[j].getName()));
+                        }
+                        out.println("     * @param p" + j + " (parameter description here)");
+                    }
+
+                    if(thism.getReturnType().getName() != "void")
+                    {
+                        String returnPackage = getPackageName(thism.getReturnType().getName());
+                        if(returnPackage != "")
+                        {
+                            importClasses.add(getImportStatement(thism.getReturnType().getName()));
+                        }
+                        out.println("     * @return (return value description here)");
+                    }
+
+                    out.println("     */");
+
+                    out.print("    " + getModifierString(thism.getModifiers()) + getClassName(thism.getReturnType().getName()) + " ");
+                    out.print(thism.getName() + "(");
+
+                    for(int j = 0; j < parameters.length; j ++)
+                    {
+                        if(j>0)
+                        {
+                            out.print(", ");
+                        }
+                        out.print(getClassName(parameters[j].getName()) + " p" + j);
+                    }
+                    out.print(")");
+                    Class[] exceptions = thism.getExceptionTypes();
+
+                    if (exceptions.length > 0)
+                    {
+                       out.print(" throws ");
+                    }
+
+                    for(int k = 0; k < exceptions.length; k++)
+                    {
+                       if(k > 0)
+                       {
+                           out.print(", ");
+                       }
+                       String exCompleteName = exceptions[k].getName();
+                       String exName = getClassName(exCompleteName);
+                       importClasses.add(getImportStatement(exCompleteName));
+
+                       out.print(exName);
+                    }
+                    out.print("\n" +
+                              "    {\n" +
+                              "        /**@todo: Implement this " + thism.getName() + "() method */\n" +
+                              "        throw new UnsupportedOperationException(\"Method " + thism.getName() + "() not yet implemented.\");\n" +
+                              "    }\n\n");
+
+
+                }
+            }
+            out.println("}");
+
+            Iterator importIterator = importClasses.iterator();
+            while(importIterator.hasNext())
+            {
+                String importName = (String)importIterator.next();
+                if(!importName.startsWith("java.lang"))
+                {
+                    imports.println("import " + importName + ";");
+                }
+            }
+
+            out.flush();
+            imports.flush();
+
+            if(getPackageName(derivedName) != "")
+            {
+                System.out.println("package " + getPackageName(derivedName) + ";\n");
+            }
+            System.out.println( "/**\n" +
+                                " * Title:        \n" +
+                                " * Description:\n" +
+                                " * Copyright:    Copyright (c)\n" +
+                                " * Company:\n" +
+                                " * @author\n" +
+                                " * @version 1.0\n" +
+                                " */\n");
+            System.out.println(importsWriter.getBuffer());
+            System.out.print(outWriter.getBuffer());
+        }
+        catch(Throwable t)
+        {
+            t.printStackTrace();
+        }
+    }
+
+    public static String getPackageName(String className)
+    {
+        if(className.charAt(0) == '[')
+        {
+            switch(className.charAt(1))
+            {
+                case 'L':
+                     return getPackageName(className.substring(2,className.length()-1));
+                default:
+                    return "";
+            }
+        }
+        String name = className.lastIndexOf(".") != -1 ? className.substring(0, className.lastIndexOf(".")) : "";
+        //System.out.println("Package: " + name);
+        return name;
+    }
+
+    public static String getClassName(String className)
+    {
+        if(className.charAt(0) == '[')
+        {
+            switch(className.charAt(1))
+            {
+                case 'L':
+                     return getClassName(className.substring(2,className.length()-1)) + "[]";
+                case 'C':
+                     return "char[]";
+                case 'I':
+                     return "int[]";
+                case 'B':
+                     return "byte[]";
+                // rest is missing here
+
+            }
+        }
+        String name = (className.lastIndexOf(".") > -1) ? className.substring(className.lastIndexOf(".")+1) : className;
+        //System.out.println("Class: "  + name);
+        return name;
+    }
+
+    static String getImportStatement(String className)
+    {
+        String pack = getPackageName(className);
+        String clss = getClassName(className);
+        if(clss.indexOf("[]") > -1)
+        {
+            return pack + "." + clss.substring(0,clss.length() - 2);
+        }
+        else
+        {
+            return pack + "." + clss;
+        }
+    }
+
+    public static String getModifierString(int modifiers)
+    {
+        StringBuffer mods = new StringBuffer();
+        if((modifiers & Modifier.ABSTRACT) != 0)
+        {
+            mods.append("abstract ");
+        }
+        if((modifiers & Modifier.FINAL) != 0)
+        {
+            mods.append("final ");
+        }
+        if((modifiers & Modifier.INTERFACE) != 0)
+        {
+            mods.append("interface ");
+        }
+        if((modifiers & Modifier.NATIVE) != 0)
+        {
+            mods.append("native ");
+        }
+        if((modifiers & Modifier.PRIVATE) != 0)
+        {
+            mods.append("private ");
+        }
+        if((modifiers & Modifier.PROTECTED) != 0)
+        {
+            mods.append("protected ");
+        }
+        if((modifiers & Modifier.PUBLIC) != 0)
+        {
+            mods.append("public ");
+        }
+        if((modifiers & Modifier.STATIC) != 0)
+        {
+            mods.append("static ");
+        }
+        if((modifiers & Modifier.STRICT) != 0)
+        {
+            mods.append("strictfp ");
+        }
+        if((modifiers & Modifier.SYNCHRONIZED) != 0)
+        {
+            mods.append("synchronized ");
+        }
+        if((modifiers & Modifier.TRANSIENT) != 0)
+        {
+            mods.append("transient ");
+        }
+        if((modifiers & Modifier.VOLATILE) != 0)
+        {
+            mods.append("volatile ");
+        }
+        return mods.toString();
+    }
+
+
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/HashedCircularLinkedList.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/HashedCircularLinkedList.java
new file mode 100644
index 00000000000..6b0d16fb6d1
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/HashedCircularLinkedList.java
@@ -0,0 +1,319 @@
+package de.lanlab.larm.util;
+
+/**
+ * Title:
+ * Description:
+ * Copyright:    Copyright (c)
+ * Company:
+ * @author
+ * @version 1.0
+ */
+
+import java.util.*;
+
+/**
+ * simple hashed linked list. It allows for inserting and removing elements like
+ * in a hash table (in fact, it uses a HashMap), while still being able to easily
+ * traverse the collection like a list. In addition, the iterator is circular. It
+ * always returns a next element as long as there are elements in the list. In
+ * contrast to the iterator of Sun's collection classes, this class can cope with
+ * inserts and removals while traversing the list.<p>
+ * Elements are always added to the end of the list, that is, always at the same place<br>
+ * All operations should work in near constant time as the list grows. Only the
+ * trade-off costs of a hash (memory versus speed) have to be considered.
+ * The List doesn't accept null elements
+ * @todo put the traversal function into an Iterator
+ * @todo implement the class as a derivate from a Hash
+ */
+public class HashedCircularLinkedList
+{
+
+
+    /**
+     * Entry class.
+     */
+    private static class Entry
+    {
+        Object key;
+        Object element;
+        Entry next;
+        Entry previous;
+
+        Entry(Object element, Entry next, Entry previous, Object key)
+        {
+            this.element = element;
+            this.next = next;
+            this.previous = previous;
+            this.key = key;
+        }
+    }
+
+    /**
+     * the list. contains objects
+     */
+    private transient Entry header = new Entry(null, null, null, null);
+
+    /**
+     * the hash. maps keys to entries, which by themselves map to objects
+     */
+    HashMap keys;
+
+    private transient int size = 0;
+
+    /** the current entry in the traversal */
+    Entry current = null;
+
+    /**
+     * Constructs an empty list.
+     */
+    public HashedCircularLinkedList(int initialCapacity, float loadFactor)
+    {
+        header.next = header.previous = header;
+        keys = new HashMap(initialCapacity, loadFactor);
+    }
+
+    /**
+     * Returns the number of elements in this list.
+     *
+     * @return the number of elements in this list.
+     */
+    public int size()
+    {
+        return size;
+    }
+
+    /**
+     * Removes the first occurrence of the specified element in this list.  If
+     * the list does not contain the element, it is unchanged.  More formally,
+     * removes the element with the lowest index <tt>i</tt> such that
+     * <tt>(o==null ? get(i)==null : o.equals(get(i)))</tt> (if such an
+     * element exists).
+     *
+     * @param o element to be removed from this list, if present.
+     * @return <tt>true</tt> if the list contained the specified element.
+     */
+    public boolean removeByKey(Object o)
+    {
+        // assert(o != null)
+        Entry e = (Entry)keys.get(o);
+        if(e != null)
+        {
+            if(e == current)
+            {
+                if(size > 1)
+                {
+                    current = previousEntry(current);
+                }
+                else
+                {
+                    current = null;
+                }
+            }
+            this.removeEntryFromList(e);
+            keys.remove(o);
+            size--;
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    /**
+     * Removes all of the elements from this list.
+     */
+    public void clear()
+    {
+        // list
+        header.next = header.previous = header;
+
+        // hash
+        keys.clear();
+
+        size = 0;
+        current = null;
+    }
+
+
+    private Entry addEntryBefore(Object key, Object o, Entry e)
+    {
+        Entry newEntry = new Entry(o, e, e.previous, key);
+        newEntry.previous.next = newEntry;
+        newEntry.next.previous = newEntry;
+        return newEntry;
+    }
+
+    private void removeEntryFromList(Entry e)
+    {
+        if(e != null)
+        {
+            if (e == header)
+            {
+                throw new NoSuchElementException();
+            }
+
+            e.previous.next = e.next;
+            e.next.previous = e.previous;
+        }
+    }
+
+
+    /**
+     * (method description here)
+     * defined in java.util.Map
+     * @param p0 (parameter description here)
+     * @param p1 (parameter description here)
+     * @return (return value description here)
+     */
+    public boolean put(Object key, Object value)
+    {
+        if(key != null && !keys.containsKey(key))
+        {
+            Entry e = addEntryBefore(key, value, header);  // add it as the last element
+            keys.put(key, e);                    // link key to entry
+            size++;
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+
+    public boolean hasNext()
+    {
+        return (size > 0);
+    }
+
+    private Entry nextEntry(Entry e)
+    {
+        // assert(e != null)
+        if(size > 1)
+        {
+            if(e == null)
+            {
+                e = header;
+            }
+            Entry next = e.next;
+            if(next == header)
+            {
+                next = next.next;
+            }
+            return next;
+        }
+        else if(size == 1)
+        {
+            return header.next;
+        }
+        else
+        {
+            return null;
+        }
+    }
+
+
+
+    private Entry previousEntry(Entry e)
+    {
+        // assert(e != null)
+        if(size > 1)
+        {
+            if(e == null)
+            {
+                e = header;
+            }
+            Entry previous = e.previous;
+            if(previous == header)
+            {
+                previous = previous.previous;
+            }
+            return previous;
+        }
+        else if(size == 1)
+        {
+            return header.previous;
+        }
+        else
+        {
+            return null;
+        }
+    }
+
+    public Object next()
+    {
+        current = nextEntry(current);
+        if(current != null)
+        {
+            return current.element;
+        }
+        else
+        {
+            return null;
+        }
+    }
+
+    public void removeCurrent()
+    {
+        keys.remove(current.key);
+        removeEntryFromList(current);
+    }
+
+
+    public Object get(Object key)
+    {
+        Entry e = ((Entry)keys.get(key));
+        if(e != null)
+        {
+            return e.element;
+        }
+        else
+        {
+            return null;
+        }
+    }
+
+    /**
+     * testing
+     */
+    public static void main(String[] args)
+    {
+        HashedCircularLinkedList h = new HashedCircularLinkedList(20, 0.75f);
+        h.put("1", "a");
+        h.put("2", "b");
+        h.put("3", "c");
+        String t;
+        System.out.println("size [3]: " + h.size());
+        t = (String)h.next();
+        System.out.println("2nd element via get [b]: " + h.get("2"));
+
+        System.out.println("next element [a]: " + t);
+        t = (String)h.next();
+        System.out.println("next element [b]: " + t);
+        t = (String)h.next();
+        System.out.println("next element [c]: " + t);
+        t = (String)h.next();
+        System.out.println("1st element after circular traversal [a]: " + t);
+        h.removeByKey("1");
+        System.out.println("1st element after remove [null]: " + h.get("1"));
+        System.out.println("size after removal [2]: " + h.size());
+        t = (String)h.next();
+        System.out.println("next element [b]: " + t);
+        t = (String)h.next();
+        System.out.println("next element [c]: " + t);
+        t = (String)h.next();
+        System.out.println("next element [b]: " + t);
+        h.removeCurrent();
+        t = (String)h.next();
+        System.out.println("next element after 1 removal [c]: " + t);
+        t = (String)h.next();
+        System.out.println("next element: [c]: " + t);
+        h.removeByKey("3");
+        System.out.println("size after 3 removals [0]: " + h.size());
+        t = (String)h.next();
+        System.out.println("next element [null]: " + t);
+    }
+}
+
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/InputStreamObserver.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/InputStreamObserver.java
new file mode 100644
index 00000000000..c16940ffac5
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/InputStreamObserver.java
@@ -0,0 +1,18 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c) <p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.util;
+
+public interface InputStreamObserver
+{
+    public void notifyOpened(ObservableInputStream in, long timeElapsed);
+    public void notifyClosed(ObservableInputStream in, long timeElapsed);
+    public void notifyRead(ObservableInputStream in, long timeElapsed, int nrRead, int totalRead);
+    public void notifyFinished(ObservableInputStream in, long timeElapsed, int totalRead);
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Logger.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Logger.java
new file mode 100644
index 00000000000..2564b661c14
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Logger.java
@@ -0,0 +1,19 @@
+/*
+ *
+ *
+ *
+ */
+package de.lanlab.larm.util;
+
+import java.io.*;
+
+public class Logger
+{
+   private FileOutputStream out;
+
+   public Logger(String fileName)
+   {
+
+  }
+
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ObservableInputStream.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ObservableInputStream.java
new file mode 100644
index 00000000000..d261d2bd75d
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ObservableInputStream.java
@@ -0,0 +1,101 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c) <p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.util;
+
+import java.io.*;
+
+public class ObservableInputStream extends FilterInputStream
+{
+    private boolean reporting = true;
+    private long startTime;
+    private int totalRead = 0;
+    private int step = 1;
+    private int nextStep = 0;
+
+    InputStreamObserver observer;
+
+    public ObservableInputStream(InputStream in, InputStreamObserver iso, int reportingStep)
+    {
+        super(in);
+        startTime = System.currentTimeMillis();
+        observer = iso;
+        observer.notifyOpened(this, System.currentTimeMillis() - startTime);
+        nextStep = step = reportingStep;
+    }
+
+    public void close() throws IOException
+    {
+        super.close();
+        observer.notifyClosed(this, System.currentTimeMillis() - startTime);
+    }
+
+    public void setReporting(boolean reporting)
+    {
+        this.reporting = reporting;
+    }
+
+    public boolean isReporting()
+    {
+        return reporting;
+    }
+
+    public void setReportingStep(int step)
+    {
+        this.step = step;
+    }
+
+    public int read() throws IOException
+    {
+        int readByte = super.read();
+        if(reporting)
+        {
+            notifyObserver(readByte>=0? 1 : 0);
+        }
+        return readByte;
+    }
+
+    public int read(byte[] b) throws IOException
+    {
+        int nrRead = super.read(b);
+        if(reporting)
+        {
+            notifyObserver(nrRead);
+        }
+        return nrRead;
+    }
+
+    private void notifyObserver(int nrRead)
+    {
+        if(nrRead > 0)
+        {
+            totalRead += nrRead;
+            if(totalRead > nextStep)
+            {
+                nextStep += step;
+                observer.notifyRead(this, System.currentTimeMillis() - startTime, nrRead, totalRead);
+            }
+        }
+        else
+        {
+            observer.notifyFinished(this, System.currentTimeMillis() - startTime, totalRead);
+        }
+    }
+
+    public int read(byte[] b, int offs, int size) throws IOException
+    {
+        int nrRead = super.read(b, offs, size);
+        if(reporting)
+        {
+            notifyObserver(nrRead);
+        }
+        return nrRead;
+    }
+}
+
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Observer.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Observer.java
new file mode 100644
index 00000000000..a81095094da
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Observer.java
@@ -0,0 +1,9 @@
+package de.lanlab.larm.util;
+
+
+/**
+ *  not used
+ */
+public interface Observer
+{
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/OverflowException.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/OverflowException.java
new file mode 100644
index 00000000000..a1f427e667a
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/OverflowException.java
@@ -0,0 +1,15 @@
+package de.lanlab.larm.util;
+
+/**
+ * Title:        LARM
+ * Description:
+ * Copyright:    Copyright (c) 2001
+ * Company:      LMU-IP
+ * @author Clemens Marschner
+ * @version 1.0
+ */
+
+
+public class OverflowException extends RuntimeException
+{
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Queue.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Queue.java
new file mode 100644
index 00000000000..26105c3c333
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Queue.java
@@ -0,0 +1,20 @@
+package de.lanlab.larm.util;
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine
+ * Description:
+ * Copyright:    Copyright (c)
+ * Company:
+ * @author
+ * @version 1.0
+ */
+
+import java.util.Collection;
+
+public interface Queue
+{
+    public Object remove();
+    public void insert(Object o);
+    public void insertMultiple(Collection c);
+    public int size();
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleCharArrayReader.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleCharArrayReader.java
new file mode 100644
index 00000000000..2e1cfd4c903
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleCharArrayReader.java
@@ -0,0 +1,285 @@
+/*
+ * @(#)SimpleCharArrayReader.java	1.35 00/02/02
+ * 
+ */
+
+package de.lanlab.larm.util;
+import java.io.*;
+
+/**
+ * A <code>SimpleCharArrayReader</code> contains
+ * an internal buffer that contains bytes that
+ * may be read from the stream. An internal
+ * counter keeps track of the next byte to
+ * be supplied by the <code>read</code> method.
+ * <br>
+ * In contrast to the original <code>CharArrayReader</code> this
+ * version is not thread safe. The monitor on the read()-function caused programs
+ * to slow down much, because this function is called for every character. This
+ * class can thus only be used if only one thread is accessing the stream
+ * @author  Clemens Marschner
+ * @version 1.00
+ * @see     java.io.ByteArrayInputStream
+ */
+public
+class SimpleCharArrayReader extends Reader 
+{
+
+    /**
+     * A flag that is set to true when this stream is closed.
+     */
+    private boolean isClosed = false;
+
+    /**
+     * An array of bytes that was provided
+     * by the creator of the stream. Elements <code>buf[0]</code>
+     * through <code>buf[count-1]</code> are the
+     * only bytes that can ever be read from the
+     * stream;  element <code>buf[pos]</code> is
+     * the next byte to be read.
+     */
+    protected char buf[];
+
+    /**
+     * The index of the next character to read from the input stream buffer.
+     * This value should always be nonnegative
+     * and not larger than the value of <code>count</code>.
+     * The next byte to be read from the input stream buffer 
+     * will be <code>buf[pos]</code>.
+     */
+    protected int pos;
+
+    /**
+     * The currently marked position in the stream.
+     * SimpleCharArrayReader objects are marked at position zero by
+     * default when constructed.  They may be marked at another
+     * position within the buffer by the <code>mark()</code> method.
+     * The current buffer position is set to this point by the
+     * <code>reset()</code> method.
+     *
+     * @since   JDK1.1
+     */
+    protected int mark = 0;
+
+    /**
+     * The index one greater than the last valid character in the input 
+     * stream buffer. 
+     * This value should always be nonnegative
+     * and not larger than the length of <code>buf</code>.
+     * It  is one greater than the position of
+     * the last byte within <code>buf</code> that
+     * can ever be read  from the input stream buffer.
+     */
+    protected int count;
+
+    /**
+     * Creates a <code>SimpleCharArrayReader</code>
+     * so that it  uses <code>buf</code> as its
+     * buffer array. 
+     * The buffer array is not copied. 
+     * The initial value of <code>pos</code>
+     * is <code>0</code> and the initial value
+     * of  <code>count</code> is the length of
+     * <code>buf</code>.
+     *
+     * @param   buf   the input buffer.
+     */
+    public SimpleCharArrayReader(char buf[]) 
+	{
+		this.buf = buf;
+        this.pos = 0;
+		this.count = buf.length;
+    }
+
+    /**
+     * Creates <code>SimpleCharArrayReader</code>
+     * that uses <code>buf</code> as its
+     * buffer array. The initial value of <code>pos</code>
+     * is <code>offset</code> and the initial value
+     * of <code>count</code> is <code>offset+len</code>.
+     * The buffer array is not copied. 
+     * <p>
+     * Note that if bytes are simply read from
+     * the resulting input stream, elements <code>buf[pos]</code>
+     * through <code>buf[pos+len-1]</code> will
+     * be read; however, if a <code>reset</code>
+     * operation  is performed, then bytes <code>buf[0]</code>
+     * through b<code>uf[pos-1]</code> will then
+     * become available for input.
+     *
+     * @param   buf      the input buffer.
+     * @param   offset   the offset in the buffer of the first byte to read.
+     * @param   length   the maximum number of bytes to read from the buffer.
+     */
+    public SimpleCharArrayReader(char buf[], int offset, int length) 
+	{
+		this.buf = buf;
+        this.pos = offset;
+		this.count = Math.min(offset + length, buf.length);
+        this.mark = offset;
+    }
+
+    /**
+     * Reads the next byte of data from this input stream. The value 
+     * byte is returned as an <code>int</code> in the range 
+     * <code>0</code> to <code>255</code>. If no byte is available 
+     * because the end of the stream has been reached, the value 
+     * <code>-1</code> is returned. 
+     * <p>
+     *
+     * @return  the next byte of data, or <code>-1</code> if the end of the
+     *          stream has been reached.
+     */
+    public int read() 
+	{
+		return (pos < count) ? (buf[pos++] & 0xff) : -1;
+    }
+
+    /**
+     * Reads up to <code>len</code> bytes of data into an array of bytes 
+     * from this input stream. 
+     * If <code>pos</code> equals <code>count</code>,
+     * then <code>-1</code> is returned to indicate
+     * end of file. Otherwise, the  number <code>k</code>
+     * of bytes read is equal to the smaller of
+     * <code>len</code> and <code>count-pos</code>.
+     * If <code>k</code> is positive, then bytes
+     * <code>buf[pos]</code> through <code>buf[pos+k-1]</code>
+     * are copied into <code>b[off]</code>  through
+     * <code>b[off+k-1]</code> in the manner performed
+     * by <code>System.arraycopy</code>. The
+     * value <code>k</code> is added into <code>pos</code>
+     * and <code>k</code> is returned.
+     * <p>
+     * This <code>read</code> method cannot block. 
+     *
+     * @param   b     the buffer into which the data is read.
+     * @param   off   the start offset of the data.
+     * @param   len   the maximum number of bytes read.
+     * @return  the total number of bytes read into the buffer, or
+     *          <code>-1</code> if there is no more data because the end of
+     *          the stream has been reached.
+     */
+    public  int read(char b[], int off, int len) 
+	{
+		if (b == null) 
+		{
+		    throw new NullPointerException();
+		} 
+		else if ((off < 0) || (off > b.length) || (len < 0) ||
+		   ((off + len) > b.length) || ((off + len) < 0)) 
+		{
+	    	throw new IndexOutOfBoundsException();
+		}
+		if (pos >= count) 
+		{
+		    return -1;
+		}
+		if (pos + len > count) 
+		{
+		    len = count - pos;
+		}
+		if (len <= 0) 
+		{
+		    return 0;
+		}
+		System.arraycopy(buf, pos, b, off, len);
+		pos += len;
+		return len;
+    }
+
+    /**
+     * Skips <code>n</code> bytes of input from this input stream. Fewer 
+     * bytes might be skipped if the end of the input stream is reached. 
+     * The actual number <code>k</code>
+     * of bytes to be skipped is equal to the smaller
+     * of <code>n</code> and  <code>count-pos</code>.
+     * The value <code>k</code> is added into <code>pos</code>
+     * and <code>k</code> is returned.
+     *
+     * @param   n   the number of bytes to be skipped.
+     * @return  the actual number of bytes skipped.
+     */
+    public  long skip(long n) 
+	{
+		if (pos + n > count) 
+		{
+		    n = count - pos;
+		}
+		if (n < 0) 
+		{
+		    return 0;
+		}
+		pos += n;
+		return n;
+    }
+
+    /**
+     * Returns the number of bytes that can be read from this input 
+     * stream without blocking. 
+     * The value returned is
+     * <code>count&nbsp;- pos</code>, 
+     * which is the number of bytes remaining to be read from the input buffer.
+     *
+     * @return  the number of bytes that can be read from the input stream
+     *          without blocking.
+     */
+    public  int available() 
+	{
+		return count - pos;
+    }
+
+    /**
+     * Tests if SimpleCharArrayReader supports mark/reset.
+     *
+     * @since   JDK1.1
+     */
+    public boolean markSupported() 
+	{
+		return true;
+    }
+
+    /**
+     * Set the current marked position in the stream.
+     * SimpleCharArrayReader objects are marked at position zero by
+     * default when constructed.  They may be marked at another
+     * position within the buffer by this method.
+     *
+     * @since   JDK1.1
+     */
+    public void mark(int readAheadLimit) 
+	{
+		mark = pos;
+    }
+
+    /**
+     * Resets the buffer to the marked position.  The marked position
+     * is the beginning unless another position was marked.
+     * The value of <code>pos</code> is set to 0.
+     */
+    public  void reset() 
+	{
+		
+		pos = mark;
+    }
+
+    /**
+     * Closes this input stream and releases any system resources 
+     * associated with the stream. 
+     * <p>
+     */
+    public  void close() throws IOException 
+	{
+		isClosed = true;
+    }
+
+    /** Check to make sure that the stream has not been closed */
+    private void ensureOpen() 
+	{
+        /* This method does nothing for now.  Once we add throws clauses
+	 * to the I/O methods in this class, it will throw an IOException
+	 * if the stream has been closed.
+	 */
+    }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLogger.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLogger.java
new file mode 100644
index 00000000000..60cd99b2b58
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLogger.java
@@ -0,0 +1,112 @@
+package de.lanlab.larm.util;
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine
+ * Description:
+ * Copyright:    Copyright (c)
+ * Company:
+ * @author
+ * @version 1.0
+ */
+import java.io.*;
+import java.util.*;
+import java.text.*;
+
+/**
+ * this class is only used for SPEED. Its log function is not thread safe by
+ * default.
+ * It uses a BufferdWriter.
+ * It registers with a logger manager, which can be used to flush several loggers
+ * at once
+ * @todo: including the date slows down a lot
+ *
+ */
+public class SimpleLogger
+{
+    private SimpleDateFormat formatter = new SimpleDateFormat ("HH:mm:ss:SSSS");
+
+    Writer logFile;
+
+    StringBuffer buffer = new StringBuffer(1000);
+
+    long startTime = System.currentTimeMillis();
+    boolean includeDate;
+
+    public void setStartTime(long startTime)
+    {
+        this.startTime = startTime;
+    }
+
+    public synchronized void logThreadSafe(String text)
+    {
+        log(text);
+    }
+
+    public synchronized void logThreadSafe(Throwable t)
+    {
+        log(t);
+    }
+
+    public void log(String text)
+    {
+        try
+        {
+            buffer.setLength(0);
+            if(includeDate)
+            {
+                buffer.append(formatter.format(new Date())).append(": ").append(System.currentTimeMillis()-startTime).append(" ms: ");
+            }
+            buffer.append(text).append("\n");
+            logFile.write(buffer.toString());
+            if(flushAtOnce)
+            {
+                logFile.flush();
+            }
+        }
+        catch(IOException e)
+        {
+            System.out.println("Couldn't write to logfile");
+        }
+    }
+
+    public void log(Throwable t)
+    {
+        t.printStackTrace(new PrintWriter(logFile));
+    }
+
+    boolean flushAtOnce = false;
+
+    public void setFlushAtOnce(boolean flush)
+    {
+        this.flushAtOnce = flush;
+    }
+
+    public SimpleLogger(String name)
+    {
+        init(name, true);
+    }
+
+    public SimpleLogger(String name, boolean includeDate)
+    {
+        init(name, includeDate);
+    }
+
+    public void flush() throws IOException
+    {
+        logFile.flush();
+    }
+
+    private void init(String name, boolean includeDate)
+    {
+        try
+        {
+           logFile = new BufferedWriter(new FileWriter("logs/" + name + ".log"));
+           SimpleLoggerManager.getInstance().register(this);
+        }
+        catch(IOException e)
+        {
+           System.out.println("IOException while creating logfile " + name + ":");
+           e.printStackTrace();
+        }
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLoggerManager.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLoggerManager.java
new file mode 100644
index 00000000000..44717249305
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLoggerManager.java
@@ -0,0 +1,65 @@
+package de.lanlab.larm.util;
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine
+ * Description:
+ * Copyright:    Copyright (c)
+ * Company:
+ * @author
+ * @version 1.0
+ */
+
+import java.util.*;
+import java.io.IOException;
+
+/**
+ * this singleton manages all loggers. It can be used to flush all SimpleLoggers
+ * at once
+ */
+public class SimpleLoggerManager
+{
+    static SimpleLoggerManager instance = null;
+
+    ArrayList logs;
+
+    private SimpleLoggerManager()
+    {
+        logs = new ArrayList();
+    }
+
+    public void register(SimpleLogger logger)
+    {
+        logs.add(logger);
+    }
+
+    public void flush() throws IOException
+    {
+        Iterator it = logs.iterator();
+        IOException ex = null;
+        while(it.hasNext())
+        {
+            try
+            {
+                SimpleLogger logger = (SimpleLogger)it.next();
+                logger.flush();
+            }
+            catch(IOException e)
+            {
+               ex = e;
+            }
+        }
+        if(ex != null)
+        {
+            throw ex;
+        }
+    }
+
+    public static SimpleLoggerManager getInstance()
+    {
+        if(instance == null)
+        {
+            instance = new SimpleLoggerManager();
+        }
+        return instance;
+    }
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleObservable.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleObservable.java
new file mode 100644
index 00000000000..a24f9f2e181
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleObservable.java
@@ -0,0 +1,21 @@
+
+/**
+ * Title:        LARM Lanlab Retrieval Machine<p>
+ * Description:  <p>
+ * Copyright:    Copyright (c) <p>
+ * Company:      <p>
+ * @author
+ * @version 1.0
+ */
+package de.lanlab.larm.util;
+
+import java.util.Observable;
+
+public class SimpleObservable extends Observable
+{
+
+    public void setChanged()
+    {
+        super.setChanged();
+    }
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/State.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/State.java
new file mode 100644
index 00000000000..87ae48fe1b6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/State.java
@@ -0,0 +1,91 @@
+package de.lanlab.larm.util;
+
+import java.io.Serializable;
+/**
+ * Title:        LARM Lanlab Retrieval Machine
+ * Description:
+ * Copyright:    Copyright (c)
+ * Company:
+ * @author
+ * @version 1.0
+ */
+
+/**
+ * thread safe state information.
+ * The get methods are not synchronized. Clone the state object before using them
+ * If you use a state object in a class, always return a clone
+ * <pre>public class MyClass {
+ *     State state = new State("Running");
+ *     public State getState() { return state.cloneState() }</pre>
+ *
+ * note on serialization: if you deserialize a state, the state string will be newly created.
+ * that means you then have to compare the states via equal() and not ==
+ */
+public class State implements Cloneable, Serializable
+{
+
+    private String state;
+    private long stateSince;
+    private Object info;
+
+    public State(String state)
+    {
+        setState(state);
+    }
+
+
+    private State(String state, long stateSince)
+    {
+        init(state, stateSince, null);
+    }
+
+    private State(String state, long stateSince, Object info)
+    {
+        init(state, stateSince, info);
+    }
+
+    private void init(String state, long stateSince, Object info)
+    {
+        this.state = state;
+        this.stateSince = stateSince;
+        this.info = info;
+    }
+
+    public void setState(String state)
+    {
+        setState(state, null);
+    }
+
+    public synchronized void setState(String state, Object info)
+    {
+        this.state = state;
+        this.stateSince = System.currentTimeMillis();
+        this.info = info;
+    }
+
+    public String getState()
+    {
+        return state;
+    }
+
+    public long getStateSince()
+    {
+        return stateSince;
+    }
+
+    public Object getInfo()
+    {
+        return info;
+    }
+
+    public synchronized Object clone()
+    {
+        return new State(state, stateSince, info);
+    }
+
+    public State cloneState()
+    {
+        return (State)clone();
+    }
+
+}
\ No newline at end of file
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/URLUtils.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/URLUtils.java
new file mode 100644
index 00000000000..1956e81886a
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/URLUtils.java
@@ -0,0 +1,60 @@
+package de.lanlab.larm.util;
+
+/**
+ * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
+ * Company:
+ *
+ * @author
+ * @version   1.0
+ */
+import java.net.URL;
+
+/**
+ * Description of the Class
+ *
+ * @author    Administrator
+ * @created   27. Januar 2002
+ */
+public class URLUtils
+{
+    /**
+     * does the same as URL.toExternalForm(), but leaves out the Ref part (which we would
+     * cut off anyway) and handles the String Buffer so that no call of expandCapacity() will
+     * be necessary
+     * only meaningful if the default URLStreamHandler is used (as is the case with http, https, or shttp)
+     *
+     * @param u  the URL to be converted
+     * @return   the URL as String
+     */
+    public static String toExternalFormNoRef(URL u)
+    {
+        String protocol  = u.getProtocol();
+        String authority = u.getAuthority();
+        String file      = u.getFile();
+
+        StringBuffer result = new StringBuffer(
+                    (protocol == null ? 0 : protocol.length()) +
+                    (authority == null ? 0 : authority.length()) +
+                    (file == null ? 1 : file.length()) + 3
+                    );
+
+        result.append(protocol);
+        result.append(":");
+        if (u.getAuthority() != null && u.getAuthority().length() > 0)
+        {
+            result.append("//");
+            result.append(u.getAuthority());
+        }
+        if (u.getFile() != null && u.getFile().length() > 0)
+        {
+            result.append(u.getFile());
+        }
+        else
+        {
+            result.append("/");
+        }
+
+        return result.toString();
+    }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/UnderflowException.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/UnderflowException.java
new file mode 100644
index 00000000000..e07b63ff58e
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/UnderflowException.java
@@ -0,0 +1,15 @@
+package de.lanlab.larm.util;
+
+/**
+ * Title:        LARM
+ * Description:
+ * Copyright:    Copyright (c) 2001
+ * Company:      LMU-IP
+ * @author Clemens Marschner
+ * @version 1.0
+ */
+
+
+public class UnderflowException extends RuntimeException
+{
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java
new file mode 100644
index 00000000000..3287fd51f6b
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java
@@ -0,0 +1,94 @@
+package de.lanlab.larm.util;
+
+
+import java.net.URL;
+import de.lanlab.larm.fetcher.URLMessage;
+
+/**
+ *  a web document of whatever type. generated by a fetcher task
+ */
+public class WebDocument extends URLMessage
+{
+	protected String mimeType;
+	protected byte[] document;
+    protected int resultCode;
+    protected int size;
+    protected String title;
+
+	public  WebDocument(URL url, String mimeType, byte[] document, int resultCode, URL referer, int size, String title)
+	{
+		super(url, referer, false);
+		this.url = url;
+		this.mimeType = mimeType;
+        this.document = document;
+        this.resultCode = resultCode;
+        this.size = size;
+        this.title = title;
+	}
+
+    public String getTitle()
+    {
+        return title;
+    }
+
+	public URL getUrl()
+	{
+		return url;
+	}
+
+    public int getSize()
+    {
+        return this.size;
+    }
+
+    public void setSize(int size)
+    {
+        this.size = size;
+    }
+
+
+	public void setDocument(byte[] document)
+	{
+		this.document = document;
+	}
+	public int getResultCode()
+	{
+		return resultCode;
+	}
+
+	public void setResultCode(int resultCode)
+	{
+		this.resultCode = resultCode;
+	}
+
+	public byte[] getDocumentBytes()
+	{
+		return this.document;
+	}
+
+	public void setUrl(URL url)
+	{
+		this.url = url;
+	}
+
+	public void setMimeType(String mimeType)
+	{
+		this.mimeType = mimeType;
+	}
+
+	public String getMimeType()
+	{
+		return mimeType;
+	}
+
+    public String getInfo()
+    {
+        return super.getInfo() + "\t" +
+        this.resultCode + "\t" +
+        this.mimeType + "\t" +
+        this.size + "\t" +
+        "\"" + this.title.replace('\"', (char)0xff ).replace('\n',' ').replace('\r',' ') + "\"";
+    }
+
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/misc/ByteArray.java b/sandbox/contributions/webcrawler-LARM/src/hplb/misc/ByteArray.java
new file mode 100644
index 00000000000..73387d14ec4
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/misc/ByteArray.java
@@ -0,0 +1,294 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.misc;
+
+import java.io.*;
+import java.net.*;
+
+/**
+ * This class is a container for algorithms working on byte arrays - some
+ * of the algorithms are analogous to those in java.lang.String.
+ * @author      Anders Kristensen
+ */
+public class ByteArray {
+
+  /** Returns copy of characters in s as a new byte array. */
+  public static final byte[] getBytes(String s) {
+    int len = s.length();
+    byte b[] = new byte[len];
+    s.getBytes(0, len, b, 0);
+    return b;
+  }
+
+  /** Returns contents of file as byte array. */
+  public static byte[] loadFromFile(String filename) throws IOException {
+    return loadFromFile(new File(filename));
+  }
+
+  /** Returns contents of file <i>file</i> as byte array. */
+  public static byte[] loadFromFile(File file) throws IOException {
+    int n, nread = 0, len = (int) file.length();
+    FileInputStream fin = new FileInputStream(file);
+    byte[] content = new byte[len];
+
+    while (nread < len) {
+      if ((n = fin.read(content, nread, len - nread)) == -1)
+        throw new IOException("Error loading Compound from file");
+      nread += n;
+    }
+
+    return content;
+  }
+
+  /**
+   * Reads n bytes from the specified input stream. It will return
+   * fewer bytes if fewer bytes are available on the stream.
+   * Hence the application should check the resulting arrays length.
+   */
+  public static byte[] readn(InputStream in, int n) throws IOException {
+    byte[] buf = new byte[n];
+    int ntotal = 0;
+    int nread;
+
+    while (ntotal < n) {
+      nread = in.read(buf, ntotal, n - ntotal);
+      if (nread < 0) {
+        // we got less than expected - return what we got
+        byte[] newbuf = new byte[ntotal];
+        System.arraycopy(buf, 0, newbuf, 0, ntotal);
+        return newbuf;
+      }
+      ntotal += nread;
+    }
+    return buf;
+  }
+
+  /**
+   * Return contents of a WWW resource identified by a URL.
+   * @param url the resource to retrieve
+   * @return    the resource contents as a byte array
+   */
+  public static byte[] getContent(URL url) throws IOException {
+    URLConnection conn = url.openConnection();
+    InputStream in = conn.getInputStream();
+    int length;
+
+    /*
+     * N.B. URLConnection.getContentLength() is buggy for "http" resources
+     * (at least in JDK1.0.2) and won't work for "file" URLs either.
+     */
+    length = length = conn.getContentLength();
+    if (length == -1)
+      length = conn.getHeaderFieldInt("Content-Length", -1);
+    if (length == -1)
+      return readAll(in);
+    return readn(in, length);
+  }
+
+  /**
+   * Read all input from an InputStream and return as a byte array.
+   * This method will not return before the end of the stream is reached.
+   * @return    contents of the stream
+   */
+  public static byte[] readAll(InputStream in) throws IOException {
+    byte[] buf = new byte[1024];
+    int nread, ntotal = 0;
+
+    while ((nread = in.read(buf, ntotal, buf.length - ntotal)) > -1) {
+      ntotal += nread;
+      if (ntotal == buf.length) {
+        // extend buffer
+        byte[] newbuf = new byte[buf.length * 2];
+        System.arraycopy(buf, 0, newbuf, 0, buf.length);
+        buf = newbuf;
+      }
+    }
+    if (ntotal < buf.length) {
+      // we cannot have excess space
+      byte[] newbuf = new byte[ntotal];
+      System.arraycopy(buf, 0, newbuf, 0, ntotal);
+      buf = newbuf;
+    }
+    return buf;
+  }
+
+  /**
+   * Copies data from the specified input stream to the output stream
+   * until end of file is met.
+   * @return    the total number of bytes written to the output stream
+   */
+  public static int cpybytes(InputStream in, OutputStream out)
+    throws IOException
+  {
+    byte[] buf = new byte[1024];
+    int n, ntotal = 0;
+    while ((n = in.read(buf)) > -1) {
+      out.write(buf, 0, n);
+      ntotal += n;
+    }
+    return ntotal;
+  }
+
+  /**
+   * Copies data from the specified input stream to the output stream
+   * until <em>n</em> bytes has been copied or end of file is met.
+   * @return    the total number of bytes written to the output stream
+   */
+  public static int cpybytes(InputStream in, OutputStream out, int n)
+    throws IOException
+  {
+    int sz = n < 1024 ? n : 1024;
+    byte[] buf = new byte[sz];
+    int chunk, nread, ntotal = 0;
+
+    chunk = sz;
+
+    while (ntotal < n && (nread = in.read(buf, 0, chunk)) > -1) {
+      out.write(buf, 0, nread);
+      ntotal += nread;
+      chunk = (n - ntotal < sz) ? n - ntotal : sz;
+    }
+    return ntotal;
+  }
+
+  /**
+   * Returns the index within this String of the first occurrence of the
+   * specified character or -1 if the character is not found.
+   * @params buf        the buffer to search
+   * @params ch         the character to search for
+   */
+  public static final int indexOf(byte[] buf,
+                                  int ch) {
+    return indexOf(buf, ch, 0, buf.length);
+  }
+
+  /**
+   * Returns the index within this String of the first occurrence of the
+   * specified character, starting the search at fromIndex. This method
+   * returns -1 if the character is not found.
+   * @params buf        the buffer to search
+   * @params ch         the character to search for
+   * @params fromIndex  the index to start the search from 
+   * @params toIndex    the highest possible index returned plus 1
+   */
+  public static final int indexOf(byte[] buf,
+                                  int ch,
+                                  int fromIndex,
+                                  int toIndex) {
+    int i;
+
+    for (i = fromIndex; i < toIndex && buf[i] != ch; i++)
+      ;  // do nothing
+
+    if (i < toIndex)
+      return i;
+    else
+      return -1;
+  }
+
+  /**
+   * Returns the index of the first occurrence of s in the specified
+   * buffer or -1 if this is not found.
+   */
+  public static final int indexOf(byte[] buf, String s) {
+    return indexOf(buf, s, 0);
+  }
+
+  /**
+   * Returns the index of the first occurrence of s in the specified
+   * buffer. The search starts from fromIndex. This method returns -1
+   * if the index is not found.
+   */
+  public static final int indexOf(byte[] buf, String s, int fromIndex) {
+    int i;                  // index into buf
+    int j;                  // index into s
+    int max_i = buf.length;
+    int max_j = s.length();
+
+    for (i = fromIndex; i + max_j <= max_i; i++) {
+      for (j = 0; j < max_j; j++) {
+        if (buf[j + i] != s.charAt(j))
+          break;
+      }
+      if (j == max_j) return i;
+    }
+    return -1;
+  }
+
+/*
+  // for testing indexOf(byte[], String, int)
+  public static void main(String[] args) {
+    byte[] buf = getBytes(args[0]);
+    System.out.println("IndexOf(arg0, arg1, 0) = " + indexOf(buf, args[1], 3));
+  }
+*/
+
+  public static final boolean isSpace(int ch) {
+    if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') return true;
+    else return false;
+  }
+
+  public static final int skipSpaces(byte[] buf, int fromIndex, int toIndex) {
+    int i;
+    for (i = fromIndex; i < toIndex && isSpace(buf[i]); i++)
+      ;
+    return i;
+  }
+  /**
+   * Find byte pattern ptrn in buffer buf.
+   * @return    index of first occurrence of ptrn in buf, -1 if no occurence
+   */
+  public static final int findBytes(byte buf[],
+                                    int off,
+                                    int len,
+                                    byte ptrn[]) {
+    // Note: This code is completely incomprehensible without a drawing...
+
+    int buf_len = off + len;
+    int ptrn_len = ptrn.length;
+    int i;                       // index into buf
+    int j;                       // index into ptrn;
+    byte b = ptrn[0];            // next byte of interest
+
+    for (i = off; i < buf_len; ) {
+      j = 0;
+      while (i < buf_len && j < ptrn_len && buf[i] == ptrn[j]) {
+        i++;
+        j++;
+      }
+      if (i == buf_len || j == ptrn_len)
+        return i - j;
+      else {
+        // We have to go back a bit as there may be an overlapping
+        // match starting a bit later in buf...
+        i = i - j + 1;
+      }
+    }
+    return -1;
+  }
+
+/*
+  // for testing findBytes(byte[], int, int, byte[]) 
+  public static void main(String args[]) {
+    if (args.length < 4) {
+      System.err.println("Usage: s1 off len s2");
+      System.exit(1);
+    }
+    byte b1[] = new byte[args[0].length()];
+    byte b2[] = new byte[args[3].length()];
+    args[0].getBytes(0, args[0].length(), b1, 0);
+    args[3].getBytes(0, args[3].length(), b2, 0);
+    int off = Integer.parseInt(args[1]);
+    int len = Integer.parseInt(args[2]);
+    System.out.println("Index = " + findBytes(b1, off, len, b2));
+  }
+*/
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Attribute.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Attribute.java
new file mode 100644
index 00000000000..3d7a4dcbfc2
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Attribute.java
@@ -0,0 +1,20 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface Attribute {
+    
+    public String   getName();
+    public Node     getValue();
+    public void     setValue(Node arg);
+    
+    public boolean  getSpecified();
+    public void     setSpecified(boolean arg);
+    
+    public String   toString();
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/AttributeList.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/AttributeList.java
new file mode 100644
index 00000000000..5339b89c18d
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/AttributeList.java
@@ -0,0 +1,16 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface AttributeList {
+    public Attribute    getAttribute(String attrName);
+    public Attribute    setAttribute(Attribute attr);
+    public Attribute    remove(String attrName);
+    public Attribute    item(int index);
+    public int          getLength();
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Comment.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Comment.java
new file mode 100644
index 00000000000..17d54913e37
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Comment.java
@@ -0,0 +1,13 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * Represents the content of comments: &lt;!-- ... --&gt;
+ */
+public interface Comment extends Node {
+    public String getData();
+    public void setData(String arg);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DOM.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DOM.java
new file mode 100644
index 00000000000..75608773cc0
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DOM.java
@@ -0,0 +1,13 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface DOM {
+    public Document createDocument(String type);
+    public boolean hasFeature(String feature);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Document.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Document.java
new file mode 100644
index 00000000000..7c71b5e18e6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Document.java
@@ -0,0 +1,28 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface Document extends DocumentFragment {
+    public Node             getDocumentType();
+    public void             setDocumentType(Node arg);
+    
+    public Element          getDocumentElement();
+    public void             setDocumentElement(Element arg);
+    
+    public DocumentContext  getContextInfo();
+    public void             setContextInfo(DocumentContext arg);
+    
+    public DocumentContext  createDocumentContext();
+    public Element          createElement(String tagName, AttributeList attributes);
+    public Text             createTextNode(String data);
+    public Comment          createComment(String data);
+    public PI               createPI(String name, String data);
+    public Attribute        createAttribute(String name, Node value);
+    public AttributeList    createAttributeList();
+    public NodeIterator     getElementsByTagName();
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DocumentContext.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DocumentContext.java
new file mode 100644
index 00000000000..508c6292249
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DocumentContext.java
@@ -0,0 +1,14 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface DocumentContext {
+    
+    public Document getDocument();
+    public void setDocument(Document arg);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DocumentFragment.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DocumentFragment.java
new file mode 100644
index 00000000000..3cae0af68ed
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/DocumentFragment.java
@@ -0,0 +1,13 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface DocumentFragment extends Node {
+    public Document getMasterDoc();
+    public void setMasterDoc(Document arg);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Element.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Element.java
new file mode 100644
index 00000000000..8240ffa5e98
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Element.java
@@ -0,0 +1,16 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface Element extends Node {
+    public String           getTagName();
+    public AttributeList    attributes();
+    public void             setAttribute(Attribute newAttr);
+    public void             normalize();
+    public NodeIterator     getElementsByTagName();
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Makefile b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Makefile
new file mode 100644
index 00000000000..946af9eb603
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Makefile
@@ -0,0 +1,38 @@
+# This Makefile generated by hplb.util.jmkmf
+# Java package is org.w3c.dom
+
+.SUFFIXES: .java .class .jj
+JPACKAGE     = org.w3c.dom
+JAVA         = java
+JAVAC        = javac
+JAVACC       = java COM.sun.labs.javacc.Main
+JFLAGS       = 
+OBJS         = \
+  Attribute.class \
+  AttributeList.class \
+  Comment.class \
+  DOM.class \
+  Document.class \
+  DocumentContext.class \
+  DocumentFragment.class \
+  Element.class \
+  Node.class \
+  NodeIterator.class \
+  PI.class \
+  Text.class \
+  TreeIterator.class
+JAVADOCFLAGS = -d ../../../doc/api -author -noindex -notree
+
+all: $(OBJS)
+
+doc:
+	javadoc $(JAVADOCFLAGS)  $(JPACKAGE)
+
+.jj.java: $*.jj
+	$(JAVACC) $<
+
+.java.class: $*.java
+	$(JAVAC) $(JFLAGS) $<
+
+clean:
+	rm -f *.class *~
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Node.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Node.java
new file mode 100644
index 00000000000..7587fce2830
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Node.java
@@ -0,0 +1,29 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface Node {
+    // NodeType
+    public static final int DOCUMENT             = 1;
+    public static final int ELEMENT              = 2;
+    public static final int ATTRIBUTE            = 3;
+    public static final int PI                   = 4;
+    public static final int COMMENT              = 5;
+    public static final int TEXT                 = 6;
+    
+    public int              getNodeType();
+    public Node             getParentNode();
+    public NodeIterator     getChildNodes();
+    public boolean          hasChildNodes();
+    public Node             getFirstChild();
+    public Node             getPreviousSibling();
+    public Node             getNextSibling();
+    public Node             insertBefore(Node newChild, Node refChild);
+    public Node             replaceChild(Node newChild, Node oldChild);
+    public Node             removeChild(Node oldChild);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/NodeIterator.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/NodeIterator.java
new file mode 100644
index 00000000000..9194fb74d31
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/NodeIterator.java
@@ -0,0 +1,19 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface NodeIterator {
+    public int  getLength();
+    public Node getCurrent();
+    public Node toNext();
+    public Node toPrevious();
+    public Node toFirst();
+    public Node toLast();
+    public Node toNth(int Nth);
+    public Node toNode(Node destNode);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/PI.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/PI.java
new file mode 100644
index 00000000000..af63d9f94d6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/PI.java
@@ -0,0 +1,16 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * Processing Instruction
+ */
+public interface PI extends Node {
+    public String   getName();
+    public void     setName(String arg);
+    
+    public String   getData();
+    public void     setData(String arg);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Text.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Text.java
new file mode 100644
index 00000000000..2490c9ecabe
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/Text.java
@@ -0,0 +1,19 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface Text extends Node {
+    public String getData();
+    public void setData(String arg);
+
+    public void append(String data);
+    public void insert(int offset, String data);
+    public void delete(int offset, int count);
+    public void replace(int offset, int count, String data);
+    public void splice(Element element, int offset, int count);
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/TreeIterator.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/TreeIterator.java
new file mode 100644
index 00000000000..bdb2339c286
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/w3c/dom/TreeIterator.java
@@ -0,0 +1,20 @@
+/*
+ * $Id$
+ */
+
+package hplb.org.w3c.dom;
+
+/**
+ * 
+ */
+public interface TreeIterator extends NodeIterator {
+    public int  numChildren();
+    public int  numPreviousSiblings();
+    public int  numNextSiblings();
+    public Node toParent();
+    public Node toPreviousSibling();
+    public Node toNextSibling();
+    public Node toFirstChild();
+    public Node toLastChild();
+    public Node toNthChild();
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/AttributeMap.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/AttributeMap.java
new file mode 100644
index 00000000000..ef71ebaccd6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/AttributeMap.java
@@ -0,0 +1,146 @@
+// $Id$
+
+package hplb.org.xml.sax;
+
+import java.util.Enumeration;
+
+/**
+  * A map of attributes for the current element.
+  * <p><em>This interface is part of the Java implementation of SAX, 
+  * the Simple API for XML.  It is free for both commercial and 
+  * non-commercial use, and is distributed with no warrantee, real 
+  * or implied.</em></p>
+  * <p>This map will be valid only during the invocation of the
+  * <code>startElement</code> callback: if you need to use attribute
+  * information elsewhere, you will need to make your own copies.</p>
+  * @author David Megginson, Microstar Software Ltd.
+  * @see hplb.org.xml.sax.DocumentHandler#startElement
+  */
+public interface AttributeMap {
+
+
+  /**
+    * Find the names of all available attributes for an element.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return An enumeration of zero or more Strings.
+    * @see java.util.Enumeration
+    * @see hplb.org.xml.sax.DocumentHandler#startElement
+    */
+  public Enumeration getAttributeNames ();
+
+
+  /**
+    * Get the value of an attribute as a String.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return The value as a String, or null if the attribute has no value.
+    * @see hplb.org.xml.sax.DocumentHandler#startElement
+    */
+  public String getValue (String attributeName);
+
+
+  /**
+    * Check if an attribute value is the name of an entity.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return true if the attribute is an entity name.
+    * @see #getEntityPublicID
+    * @see #getEntitySystemID
+    * @see #getNotationName
+    * @see #getNotationPublicID
+    * @see #getNotationSystemID
+    * @see hplb.org.xml.sax.DocumentHandler#startElement
+    */
+  public boolean isEntity (String aname);
+
+
+  /**
+    * Check if an attribute value is the name of a notation.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return true if the attribute is a notation name.
+    * @see #getNotationPublicID
+    * @see #getNotationSystemID
+    * @see hplb.org.xml.sax.DocumentHandler#startElement
+    */
+  public boolean isNotation (String aname);
+
+
+  /**
+    * Check if an attribute value is a unique identifier.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return true if the attribute is a unique identifier.
+    * @see hplb.org.xml.sax.DocumentHandler#startElement
+    */
+  public boolean isId (String aname);
+
+
+  /**
+    * Check if an attribute value is a reference to an ID.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return true if the attribute is a reference to an ID.
+    * @see hplb.org.xml.sax.DocumentHandler#startElement
+    */
+  public boolean isIdref (String aname);
+
+
+  /**
+    * Get the public identifier for an ENTITY attribute.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return The public identifier or null if there is none (or if
+    *         the attribute value is not an entity name)
+    * @see #isEntity
+    */
+  public String getEntityPublicID (String aname);
+
+
+  /**
+    * Get the system identifer for an ENTITY attribute.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return The system identifier or null if there is none (or if
+    *         the attribute value is not an entity name)
+    * @see #isEntity
+    */
+  public String getEntitySystemID (String aname);
+
+
+  /**
+    * Get the notation name for an ENTITY attribute.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return The notation name or null if there is none (or if
+    *         the attribute value is not an entity name)
+    * @see #isEntity
+    */
+  public String getNotationName (String aname);
+
+
+  /**
+    * Get the notation public ID for an ENTITY or NOTATION attribute.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return The public identifier or null if there is none (or if
+    *         the attribute value is not an entity or notation name)
+    * @see #isEntity
+    * @see #isNotation
+    */
+  public String getNotationPublicID (String aname);
+
+
+  /**
+    * Get the notation system ID for an ENTITY or NOTATION attribute.
+    * <p>This applies to the current element, and can be called only
+    * during an invocation of <code>startElement</code>.</p> 
+    * @return The system identifier or null if there is none (or if
+    *         the attribute value is not an entity or notation name)
+    * @see #isEntity
+    * @see #isNotation
+    */
+  public String getNotationSystemID (String aname);
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/DocumentHandler.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/DocumentHandler.java
new file mode 100644
index 00000000000..13b83ec3eb1
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/DocumentHandler.java
@@ -0,0 +1,129 @@
+// $Id$
+
+package hplb.org.xml.sax;
+
+
+/**
+  * A callback interface for basic XML document events.
+  * <p><em>This interface is part of the Java implementation of SAX, 
+  * the Simple API for XML.  It is free for both commercial and 
+  * non-commercial use, and is distributed with no warrantee, real 
+  * or implied.</em></p>
+  * <p>This is the main handler for basic document events; it provides
+  * information on roughly the same level as the ESIS in full SGML,
+  * concentrating on logical structure rather than lexical 
+  * representation.</p>
+  * <p>If you do not set a document handler, then by default all of these
+  * events will simply be ignored.</p>
+  * @author David Megginson, Microstar Software Ltd.
+  * @see hplb.org.xml.sax.Parser@setDocumentHandler
+  */
+public interface DocumentHandler {
+
+
+  /**
+    * Handle the start of a document.
+    * <p>This is the first event called by a
+    * SAX-conformant parser, so you can use it to allocate and
+    * initialise new objects for the document.</p>
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void startDocument ()
+    throws Exception;
+
+
+  /**
+    * Handle the end of a document.
+    * <p>This is the last event called by a
+    * SAX-conformant parser, so you can use it to finalize and
+    * clean up objects for the document.</p>
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void endDocument ()
+    throws Exception;
+
+
+  /**
+    * Handle the document type declaration.
+    * <p>This will appear only if the XML document contains a
+    * <code>DOCTYPE</code> declaration.</p>
+    * @param name The document type name.
+    * @param publicID The public identifier of the external DTD subset
+    *                 (if any), or null.
+    * @param systemID The system identifier of the external DTD subset
+    *                 (if any), or null.
+    * @param name The document type name.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void doctype (String name, String publicID, String systemID)
+    throws Exception;
+
+
+  /**
+    * Handle the start of an element.
+    * <p>Please note that the information in the <code>attributes</code>
+    * parameter will be accurate only for the duration of this handler:
+    * if you need to use the information elsewhere, you should copy 
+    * it.</p>
+    * @param name The element type name.
+    * @param attributes The available attributes.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void startElement (String name, AttributeMap attributes)
+    throws Exception;
+
+
+  /**
+    * Handle the end of an element.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void endElement (String name)
+    throws Exception;
+
+
+  /**
+    * Handle significant character data.
+    * <p>Please note that the contents of the array will be
+    * accurate only for the duration of this handler: if you need to
+    * use them elsewhere, you should make your own copy, possible
+    * by constructing a string:</p>
+    * <pre>
+    * String data = new String(ch, start, length);
+    * </pre>
+    * @param ch An array of characters.
+    * @param start The starting position in the array.
+    * @param length The number of characters to use in the array.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void characters (char ch[], int start, int length)
+    throws Exception;
+
+
+  /**
+    * Handle ignorable whitespace.
+    * <p>Please note that the contents of the array will be
+    * accurate only for the duration of this handler: if you need to
+    * use them elsewhere, you should make your own copy, possible
+    * by constructing a string:</p>
+    * <pre>
+    * String whitespace = new String(ch, start, length);
+    * </pre>
+    * @param ch An array of whitespace characters.
+    * @param start The starting position in the array.
+    * @param length The number of characters to use in the array.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void ignorable (char ch[], int start, int length)
+    throws Exception;
+
+
+  /**
+    * Handle a processing instruction.
+    * <p>XML processing instructions have two parts: a target, which
+    * is a name, followed optionally by data.</p>
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void processingInstruction (String name, String remainder)
+    throws Exception;
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/EntityHandler.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/EntityHandler.java
new file mode 100644
index 00000000000..93faa3dcf73
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/EntityHandler.java
@@ -0,0 +1,48 @@
+// $Id$
+
+package hplb.org.xml.sax;
+
+
+/**
+  * A callback interface for basic XML entity-related events.
+  * <p><em>This interface is part of the Java implementation of SAX, 
+  * the Simple API for XML.  It is free for both commercial and 
+  * non-commercial use, and is distributed with no warrantee, real 
+  * or implied.</em></p>
+  * <p>If you do not set an entity handler, then a parser will
+  * resolve all entities to the suggested system ID, and will take no
+  * action for entity changes.</p>
+  * @author David Megginson, Microstar Software Ltd.
+  * @see hplb.org.xml.sax.Parser#setEntityHandler
+  */
+public interface EntityHandler {
+
+
+  /**
+    * Resolve a system identifier.
+    * <p>Before loading any entity (including the document entity),
+    * SAX parsers will filter the system identifier through this
+    * callback, and you can return a different system identifier if you
+    * wish, or null to prevent the parser from reading any entity.</p>
+    * @param ename The name of the entity, "[document]" for the
+    *              document entity, or "[external DTD]" for the external
+    *              DTD subset.
+    * @param publicID The public identifier, or null if there is none.
+    * @param systemID The system identifier suggested in the XML document.
+    * @return A system identifier, or null to skip the entity.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public String resolveEntity (String ename, String publicID, String systemID)
+    throws Exception;
+
+  /**
+    * Handle a change in the current entity.
+    * <p>Whenever the parser switches the entity (URI) that it is reading
+    * from, it will call this handler to report the change.</p>
+    * @param systemID The URI of the new entity.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void changeEntity (String systemID)
+    throws Exception;
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/ErrorHandler.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/ErrorHandler.java
new file mode 100644
index 00000000000..4c8397029ee
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/ErrorHandler.java
@@ -0,0 +1,52 @@
+// $Id$
+
+package hplb.org.xml.sax;
+
+
+/**
+  * A callback interface for basic XML error events.
+  * <p><em>This interface is part of the Java implementation of SAX, 
+  * the Simple API for XML.  It is free for both commercial and 
+  * non-commercial use, and is distributed with no warrantee, real 
+  * or implied.</em></p>
+  * <p>If you do not set an error handler, then a parser will report
+  * warnings to <code>System.err</code>, and will throw an (unspecified)
+  * exception for fata errors.</p>
+  * @author David Megginson, Microstar Software Ltd.
+  * @see hplb.org.xml.sax.Parser#setErrorHandler
+  */
+public interface ErrorHandler {
+
+  /**
+    * Handle a non-fatal warning.
+    * <p>A SAX parser will use this callback to report a condition
+    * that is not serious enough to stop the parse (though you may
+    * still stop the parse if you wish).</p>
+    * @param message The warning message.
+    * @param systemID The URI of the entity that caused the warning, or
+    *                 null if not available.
+    * @param line The line number in the entity, or -1 if not available.
+    * @param column The column number in the entity, or -1 if not available.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void warning (String message, String systemID, int line, int column)
+    throws java.lang.Exception;
+
+  /**
+    * Handle a fatal error.
+    * <p>A SAX parser will use this callback to report a condition
+    * that is serious enough to invalidate the parse, and may not
+    * report all (or any) significant parse events after this.  Ordinarily,
+    * you should stop immediately with an exception, but you can continue
+    * to try to collect more errors if you wish.</p>
+    * @param message The error message.
+    * @param systemID The URI of the entity that caused the error, or
+    *                 null if not available.
+    * @param line The line number in the entity, or -1 if not available.
+    * @param column The column number in the entity, or -1 if not available.
+    * @exception java.lang.Exception You may throw any exception.
+    */
+  public void fatal (String message, String systemID, int line, int column)
+    throws Exception;
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/HandlerBase.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/HandlerBase.java
new file mode 100644
index 00000000000..1bf3f2a1099
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/HandlerBase.java
@@ -0,0 +1,201 @@
+// $Id$
+
+package hplb.org.xml.sax;
+
+
+/**
+  * A simple base class for deriving SAX event handlers.
+  * <p><em>This class is part of the Java implementation of SAX, 
+  * the Simple API for XML.  It is free for both commercial and 
+  * non-commercial use, and is distributed with no warrantee, real 
+  * or implied.</em></p>
+  * <p>This class implements the default behaviour when no handler
+  * is specified (though parsers are not actually required to use
+  * this class).</p>
+  * @author David Megginson, Microstar Software Ltd.
+  * @see hplb.org.xml.sax.XmlException
+  * @see hplb.org.xml.sax.EntityHandler
+  * @see hplb.org.xml.sax.DocumentHandler
+  * @see hplb.org.xml.sax.ErrorHandler
+  */
+public class HandlerBase
+  implements EntityHandler, DocumentHandler, ErrorHandler 
+{
+
+
+  //////////////////////////////////////////////////////////////////////
+  // Implementation of hplb.org.xml.sax.EntityHandler.
+  //////////////////////////////////////////////////////////////////////
+
+  /**
+    * Resolve an external entity.
+    * <p>By default, simply return the system ID supplied.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.EntityHandler#resolveEntity
+    */
+  public String resolveEntity (String ename, String publicID, String systemID)
+    throws Exception
+  {
+    return systemID;
+  }
+
+
+  /**
+    * Handle an entity-change event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.EntityHandler#changeEntity
+    */
+  public void changeEntity (String systemID)
+    throws Exception
+  {
+  }
+
+
+
+  //////////////////////////////////////////////////////////////////////
+  // Implementation of hplb.org.xml.sax.DocumentHandler.
+  //////////////////////////////////////////////////////////////////////
+
+
+  /**
+    * Handle a start document event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.DocumentHandler#startDocument
+    */
+  public void startDocument ()
+    throws Exception
+  {}
+
+
+  /**
+    * Handle a end document event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.DocumentHandler#endDocument
+    */
+  public void endDocument ()
+    throws Exception
+  {}
+
+  
+  /**
+    * Handle a document type declaration event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.DocumentHandler#doctype
+    */
+  public void doctype (String name, String publicID, String systemID)
+    throws Exception
+  {}
+  
+
+  /**
+    * Handle a start element event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.DocumentHandler#startElement
+    */
+  public void startElement (String name, AttributeMap attributes) 
+    throws Exception
+  {}
+  
+
+  /**
+    * Handle an end element event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.DocumentHandler#endElement
+    */
+  public void endElement (String name) 
+    throws Exception
+  {}
+  
+
+  /**
+    * Handle a character data event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.DocumentHandler#characters
+    */
+  public void characters (char ch[], int start, int length) 
+    throws Exception
+  {}
+
+
+  /**
+    * Handle an ignorable whitespace event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.DocumentHandler#ignorable
+    */
+  public void ignorable (char ch[], int start, int length) 
+    throws Exception
+  {}
+
+
+  /**
+    * Handle a processing instruction event.
+    * <p>By default, do nothing.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.DocumentHandler#processingInstruction
+    */
+  public void processingInstruction (String name, String remainder) 
+    throws Exception
+  {}
+
+
+
+  //////////////////////////////////////////////////////////////////////
+  // Implementation of ErrorHandler.
+  //////////////////////////////////////////////////////////////////////
+
+
+  /**
+    * Handle a non-fatal error.
+    * <p>By default, report the warning to System.err.</p>
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.ErrorHandler#warning
+    */
+  public void warning (String message, String systemID, int line, int column)
+    throws Exception
+  {
+    System.err.println("Warning (" +
+		       systemID +
+		       ',' +
+		       line +
+		       ',' +
+		       column +
+		       "): " +
+		       message);
+  }
+
+
+  /**
+    * Handle a fatal error.
+    * <p>By default, throw an instance of XmlException.</p>
+    * @exception hplb.org.xml.sax.XmlException A fatal parsing error
+    *                has been found.
+    * @exception java.lang.Exception When you override this method,
+    *                                you may throw any exception.
+    * @see hplb.org.xml.sax.ErrorHandler#fatal
+    */
+  public void fatal (String message, String systemID, int line, int column)
+    throws XmlException, Exception
+  {
+    throw new XmlException(message, systemID, line, column);
+  }
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/Makefile b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/Makefile
new file mode 100644
index 00000000000..e2ad29c1f0f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/Makefile
@@ -0,0 +1,32 @@
+# This Makefile generated by jmkmf
+# Java package is org.xml.sax
+
+.SUFFIXES: .java .class .jj
+JPACKAGE     = org.xml.sax
+JAVA         = java
+JAVAC        = javac
+JAVACC       = java COM.sun.labs.javacc.Main
+JFLAGS       = 
+OBJS         = \
+  AttributeMap.class \
+  DocumentHandler.class \
+  EntityHandler.class \
+  ErrorHandler.class \
+  HandlerBase.class \
+  Parser.class \
+  XmlException.class 
+JAVADOCFLAGS = -d ../../../doc/api -author -noindex -notree
+
+all: $(OBJS)
+
+doc:
+	javadoc $(JAVADOCFLAGS) $(JPACKAGE)
+
+.jj.java: org.xml.sax.jj
+	$(JAVACC) $<
+
+.java.class: $*.java
+	$(JAVAC) $(JFLAGS) $<
+
+clean:
+	rm -f *.class *~
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/Parser.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/Parser.java
new file mode 100644
index 00000000000..3033ef9d37f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/Parser.java
@@ -0,0 +1,71 @@
+// $Id$
+
+package hplb.org.xml.sax;
+
+
+/**
+  * A standard interface for event-driven XML parsers.
+  * <p><em>This interface is part of the Java implementation of SAX, 
+  * the Simple API for XML.  It is free for both commercial and 
+  * non-commercial use, and is distributed with no warrantee, real 
+  * or implied.</em></p>
+  * <p>All SAX-conformant XML parsers (or their front-end SAX drivers)
+  * <em>must</em> implement this interface, together with a zero-argument
+  * constructor.</p>
+  * <p>You can plug three different kinds of callback interfaces into
+  * a basic SAX parser: one for entity handling, one for basic document
+  * events, and one for error reporting.  It is not an error to start
+  * a parse without setting any handlers.</p>
+  * @author David Megginson, Microstar Software Ltd.
+  */
+public interface Parser {
+
+
+  /**
+    * Register the handler for basic entity events.
+    * <p>If you begin a parse without setting an entity handler,
+    * the parser will by default resolve all entities to their
+    * default system IDs.</p>
+    * @param handler An object to receive callbacks for events.
+    * @see hplb.org.xml.sax.EntityHandler
+    */
+  public void setEntityHandler (EntityHandler handler);
+
+
+  /**
+    * Register the handler for basic document events.
+    * <p>You may begin the parse without setting a handler, but
+    * in that case no document events will be reported.</p>
+    * @param handler An object to receive callbacks for events.
+    * @see hplb.org.xml.sax.DocumentHandler
+    */
+  public void setDocumentHandler (DocumentHandler handler);
+
+
+  /**
+    * Register the handler for errors and warnings.
+    * <p>If you begin a parse without setting an error handlers,
+    * warnings will be printed to System.err, and errors will
+    * throw an unspecified exception.</p>
+    * @param handler An object to receive callbacks for errors.
+    * @see hplb.org.xml.sax.ErrorHandler
+    */
+  public void setErrorHandler (ErrorHandler handler);
+
+
+  /**
+    * Parse an XML document.
+    * <p>Nothing exciting will happen unless you have set handlers.</p>
+    * @param publicID The public identifier for the document, or null
+    *                 if none is available.
+    * @param systemID The system identifier (URI) for the document.
+    * @exception java.lang.Exception This method may throw any exception, 
+    *            but the parser itself
+    *            will throw only exceptions derived from java.io.IOException;
+    *            anything else will come from your handlers.
+    * @see #setEntityHandler
+    * @see #setDocumentHandler
+    * @see #setErrorHandler
+    */
+  void parse (String publicID, String systemID) throws java.lang.Exception;
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/XmlException.java b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/XmlException.java
new file mode 100644
index 00000000000..f7d4c244fd4
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/org/xml/sax/XmlException.java
@@ -0,0 +1,73 @@
+// $Id$
+
+package hplb.org.xml.sax;
+
+
+/**
+  * An exception for reporting XML parsing errors.
+  * <p><em>This interface is part of the Java implementation of SAX, 
+  * the Simple API for XML.  It is free for both commercial and 
+  * non-commercial use, and is distributed with no warrantee, real 
+  * or implied.</em></p>
+  * <p>This exception is not a required part of SAX, and it is not
+  * referenced in any of the core interfaces.  It is used only in
+  * the optional HandlerBase base class, as a means of signalling
+  * parsing errors.</p>
+  * @author David Megginson, Microstar Software Ltd.
+  * @see hplb.org.xml.sax.HandlerBase#fatal
+  */
+public class XmlException extends Exception {
+
+
+  /**
+    * Construct a new exception with information about the location.
+    */
+  public XmlException (String message, String systemID, int line, int column)
+  {
+    super(message);
+    this.systemID = systemID;
+    this.line = line;
+    this.column = column;
+  }
+
+
+  /**
+    * Find the system identifier (URI) where the error occurred.
+    * @return A string representing the URI, or null if none is available.
+    */
+  public String getSystemID ()
+  {
+    return systemID;
+  }
+
+
+  /**
+    * Find the line number where the error occurred.
+    * @return The line number, or -1 if none is available.
+    */
+  public int getLine ()
+  {
+    return line;
+  }
+
+
+  /**
+    * Find the column number (line offset) where the error occurred.
+    * @return The column number, or -1 if none is available.
+    */
+  public int getColumn ()
+  {
+    return column;
+  }
+
+
+
+  //
+  // Internal state.
+  //
+
+  private String systemID;
+  private int line;
+  private int column;
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Atom.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Atom.java
new file mode 100644
index 00000000000..097b85ddd5f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Atom.java
@@ -0,0 +1,41 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import java.util.Hashtable;
+
+/**
+ * This class is responsible for maintaining strings as <em>atoms</em>,
+ * i.e. if two strings returned by getAtom() are equal in the sense of
+ * String.equal() then they are in fact the same Object. This is used to
+ * "intern" element and attribute names which can then be compared using
+ * the more efficient reference equality, a la "s1==s2".
+ * 
+ * @author  Anders Kristensen
+ */
+public final class Atom {
+  /** Holds atoms: element names (GIs), and attribute names. */
+  private static final Hashtable atoms = new Hashtable();
+  
+    /**
+     * Return an atom corresponding to the argument.
+     */
+    public static String getAtom(String s) {
+        synchronized (atoms) {
+            String a = (String) atoms.get(s);
+            if (a == null) {
+                atoms.put(s, s);
+                a = s;
+            }
+        return a;
+        }
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/AttrImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/AttrImpl.java
new file mode 100644
index 00000000000..4e84b2fb10c
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/AttrImpl.java
@@ -0,0 +1,57 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+
+/**
+ * 
+ * @author  Anders Kristensen
+ */
+public final class AttrImpl implements Attribute {
+    protected String name;
+    protected Node value;
+    protected boolean specified;
+    
+    public AttrImpl(String name, String value) {
+        this(name, new TextImpl(Node.TEXT, value), true);
+    }
+    
+    public AttrImpl(String name, Node value, boolean specified) {
+        this.name = name;
+        this.value = value;
+        this.specified = specified;
+    }
+    
+    public String getName() {
+        return name;
+    }
+    
+    public Node getValue() {
+        return value;
+    }
+    
+    public void setValue(Node arg) {
+        value = arg;
+    }
+    
+    public boolean getSpecified() {
+        return specified;
+    }
+    
+    public void setSpecified(boolean arg) {
+        specified = arg;
+    }
+    
+    public String toString() {
+        return value.toString();
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/AttrListImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/AttrListImpl.java
new file mode 100644
index 00000000000..429626703da
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/AttrListImpl.java
@@ -0,0 +1,183 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+
+/**
+ * An ordered Dictionary. keys() and elements() returns Enumerations
+ * which enumerate over elements in the order they were inserted.
+ * Elements are stored linearly. Operations put(), get(), and remove()
+ * are linear in the number of elements in the Dictionary.
+ * 
+ * <p>Allows direct access to elements (as an alternative to using
+ * Enumerators) for speed.
+ * 
+ * <p>Can function as a <em>bag</em>, i.e. it can be created with a mode
+ * which allows the same key to map to multiple entries. In this case 
+ * operations get() and remove() operate on the <em>first</em> pair in
+ * the map. Hence to get hold of all values associated with a key it is
+ * necessary to use the direct access to underlying arrays.
+ * 
+ * @author  Anders Kristensen
+ */
+public class AttrListImpl implements AttributeList {
+    protected Attribute[] elms;
+    
+    /**
+     * Number of elements. The elements are held at indices 0 to n in elms.
+     */
+    protected int n = 0;
+    
+    public AttrListImpl() {
+        this(2);
+    }
+    
+    /**
+     * Create an AttrListImpl with the specififed initial capacity.
+     */
+    public AttrListImpl(int size) {
+        if (size <= 0) throw new IllegalArgumentException(
+                "Initial size must be at least 1");
+        elms = new Attribute[size];
+    }
+    
+    /**
+     * Returns the value to which the key is mapped in this dictionary. 
+     */
+    public synchronized Attribute getAttribute(String attrName) {
+        int i = getIndex(attrName);
+        return (i < 0 ? null : elms[i]);
+    }
+    
+    protected int getIndex(String name) {
+        for (int i = 0; i < n; i++) {
+            if (elms[i].getName().equals(name)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    // XXX: what if attrName != attr.getName()???
+    public synchronized Attribute setAttribute(Attribute attr) {
+        int i = getIndex(attr.getName());
+        if (i >= 0) {
+            Attribute old = elms[i];
+            elms[i] = attr;
+            return old;
+        }
+        
+        int len = elms.length;
+        if (len == n) {
+            // double size of key,elms arrays
+            AttrImpl[] e;
+            e = new AttrImpl[len * 2];
+            System.arraycopy(elms, 0, e, 0, len);
+            elms = e;
+        }
+        elms[n] = attr;
+        n++;
+        return null;
+    }
+    
+    public synchronized Attribute remove(String attrName) {
+        int i = getIndex(attrName);
+        if (i < 0) return null;
+        Attribute val = elms[i];
+        System.arraycopy(elms, i+1, elms, i, n-i-1);
+        n--;
+        return val;
+    }
+    
+    public synchronized Attribute item(int index) {
+        if (index < 0 || index >= n) {
+            throw new IndexOutOfBoundsException(""+index);
+        }
+        return elms[index];
+    }
+    
+    /** Returns the number of keys in this dictionary. */
+    public synchronized int getLength() {
+        return n;
+    }
+    
+    public synchronized String toString() {
+        StringBuffer sb = new StringBuffer();
+        boolean f = true;
+        int n = getLength();
+        
+        sb.append("{ ");
+        for (int i = 0; i < n; i++) {
+            if (f) { f = false; }
+            else { sb.append(", "); }
+            Attribute attr = item(i);
+            sb.append(attr.getName() + '=' + attr);
+        }
+        sb.append(" }");
+        return sb.toString();
+    }
+    
+    /**/
+    // for testing
+    public static void main(String[] args) throws Exception {
+        AttrListImpl alist;
+        Attribute attr;
+        java.io.BufferedReader r;
+        java.util.StringTokenizer tok;
+        String op;
+        
+        if (args.length > 1) {
+            alist = new AttrListImpl(Integer.parseInt(args[0]));
+        } else {
+            alist = new AttrListImpl();
+        }
+        
+        System.out.println(
+            "Enter operations... op's are one of\n"+
+            "put <key> <val>\n"+
+            "get <key>\n"+
+            "rem <key>\n"+
+            "size\n"+
+            "quit\n");
+        
+        r = new java.io.BufferedReader(
+                new java.io.InputStreamReader(System.in)); 
+        while (true) {
+            System.out.print("doyourworst> ");
+            tok = new java.util.StringTokenizer(r.readLine());
+            op = tok.nextToken();
+            if ("put".equals(op)) {
+                attr = new AttrImpl(tok.nextToken(), tok.nextToken());
+                System.out.println("Value: " +
+                        alist.setAttribute(attr));
+            } else if ("get".equals(op)) {
+                attr = alist.getAttribute(tok.nextToken());
+                System.out.println("Value: " +
+                        (attr == null ? "No such element" : attr.toString()));
+            } else if ("rem".equals(op)) {
+                attr = alist.remove(tok.nextToken());
+                System.out.println("Value: " + attr);
+            } else if (op.startsWith("s")) {
+                System.out.println("Size: " + alist.getLength());
+            } else if (op.startsWith("q")) {
+                break;
+            } else {
+                System.out.println("Unrecognized op: " + op);
+            }
+            
+            System.out.println("AttributeList: " + alist);
+            System.out.println("Size: " + alist.getLength());
+            System.out.println();
+        }
+    }
+    //*/
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/CharBuffer.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/CharBuffer.java
new file mode 100644
index 00000000000..5eee304178d
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/CharBuffer.java
@@ -0,0 +1,46 @@
+/*
+ * $Id$
+ *
+ * Copyright 1997 Hewlett-Packard Company
+ *
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+/**
+ * A java.io.CharArrayWriter with the additional property that users can get
+ * to the actual underlying storage. Hence it's very fast (and dangerous).
+ * @author      Anders Kristensen
+ */
+public final class CharBuffer extends java.io.CharArrayWriter {
+    public CharBuffer() {
+        super();
+    }
+
+    public CharBuffer(int size) {
+        super(size);
+    }
+
+    // use only to *decrement* size
+    public void setLength(int size) {
+        synchronized (lock) {
+            if (size < count) count = size;
+	    }
+	}
+
+    public char[] getCharArray() {
+	    synchronized (lock) {
+	        return buf;
+	    }
+    }
+
+    public int getLength()
+    {
+        return count;
+    }
+
+
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DOMImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DOMImpl.java
new file mode 100644
index 00000000000..5645c2c1ac7
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DOMImpl.java
@@ -0,0 +1,23 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.DOM;
+import hplb.org.w3c.dom.Document;
+
+public class DOMImpl implements DOM {
+    public Document createDocument(String type) {
+        return new DocumentImpl();
+    }
+    public boolean hasFeature(String feature) {
+        return false;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DocContextImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DocContextImpl.java
new file mode 100644
index 00000000000..fc2db0c8eb3
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DocContextImpl.java
@@ -0,0 +1,25 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+
+public class DocContextImpl implements DocumentContext {
+    Document doc;
+    
+    public Document getDocument() {
+        return doc;
+    }
+    
+    public void setDocument(Document arg) {
+        doc = arg;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DocumentImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DocumentImpl.java
new file mode 100644
index 00000000000..54badf69d2d
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/DocumentImpl.java
@@ -0,0 +1,106 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+
+/**
+ * 
+ */
+public class DocumentImpl extends NodeImpl implements Document {
+    DocumentContext context;
+    Document        masterDoc = this;
+    Node            type;
+    Element         rootNode;
+    
+    public DocumentImpl() {
+        super(Node.DOCUMENT);
+    }
+    
+    public Document getMasterDoc() {
+        return masterDoc;
+    }
+    public void setMasterDoc(Document arg) {
+        masterDoc = arg;
+    }
+    
+    public Node getDocumentType() {
+        return type;
+    }
+    public void setDocumentType(Node arg) {
+        type = arg;
+    }
+    
+    public Element getDocumentElement() {
+        return rootNode;
+    }
+    public void setDocumentElement(Element arg) {
+        rootNode = arg;
+    }
+    
+    public DocumentContext getContextInfo() {
+        return context;
+    }
+    public void setContextInfo(DocumentContext arg) {
+        context = arg;
+    }
+    
+    public Document createDocument() {
+        return new DocumentImpl();
+    }
+    
+    public DocumentContext createDocumentContext() {
+        return new DocContextImpl();
+    }
+    
+    public Element createElement(String tagName, AttributeList attributes) {
+        return new ElementImpl(tagName, attributes);
+    }
+    
+    public Text createTextNode(String data) {
+        return new TextImpl(Node.TEXT, data);
+    }
+    
+    public Comment createComment(String data) {
+        return new TextImpl(Node.COMMENT, data);
+    }
+    
+    public PI createPI(String name, String data) {
+        PI pi = new TextImpl(Node.PI, data);
+        pi.setName(name);
+        return pi;
+    }
+    
+    public Attribute createAttribute(String name, Node value) {
+        return new AttrImpl(name, value, true);
+    }
+    
+    public AttributeList createAttributeList() {
+        return new AttrListImpl();
+    }
+    
+    public NodeIterator getElementsByTagName() {
+        throw new NullPointerException("NOT IMPLEMENTED");
+    }
+    
+    public String toString() {
+        return "ROOT";
+        /*
+        if (children == null) return "";
+        StringBuffer sb = new StringBuffer();
+        int len = children.getLength();
+        for (int i = 0; i < len; i++) {
+            System.out.println(children.item(i));
+        }
+        return sb.toString();
+        */
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/ElementImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/ElementImpl.java
new file mode 100644
index 00000000000..98b011cee9f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/ElementImpl.java
@@ -0,0 +1,55 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+
+/**
+ * 
+ * @author  Anders Kristensen
+ */
+public class ElementImpl extends NodeImpl implements Element {
+    protected String tagName;
+    protected AttributeList attrs;  // Note: Parser ensures this wont be null
+    
+    public ElementImpl(String tagName, AttributeList attributes) {
+        super(Node.ELEMENT);
+        this.tagName = tagName;
+        attrs = attributes;
+    }
+    
+    public String getTagName() {
+        return tagName;
+    }
+    
+    public AttributeList attributes() {
+        return attrs;
+    }
+    
+    public void setAttribute(Attribute newAttr) {
+        if (attrs == null) attrs = new AttrListImpl();
+        attrs.setAttribute(newAttr);
+    }
+    
+    public void normalize() {}
+    
+    public NodeIterator getElementsByTagName() {
+        throw new IllegalArgumentException(
+            "Why wasn't this method defined by the DOM WG to take an arg???");
+    }
+    
+    public String toString() {
+        boolean empty = (children == null || children.getLength() == 0);
+        return "<" + tagName + " "
+               + (attrs != null ? attrs.toString() : "{}")
+               + (empty ? " />" : ">");
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/EntityManager.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/EntityManager.java
new file mode 100644
index 00000000000..c88b0e27c0a
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/EntityManager.java
@@ -0,0 +1,135 @@
+/*
+ * $Id$
+ *
+ * Copyright 1997 Hewlett-Packard Company
+ *
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+import java.util.Hashtable;
+import java.io.*;
+
+/**
+ * A very simple entity manager.
+ * @author  Anders Kristensen
+ */
+public class EntityManager {
+    protected Hashtable entities = new Hashtable();
+    private hplb.org.xml.sax.Parser tok;
+
+    public EntityManager(hplb.org.xml.sax.Parser tok) {
+        this.tok = tok;
+        entities.put("amp",   "&");
+        entities.put("lt",    "<");
+        entities.put("gt",    ">");
+        entities.put("apos",  "'");
+        entities.put("quot", "\"");
+    }
+
+    /**
+     * Finds entitiy and character references in the provided char array
+     * and decodes them. The operation is destructive, i.e. the encoded
+     * string replaces the original - this is atrightforward since the
+     * new string can only get shorter.
+     */
+    public final CharBuffer entityDecode(CharBuffer buffer) throws Exception {
+        char[] buf = buffer.getCharArray();  // avoids method calls
+        int len = buffer.size();
+
+        // not fastest but certainly simplest:
+        if (indexOf(buf, '&', 0, len) == -1) return buffer;
+        CharBuffer newbuf = new CharBuffer(len);
+
+        for (int start = 0; ; ) {
+            int x = indexOf(buf, '&', start, len);
+            if (x == -1) {
+                newbuf.write(buf, start, len - start);
+                return newbuf;
+            } else {
+                newbuf.write(buf, start, x - start);
+                start = x+1;
+                x = indexOf(buf, ';', start, len);
+                if (x == -1) {
+                    //tok.warning("Entity reference not semicolon terminated");
+                    newbuf.write('&');
+                    //break; //???????????
+                } else {
+                    try {
+                        writeEntityDef(buf, start, x-start, newbuf);
+                        start = x+1;
+                    } catch (Exception ex) {
+                        //tok.warning("Bad entity reference");
+                    }
+                }
+            }
+        }
+    }
+
+    // character references are rare enough that we don't care about
+    // creating a String object for them unnecessarily...
+    public void writeEntityDef(char[] buf, int off, int len, Writer out)
+        throws Exception, IOException, NumberFormatException
+    {
+        Integer ch;
+        //System.out.println("Entity: " + new String(buf, off, len) +" "+off+" "+len);
+
+        if (buf[off] == '#') {  // character reference
+            off++;
+            len--;
+            if (buf[off] == 'x' || buf[off] == 'X') {
+                ch = Integer.valueOf(new String(buf, off+1, len-1), 16);
+            } else {
+                ch = Integer.valueOf(new String(buf, off, len));
+            }
+            out.write(ch.intValue());
+         } else {
+            String ent = new String(buf, off, len);
+            String val = (String) entities.get(ent);
+            if (val != null) {
+                out.write(val);
+            } else {
+                out.write("&" + ent + ";");
+                //tok.warning("unknown entity reference: " + ent);
+            }
+        }
+    }
+
+    public String defTextEntity(String entity, String value) {
+        return (String) entities.put(entity, value);
+    }
+
+    /**
+     * Returns the index within this String of the first occurrence of the
+     * specified character, starting the search at fromIndex. This method
+     * returns -1 if the character is not found.
+     * @params buf        the buffer to search
+     * @params ch         the character to search for
+     * @params from       the index to start the search from
+     * @params to         the highest possible index returned plus 1
+     * @throws IndexOutOfBoundsException  if index out of bounds...
+     */
+    public static final int indexOf(char[] buf, int ch, int from, int to) {
+        int i;
+        for (i = from; i < to && buf[i] != ch; i++)
+            ;  // do nothing
+        if (i < to) return i;
+        else return -1;
+    }
+
+    // FOR TESTING
+    /*
+    public static void main(String[] args) throws Exception {
+        Parser tok = new Parser();
+        tst.xml.TokArgs.args(args, tok);
+        CharBuffer buf1 = new CharBuffer();
+        buf1.write(args[0]);
+        CharBuffer buf2 = tok.entMngr.entityDecode(buf1);
+
+        System.out.println("Changed: " + (buf1 != buf2));
+        System.out.println("Result: [" + buf2 + "]");
+    }
+    */
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/HTML.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/HTML.java
new file mode 100644
index 00000000000..7884315466a
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/HTML.java
@@ -0,0 +1,281 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+/**
+ * Parser customizations for correctly parsing HTML.
+ * Defines a set of empty elements (&lt;hr&gt;, &lt;br&gt;, etc.)
+ * and for some elements it defines which other start tags
+ * implicitly ends them. As an example, an &lt;li&gt; element within
+ * a &lt;ul&gt; list is terminated by either a &lt;/ul&gt; end tag
+ * or another &lt;li&gt; start tag.
+ *
+ * @author  Anders Kristensen
+ */
+public class HTML {
+    public static String A          = a("a");//
+    public static String ACRONYM    = a("acronym");//
+    public static String ADDRESS    = a("address");
+    public static String APPLET     = a("applet");//
+    public static String AREA       = a("area");
+    public static String B          = a("b");//
+    public static String BASE       = a("base");
+    public static String BASEFONT   = a("basefont");//
+    public static String BDO        = a("bdo");//
+    public static String BIG        = a("big");//
+    public static String BLOCKQUOTE = a("blockquote");
+    public static String BODY       = a("body");//
+    public static String BR         = a("br");
+    public static String BUTTON     = a("button");//
+    public static String CAPTION    = a("caption");//
+    public static String CENTER     = a("center");
+    public static String CITE       = a("cite");//
+    public static String CODE       = a("code");//
+    public static String COL        = a("col");
+    public static String COLGROUP   = a("colgroup");//
+    public static String DD         = a("dd");
+    public static String DEL        = a("del");//
+    public static String DFN        = a("dfn");//
+    public static String DIR        = a("dir");
+    public static String DIV        = a("div");
+    public static String DL         = a("dl");
+    public static String DT         = a("dt");
+    public static String EM         = a("em");//
+    public static String FIELDSET   = a("fieldset");
+    public static String FONT       = a("font");//
+    public static String FORM       = a("form");
+    public static String FRAME      = a("frame");
+    public static String FRAMESET   = a("frameset");//
+    public static String H1         = a("h1");
+    public static String H2         = a("h2");
+    public static String H3         = a("h3");
+    public static String H4         = a("h4");
+    public static String H5         = a("h5");
+    public static String H6         = a("h6");
+    public static String HEAD       = a("head");
+    public static String HR         = a("hr");
+    public static String HTML       = a("html");
+    public static String I          = a("i");//
+    public static String IFRAME     = a("iframe");//
+    public static String IMG        = a("img");
+    public static String INPUT      = a("input");
+    public static String INS        = a("ins");//
+    public static String ISINDEX    = a("isindex");//
+    public static String KBD        = a("kbd");//
+    public static String LABEL      = a("label");//
+    public static String LEGEND     = a("legend");//
+    public static String LI         = a("li");
+    public static String LINK       = a("link");
+    public static String MAP        = a("map");//
+    public static String MENU       = a("menu");
+    public static String META       = a("meta");
+    public static String NOFRAMES   = a("noframes");//
+    public static String NOSCRIPT   = a("noscript");
+    public static String OBJECT     = a("object");//
+    public static String OL         = a("ol");
+    public static String OPTION     = a("option");//
+    public static String P          = a("p");
+    public static String PARAM      = a("param");
+    public static String PRE        = a("pre");
+    public static String Q          = a("q");//
+    public static String S          = a("s");//
+    public static String SAMP       = a("samp");//
+    public static String SCRIPT     = a("script");//
+    public static String SELECT     = a("select");//
+    public static String SMALL      = a("small");//
+    public static String SPAN       = a("span");//
+    public static String STRIKE     = a("strike");//
+    public static String STRONG     = a("strong");//
+    public static String STYLE      = a("style");//
+    public static String SUB        = a("sub");//
+    public static String SUP        = a("sup");//
+    public static String TABLE      = a("table");
+    public static String TBODY      = a("tbody");//
+    public static String TD         = a("td");//
+    public static String TEXTAREA   = a("textarea");//
+    public static String TFOOT      = a("tfoot");//
+    public static String TH         = a("th");//
+    public static String THEAD      = a("thead");//
+    public static String TITLE      = a("title");//
+    public static String TR         = a("tr");
+    public static String TT         = a("tt");//
+    public static String U          = a("u");//
+    public static String UL         = a("ul");
+    public static String VAR        = a("var");//
+    
+    private static String a(String s) { return Atom.getAtom(s); }
+    
+    /** The full set of HTML4.0 element names. */
+    public static final String[] elements = {
+        A, ACRONYM, ADDRESS, APPLET, AREA , B, BASE, BASEFONT, BDO, BIG,
+        BLOCKQUOTE, BODY, BR, BUTTON, CAPTION, CENTER, CITE, CODE, COL,
+        COLGROUP, DD, DEL, DFN, DIR, DIV, DL, DT, EM, FIELDSET, FONT, FORM,
+        FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HR, HTML, I, IFRAME,
+        IMG, INPUT, INS, ISINDEX, KBD, LABEL, LEGEND, LI, LINK, MAP, MENU,
+        META, NOFRAMES, NOSCRIPT, OBJECT, OL, OPTION, P, PARAM, PRE, Q, S,
+        SAMP, SCRIPT, SELECT, SMALL, SPAN, STRIKE, STRONG, STYLE, SUB, SUP,
+        TABLE, TBODY, TD, TEXTAREA, TFOOT, TH, THEAD, TITLE, TR, TT, U, UL, VAR
+    };
+    
+    // FIXME: the parser kindof supports optional end tags but not
+    //        at all optional start tags (eg <html>, <head>)
+    // FIXME: add support for HTML entities not in HTML (lots of those)
+
+    // FIXME: this list probably not complete!!!
+    /** Empty elements in HTML4.0: <em>br</em>, <em>img</em>, etc. */
+    public static final String[] emptyElms = {
+        AREA, BASE, BR, COL, FRAME, HR, IMG, LINK, META, PARAM };
+
+    public static final String[] li_terminators = { LI };
+    public static final String[] dt_terminators = { DT, DD };
+    public static final String[] dd_terminators = dt_terminators;
+    // <head> terminators: <body> and just about everything else
+
+    /** Block-level HTML4.0 elements. */
+    public static final String[] block_level = {
+        ADDRESS, BLOCKQUOTE, CENTER, DIR, DIV, DL, FIELDSET, FORM,
+        H1, H2, H3, H4, H5, H6, HR, MENU, NOSCRIPT, OL, P, PRE, TABLE, UL };
+
+    // The P element can contain any *inline* markup - hence it is
+    // terminated by any *blocklevel* markup (incl. other P elements):
+    public static final String[] p_terminators = block_level;
+
+    // elements which cannot contain PCDATA don't care about whitespace
+    // FIXME: ignore_ws probably not complete  [don't include empty elements]
+    public static final String[] ignore_ws = {
+        HEAD, HTML, OL, MENU, TABLE, TR , UL };
+    
+    public static void applyHacks(Tokenizer tok) {
+        for (int i = 0; i < elements.length; i++) {
+            tok.ignoreCase(elements[i]);
+        }
+        
+        EntityManager entMngr = tok.entMngr;
+        
+        // standard SGML entities
+        entMngr.defTextEntity("amp", "&");    // ampersand
+        entMngr.defTextEntity("gt", ">");     // greater than
+        entMngr.defTextEntity("lt", "<");     // less than
+        entMngr.defTextEntity("quot", "\"");  // double quote
+
+        // PUBLIC ISO 8879-1986//    entities Added Latin 1//EN//HTML
+        entMngr.defTextEntity("AElig",  "\u00c6");   // capital AE diphthong (ligature)
+        entMngr.defTextEntity("Aacute", "\u00c1");  // capital A, acute accent
+        entMngr.defTextEntity("Acirc",  "\u00c2");   // capital A, circumflex accent
+        entMngr.defTextEntity("Agrave", "\u00c0");  // capital A, grave accent
+        entMngr.defTextEntity("Aring",  "\u00c5");   // capital A, ring
+        entMngr.defTextEntity("Atilde", "\u00c3");  // capital A, tilde
+        entMngr.defTextEntity("Auml",   "\u00c4");    // capital A, dieresis or umlaut mark
+        entMngr.defTextEntity("Ccedil", "\u00c7");  // capital C, cedilla
+        entMngr.defTextEntity("ETH",    "\u00d0");     // capital Eth, Icelandic
+        entMngr.defTextEntity("Eacute", "\u00c9");  // capital E, acute accent
+        entMngr.defTextEntity("Ecirc",  "\u00ca");   // capital E, circumflex accent
+        entMngr.defTextEntity("Egrave", "\u00c8");  // capital E, grave accent
+        entMngr.defTextEntity("Euml",   "\u00cb");    // capital E, dieresis or umlaut mark
+        entMngr.defTextEntity("Iacute", "\u00cd");  // capital I, acute accent
+        entMngr.defTextEntity("Icirc",  "\u00ce");   // capital I, circumflex accent
+        entMngr.defTextEntity("Igrave", "\u00cc");  // capital I, grave accent
+        entMngr.defTextEntity("Iuml",   "\u00cf");    // capital I, dieresis or umlaut mark
+        entMngr.defTextEntity("Ntilde", "\u00d1");  // capital N, tilde
+        entMngr.defTextEntity("Oacute", "\u00d3");  // capital O, acute accent
+        entMngr.defTextEntity("Ocirc",  "\u00d4");   // capital O, circumflex accent
+        entMngr.defTextEntity("Ograve", "\u00d2");  // capital O, grave accent
+        entMngr.defTextEntity("Oslash", "\u00d8");  // capital O, slash
+        entMngr.defTextEntity("Otilde", "\u00d5");  // capital O, tilde
+        entMngr.defTextEntity("Ouml",   "\u00d6");    // capital O, dieresis or umlaut mark
+        entMngr.defTextEntity("THORN",  "\u00de");   // capital THORN, Icelandic
+        entMngr.defTextEntity("Uacute", "\u00da");  // capital U, acute accent
+        entMngr.defTextEntity("Ucirc",  "\u00db");   // capital U, circumflex accent
+        entMngr.defTextEntity("Ugrave", "\u00d9");  // capital U, grave accent
+        entMngr.defTextEntity("Uuml",   "\u00dc");    // capital U, dieresis or umlaut mark
+        entMngr.defTextEntity("Yacute", "\u00dd");  // capital Y, acute accent
+        entMngr.defTextEntity("aacute", "\u00e1");  // small a, acute accent
+        entMngr.defTextEntity("acirc",  "\u00e2");   // small a, circumflex accent
+        entMngr.defTextEntity("aelig",  "\u00e6");   // small ae diphthong (ligature)
+        entMngr.defTextEntity("agrave", "\u00e0");  // small a, grave accent
+        entMngr.defTextEntity("aring",  "\u00e5");   // small a, ring
+        entMngr.defTextEntity("atilde", "\u00e3");  // small a, tilde
+        entMngr.defTextEntity("auml",   "\u00e4");    // small a, dieresis or umlaut mark
+        entMngr.defTextEntity("ccedil", "\u00e7");  // small c, cedilla
+        entMngr.defTextEntity("eacute", "\u00e9");  // small e, acute accent
+        entMngr.defTextEntity("ecirc",  "\u00ea");   // small e, circumflex accent
+        entMngr.defTextEntity("egrave", "\u00e8");  // small e, grave accent
+        entMngr.defTextEntity("eth",    "\u00f0");     // small eth, Icelandic
+        entMngr.defTextEntity("euml",   "\u00eb");    // small e, dieresis or umlaut mark
+        entMngr.defTextEntity("iacute", "\u00ed");  // small i, acute accent
+        entMngr.defTextEntity("icirc",  "\u00ee");   // small i, circumflex accent
+        entMngr.defTextEntity("igrave", "\u00ec");  // small i, grave accent
+        entMngr.defTextEntity("iuml",   "\u00ef");    // small i, dieresis or umlaut mark
+        entMngr.defTextEntity("ntilde", "\u00f1");  // small n, tilde
+        entMngr.defTextEntity("oacute", "\u00f3");  // small o, acute accent
+        entMngr.defTextEntity("ocirc",  "\u00f4");   // small o, circumflex accent
+        entMngr.defTextEntity("ograve", "\u00f2");  // small o, grave accent
+        entMngr.defTextEntity("oslash", "\u00f8");  // small o, slash
+        entMngr.defTextEntity("otilde", "\u00f5");  // small o, tilde
+        entMngr.defTextEntity("ouml",   "\u00f6");    // small o, dieresis or umlaut mark
+        entMngr.defTextEntity("szlig",  "\u00df");   // small sharp s, German (sz ligature)
+        entMngr.defTextEntity("thorn",  "\u00fe");   // small thorn, Icelandic
+        entMngr.defTextEntity("uacute", "\u00fa");  // small u, acute accent
+        entMngr.defTextEntity("ucirc",  "\u00fb");   // small u, circumflex accent
+        entMngr.defTextEntity("ugrave", "\u00f9");  // small u, grave accent
+        entMngr.defTextEntity("uuml",   "\u00fc");    // small u, dieresis or umlaut mark
+        entMngr.defTextEntity("yacute", "\u00fd");  // small y, acute accent
+        entMngr.defTextEntity("yuml",   "\u00ff");    // small y, dieresis or umlaut mark
+
+        // Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
+        entMngr.defTextEntity("nbsp",   "\u00a0");  // non breaking space
+        entMngr.defTextEntity("reg",    "\u00ae");   // registered sign
+        entMngr.defTextEntity("copy",   "\u00a9");  // copyright sign
+
+        // Additional ISO-8859/1     entities listed in rfc1866 (section 14)
+        entMngr.defTextEntity("iexcl",  "\u00a1");
+        entMngr.defTextEntity("cent",   "\u00a2");
+        entMngr.defTextEntity("pound",  "\u00a3");
+        entMngr.defTextEntity("curren", "\u00a4");
+        entMngr.defTextEntity("yen",    "\u00a5");
+        entMngr.defTextEntity("brvbar", "\u00a6");
+        entMngr.defTextEntity("sect",   "\u00a7");
+        entMngr.defTextEntity("uml",    "\u00a8");
+        entMngr.defTextEntity("ordf",   "\u00aa");
+        entMngr.defTextEntity("laquo",  "\u00ab");
+        entMngr.defTextEntity("not",    "\u00ac");
+        entMngr.defTextEntity("shy",    "\u00ad");  // soft hyphen
+        entMngr.defTextEntity("macr",   "\u00af");
+        entMngr.defTextEntity("deg",    "\u00b0");
+        entMngr.defTextEntity("plusmn", "\u00b1");
+        entMngr.defTextEntity("sup1",   "\u00b9");
+        entMngr.defTextEntity("sup2",   "\u00b2");
+        entMngr.defTextEntity("sup3",   "\u00b3");
+        entMngr.defTextEntity("acute",  "\u00b4");
+        entMngr.defTextEntity("micro",  "\u00b5");
+        entMngr.defTextEntity("para",   "\u00b6");
+        entMngr.defTextEntity("middot", "\u00b7");
+        entMngr.defTextEntity("cedil",  "\u00b8");
+        entMngr.defTextEntity("ordm",   "\u00ba");
+        entMngr.defTextEntity("raquo",  "\u00bb");
+        entMngr.defTextEntity("frac14", "\u00bc");
+        entMngr.defTextEntity("frac12", "\u00bd");
+        entMngr.defTextEntity("frac34", "\u00be");
+        entMngr.defTextEntity("iquest", "\u00bf");
+        entMngr.defTextEntity("times",  "\u00d7");
+        entMngr.defTextEntity("divide", "\u00f7");
+    }
+
+    public static void applyHacks(Parser parser) {
+        parser.addEmptyElms(emptyElms);
+        parser.setElmTerminators(LI, li_terminators);
+        parser.setElmTerminators(DT, dt_terminators);
+        parser.setElmTerminators(DD, dd_terminators);
+        parser.setElmTerminators(P, p_terminators);
+        //parser.ignoreWS(ginore_ws);
+        applyHacks(parser.getTokenizer());
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/HtmlXmlParser.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/HtmlXmlParser.java
new file mode 100644
index 00000000000..d2563367eb5
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/HtmlXmlParser.java
@@ -0,0 +1,34 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+/** 
+ * The HtmlXmlParser is a Parser with some HTML specific <i>hacks</i>
+ * applied to it which means it will more or less correctly parse most
+ * HTML pages, also when they arbitrary embedded XML markup. It is
+ * very forgiving as is commonly the case with HTML parsers.
+ * 
+ * @author  Anders Kristensen
+ */
+public class HtmlXmlParser extends Parser {
+    public HtmlXmlParser() {
+        super();
+        HTML.applyHacks(this);
+        tok.rcgnzCDATA = false;
+    }
+    
+    // for debugging
+    public static void main(String[] args) throws Exception {
+        Parser parser = new HtmlXmlParser();
+        hplb.org.w3c.dom.Document doc = parser.parse(System.in);
+        Utils.pp(doc, System.out);
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/NodeImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/NodeImpl.java
new file mode 100644
index 00000000000..60ea04ad890
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/NodeImpl.java
@@ -0,0 +1,88 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+
+/**
+ * Implements management of list of children.
+ * @author  Anders Kristensen
+ */
+public abstract class NodeImpl implements Node {
+    protected int type;
+    protected NodeImpl parent;
+    protected NodeListImpl children = new NodeListImpl();
+    
+    public NodeImpl(int type) {
+        this.type = type;
+    }
+    
+    public NodeListImpl getChildren() {
+        return children;
+    }
+    
+    public int getNodeType() {
+        return type;
+    }
+    
+    public Node getParentNode() {
+        return parent;
+    }
+    
+    public NodeIterator getChildNodes() {
+        return children.getIterator();
+    }
+    
+    public boolean hasChildNodes() {
+        return (children.getLength() > 0);
+    }
+    
+    public Node getFirstChild() {
+        return children.item(0);
+    }
+    
+    public Node getPreviousSibling() {
+        if (parent == null) return null;
+        else return parent.children.getPreviousNode(this);
+    }
+    
+    public Node getNextSibling() {
+        if (parent == null) return null;
+        else return parent.children.getNextNode(this);
+    }
+    
+    public Node insertBefore(Node newChild, Node refChild) {
+        NodeImpl node = (NodeImpl) children.insertBefore(newChild, refChild);
+        if (node != null) ((NodeImpl) newChild).parent = this;
+        return node;
+    }
+    
+    public Node insertAfter(Node newChild, Node refChild) {
+        NodeImpl node = (NodeImpl) children.insertAfter(newChild, refChild);
+        if (node != null) ((NodeImpl) newChild).parent = this;
+        return node;
+    }
+    
+    public Node replaceChild(Node newChild, Node oldChild) {
+        NodeImpl node = (NodeImpl) children.replace(newChild, oldChild);
+        if (node != null) {
+            node.parent = null;
+            ((NodeImpl) newChild).parent = this;
+        }
+        return node;
+    }
+    
+    public Node removeChild(Node oldChild) {
+        NodeImpl node = (NodeImpl) children.remove(oldChild);
+        if (node != null) node.parent = null;
+        return node;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/NodeListImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/NodeListImpl.java
new file mode 100644
index 00000000000..92271b549df
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/NodeListImpl.java
@@ -0,0 +1,223 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+
+/**
+ * 
+ * @author  Anders Kristensen
+ */
+public class NodeListImpl {
+    protected Node[] elms;
+    protected int count = 0;
+    
+    public NodeListImpl() {
+        this(5);
+    }
+    
+    public NodeListImpl(int size) {
+        if (size <= 0) throw new IllegalArgumentException(
+                "Initial size of must be at least 1");
+        elms = new Node[size];
+    }
+    
+    public synchronized void add(Node node) {
+        int len = elms.length;
+        if (len == count) {
+            Node[] e = new Node[len * 2];
+            System.arraycopy(elms, 0, e, 0, len);
+            elms = e;
+        }
+        elms[count++] = node;
+    }
+    
+    public synchronized Node replace(int index, Node replaceNode) {
+        if (index < 0 || index >= count) {
+            throw new IndexOutOfBoundsException(""+index);
+        } else if (index == count) {
+            add(replaceNode);
+            return null;
+        } else {
+            Node node = elms[index];
+            elms[index] = replaceNode;
+            return node;
+        }
+    }
+    
+    // XXX: TEST THIS METHOD!!!
+    public synchronized Node insert(int index, Node newNode) {
+        Node res = null;
+        if (index < 0 || index > count) {
+            throw new IndexOutOfBoundsException(""+index);
+        } else if (index == count) {
+            add(newNode);
+        } else {
+            int len = elms.length;
+            if (len == count) {
+                Node[] e = new Node[len * 2];
+                System.arraycopy(elms, 0, e, 0, index);
+                System.arraycopy(elms, index, e, index+1, count-index);
+                elms = e;
+            } else {
+                System.arraycopy(elms, index, elms, index+1, count-index);
+            }
+            res = elms[index];
+            elms[index] = newNode;
+            count++;
+        }
+        return res;
+    }
+    
+    public NodeIterator getIterator() {
+        return new NodeIteratorImpl(this);
+    }
+    
+    public synchronized Node remove(int index) {
+        if (index < 0 || index >= count) {
+            throw new IndexOutOfBoundsException(""+index);
+        }
+        Node node = elms[index];
+        System.arraycopy(elms, index+1, elms, index, count-index-1);
+        count--;
+        return node;
+    }
+    
+    public synchronized Node item(int index) {
+        if (index < 0 || index >= count) {
+            return null;
+        }
+        return elms[index];
+    }
+    
+    /** Returns the number of keys in this dictionary. */
+    public synchronized int getLength() {
+        return count;
+    }
+    
+    public Node getPreviousNode(Node node) {
+        for (int i = 1; i < count; i++) {
+            if (elms[i] == node) return elms[i-1];
+        }
+        return null;
+    }
+    
+    public Node getNextNode(Node node) {
+        for (int i = 0; i < count-1; i++) {
+            if (elms[i] == node) return elms[i+1];
+        }
+        return null;
+    }
+    
+    public Node insertBefore(Node node, Node ref) {
+        int idx = index(ref);
+        if (idx > -1) insert(idx, node);
+        else add(node);
+        return node;
+    }
+    
+    public Node insertAfter(Node node, Node ref) {
+        int idx = index(ref);
+        if (idx > -1) insert(idx+1, node);
+        else add(node);
+        return node;
+    }
+    
+    public Node replace(Node node, Node ref) {
+        return replace(index(ref), node);
+    }
+    
+    public Node remove(Node node) {
+        int idx = index(node);
+        return (idx > -1 ? remove(idx) : null);
+    }
+    
+    public int index(Node node) {
+        for (int i = 0; i < count; i++) {
+            if (elms[i] == node) return i;
+        }
+        return -1;
+    }
+    
+    public synchronized String toString() {
+        StringBuffer sb = new StringBuffer();
+        boolean f = true;
+        int count = getLength();
+        
+        sb.append("{ ");
+        for (int i = 0; i < count; i++) {
+            if (f) { f = false; }
+            else { sb.append(", "); }
+            sb.append(item(i).toString());
+        }
+        sb.append(" }");
+        return sb.toString();
+    }
+}
+
+
+// FIXME: doesn't work properly when list changed underneath iterator
+// proper thing would be to use observer pattern on current element--if
+// this is removed we get callback and reposition the cursor... THISISAHACK!
+// FIXME synchronize on the list itself.
+class NodeIteratorImpl implements NodeIterator {
+    NodeListImpl nlist;
+    int index;
+    
+    /**
+     * Create iterator over the specified NodeList. The initial position
+     * will be one *before* the first element. Calling toNext() will
+     * position the iterator at the first element.
+     */
+    public NodeIteratorImpl(NodeListImpl list) {
+        nlist = list;
+        index = -1;
+    }
+    
+    public int getLength() {
+        return nlist.getLength();
+    }
+    
+    public Node getCurrent() {
+        return (index >= 0 && index < nlist.count) ? nlist.item(index) : null;
+    }
+    
+    public Node toNext() {
+        if (index < nlist.count) index++;
+        return getCurrent();
+    }
+    
+    public Node toPrevious() {
+        if (index >= 0) index--;
+        return getCurrent();
+    }
+    
+    public Node toFirst() {
+        index = 0;
+        return getCurrent();
+    }
+    
+    public Node toLast() {
+        index = nlist.count;
+        return getCurrent();
+    }
+    
+    public Node toNth(int Nth) {
+        index = Nth;
+        return getCurrent();
+    }
+    
+    // FIXME: multi-threading problems here... (race condition)
+    public Node toNode(Node destNode) {
+        int idx = nlist.index(destNode);
+        return (idx >= 0 ? toNth(idx) : null);
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Parser.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Parser.java
new file mode 100644
index 00000000000..aa76e03f3c6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Parser.java
@@ -0,0 +1,208 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+package hplb.xml;
+
+import hplb.org.xml.sax.*;
+import hplb.org.w3c.dom.*;
+import java.util.*;
+import java.io.*;
+
+/**
+ * Parses a stream of MarkupTokens into a tree structure.
+ * Uses Tokenizer.
+ * 
+ * <p>This class has very shallow (no) understanding of HTML. Correct
+ * handling of &lt;p&gt; tags requires some special code as does correct
+ * handling of &lt;li&gt;. This parser doesn't know that an "li" tag can
+ * be terminated by another "li" tag or a "ul" end tag. Hence "li" is
+ * treated as an empty tag here which means that in the generated parse
+ * tree the children of the "li" element are represented as siblings of it.
+ * 
+ * @see Tokenizer
+ * @author  Anders Kristensen
+ */
+public class Parser implements DocumentHandler {
+    // FIXME: add support for discriminate per-element whitespace handling
+    
+    /**
+     * Set of elements which the parser will expect to be empty, i.e. it
+     * will not expect an end tag (e.g. IMG, META HTML elements).
+     * End tags for any of these are ignored...
+     */
+    protected Hashtable emptyElms = new Hashtable();
+    
+    /**
+     * Maps element names to a list of names of other elements which
+     * terminate that element. So for example "dt" might be mapped to
+     * ("dt", "dd") and "p" might be mapped to all blocklevel HTML
+     * elements.
+     */
+    protected Hashtable terminators = new Hashtable();
+    protected Tokenizer tok;
+    protected DOM dom;
+    protected Document root;
+    protected Node current;
+    
+    /**
+     * Non-fatal errors are written to this PrintStream. Fatal errors
+     * are reported as Exceptions.
+     */
+    PrintStream err = System.err;
+    
+    public Parser() {
+        tok = new Tokenizer();
+        tok.setDocumentHandler(this);
+        dom = new DOMImpl();
+    }
+    
+    public DOM setDOM(DOM dom) {
+        DOM old = dom;
+        this.dom = dom;
+        return old;
+    }
+    
+    public Tokenizer getTokenizer() {
+        return tok;
+    }
+    
+    /**
+     * Add the set of HTML empty elements to the set of tags recognized
+     * as empty tags.
+     */
+    public void addEmptyElms(String[] elms) {
+        for (int i = 0; i < elms.length; i++) {
+            emptyElms.put(elms[i], elms[i]);
+        }
+    }
+    
+    public void clearEmptyElmSet() {
+        emptyElms.clear();
+    }
+    
+    public boolean isEmptyElm(String elmName) {
+        return emptyElms.get(elmName) != null;
+    }
+    
+    public void setElmTerminators(String elmName, String[] elmTerms) {
+        terminators.put(elmName, putIds(new Hashtable(), elmTerms));
+    }
+    
+    public void addTerminator(String elmName, String elmTerm) {
+        Hashtable h = (Hashtable) terminators.get(elmName);
+        if (h == null) terminators.put(elmName, h = new Hashtable());
+        h.put(elmTerm, elmTerm);
+    }
+    
+    public static final Dictionary putIds(Dictionary dict, String[] sary) {
+        for (int i = 0; i < sary.length; i++) {
+            dict.put(sary[i], sary[i]);
+        }
+        return dict;
+    }
+    
+    protected Document root() {
+        return root;
+    }
+    
+    public Document parse(InputStream in) throws Exception {
+        root = dom.createDocument(null);
+        current = root;
+        tok.parse(in);
+        return root();
+    }
+   
+    public void startDocument() {}
+    public void endDocument() {}
+    
+    // FIXME: record in root DOCUMENT the id's of elements which have one
+    
+    public void doctype(String name, String publicID, String systemID) {
+    }
+    
+    public void startElement(String name, AttributeMap attributes) {
+        //System.out.println("CURRENT: " + current);
+        
+        // does this new element terminate the current element?
+        if (current != root) {
+        String tagName = ((Element) current).getTagName();
+        if (tagName != null) {
+            Hashtable terms = (Hashtable) terminators.get(tagName);
+            if (terms != null && terms.get(name) != null) {
+                current = current.getParentNode();  // FIXME: could be null
+            }
+        }
+        }
+        
+        Element elm = root.createElement(name, getDOMAttrs(attributes));
+        // FIXME: <hr> gets written as <hr></hr> - the following line changes
+        // this tp <hr/> which is even wors - we should distinguish between
+        // those two types of empty elements.
+        current.insertBefore(elm, null);
+        if (!isEmptyElm(name)) current = elm;
+    }
+    
+    public void endElement(String name) {
+        // we go up the parse tree till we find the node which matches
+        // this end tag. This mechanism elegantly handles "implicitly
+        // closed" elements such as <li> being terminated by an
+        // enclosing <ul> being ended.
+        
+        //System.out.println("CURRENT: " + current);
+        
+        Node node = current;
+        for (;;) {
+            if (node == root) {
+                err.println("Stray end tag ignored: " + name +
+                            " line " + tok.line + " column " + tok.column);
+                return;
+            } else if (name.equals(((Element) node).getTagName())) {
+                current = node.getParentNode();
+                return;
+            } else {
+                node = node.getParentNode();
+            }
+        }
+    }
+    
+    public void characters(char[] ch, int start, int length) {
+        current.insertBefore(
+            root.createTextNode(new String(ch, start, length)), null);
+    }
+    
+    public void ignorable (char ch[], int start, int length) {
+        System.out.println("Ignorable ws: " + new String(ch, start, length));
+    }
+    
+    public void processingInstruction(String target, String remainder) {
+        // FIXME: the DOM says 2nd arg should be everything between "<?" and "?>"
+        current.insertBefore(root.createPI(target, remainder), null);
+    }
+    
+    public AttributeList getDOMAttrs(AttributeMap attrs) {
+        String name;
+        Node value;
+        Enumeration e;
+        AttributeList domAttrs = root.createAttributeList();
+        
+        for (e = attrs.getAttributeNames(); e.hasMoreElements(); ) {
+            name = (String) e.nextElement();
+            value = root.createTextNode(attrs.getValue(name));
+            domAttrs.setAttribute(root.createAttribute(name, value));
+        }
+        return domAttrs;
+    }
+    
+    // for debugging
+    public static void main(String[] args) throws Exception {
+        Parser parser = new Parser();
+        Document doc = parser.parse(System.in);
+        Utils.pp(doc, System.out);
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/SAXAttributeMap.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/SAXAttributeMap.java
new file mode 100644
index 00000000000..69bee4117e4
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/SAXAttributeMap.java
@@ -0,0 +1,229 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.xml.sax.AttributeMap;
+import java.util.Enumeration;
+
+/**
+ * An ordered Dictionary. keys() and elements() returns Enumerations
+ * which enumerate over elements in the order they were inserted.
+ * Elements are stored linearly. Operations put(), get(), and remove()
+ * are linear in the number of elements in the Dictionary.
+ * 
+ * <p>Allows direct access to elements (as an alternative to using
+ * Enumerators) for speed.
+ * 
+ * <p>Can function as a <em>bag</em>, i.e. it can be created with a mode
+ * which allows the same key to map to multiple entries. In this case 
+ * operations get() and remove() operate on the <em>first</em> pair in
+ * the map. Hence to get hold of all values associated with a key it is
+ * necessary to use the direct access to underlying arrays.
+ * 
+ * @author  Anders Kristensen
+ */
+public class SAXAttributeMap implements AttributeMap {
+    
+    /** The list of keys. */
+    public String[] keys;
+    
+    /** List of values associated with keys. */
+    public String[] elms;
+    
+    /**
+     * Number of elements in the Dictionary.
+     * The elements are held at indices 0 to n in the keys and elms arrays.
+     */
+    public int n = 0;
+    
+    public SAXAttributeMap() {
+        this(5);
+    }
+    
+    /**
+     * Create a SAXAttributeMap with the specififed initial cpacity.
+     */
+    public SAXAttributeMap(int size) {
+        if (size <= 0) throw new IllegalArgumentException(
+                "Initial size must be at least 1");
+        keys = new String[size];
+        elms = new String[size];
+    }
+    
+    /** Returns the number of keys in this dictionary. */
+    public synchronized int size() {
+        return n;
+    }
+    
+    /** Returns true if this dictionary maps no keys to value. */
+    public synchronized boolean isEmpty() {
+        return size() == 0;
+    }
+    
+    /**
+     * Returns an enumeration of the keys in this dictionary. 
+     */
+    public Enumeration getAttributeNames() {
+        return new SAXAttributeEnum(keys, n);
+    }
+
+    /**
+     * Returns the value to which the key is mapped in this dictionary. 
+     */
+    public synchronized String getValue(String key) {
+        int i = getIndex(key);
+        return (i < 0 ? null : elms[i]);
+    }
+    
+    protected int getIndex(String key) {
+        for (int i = 0; i < n; i++) {
+            if (keys[i].equals(key))
+                return i;
+        }
+        return -1;
+    }
+
+    /**
+     * Maps the specified key to the specified value in this dictionary.
+     * Neither the key nor the value can be null. 
+     * 
+     * <p>The value can be retrieved by calling the get method with a key
+     * that is equal to the original key. 
+     * @return  the previous value to which the key was mapped in
+     *          this dictionary, or null if the key did not have a
+     *          previous mapping.
+     * @throws NullPointerException  if the key or value is null
+     */
+    public synchronized String put(String key, String value) {
+        if (value == null) throw new NullPointerException("value is null");
+        int i = getIndex(key);
+        if (i >= 0) {
+            String old = elms[i];
+            elms[i] = value;
+            return old;
+        }
+        int len = keys.length;
+        if (len == n) {
+            // double size of key,elms arrays
+            String[] k, e;
+            k = new String[len * 2];
+            e = new String[len * 2];
+            System.arraycopy(keys, 0, k, 0, len);
+            System.arraycopy(elms, 0, e, 0, len);
+            keys = k;
+            elms = e;
+        }
+        keys[n] = key;
+        elms[n] = value;
+        n++;
+        return null;
+    }
+  
+  public void clear() {
+    n = 0;
+  }
+    
+  public boolean isEntity (String aname) { return false; }
+  public boolean isNotation (String aname) { return false; }
+  public boolean isId (String aname) { return false; }
+  public boolean isIdref (String aname) { return false; }
+  public String getEntityPublicID (String aname) { return null; }
+  public String getEntitySystemID (String aname) { return null; }
+  public String getNotationName (String aname) { return null; }
+  public String getNotationPublicID (String aname) { return null; }
+  public String getNotationSystemID (String aname) { return null; }
+
+    public synchronized String toString() {
+        StringBuffer sb = new StringBuffer();
+        boolean f = true;
+        
+        sb.append("{ ");
+        for (Enumeration e = getAttributeNames(); e.hasMoreElements(); ) {
+            if (f) { f = false; }
+            else { sb.append(", "); }
+            String key = (String) e.nextElement();
+            sb.append("" + key + '=' + getValue(key));
+        }
+        sb.append(" }");
+        return sb.toString();
+    }
+    
+    /*
+    // for testing
+    public static void main(String[] args) throws Exception {
+        SAXAttributeMap d;
+        java.io.BufferedReader r;
+        java.util.StringTokenizer tok;
+        String op;
+        
+        if (args.length > 1) {
+            d = new SAXAttributeMap(Integer.parseInt(args[0]));
+        } else {
+            d = new SAXAttributeMap();
+        }
+        
+        System.out.println(
+            "Enter operations... op's are one of\n"+
+            "put <key> <val>\n"+
+            "get <key>\n"+
+            "enum\n"+
+            "size\n"+
+            "quit\n");
+        
+        r = new java.io.BufferedReader(
+                new java.io.InputStreamReader(System.in)); 
+        while (true) {
+            System.out.print("doyourworst> ");
+            tok = new java.util.StringTokenizer(r.readLine());
+            op = tok.nextToken();
+            if ("put".equals(op)) {
+                System.out.println("Value: " +
+                                   d.put(tok.nextToken(), tok.nextToken()));
+            } else if ("get".equals(op)) {
+                System.out.println("Value: " + d.getValue(tok.nextToken()));
+            } else if ("enum".equals(op)) {
+                for (Enumeration e = d.getAttributeNames();
+                     e.hasMoreElements(); ) {
+                    System.out.println("" + e.nextElement() + " ");
+                }
+            } else if (op.startsWith("s")) {
+                System.out.println("Size: " + d.size());
+            } else if (op.startsWith("q")) {
+                break;
+            } else {
+                System.out.println("Unrecognized op: " + op);
+            }
+            
+            System.out.println("Dictionary: " + d);
+            System.out.println("Size: " + d.size());
+            System.out.println();
+        }
+    }
+    */
+}
+
+class SAXAttributeEnum implements Enumeration {
+    String[] objs;
+    int i = 0, n;
+    
+    public SAXAttributeEnum(String[] objs, int n) {
+        this.objs = objs;
+        this.n = n;
+    }
+    
+    public boolean hasMoreElements() {
+        return i < n;
+    }
+    
+    public Object nextElement() {
+        return objs[i++];
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/TextImpl.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/TextImpl.java
new file mode 100644
index 00000000000..9df683f75a0
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/TextImpl.java
@@ -0,0 +1,112 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+// FIXME: check parameters reasonable [within bounds]
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+
+/**
+ * Class whose instances represent PCDATA, comments, and PIs (processing
+ * instructions.
+ * @author  Anders Kristensen
+ */
+public class TextImpl extends NodeImpl implements Text, Comment, PI {
+    protected String data;
+    protected String name; // only valid for PIs
+    
+    /**
+     * Construct new leaf node whose value is textual.
+     * @param type  one of Node.PI, Node.COMMENT, and Node.TEXT.
+     * @param data  the PCDATA, CDATA, comment, whatever
+     */
+    public TextImpl(int type, String data) {
+        super(type);
+        this.data = data;
+    }
+    
+    // getData/setData common for the three interfaces
+    public String getData() {
+        return data;
+    }
+    public void setData(String arg) {
+        data = arg;
+    }
+    
+    // Text specific methods:
+    
+    public void append(String data) {
+        this.data = this.data + data;
+    }
+    
+    public void insert(int offset, String data) {
+        this.data = this.data.substring(0, offset)
+                    + data
+                    + this.data.substring(offset);
+    }
+    
+    public void delete(int offset, int count) {
+        this.data = this.data.substring(0, offset)
+                    + this.data.substring(offset + count);
+    }
+    
+    public void replace(int offset, int count, String data) {
+        this.data = this.data.substring(0, offset)
+                    + data
+                    + this.data.substring(offset + count);
+    }
+    
+    public void splice(Element element, int offset, int count) {
+        if (offset <= 0) {
+            parent.insertBefore(element, this);
+        } else if (offset+count > data.length()) {
+            parent.insertAfter(element, this);
+        } else {
+            Node n;
+            n = new TextImpl(Node.TEXT, data.substring(offset, offset+count));
+            element.insertBefore(n , null);
+            n = new TextImpl(Node.TEXT, data.substring(offset+count));
+            parent.insertAfter(n, this);
+            data = data.substring(0, offset);
+        }
+    }
+    
+    // PI specific methods:
+    public String getName() {
+        return name;
+    }
+    public void setName(String arg) {
+        name = arg;
+    }
+    
+    protected String typeAsString() {
+        switch (type) {
+            case Node.PI:        return "PI";
+            case Node.COMMENT:   return "COMMENT";
+            case Node.TEXT:      return "TEXT";
+            default:        return "UNKNOWN";
+        }
+    }
+    
+    public String toDebugString() {
+        return typeAsString() +
+               (data == null ? "" : Utils.compact(data));
+    }
+    
+    public String toString() {
+        switch (type) {
+            case Node.PI:        return "<?" + name + " " + data + "?>";
+            case Node.COMMENT:   return "<!--" + data + "-->";
+            case Node.TEXT:      return data;
+            default:        return "UNKNOWN";
+        }
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Tokenizer.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Tokenizer.java
new file mode 100644
index 00000000000..9f77289b04f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Tokenizer.java
@@ -0,0 +1,690 @@
+/*
+ * $Id$
+ *
+ * Copyright 1997 Hewlett-Packard Company
+ *
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+/*
+ * FIXME:
+ *   - use java.io.Reader and Unicode chars...
+ *   - recognize PIs and CDATA
+ *   - recognize PEs and CEs (optionally)
+ *   - Do NOT map element and attr names to lower (or upper) case
+ */
+
+package hplb.xml;
+
+import hplb.org.xml.sax.*;
+import java.util.Dictionary;
+import java.util.Hashtable;
+import java.io.*;
+import hplb.misc.ByteArray;
+import java.net.URL;
+
+/**
+ * This is a hand-written lexical analyzer for XML/HTML Markup.
+ * The parser is simple, fast and quite robust.
+ * Element and attribute names are mapped to lower case.
+ * Comments are returned as (part of) PCDATA tokens.
+ * Markup elements within comments is not recognized as markup.
+ *
+ * @author      Anders Kristensen
+ */
+public class Tokenizer implements hplb.org.xml.sax.Parser {
+
+    /** The value of boolean attributes is this string. */
+    public static final String BOOLATTR = Atom.getAtom("BOOLATTR");
+
+    // FSM states:
+    static final int ST_START           = 1;
+    static final int ST_TAG_LT          = 3;
+    static final int ST_TAG_NAME        = 4;
+    static final int ST_TAG_WS          = 5;
+    static final int ST_EMPTY_TAG_SLASH = 6;
+    static final int ST_NAME            = 7;
+    static final int ST_NAME_WS         = 8;
+    static final int ST_EQ              = 9;
+    static final int ST_VALUE           = 10;
+    static final int ST_VALUE_QUOTED    = 11;
+    static final int ST_PCDATA          = 21;
+    static final int ST_COMMENT         = 22;
+
+    HandlerBase    dfltHandler = new HandlerBase();
+    EntityHandler   entHandler = dfltHandler;
+    DocumentHandler docHandler = dfltHandler;
+    ErrorHandler    errHandler = dfltHandler;
+    SAXAttributeMap attrs = new SAXAttributeMap();
+    String sysID;
+
+    protected Hashtable noCaseElms;
+    public boolean rcgnzWS       = true;   // is white space chars recognized as PCDATA
+                                           // even when preceeding tags?
+    public boolean rcgnzEntities = true;
+    public boolean rcgnzCDATA    = true;
+    public boolean rcgnzComments = true;   //
+    public boolean atomize       = false;  // make element and attr names atoms
+
+    CharBuffer buf       = new CharBuffer();
+    boolean isStartTag   = true;
+    /**
+     * Signals whether a non-empty element has any children. If not we
+     * must generate an artificial empty-string child [characters(buf, 0, 0)].
+     */
+    boolean noChildren;
+    CharBuffer tagname   = new CharBuffer();
+    CharBuffer attrName  = new CharBuffer();
+    CharBuffer attrValue = new CharBuffer();
+    Reader in;
+
+    public final EntityManager entMngr = new EntityManager(this);
+    protected int state = ST_START;
+    protected int _line = 1;
+    protected int _column = 0;
+    public int line;          // can be used in Handler callbacks
+    public int column;        // can be used in Handler callbacks
+    protected int qchar;      // <'> or <"> when parsing quoted attr values
+    // we recognize attribute name-value pairs for XML PI by setting
+    // the inXMLDecl flag and going to state ST_TAG_WS
+    boolean inXMLDecl = false;  // see
+
+    public Tokenizer() {
+        pos();
+    }
+
+    public void setEntityHandler(EntityHandler handler) {
+        entHandler = handler;
+    }
+
+    public void setDocumentHandler(DocumentHandler handler) {
+        docHandler = handler;
+    }
+
+    public void setErrorHandler(ErrorHandler handler) {
+        errHandler = handler;
+    }
+
+    public void parse(String publicID, String sysID) throws Exception {
+        this.sysID = sysID;
+        parse(new URL(sysID).openStream());
+    }
+
+    public void parse(InputStream in) throws Exception
+	{
+		parse(new InputStreamReader(in));
+    }
+
+	public void parse(Reader in) throws Exception
+	{
+        this.in = in;
+        docHandler.startDocument();
+        tokenize();
+        docHandler.endDocument();
+	}
+
+    // invoked to remember current position
+    protected void pos() {
+        line = _line;
+        column = _column;
+    }
+
+    public void ignoreCase(String elementName) {
+        if (noCaseElms == null) noCaseElms = new Hashtable();
+        noCaseElms.put(elementName.toLowerCase(), elementName);
+    }
+
+    public void rcgnzWS(boolean b) {
+        rcgnzWS = b;
+    }
+
+    // invoked after doing any Handler callback - resets state
+    protected void toStart() {
+        state = ST_START;
+        buf.reset();
+        tagname.reset();
+        attrName.reset();
+        attrValue.reset();
+        attrs.clear();
+        isStartTag = true;  // until proven wrong
+        pos();
+    }
+
+  public void tokenize() throws Exception {
+    int c;
+
+    while ((c = read()) != -1) {
+      switch (state) {
+        case ST_START:
+          switch (c) {
+            case '<':
+              state = ST_TAG_LT;
+              isStartTag = true;  // until proven wrong
+              tagname.reset();
+              break;
+            case ' ': case '\t': case '\r': case '\n':
+              if (!rcgnzWS) break;
+              // else fall through
+            default:
+              state = ST_PCDATA;
+          }
+          break;
+
+        case ST_PCDATA:
+          if (c == '<') {
+            gotPCDATA(true);
+            state = ST_TAG_LT;
+          }
+          break;
+
+        case ST_TAG_LT:
+          switch (c) {
+            case '/':
+              isStartTag = false;
+              state = ST_TAG_NAME;
+              break;
+            case '!':
+              c = read();
+              if ((c == '-' && !rcgnzComments) || (c == '[' && !rcgnzCDATA)) {
+                state = ST_PCDATA;
+                break;
+              }
+              if (c == '-') state = ST_COMMENT;
+              else if (c == '[') parseCDATA();
+              else {
+                // FIXME: shouldn't be delivered as PCDATA
+                warning("Bad markup " + buf);
+                state = ST_PCDATA;
+              }
+              break;
+            case '?':
+              parsePI();
+              break;
+            case ' ': case '\t': case '\r': case '\n':
+              state = ST_TAG_WS;
+              break;
+            default:
+              tagname.write(c);
+              state = ST_TAG_NAME;
+          }
+          break;
+
+        case ST_TAG_NAME:
+          switch (c) {
+            case ' ': case '\t': case '\r': case '\n':
+              state = ST_TAG_WS;
+              break;
+            case '/': state = ST_EMPTY_TAG_SLASH; break;
+            case '>': gotTag(false); break;
+            default:  tagname.write(c);
+          }
+          break;
+
+        case ST_TAG_WS:
+          switch (c) {
+            case ' ': case '\t': case '\r': case '\n': break;
+            case '/': state = ST_EMPTY_TAG_SLASH; break;
+            case '>': gotTag(false); break;
+            case '?':
+              if (inXMLDecl) {
+                if ((c = read()) != '>') {
+                errHandler.warning("XML PI not terminated properly",
+                                   sysID, _line, _column);
+                  //err_continue("XML PI not terminated properly");
+                }
+                //handler.gotXMLDecl(attrs);  // FIXME(?)
+                toStart();
+                break;
+              }
+              // NOTE: if !inXMLDecl we fall through to default case
+            default:
+              if (!isStartTag) {
+                // bit of a hack this...
+                errHandler.warning("Malformed tag: "+buf, sysID, _line, _column);
+                //err_continue("Malformed tag: "+buf);
+                if (c == '<') {
+                    gotPCDATA(true);
+                    state = ST_TAG_LT;
+                } else {
+                    // we get here e.g. if there's an end tag with attributes
+                    state = ST_PCDATA;
+                }
+              } else {
+                // FIXME: this accepts way too many first chars for attr name
+                attrName.write(c);
+                state = ST_NAME;
+              }
+          }
+          break;
+
+        case ST_EMPTY_TAG_SLASH:
+          if (c == '>') {
+            //tagtype = TAG_EMPTY;
+            gotTag(true);
+            break;
+          } else {
+            // ERROR !? - can't throw Exception here - we go to next tag...
+            state = ST_PCDATA;
+          }
+          break;
+
+        case ST_NAME:
+          switch (c) {
+            case ' ': case '\t': case '\r': case '\n':
+              if (attrName.size() > 0) {
+                state = ST_NAME_WS;
+              }
+              break;
+            case '>':
+              if (attrName.size() > 0) gotAttr(true);
+              gotTag(false);
+              break;
+            case '=':
+              state = ST_EQ;
+              break;
+            default:
+              if (isCtlOrTspecial(c)) {
+                state = ST_PCDATA;
+              } else {
+                attrName.write(c);
+              }
+          }
+          break;
+
+        case ST_NAME_WS:   // white-space between name and '='
+          switch (c) {
+            case ' ': case '\t': case '\r': case '\n': break;
+            case '=': state = ST_EQ; break;
+            case '>': gotAttr(true); gotTag(false); break;
+            default:
+              if (isNameChar(c)) {
+                gotAttr(true);
+                attrName.write(c);
+                state = ST_TAG_WS;
+              } else {
+                state = ST_PCDATA;
+              }
+          }
+          break;
+
+        case ST_EQ:        // white-space between '=' and value
+          switch (c) {
+            case ' ': case '\t': case '\r': case '\n': break;
+            case '"':  qchar = '"';  state = ST_VALUE_QUOTED; break;
+            case '\'': qchar = '\''; state = ST_VALUE_QUOTED; break;
+            default:
+              if (isCtlOrTspecial(c)) {
+                state = ST_PCDATA;
+              } else {
+                attrValue.write(c);
+                state = ST_VALUE;
+              }
+          }
+          break;
+
+        case ST_VALUE:
+          switch (c) {
+            case ' ': case '\t': case '\r': case '\n':
+              gotAttr(false);
+              state = ST_TAG_WS;
+              break;
+            case '>':
+              gotAttr(false);
+              gotTag(false);
+              break;
+            case '/':
+              gotAttr(false);
+              state = ST_EMPTY_TAG_SLASH;
+              break;
+            default:
+              if (isCtlOrTspecial(c)) {
+                state = ST_PCDATA;
+              } else {
+                attrValue.write(c);
+              }
+          }
+          break;
+
+        case ST_VALUE_QUOTED:
+          if (c == qchar) {
+            gotAttr(false);
+            state = ST_TAG_WS;
+          } else {
+            attrValue.write(c);
+          }
+          break;
+
+        case ST_COMMENT:
+          // we've seen "...<!-" by now
+          try {
+            if (c != '-') {
+              warning("Bad comment");
+              state = ST_PCDATA;
+              break;
+            }
+            // we're within comment - read till we see "--"
+            while (true) {
+              while (read_ex() != '-') ;
+              if (read_ex() == '-') break;
+            }
+            // seen "--" - gotComment() reads past next '>'
+            gotComment();
+            //while (read_ex() != '>') ;
+            //state = ST_PCDATA;
+          } catch (EmptyInputStream ex) {
+            gotPCDATA(false);
+            break;
+          }
+      }
+    }
+    /* TODO: catch EmptyInputStream exception only here!
+    } catch (EmptyInputStream ex) {
+        err_continue("EOF while parsing " + token[state]);
+    }
+    */
+
+    // input stream ended - return rest, if any, as PCDATA
+    if (buf.size() > 0) {
+        gotPCDATA(false);
+        buf.reset();
+        }
+    }
+
+    // counts lines and columns - used in error reporting
+    // a line can be a single \r or \n or it can be \r\n - we handle them all
+    int cc; // last char read
+    public final int read() throws IOException {
+        int c = in.read();
+        if (c != -1) {
+            buf.write(c);
+
+            switch (c) {
+                case '\r': _line++; _column = 0; break;
+                case '\n':
+                    if (cc != '\r') _line++;
+                    _column = 0;
+                    break;
+                default:
+                    _column++;
+            }
+            cc = c;
+        }
+        return c;
+    }
+
+    public final int read_ex() throws IOException, EmptyInputStream {
+        int c = read();
+        if (c == -1) throw new EmptyInputStream();
+        return c;
+    }
+
+    // HTML allows <em>boolean</em> attributes - attributes without a
+    // value, or rather an implicit value which is the same as the name.
+    protected final void gotAttr(boolean isBoolean) throws Exception {
+        String nm = attrName.toString();
+        if (atomize) nm = Atom.getAtom(nm);
+        String val = isBoolean ? BOOLATTR :
+                        (rcgnzEntities ? entMngr.entityDecode(attrValue) :
+                            attrValue).toString();
+        attrName.reset();
+        attrValue.reset();
+        attrs.put(nm, val);
+    }
+
+    protected void gotTag(boolean isEmpty) throws Exception {
+        String nm = tagname.toString();
+        String nm_lc = nm.toLowerCase();
+        if (noCaseElms != null && noCaseElms.get(nm_lc) != null) {
+            nm = nm_lc;
+            keysToLowerCase(attrs);
+        }
+        if (atomize) nm = Atom.getAtom(nm);
+        if (isStartTag) {
+            docHandler.startElement(nm, attrs);
+            //handler.gotSTag(nm, isEmpty, attrs, getBuffer());
+            if (isEmpty) docHandler.endElement(nm);
+            noChildren = !isEmpty;
+        } else {
+            if (noChildren) {
+                docHandler.characters(buf.getCharArray(), 0, 0);
+                noChildren = false;
+            }
+            docHandler.endElement(nm);
+            //handler.gotETag(nm, getBuffer());
+        }
+        toStart();
+    }
+
+    public final void keysToLowerCase(SAXAttributeMap attrs) {
+        for (int i = 0; i < attrs.n; i++) {
+            attrs.keys[i] = attrs.keys[i].toLowerCase();
+            if (atomize) attrs.keys[i] = Atom.getAtom(attrs.keys[i]);
+        }
+    }
+
+    // toomuch true iff we read a '<' of the next token
+    protected void gotPCDATA(boolean toomuch) throws Exception {
+        noChildren = false;
+        if (toomuch) {
+            buf.setLength(buf.size() - 1);
+        }
+        CharBuffer buf1 = rcgnzEntities ? entMngr.entityDecode(buf) : buf;
+        docHandler.characters(buf1.getCharArray(), 0, buf1.size());
+        //handler.gotText(getBuffer());
+        toStart();
+        if (toomuch) {
+            buf.write('<');
+            column--;
+        }
+    }
+
+    // XXX: should pass the comment on as docHandler.ignorable() ??
+    protected void gotComment() throws IOException, EmptyInputStream {
+        //toStart();  // so an unexpected EOF causes rest to be returned as PCDATA
+        while (read_ex() != '>') ;
+        toStart();
+    }
+
+    // Processing Instruction
+    protected void parsePI() throws Exception {
+        int i;
+        String target;
+
+        noChildren = false;
+        inXMLDecl = false;
+        i = buf.size();
+        try {
+        while (!isWS(read_ex())) ;
+        target = buf.toString();
+        target = target.substring(i, target.length() - 1);
+
+        if ("XML".equals(target)) {
+            inXMLDecl = true;
+            state = ST_TAG_WS;
+            return;
+        }
+
+        while (isWS(read_ex())) ;
+        i = buf.size() - 1;
+        while (true) {
+            while (read_ex() != '?') ;
+            if (read_ex() == '>') {
+                String s = buf.toString();
+                docHandler.processingInstruction(
+                        Atom.getAtom(target), s.substring(i, s.length()-2));
+                //handler.gotPI(Atom.getAtom(target),
+                //              s.substring(i, s.length()-2));
+                break;
+            }
+        }
+        } catch (EmptyInputStream ex) {
+            gotPCDATA(false);
+            errHandler.warning("EOF while parsing PI", sysID, _line, _column);
+            //err_continue("EOF while parsing PI");
+        }
+        toStart();
+    }
+
+    // CDATA section
+    // XXX: should contents be amalgamated with surrounding PCDATA?
+    protected void parseCDATA() throws Exception {
+        // we've seen "<![" by now
+        try {
+            if (read_ex() == 'C' && read_ex() == 'D' && read_ex() == 'A' &&
+                read_ex() == 'T' && read_ex() == 'A' && read_ex() == '[') {
+                int i1 = buf.size();
+                while (read_ex() != ']' ||
+                       read_ex() != ']' ||
+                       read_ex() != '>') ;
+                docHandler.characters(buf.getCharArray(), i1, buf.size()-3-i1);
+            } else {
+                warning("Bad CDATA markup");
+                state = ST_PCDATA;
+            }
+        } catch (EmptyInputStream ex) {
+            warning("EOF while parsing CDATA section");
+            gotPCDATA(false);
+        }
+        toStart();
+    }
+
+    public boolean isWS(int c) {
+        switch (c) {
+            case ' ': case '\t': case '\r': case '\n': return true;
+            default: return false;
+        }
+    }
+
+    /**
+     * Returns true if c is either an ascii control character or
+     * a tspecial according to the HTTP specification.
+     */
+  //   private static final boolean[] isCtlOrTSpecial = new boolean[]
+//     {
+//        /* 0 */     true , true , true , true , true , true , true , true , true , true , true , true , true , true ,
+//        /* 14 */    true , true , true , true , true , true , true , true , true , true , true , true , true , true ,
+//        /* 28 */    true , true , true , true , true , false, true , false, false, false, false, false, true , true ,
+//        /* 42 */    false, false, true , false, false, true , false, false, false, false, false, false, false, false,
+//        /* 56 */    false, false, true , true , true , true , true , true , true , false, false, false, false, false,
+//        /* 70 */    false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 84 */    false, false, false, false, false, false, false, true , true , true , false, false, false, false,
+//        /* 98 */    false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 112 */   false, false, false, false, false, false, false, false, false, false, false, true , false, true ,
+//        /* 126 */   false, true , false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 140 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 154 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 168 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 182 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 196 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 210 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 224 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 238 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 252 */   false, false, false, false
+//    };
+
+    public static final boolean isCtlOrTspecial(int c) {
+        switch (c) {
+          // control characters (0-31 and 127):
+          case  0: case  1: case  2: case  3: case  4: case  5:
+          case  6: case  7: case  8: case  9: case 10: case 11:
+          case 12: case 13: case 14: case 15: case 16: case 17:
+          case 18: case 19: case 20: case 21: case 22: case 23:
+          case 24: case 25: case 26: case 27: case 28: case 29:
+          case 30: case 31: case 127:
+
+          // tspecials:
+          case '(': case ')': case '<': case '>': case '@':
+          case ',': case ';': case ':': case '\\': case '"':
+          case '/': case '[': case ']': case '?': case '=':
+          case '{': case '}': case ' ': // case '\t':
+            return true;
+
+          default:
+            return false;
+        }
+    }
+
+/*    public static void main(String[])
+    {
+    System.out.println("private static final boolean[] isCtlOrTSpecial = \n{");  // bzw. isNameChar
+        for(int i=0; i<256; i++)
+        {
+            if(i>0)
+                System.out.print(", ");
+            if(i % 14 == 0)
+            {
+                System.out.print("\n/* " + i + " *" + "/   ");
+            }
+            if(Tokenizer.isCtlOrTspecial(i))  // bzw. isNameChar(i)
+            {
+                System.out.print("true ");
+            }
+            else
+            {
+                System.out.print("false");
+            }
+
+
+        }
+        System.out.print("};\n\n");
+    }
+    */
+
+//    public static final boolean isCtlOrTspecial(int c)
+//    {
+//        return (c < 256 ? isCtlOrTSpecial[c] : false);
+//    }
+//
+//    private static final boolean[] isNameChar =
+//    {
+//        /* 0 */     false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 14 */    false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 28 */    false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 42 */    false, false, false, true , true , false, true , true , true , true , true , true , true , true ,
+//        /* 56 */    true , true , false, false, false, false, false, false, false, true , true , true , true , true ,
+//        /* 70 */    true , true , true , true , true , true , true , true , true , true , true , true , true , true ,
+//        /* 84 */    true , true , true , true , true , true , true , false, false, false, false, true , false, true ,
+//        /* 98 */    true , true , true , true , true , true , true , true , true , true , true , true , true , true ,
+//        /* 112 */   true , true , true , true , true , true , true , true , true , true , true , false, false, false,
+//        /* 126 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 140 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 154 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 168 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 182 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 196 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 210 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 224 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 238 */   false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+//        /* 252 */   false, false, false, false
+//    };
+//    public static final boolean isNameChar(int c)
+//    {
+//        return (c < 256 ? isNameChar[c] : false);
+//    }
+//
+    /*
+    // I don't think this is a very standard definition of what can
+    // go into tag and attribute names.*/
+    public static final boolean isNameChar(int c) {
+        return ('a' <= c && c <= 'z') ||
+               ('A' <= c && c <= 'Z') ||
+               ('0' <= c && c <= '9') ||
+               c == '.' || c == '-' || c == '_';
+    }
+
+
+
+    protected final void warning(String s) throws Exception {
+        errHandler.warning(s, sysID, _line, _column);
+    }
+
+    protected final void fatal(String s) throws Exception {
+        errHandler.fatal(s, sysID, _line, _column);
+    }
+}
+
+class EmptyInputStream extends Exception {
+    EmptyInputStream() {}
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Utils.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Utils.java
new file mode 100644
index 00000000000..66eda2511f6
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/Utils.java
@@ -0,0 +1,98 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml;
+
+import hplb.org.w3c.dom.*;
+import java.io.*;
+import java.util.*;
+
+public class Utils {
+    /** Pretty-print elm. */
+    public static void pp(Node node, PrintStream out) {
+        pp(node, out, 0);
+    }
+    
+    public static void pp(Node node, PrintStream out, int indent) {
+        indent(out, indent);
+        out.println("" + node);
+        indent += 2;
+        
+        NodeIterator iter = node.getChildNodes();
+        Node child;
+        while ((child = iter.toNext()) != null) {
+            pp(child, out, indent);
+        }
+    }
+    
+    public static String compact(String s) {
+        if (s.length() < 18) {
+            return "[" + noCRLF(s) + "]";
+        } else {
+            return "[" + noCRLF(s.substring(0, 7)) + "..." +
+                   noCRLF(s.substring(s.length() - 7)) + "]";
+        }
+    }
+    
+    public static String noCRLF(String s) {
+        return s.replace('\r', ' ').replace('\n', ' ');
+    }
+    
+    public static void indent(PrintStream out, int indent) {
+        for (int i = 0; i < indent; i++) out.print(' ');
+    }
+    
+    /**
+     * Encode an XML attribute value. Changes &lt;"&gt; to "&amp;quote;".
+     */
+    public static String encAttrVal(String val) {
+        if (val.indexOf('"') > -1) {
+            StringBuffer sbuf = new StringBuffer();
+            int offset = 0, i;
+            while ((i = val.indexOf('"', offset)) > -1) {
+                sbuf.append(val.substring(offset, i));
+                sbuf.append("&quote;");
+                offset = i+1;
+            }
+            sbuf.append(val.substring(offset));
+            return sbuf.toString();
+        }
+        return val;
+    }
+    
+    /**
+     * Encode the specified String as XML PCDATA, i.e. "&lt;" is
+     * encoded as "&amp;lt;" and "&amp;" is encoded as "&amp;amp;".
+     */
+    public static String encPCDATA(String s) {
+        if (s.indexOf('<') > -1 || s.indexOf('&') > -1) {
+            StringBuffer sbuf = new StringBuffer();
+            int offset = 0;
+            int i = s.indexOf('<', offset);
+            int j = s.indexOf('&', offset);
+            while (i > -1 || j > -1) {
+                if (i > j) {
+                    sbuf.append(s.substring(offset, i));
+                    sbuf.append("&quote;");
+                    offset = i+1;
+                    i = s.indexOf('<', offset);
+                } else {
+                    sbuf.append(s.substring(offset, j));
+                    sbuf.append("&quote;");
+                    offset = j+1;
+                    j = s.indexOf('&', offset);
+                }
+            }
+            sbuf.append(s.substring(offset));
+            return sbuf.toString();
+        }
+        return s;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/HtmlObserver.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/HtmlObserver.java
new file mode 100644
index 00000000000..ab8cbdbac5f
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/HtmlObserver.java
@@ -0,0 +1,40 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml.util;
+
+import java.net.URL;
+
+/**
+ * A callback interface used in conjunction with UrlScanner. Allows actions
+ * to be taken whenever the scanner finds a URL in an HTML document. The
+ * scanner knows about most HTML 4.0 elements which can contain URLs.
+ * Can be used, for example, to implement robot code which crawls a hypertext
+ * graph. This interface is similar to Jeff Poskanzer's Acme.HtmlObserver.
+ * 
+ * @see     HtmlScanner
+ * @author  Anders Kristensen
+ */
+public interface HtmlObserver {
+    /** Invoked when the scanner finds an &lt;a href=""&gt; URL. */
+    public void gotAHref(String urlStr, URL contextUrl, Object data);
+
+    /** Invoked when the scanner finds an &lt;img src=""&gt; URL. */
+    public void gotImgSrc(String urlStr, URL contextUrl, Object data);
+
+    /** Invoked when the scanner finds a &lt;base href=""&gt; URL. */
+    public void gotBaseHref(String urlStr, URL contextUrl, Object data );
+
+    /** Invoked when the scanner finds a &lt;area href=""&gt; URL. */
+    public void gotAreaHref(String urlStr, URL contextUrl, Object data );
+
+    /** Invoked when the scanner finds an &lt;frame src=""&gt; URL. */
+    public void gotFrameSrc(String urlStr, URL contextUrl, Object data );
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/HtmlScanner.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/HtmlScanner.java
new file mode 100644
index 00000000000..c1e9a4f142d
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/HtmlScanner.java
@@ -0,0 +1,177 @@
+/*
+ * $Id$
+ *
+ * Copyright 1997 Hewlett-Packard Company
+ *
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml.util;
+
+import hplb.org.xml.sax.HandlerBase;
+import hplb.org.xml.sax.AttributeMap;
+import hplb.org.xml.sax.XmlException;
+import hplb.org.xml.sax.ErrorHandler;
+import hplb.org.xml.sax.EntityHandler;
+import hplb.org.xml.sax.DocumentHandler;
+import hplb.xml.*;
+import java.net.*;
+import java.io.*;
+
+/**
+ * The HtmlScanner parses an HTML document for elements containing links.
+ * For each link found it will invoke a client-provided callback method.
+ * It knows about most HTML4.0 links and also knows about the &lt;base&gt;.
+ *
+ * <p>For an example use see UrlScanner.
+ *
+ * @see     HtmlObserver
+ * @see     UrlScanner
+ * @author  Anders Kristensen
+ */
+public class HtmlScanner extends HandlerBase {
+    HtmlObserver observer;
+    URL contextURL;
+    Object data;
+    Tokenizer tok;
+    Reader in;
+
+    /**
+     * Parse the input on the specified stream as if it was HTML and
+     * invoke the provided observer as links are encountered.
+     * @param url   the URL to parse for links
+     * @param observer  the callback object
+     * @param data  client-specific data; this is passed back to the
+     *              client in callbacks; this scanner doesn't use it
+     * @throws Exception    see hplb.org.xml.sax.Parser.parse()
+     * @see hplb.org.xml.sax.Parser.parse
+     */
+    public HtmlScanner(URL url, HtmlObserver observer ) throws Exception {
+        this(new BufferedReader(new InputStreamReader(url.openStream())), url, observer);
+    }
+
+    /**
+     * Parse the input on the specified stream as if it was HTML and
+     * invoke the provided observer as links are encountered.
+     * @param in    the input stream
+     * @param url   the URL corresponding to this document
+     * @param observer  the callback object
+     * @throws Exception    see hplb.org.xml.sax.Parser.parse()
+     * @see hplb.org.xml.sax.Parser.parse
+	 * @deprecated
+     */
+    public HtmlScanner(InputStream in, URL url, HtmlObserver observer)
+        throws Exception
+    {
+        this(new BufferedReader(new InputStreamReader(in)), url, observer, null);
+    }
+
+	    /**
+     * Parse the input on the specified stream as if it was HTML and
+     * invoke the provided observer as links are encountered.
+     * @param in    the Reader
+     * @param url   the URL corresponding to this document
+     * @param observer  the callback object
+     * @throws Exception    see hplb.org.xml.sax.Parser.parse()
+     * @see hplb.org.xml.sax.Parser.parse
+     */
+    public HtmlScanner(Reader in, URL url, HtmlObserver observer)
+        throws Exception
+    {
+        this(in, url, observer, null);
+    }
+
+	/**
+     * Parse the input on the specified stream as if it was HTML and
+     * invoke the provided observer as links are encountered.
+	 * Although not deprecated, this method should not be used. Use HtmlScanner(Reader...) instead
+	 * @deprecated
+	 */
+    public HtmlScanner(InputStream in, URL url, HtmlObserver observer, Object data)
+        throws Exception
+    {
+		this(new BufferedReader(new InputStreamReader(in)), url, observer, data);
+	}
+
+    /**
+     * Parse the input on the specified stream as if it was HTML and
+     * invoke the provided observer as links are encountered.
+     * @param in    the input stream
+     * @param url   the URL corresponding to this document
+     * @param observer  the callback object
+     * @param data  client-specific data; this is passed back to the
+     *              client in callbacks; this scanner doesn't use it
+     * @throws Exception    see hplb.org.xml.sax.Parser.parse()
+     * @see hplb.org.xml.sax.Parser.parse
+     */
+    public HtmlScanner(Reader in, URL url, HtmlObserver observer, Object data)
+        throws Exception
+    {
+        this.in = in;
+        this.observer = observer;
+        this.contextURL = url;
+        this.data = data;
+        tok = new Tokenizer();
+        setDocumentHandler(this);
+        HTML.applyHacks(tok);
+        tok.rcgnzEntities = false;
+        tok.rcgnzCDATA = false;
+        tok.atomize = true;
+    }
+
+    public void setDocumentHandler(DocumentHandler doc)
+    {
+        tok.setDocumentHandler(doc);
+    }
+
+    public void setEntityHandler(EntityHandler ent)
+    {
+        tok.setEntityHandler(ent);
+    }
+
+    public void setErrorHandler(ErrorHandler err)
+    {
+        tok.setErrorHandler(err);
+    }
+
+    public void parse() throws Exception
+    {
+        tok.parse(in);
+    }
+
+    public void startElement(String name, AttributeMap attributes) {
+        String val;
+
+        if (name == HTML.A) {
+            if ((val = attributes.getValue("href")) != null) {
+                observer.gotAHref(val, contextURL, data);
+            }
+        } else if (name == HTML.IMG) {
+            if ((val = attributes.getValue("src")) != null) {
+                observer.gotImgSrc(val, contextURL, data);
+            }
+        } else if (name == HTML.BASE) {
+            if ((val = attributes.getValue("href")) != null) {
+                observer.gotBaseHref(val, contextURL, data);
+                if (contextURL != null) {
+                    try {
+                        contextURL = new URL(contextURL, val);
+                    } catch (MalformedURLException ex) {
+                        System.err.println("Bad <base> URL: " + val + ".");
+                        System.err.println(ex.getMessage());
+                    }
+                }
+            }
+        } else if (name == HTML.AREA) {
+            if ((val = attributes.getValue("href")) != null) {
+                observer.gotAreaHref(val, contextURL, data);
+            }
+        } else if (name == HTML.FRAME) {
+            if ((val = attributes.getValue("src")) != null) {
+                observer.gotFrameSrc(val, contextURL, data);
+            }
+        }
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/NormalizeHtml.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/NormalizeHtml.java
new file mode 100644
index 00000000000..b590edab365
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/NormalizeHtml.java
@@ -0,0 +1,143 @@
+/*
+ * $Id$
+ *
+ * Copyright 1997 Hewlett-Packard Company
+ *
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml.util;
+
+import hplb.xml.*;
+import hplb.org.w3c.dom.*;
+import java.io.*;
+
+/**
+ * Reads an HTML document on System.in, "normalizes" it in a couple of ways, and
+ * writes it to System.out. In the process HTML4.0 element names are converted to
+ * upper case, attribute names are converted to lower case, all attribute values
+ * gets enclosed in double quotes, all non-empty elements with an optional and
+ * omitted end tag are given an end tag.
+ *
+ * @author      Anders Kristensen
+ */
+public class NormalizeHtml {
+    static PrintStream out = System.out;
+
+    public static void usage() {
+        System.exit(1);
+    }
+
+    public static void main(String[] args) throws Exception {
+        /*
+        Tokenizer tok = new Tokenizer();
+        tok.setDocumentHandler(new NormalizeHtml());
+        HTML.applyHacks(tok);
+        //tok.rcgnzEntities = false;
+        tok.rcgnzCDATA = false;
+        tok.atomize = true;
+        tok.parse(System.in);
+        */
+        HtmlXmlParser parser = new HtmlXmlParser();
+        Tokenizer tok = parser.getTokenizer();
+        tok.rcgnzEntities = false;
+        tok.rcgnzCDATA = false;
+        tok.rcgnzComments = false;
+        tok.atomize = true;
+        print(parser.parse(System.in));
+    }
+
+    public static void print(Document doc) {
+        //print(doc.getDocumentElement());
+        NodeIterator iter = doc.getChildNodes();
+        while (iter.toNext() != null) {
+            printNode(iter.getCurrent());
+        }
+    }
+
+    public static void printNode(Node node) {
+        if (node instanceof Document) print((Document) node);
+        else if (node instanceof Element) print((Element) node);
+        else if (node instanceof Text) print((Text) node);
+        else System.err.println("Error: non-text, non-element node ignored.");
+    }
+
+    public static void print(Text text) {
+        //out.print(encodeText(text.getData(), false));
+        out.print(text.getData());
+    }
+
+    public static void print(Element elm) {
+        String tagName      = elm.getTagName();
+        AttributeList attrs = elm.attributes();
+        boolean isHtmlElm = isHtmlElm(tagName);
+        boolean isEmpty = (elm.getFirstChild() == null);
+        boolean isHtmlEmptyElm =
+              (tagName == HTML.AREA
+            || tagName == HTML.BASE
+            || tagName == HTML.BR
+            || tagName == HTML.COL
+            || tagName == HTML.FRAME
+            || tagName == HTML.HR
+            || tagName == HTML.IMG
+            || tagName == HTML.LINK
+            || tagName == HTML.META
+            || tagName == HTML.PARAM);
+
+        if (isHtmlElm) tagName = tagName.toUpperCase();
+
+        // print start tag and attribute name-value pairs
+        out.print("<" + tagName);
+        int len = attrs.getLength();
+        for (int i = 0; i < len; i++) {
+            print(attrs.item(i), isHtmlElm);
+        }
+        if (isEmpty && !isHtmlEmptyElm) out.print("/");
+        out.print(">");
+        if (isEmpty) return;
+
+        // print content
+        NodeIterator iter = elm.getChildNodes();
+        while (iter.toNext() != null) {
+            printNode(iter.getCurrent());
+        }
+
+        // print end tag
+        out.print("</" + tagName + ">");
+    }
+
+    public static void print(Attribute attr, boolean toLower) {
+        String a = attr.getName();
+        out.print(" " + (toLower ? a.toLowerCase() : a)
+                + "=\"" + encodeText(attr.toString(), true) +'"');
+    }
+
+    public static String encodeText(String s, boolean attr) {
+        StringBuffer sb = new StringBuffer();
+        int ch, len = s.length();
+
+        for (int i = 0; i < len; i++) {
+            ch = s.charAt(i);
+            if (ch == '"') sb.append("&quot;");
+            /* cause we don't recognize markup within PCDATA and attr values
+            else if (ch == '&') sb.append("&amp;");
+            else if (!attr && ch == '<') sb.append("&lt;");
+            else if (!attr && ch == '>') sb.append("&gt;");
+            else if ((" \r\n\t".indexOf((char) ch) != -1)
+                     && (ch <= 31 || ch >= 127)) sb.append("&#"+ch+";");
+            */
+            else sb.append((char) ch);
+        }
+        return sb.toString();
+    }
+
+    public static boolean isHtmlElm(String tagName) {
+        int len = HTML.elements.length;
+        for (int i = 0; i < len; i++) {
+            if (tagName == HTML.elements[i]) return true;
+        }
+        return false;
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/RmMarkup.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/RmMarkup.java
new file mode 100644
index 00000000000..dcb64434834
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/RmMarkup.java
@@ -0,0 +1,32 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml.util;
+
+import hplb.xml.Tokenizer;
+import hplb.org.xml.sax.*;
+import java.io.*;
+
+public class RmMarkup extends HandlerBase {
+    static Tokenizer tok;
+    static Writer out = new OutputStreamWriter(System.out);
+
+    public void characters (char ch[], int start, int length) throws IOException {
+        out.write(ch, start, length);
+    }
+
+    public static void main(String[] args) throws Exception {
+        tok = new Tokenizer();
+        tok.setDocumentHandler(new RmMarkup());
+        TokTest.args(args, tok);
+        tok.parse(System.in);
+        out.flush();
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/TokTest.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/TokTest.java
new file mode 100644
index 00000000000..07d34574cbf
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/TokTest.java
@@ -0,0 +1,107 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml.util;
+
+import hplb.xml.*;
+import hplb.org.xml.sax.*;
+import java.io.*;
+
+/**
+ * Test of Tokenizer.
+ * Usage: TokTest [-w] < html-file
+ * @author  Anders Kristensen
+ */
+public class TokTest implements DocumentHandler {
+    static Tokenizer tok;
+    static PrintStream out = System.out;
+    int n = 60;
+    int n2 = (n-3)/2;
+    
+    public void startDocument () {
+        out.println("START DOC");
+    }
+    
+    public void endDocument () {
+        out.println("END DOC");
+    }
+    
+    public void doctype (String name, String publicID, String systemID) {
+        out.println("DOC TYPE " + name + ", " + publicID + ", " + systemID);
+    }
+
+    public void startElement (String name, AttributeMap attributes) {
+        out.println("START " + name + ", " + attributes);
+    }
+
+    public void endElement (String name) {
+        out.println("END   " + name);
+    }
+
+    public void characters (char ch[], int start, int length) {
+        //out.println("Chars: " + new String(ch, start, length));
+        out.println("Chars: " + compact(new String(ch, start, length)));
+    }
+
+    public void ignorable (char ch[], int start, int length) {
+        out.println("Ignorable: " + compact(new String(ch, start, length)));
+    }
+
+    public void processingInstruction (String name, String remainder) {
+        out.println("PI: " + name + ", " + compact(remainder));
+    }
+    
+    // Returns short description of PCDATA argument.
+    public String compact(char[] buf) {
+        return compact(new String(buf));
+    }
+    
+    public String compact(String s) {
+        if (s.length() < n) {
+            return "[" + noCRLF(s) + "]";
+        } else {
+            return "[" + noCRLF(s.substring(0, n2)) + "..." +
+                   noCRLF(s.substring(s.length() - n2)) + "]";
+        }
+    }
+    
+    private static String noCRLF(String s) {
+        return s.replace('\r', ' ').replace('\n', ' ');
+    }
+    
+    /**
+     * Process options in 'args' vector and apply to the supplied Tokenizer.
+     */
+    public static void args(String[] args, Tokenizer tok) {
+        // case mappoing: tags/attr names/attr values, upper/lower/depends...
+        for (int i = 0; i < args.length; i++) {
+            if ("-w".equals(args[i])) {
+                tok.rcgnzWS = true;
+            } else if ("-d".equals(args[i])) {
+                tok.rcgnzComments = false;
+            } else if ("-c".equals(args[i])) {
+                tok.rcgnzCDATA = false;
+            } else if ("-e".equals(args[i])) {
+                tok.rcgnzEntities = false;
+            } else if ("-h".equals(args[i])) {
+                HTML.applyHacks(tok);
+            } else {
+                System.err.println("Unrecognized option: " + args[i]);
+            }
+        }
+    }
+    
+    public static void main(String[] args) throws Exception {
+        tok = new Tokenizer();
+        tok.setDocumentHandler(new TokTest());
+        args(args, tok);
+        tok.parse(System.in);
+    }
+}
diff --git a/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/UrlScanner.java b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/UrlScanner.java
new file mode 100644
index 00000000000..b86f8f5e078
--- /dev/null
+++ b/sandbox/contributions/webcrawler-LARM/src/hplb/xml/util/UrlScanner.java
@@ -0,0 +1,166 @@
+/*
+ * $Id$
+ * 
+ * Copyright 1997 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ * 
+ * Copyright 1998 Hewlett-Packard Company
+ * 
+ * This file may be copied, modified and distributed only in
+ * accordance with the terms of the limited licence contained
+ * in the accompanying file LICENSE.TXT.
+ */
+
+package hplb.xml.util;
+
+import java.net.*;
+import java.io.*;
+import java.util.Date;
+
+/**
+ * Scans an HTML Web object for embedded link and prints them on stdout.
+ * <b>Usage</b>:
+ * <pre>
+ *  java hplb.www.client.UrlScan [-t] [-v] [-h proxy-host] [-p proxy-port] URL
+ *  where -t means test validity of embedded URLs and
+ *        -v means be verbose
+ * </pre>
+ * 
+ * @author      Anders Kristensen
+ */
+public class UrlScanner implements HtmlObserver {
+
+    // should use getenv and/or getProperty for these:
+    static String   proxyHost;
+    static String   proxyPort;
+    static boolean  test;
+    static boolean  verbose;
+    
+    public static void usage() {
+        PrintStream out = System.out;
+        out.println("Usage: UrlScan [-v] [-t] <baseurl>");
+        out.println("Extracts URLs from System.in and writes them on stdout.");
+        out.println("  -v  verbose mode");
+        out.println("  -t  test links (using HTTP HEAD requests)");
+        
+        System.exit(1);
+    }
+    
+    public static void main(String[] args) throws Exception {
+        URL url = null;
+        //HttpClient cl;
+        //HttpResponse res = null;
+    
+        try {
+            url = new URL(args[args.length-1]);
+            for (int i = 0; i < args.length - 1; i++) {
+                if ("-t".equals(args[i])) {
+                    test = true;
+                } else if ("-v".equals(args[i])) {
+                    verbose = true;
+                } else if ("-h".equals(args[i])) {
+                    proxyHost = args[++i];
+                } else if ("-p".equals(args[i])) {
+                    proxyPort = args[++i];
+                } else {
+                    usage();
+                }
+            }
+        } catch (Exception e) {
+            usage();
+        }
+
+        //cl = new HttpClient(url);
+        if (proxyHost != null) {
+            System.getProperties().put("http.proxyHost", proxyHost);
+        }
+        if (proxyPort != null) {
+            System.getProperties().put("http.proxyPort", proxyPort);
+        }
+        /*
+        try {
+            res = cl.get();
+        } catch (UnknownHostException e) {
+            panic("Couldn't connect to host " + e.getMessage());
+        } catch (IOException e) {
+            panic("I/O exception");
+        } catch (Exception e) {
+            panic("Error: " + e.getMessage());
+        }
+        */
+        
+        new HtmlScanner(url, new UrlScanner());
+    }
+
+    public static void panic(String reason) {
+        System.out.println(reason);
+        System.exit(1);
+    }
+
+    public void gotAHref(String urlStr, URL contextUrl, Object data) {
+        try {
+            URL url = new URL(contextUrl, urlStr);
+            System.out.print(url.toExternalForm());
+            if (test) testLink(url);
+            System.out.println();
+        } catch (Exception e) {
+            if (verbose) e.printStackTrace();
+        }
+    }
+
+    /** Invoked when the scanner finds an &lt;img src=""&gt; URL. */
+    public void gotImgSrc(String urlStr, URL contextUrl, Object data) {
+        try {
+            URL url = new URL(contextUrl, urlStr);
+            System.out.print(url.toExternalForm());
+            if (test) testLink(url);
+            System.out.println();
+        } catch (Exception e) {
+            if (verbose) e.printStackTrace();
+        }
+    }
+
+    /** Invoked when the scanner finds a &lt;base href=""&gt; URL. */
+    public void gotBaseHref(String urlStr, URL contextUrl, Object data ) {
+        if (verbose) {
+            System.out.println("gotBASEHREF: " + urlStr);
+            System.out.println("               " + contextUrl);
+        }
+    }
+
+    /** Invoked when the scanner finds a &lt;area href=""&gt; URL. */
+    public void gotAreaHref(String urlStr, URL contextUrl, Object data ) {
+        if (verbose) {
+            System.out.println("gotAreaHref:   " + urlStr);
+            System.out.println("               " + contextUrl);
+        }
+    }
+
+    /** Invoked when the scanner finds an &lt;frame src=""&gt; URL. */
+    public void gotFrameSrc(String urlStr, URL contextUrl, Object data ) {
+        try {
+            URL url = new URL(contextUrl, urlStr);
+            System.out.print(url.toExternalForm());
+            if (test) testLink(url);
+            System.out.println();
+        } catch (Exception e) {
+            if (verbose) e.printStackTrace();
+        }
+    }
+    
+    public static void testLink(URL url) throws IOException {
+        throw new IOException("Not implemented");
+        /*
+        HttpClient cl = new HttpClient(url);
+        if (proxyHost != null)
+            cl.setProxyAddr(proxyHost, proxyPort);
+        HttpResponse res = cl.head();
+
+        System.out.print(" " + res.getStatusCode());
+        if (verbose) System.out.print(" " + res.getReason());
+        */
+    }
+}