From 7a3b5acf374361023979b529d6daf0a5e66e35f1 Mon Sep 17 00:00:00 2001 From: cmarschner Date: Wed, 22 May 2002 23:09:22 +0000 Subject: [PATCH] added license info; added anchor text extraction git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150775 13f79535-47bb-0310-9956-ffa450edef68 --- .../contributions/webcrawler-LARM/CHANGES.txt | 11 + .../contributions/webcrawler-LARM/build.bat | 21 + .../contributions/webcrawler-LARM/build.sh | 4 +- sandbox/contributions/webcrawler-LARM/run.bat | 3 + sandbox/contributions/webcrawler-LARM/run.sh | 2 +- .../src/de/lanlab/larm/fetcher/Constants.java | 63 +- .../de/lanlab/larm/fetcher/DNSResolver.java | 62 +- .../src/de/lanlab/larm/fetcher/Fetcher.java | 56 +- .../larm/fetcher/FetcherGUIController.java | 187 ++-- .../de/lanlab/larm/fetcher/FetcherMain.java | 64 +- .../de/lanlab/larm/fetcher/FetcherTask.java | 60 +- .../lanlab/larm/fetcher/FetcherTaskQueue.java | 85 +- .../de/lanlab/larm/fetcher/FetcherThread.java | 62 +- .../larm/fetcher/FetcherThreadFactory.java | 62 +- .../src/de/lanlab/larm/fetcher/Filter.java | 72 +- .../src/de/lanlab/larm/fetcher/GZipTest.java | 61 +- .../src/de/lanlab/larm/fetcher/HostInfo.java | 61 +- .../de/lanlab/larm/fetcher/HostManager.java | 61 +- .../lanlab/larm/fetcher/KnownPathsFilter.java | 63 +- .../src/de/lanlab/larm/fetcher/Message.java | 55 + .../lanlab/larm/fetcher/MessageHandler.java | 55 + .../lanlab/larm/fetcher/MessageListener.java | 54 +- .../larm/fetcher/RobotExclusionFilter.java | 55 +- .../de/lanlab/larm/fetcher/ThreadMonitor.java | 62 +- .../lanlab/larm/fetcher/URLLengthFilter.java | 67 +- .../de/lanlab/larm/fetcher/URLMessage.java | 68 +- .../lanlab/larm/fetcher/URLScopeFilter.java | 105 +- .../lanlab/larm/fetcher/URLVisitedFilter.java | 57 +- .../de/lanlab/larm/graph/DistanceCount.java | 492 ++++++++- .../src/de/lanlab/larm/gui/AboutDialog.java | 298 +++--- .../src/de/lanlab/larm/gui/FetcherFrame.java | 940 +++++++++--------- .../lanlab/larm/gui/FetcherSummaryFrame.java | 109 +- .../src/de/lanlab/larm/gui/QuitDialog.java | 338 ++++--- .../de/lanlab/larm/net/HttpClientTimeout.java | 258 +++-- .../lanlab/larm/net/HttpTimeoutFactory.java | 90 +- .../lanlab/larm/net/HttpTimeoutHandler.java | 162 ++- .../larm/net/HttpURLConnectionTimeout.java | 54 + .../de/lanlab/larm/parser/LinkHandler.java | 69 +- .../src/de/lanlab/larm/parser/Tokenizer.java | 186 +++- .../lanlab/larm/storage/DocumentStorage.java | 56 +- .../de/lanlab/larm/storage/LogStorage.java | 65 +- .../de/lanlab/larm/storage/NullStorage.java | 61 +- .../lanlab/larm/storage/SQLServerStorage.java | 61 +- .../larm/threads/InterruptableTask.java | 59 +- .../de/lanlab/larm/threads/ServerThread.java | 54 + .../src/de/lanlab/larm/threads/TaskQueue.java | 55 + .../larm/threads/TaskReadyListener.java | 56 +- .../de/lanlab/larm/threads/ThreadFactory.java | 61 +- .../de/lanlab/larm/threads/ThreadPool.java | 53 + .../larm/threads/ThreadPoolObserver.java | 58 +- .../larm/threads/ThreadingStrategy.java | 60 +- .../src/de/lanlab/larm/util/CachingQueue.java | 56 +- .../src/de/lanlab/larm/util/ClassInfo.java | 63 +- .../larm/util/HashedCircularLinkedList.java | 59 +- .../lanlab/larm/util/InputStreamObserver.java | 61 +- .../src/de/lanlab/larm/util/Logger.java | 51 +- .../larm/util/ObservableInputStream.java | 61 +- .../src/de/lanlab/larm/util/Observer.java | 54 + .../lanlab/larm/util/OverflowException.java | 54 + .../src/de/lanlab/larm/util/Queue.java | 54 + .../larm/util/SimpleCharArrayReader.java | 241 +++-- .../src/de/lanlab/larm/util/SimpleLogger.java | 62 +- .../lanlab/larm/util/SimpleLoggerManager.java | 63 +- .../de/lanlab/larm/util/SimpleObservable.java | 63 +- .../src/de/lanlab/larm/util/State.java | 54 + .../src/de/lanlab/larm/util/URLUtils.java | 61 +- .../lanlab/larm/util/UnderflowException.java | 54 + .../src/de/lanlab/larm/util/WebDocument.java | 134 ++- 68 files changed, 5113 insertions(+), 1465 deletions(-) create mode 100644 sandbox/contributions/webcrawler-LARM/CHANGES.txt create mode 100755 sandbox/contributions/webcrawler-LARM/build.bat create mode 100755 sandbox/contributions/webcrawler-LARM/run.bat diff --git a/sandbox/contributions/webcrawler-LARM/CHANGES.txt b/sandbox/contributions/webcrawler-LARM/CHANGES.txt new file mode 100644 index 00000000000..2234b405863 --- /dev/null +++ b/sandbox/contributions/webcrawler-LARM/CHANGES.txt @@ -0,0 +1,11 @@ +$id: $ + +2002-05-23 (cmarschner) + * removed 0x0d0d from the source files (Otis?) + * included Apache License into all of the source files in de.lanlab.larm.* directories + * added anchor text deparsing to the Tokenizer + * split store.log in two files: + - store.log contains the page file index: <PageFileNo> <PageFileOffset> + - links.log contains link information: <referer> <URL> <isFrame> <AnchorText> + * changed lib to libs in the startup scripts + * added .bat files for Windows diff --git a/sandbox/contributions/webcrawler-LARM/build.bat b/sandbox/contributions/webcrawler-LARM/build.bat new file mode 100755 index 00000000000..e0f42b1d8ce --- /dev/null +++ b/sandbox/contributions/webcrawler-LARM/build.bat @@ -0,0 +1,21 @@ +@echo off + +rem clean +echo cleaning +rmdir /s /q build +rmdir /s /q classes +rmdir /s /q cachingqueue +rmdir /s /q logs + +rem build +echo making build directory +mkdir build +cd build +echo extracting http client +jar xvf ../libs/HTTPClient.zip >nul +cd .. +xcopy /s src\*.java build +mkdir classes +echo compiling +javac -g -d classes -sourcepath build build/HTTPClient/*.java +javac -g -classpath ./libs/jakarta-oro-2.0.5.jar -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java diff --git a/sandbox/contributions/webcrawler-LARM/build.sh b/sandbox/contributions/webcrawler-LARM/build.sh index 384c3ab9e68..0b86a7cfa91 100755 --- a/sandbox/contributions/webcrawler-LARM/build.sh +++ b/sandbox/contributions/webcrawler-LARM/build.sh @@ -12,12 +12,12 @@ echo making build directory mkdir build cd build echo extracting http client -jar xvf ../lib/HTTPClient.zip >/dev/nul +jar xvf ../libs/HTTPClient.zip >/dev/nul cd .. cp -r src/* build mkdir classes echo compiling javac -g -d classes -sourcepath build build/HTTPClient/*.java -javac -g -classpath ./lib/jakarta-oro-2.0.5.jar -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java +javac -g -classpath ./libs/jakarta-oro-2.0.5.jar -d classes -sourcepath build build/de/lanlab/larm/fetcher/FetcherMain.java diff --git a/sandbox/contributions/webcrawler-LARM/run.bat b/sandbox/contributions/webcrawler-LARM/run.bat new file mode 100755 index 00000000000..71022a9daec --- /dev/null +++ b/sandbox/contributions/webcrawler-LARM/run.bat @@ -0,0 +1,3 @@ +rmdir /s /q -r logs +mkdir logs +java -server -Xmx400mb -classpath classes;libs/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://www.cis.uni-muenchen.de/ -restrictto http://.*\.uni-muenchen\.de.* -threads 15 diff --git a/sandbox/contributions/webcrawler-LARM/run.sh b/sandbox/contributions/webcrawler-LARM/run.sh index 4af92d2fed6..00c22e9ad03 100755 --- a/sandbox/contributions/webcrawler-LARM/run.sh +++ b/sandbox/contributions/webcrawler-LARM/run.sh @@ -1,4 +1,4 @@ #!/bin/sh rm -r logs mkdir logs -java -server -Xmx400mb -classpath classes:lib/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://www.cis.uni-muenchen.de/ -restrictto http://[^/]*\.uni-muenchen\.de.* -threads 15 +java -server -Xmx400mb -classpath classes:libs/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://your.server.here/ -restrictto http://[^/]*\.your\.server\.here.* -threads 15 diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Constants.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Constants.java index b8b4d7e3a36..a49f1bf0950 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Constants.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Constants.java @@ -1,16 +1,63 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c) <p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; /** * contains all global constants used in this package + * @version $Id$ + * */ public class Constants { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/DNSResolver.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/DNSResolver.java index a724066daff..aa0db2a447b 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/DNSResolver.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/DNSResolver.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; import java.util.*; @@ -17,6 +62,7 @@ import java.net.*; * the other parts of the application * since URLs cache their IP addresses themselves, and HTTP 1.1 needs the * host names to be sent to the server, this class is not used anymore + * @version $Id$ */ public class DNSResolver implements MessageListener { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Fetcher.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Fetcher.java index e1ca56c2355..ec4854cae50 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Fetcher.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Fetcher.java @@ -1,10 +1,58 @@ -/* - * LARM - LANLab Retrieval Machine +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * $history: $ + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; import de.lanlab.larm.threads.ThreadPool; @@ -24,7 +72,7 @@ import de.lanlab.larm.fetcher.FetcherTask; * so that all filtering can be made beforehand. * * @author Clemens Marschner - * + * @version $Id$ */ public class Fetcher implements MessageListener diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherGUIController.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherGUIController.java index 43b19768245..b1303d80e63 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherGUIController.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherGUIController.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.fetcher; import java.awt.event.ActionListener; @@ -12,64 +66,65 @@ import de.lanlab.larm.threads.*; /** * this was used to connect the GUI to the fetcher * @TODO put this into the GUI package, probably? + * @version $Id$ */ public class FetcherGUIController implements ActionListener { - FetcherMain fetcherMain; - FetcherSummaryFrame fetcherFrame; + FetcherMain fetcherMain; + FetcherSummaryFrame fetcherFrame; - public FetcherGUIController(FetcherMain fetcherMainPrg, FetcherSummaryFrame fetcherFrameWin, String defaultStartURL) - { - this.fetcherMain = fetcherMainPrg; - this.fetcherFrame = fetcherFrameWin; + public FetcherGUIController(FetcherMain fetcherMainPrg, FetcherSummaryFrame fetcherFrameWin, String defaultStartURL) + { + this.fetcherMain = fetcherMainPrg; + this.fetcherFrame = fetcherFrameWin; - fetcherFrame.setRestrictTo(fetcherMain.urlScopeFilter.getRexString()); - fetcherFrame.setStartURL(defaultStartURL); + fetcherFrame.setRestrictTo(fetcherMain.urlScopeFilter.getRexString()); + fetcherFrame.setStartURL(defaultStartURL); - fetcherMain.fetcher.addThreadPoolObserver( - new ThreadPoolObserver() - { - public void threadUpdate(int threadNr, String action, String info) - { - String status = threadNr + ": " + action + ": " + info; - fetcherFrame.setIdleThreadsCount(fetcherMain.fetcher.getIdleThreadsCount()); - fetcherFrame.setBusyThreadsCount(fetcherMain.fetcher.getBusyThreadsCount()); - fetcherFrame.setWorkingThreadsCount(fetcherMain.fetcher.getWorkingThreadsCount()); - } + fetcherMain.fetcher.addThreadPoolObserver( + new ThreadPoolObserver() + { + public void threadUpdate(int threadNr, String action, String info) + { + String status = threadNr + ": " + action + ": " + info; + fetcherFrame.setIdleThreadsCount(fetcherMain.fetcher.getIdleThreadsCount()); + fetcherFrame.setBusyThreadsCount(fetcherMain.fetcher.getBusyThreadsCount()); + fetcherFrame.setWorkingThreadsCount(fetcherMain.fetcher.getWorkingThreadsCount()); + } - public void queueUpdate(String info, String action) - { - fetcherFrame.setRequestQueueCount(fetcherMain.fetcher.getQueueSize()); - } - } - ); + public void queueUpdate(String info, String action) + { + fetcherFrame.setRequestQueueCount(fetcherMain.fetcher.getQueueSize()); + } + } + ); - fetcherMain.monitor.addObserver(new Observer() - { - public void update(Observable o, Object arg) - { - // der ThreadMonitor wurde geupdated - //fetcherFrame.setStalledThreads(fetcherMain.monitor.getStalledThreadCount(10, 500.0)); - //fetcherFrame.setBytesPerSecond(fetcherMain.monitor.getAverageReadCount(5)); - // fetcherFrame.setDocsPerSecond(fetcherMain.monitor.getDocsPerSecond(5)); - // wir nutzen die Gelegenheit, den aktuellen Speicherbestand auszugeben - fetcherFrame.setFreeMem(Runtime.getRuntime().freeMemory()); - fetcherFrame.setTotalMem(Runtime.getRuntime().totalMemory()); + fetcherMain.monitor.addObserver(new Observer() + { + public void update(Observable o, Object arg) + { + // der ThreadMonitor wurde geupdated + //fetcherFrame.setStalledThreads(fetcherMain.monitor.getStalledThreadCount(10, 500.0)); + //fetcherFrame.setBytesPerSecond(fetcherMain.monitor.getAverageReadCount(5)); + // fetcherFrame.setDocsPerSecond(fetcherMain.monitor.getDocsPerSecond(5)); + // wir nutzen die Gelegenheit, den aktuellen Speicherbestand auszugeben + fetcherFrame.setFreeMem(Runtime.getRuntime().freeMemory()); + fetcherFrame.setTotalMem(Runtime.getRuntime().totalMemory()); - } + } - }); + }); - /* fetcherMain.reFilter.addObserver( - new Observer() - { - public void update(Observable o, Object arg) - { - fetcherFrame.setRobotsTxtCount(fetcherMain.reFilter.getExcludingHostsCount()); - } - } - );*/ + /* fetcherMain.reFilter.addObserver( + new Observer() + { + public void update(Observable o, Object arg) + { + fetcherFrame.setRobotsTxtCount(fetcherMain.reFilter.getExcludingHostsCount()); + } + } + );*/ fetcherMain.messageHandler.addMessageQueueObserver(new Observer() { @@ -83,8 +138,8 @@ public class FetcherGUIController implements ActionListener } ); - // this observer will be called if a filter has decided to throw a - // message away. + // this observer will be called if a filter has decided to throw a + // message away. fetcherMain.messageHandler.addMessageProcessorObserver(new Observer() { public void update(Observable o, Object arg) @@ -101,31 +156,31 @@ public class FetcherGUIController implements ActionListener { fetcherFrame.setURLsCaughtCount(fetcherMain.reFilter.getFiltered()); } - else // it's the fetcher - { - fetcherFrame.setDocsRead(fetcherMain.fetcher.getDocsRead()); - } + else // it's the fetcher + { + fetcherFrame.setDocsRead(fetcherMain.fetcher.getDocsRead()); + } } } ); - fetcherFrame.addWindowListener( - new WindowAdapter() - { - public void windowClosed(WindowEvent e) - { - System.out.println("window Closed"); - System.exit(0); - } + fetcherFrame.addWindowListener( + new WindowAdapter() + { + public void windowClosed(WindowEvent e) + { + System.out.println("window Closed"); + System.exit(0); + } - } - ); + } + ); fetcherFrame.addStartButtonListener((ActionListener)this); - } + } - /** + /** * will be called when the start button is pressed */ public void actionPerformed(ActionEvent e) @@ -135,8 +190,8 @@ public class FetcherGUIController implements ActionListener { // urlVisitedFilter.printAllURLs(); // urlVisitedFilter.clearHashtable(); - fetcherMain.setRexString(fetcherFrame.getRestrictTo()); - fetcherMain.startMonitor(); + fetcherMain.setRexString(fetcherFrame.getRestrictTo()); + fetcherMain.startMonitor(); fetcherMain.putURL(new URL(fetcherFrame.getStartURL()), false); } catch(Exception ex) diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java index 2da43b08f68..7604c378283 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java @@ -1,9 +1,57 @@ -/* - * LARM - LANLab Retrieval Machine +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * $history: $ + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; import de.lanlab.larm.threads.ThreadPoolObserver; @@ -25,6 +73,7 @@ import org.apache.oro.text.regex.MalformedPatternException; * * @author Clemens Marschner * @created December 16, 2000 + * @version $Id$ */ public class FetcherMain { @@ -128,8 +177,9 @@ public class FetcherMain // it also saves all documents to page files. Probably this single storage // could also be replaced by a pipeline; or even incorporated into the // existing message pipeline - SimpleLogger log = new SimpleLogger("store", false); - this.storage = new LogStorage(log, true, "logs/pagefile"); + SimpleLogger storeLog = new SimpleLogger("store", false); + SimpleLogger linksLog = new SimpleLogger("links", false); + this.storage = new LogStorage(storeLog, true, "logs/pagefile"); // a third example would be the NullStorage, which converts the documents into // heat, which evaporates above the processor @@ -138,7 +188,7 @@ public class FetcherMain // create the filters and add them to the message queue urlScopeFilter = new URLScopeFilter(); - urlVisitedFilter = new URLVisitedFilter(100000, log); + urlVisitedFilter = new URLVisitedFilter(100000, linksLog); // dnsResolver = new DNSResolver(); hostManager = new HostManager(1000); @@ -219,7 +269,7 @@ public class FetcherMain { try { - messageHandler.putMessage(new URLMessage(url, null, isFrame)); + messageHandler.putMessage(new URLMessage(url, null, isFrame, null)); } catch (Exception e) { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTask.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTask.java index 9f6edf904e4..f55de4ec018 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTask.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTask.java @@ -1,9 +1,57 @@ -/* - * LARM - LANLab Retrieval Machine +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * $history: $ + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; import java.net.URL; @@ -31,7 +79,7 @@ import de.lanlab.larm.parser.LinkHandler; * be put into the message handler again. * * @author Clemens Marschner - * + * @version $Id$ */ public class FetcherTask implements InterruptableTask, LinkHandler, Serializable @@ -477,7 +525,7 @@ public class FetcherTask * * @param link Description of the Parameter */ - public void handleLink(String link, boolean isFrame) + public void handleLink(String link, String anchor, boolean isFrame) { try { @@ -508,7 +556,7 @@ public class FetcherTask url = new URL(base, link); } - URLMessage urlMessage = new URLMessage(url, contextUrl, isFrame); + URLMessage urlMessage = new URLMessage(url, contextUrl, isFrame, anchor); String urlString = urlMessage.getURLString(); diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTaskQueue.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTaskQueue.java index f2c9083708b..f299e348ae8 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTaskQueue.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherTaskQueue.java @@ -1,4 +1,58 @@ -package de.lanlab.larm.fetcher; +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + + package de.lanlab.larm.fetcher; import de.lanlab.larm.threads.*; import de.lanlab.larm.util.*; @@ -13,6 +67,7 @@ import java.net.URL; * * @author Clemens Marschner * @created 23. November 2001 + * @version $Id$ */ public class FetcherTaskQueue extends TaskQueue { @@ -134,13 +189,13 @@ public class FetcherTaskQueue extends TaskQueue System.out.println("Test 1. put in 4 yahoos and 3 lmus. pull out LMU/Yahoo/LMU/Yahoo/LMU/Yahoo/Yahoo"); try { - q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false, null))); } catch (Throwable t) { @@ -162,9 +217,9 @@ public class FetcherTaskQueue extends TaskQueue try { System.out.println("put 3 lmus."); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/1"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/2"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.lmu.de/3"), null, false, null))); System.out.print("pull out 1st element [lmu/1]: "); System.out.println(((FetcherTask) q.remove()).getInfo()); System.out.println("size now [2]: " + q.size()); @@ -172,9 +227,9 @@ public class FetcherTaskQueue extends TaskQueue System.out.println(((FetcherTask) q.remove()).getInfo()); System.out.println("size now [1]: " + q.size()); System.out.println("put in 3 yahoos"); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false))); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/1"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/2"), null, false, null))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/3"), null, false, null))); System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo()); System.out.println("Size now [3]: " + q.size()); System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo()); @@ -182,7 +237,7 @@ public class FetcherTaskQueue extends TaskQueue System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo()); System.out.println("Size now [1]: " + q.size()); System.out.println("put in another Yahoo"); - q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false))); + q.insert(new FetcherTask(new URLMessage(new URL("http://www.yahoo.de/4"), null, false, null))); System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo()); System.out.println("Size now [1]: " + q.size()); System.out.println("remove [?]: " + ((FetcherTask) q.remove()).getInfo()); diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThread.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThread.java index 54930fa9fc3..3473a273ee8 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThread.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThread.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; import de.lanlab.larm.threads.ServerThread; @@ -17,6 +62,7 @@ import de.lanlab.larm.util.State; * of bytes read and the number of tasks run * mainly for statistical purposes and to keep most of the information a task needs * static + * @version $Id$ */ public class FetcherThread extends ServerThread { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThreadFactory.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThreadFactory.java index 99035c24ee0..24a47d31fc5 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThreadFactory.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherThreadFactory.java @@ -1,18 +1,64 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; import de.lanlab.larm.threads.*; /** * this factory simply creates fetcher threads. It's passed * to the ThreadPool because the pool is creating the threads on its own + * @version $Id$ */ public class FetcherThreadFactory extends ThreadFactory { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Filter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Filter.java index 0a3be1c0e7e..2c83845881a 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Filter.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Filter.java @@ -1,24 +1,70 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ -package de.lanlab.larm.fetcher; + + package de.lanlab.larm.fetcher; /** * base class of all filter classes + * @version $Id$ */ public abstract class Filter { - /** - * number of items filtered. augmented directly by - * the inheriting classes - */ + /** + * number of items filtered. augmented directly by + * the inheriting classes + */ protected int filtered = 0; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/GZipTest.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/GZipTest.java index ad6d5b3ed32..2f6d0749823 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/GZipTest.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/GZipTest.java @@ -1,13 +1,59 @@ -package de.lanlab.larm.fetcher; - -/** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * @author - * @version 1.0 + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ +package de.lanlab.larm.fetcher; + import java.io.*; import java.util.zip.*; import java.net.*; @@ -17,6 +63,7 @@ import java.net.*; * * @author Administrator * @created 28. Januar 2002 + * @version $Id$ */ public class GZipTest { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostInfo.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostInfo.java index ff48f26f31f..dd28bba7beb 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostInfo.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostInfo.java @@ -1,13 +1,59 @@ -package de.lanlab.larm.fetcher; - -/** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * @author Clemens Marschner - * @version 1.0 + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ +package de.lanlab.larm.fetcher; + import java.util.HashMap; import java.net.*; import de.lanlab.larm.util.CachingQueue; @@ -20,6 +66,7 @@ import de.lanlab.larm.util.Queue; * * @author Clemens Marschner * @created 16. Februar 2002 + * @version $Id$ */ public class HostInfo { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostManager.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostManager.java index dc892d8f4fa..6a854968860 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostManager.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/HostManager.java @@ -1,13 +1,59 @@ -package de.lanlab.larm.fetcher; - -/** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * @author - * @version 1.0 + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ +package de.lanlab.larm.fetcher; + import java.util.HashMap; /** @@ -15,6 +61,7 @@ import java.util.HashMap; * * @author Administrator * @created 16. Februar 2002 + * @version $Id$ */ public class HostManager { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/KnownPathsFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/KnownPathsFilter.java index 3984bafe990..251f27b6755 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/KnownPathsFilter.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/KnownPathsFilter.java @@ -1,18 +1,65 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.fetcher; -/** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: - * - * @author - * @created 17. Februar 2002 - * @version 1.0 - */ import java.net.*; /** * this can be considered a hack * @TODO implement this as a fast way to filter out different URL endings or beginnings + * @version $Id$ */ public class KnownPathsFilter extends Filter implements MessageListener { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Message.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Message.java index fccf11877b3..42eea35b3f7 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Message.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/Message.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.fetcher; import java.io.*; @@ -5,6 +59,7 @@ import java.io.*; /** * Marker interface. * represents a simple message. + * @version $Id$ */ public interface Message { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageHandler.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageHandler.java index 5f507e700d6..14ed2be1bb5 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageHandler.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageHandler.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.fetcher; import java.util.*; @@ -15,6 +69,7 @@ import de.lanlab.larm.util.UnderflowException; * object, usually the one they got.<br> * The filters will run synchronously within the message handler thread<br> * This implements a chain of responsibility-style message handling + * @version $Id$ */ public class MessageHandler implements Runnable { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageListener.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageListener.java index f39681cbdbf..b9df91949f7 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageListener.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/MessageListener.java @@ -1,10 +1,57 @@ -/* - * LARM - LANLab Retrieval Machine +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * $history: $ + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; /** @@ -14,6 +61,7 @@ package de.lanlab.larm.fetcher; * * @author Administrator * @created 24. November 2001 + * @version $Id$ */ public interface MessageListener { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/RobotExclusionFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/RobotExclusionFilter.java index 35158d4f53d..76e1a363dee 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/RobotExclusionFilter.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/RobotExclusionFilter.java @@ -1,17 +1,57 @@ -/** - * Title: LARM Lanlab Retrieval Machine<p> +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * Description: <p> + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * - * Copyright: Copyright (c)<p> + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * Company: <p> + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. * - * @author Clemens Marschner - * @version 1.0 + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; import de.lanlab.larm.util.SimpleObservable; @@ -30,6 +70,7 @@ import HTTPClient.*; * * @author Administrator * @created 17. Februar 2002 + * @version $Id$ */ class REFThreadFactory extends ThreadFactory { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/ThreadMonitor.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/ThreadMonitor.java index 140924ab81a..3e07a0401a8 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/ThreadMonitor.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/ThreadMonitor.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.fetcher; @@ -25,6 +70,7 @@ import de.lanlab.larm.util.SimpleLoggerManager; * to log files and to the console * @TODO this can be done better. Probably with an agent where different services * can be registered to be called every X seconds + * @version $Id$ */ public class ThreadMonitor extends Observable implements Runnable { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java index 61f49c448f4..cf5ebeb59d6 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java @@ -1,23 +1,66 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.fetcher; /** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: - * - * @author - * @created 28. Januar 2002 - * @version 1.0 - */ - -/** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: - * * kills URLs longer than X characters. Used to prevent endless loops where * the page contains the current URL + some extension * * @author Clemens Marschner * @created 28. Januar 2002 + * @version $Id$ */ public class URLLengthFilter extends Filter implements MessageListener diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLMessage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLMessage.java index 24973f93929..c70a7370c34 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLMessage.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLMessage.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.fetcher; import java.net.*; @@ -6,6 +60,7 @@ import de.lanlab.larm.util.URLUtils; /** * represents a URL which is passed around in the messageHandler + * @version $Id$ */ public class URLMessage implements Message, Serializable { @@ -18,8 +73,9 @@ public class URLMessage implements Message, Serializable protected URL referer; protected String refererString; boolean isFrame; + protected String anchor; - public URLMessage(URL url, URL referer, boolean isFrame) + public URLMessage(URL url, URL referer, boolean isFrame, String anchor) { //super(); this.url = url; @@ -28,6 +84,7 @@ public class URLMessage implements Message, Serializable this.referer = referer; this.refererString = referer != null ? URLUtils.toExternalFormNoRef(referer) : null; this.isFrame = isFrame; + this.anchor = anchor != null ? anchor : ""; //System.out.println("" + refererString + " -> " + urlString); } @@ -57,6 +114,11 @@ public class URLMessage implements Message, Serializable return refererString; } + public String getAnchor() + { + return anchor; + } + public int hashCode() { @@ -68,6 +130,7 @@ public class URLMessage implements Message, Serializable out.writeObject(url); out.writeObject(referer); out.writeBoolean(isFrame); + out.writeUTF(anchor); } private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException @@ -77,11 +140,12 @@ public class URLMessage implements Message, Serializable urlString = url.toExternalForm(); refererString = referer.toExternalForm(); isFrame = in.readBoolean(); + anchor = in.readUTF(); } public String getInfo() { - return (referer != null ? refererString : "<start>") + "\t" + urlString + "\t" + (isFrame ? "1" : "0"); + return (referer != null ? refererString : "<start>") + "\t" + urlString + "\t" + (isFrame ? "1" : "0") + "\t" + anchor; } } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLScopeFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLScopeFilter.java index 66d66fd5a94..2928c78cc50 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLScopeFilter.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLScopeFilter.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.fetcher; import org.apache.oro.text.regex.Perl5Matcher; @@ -5,10 +59,9 @@ import org.apache.oro.text.regex.Perl5Compiler; import org.apache.oro.text.regex.Pattern; /** - * Filter-Klasse; prüft eine eingegangene Message auf Einhaltung eines - * regulären Ausdrucks. Wenn die URL diesem Ausdruck - * nicht entspricht, wird sie verworfen + * filter class. Tries to match a regular expression with an incoming URL * @author Clemens Marschner + * @version $Id$ */ class URLScopeFilter extends Filter implements MessageListener { @@ -19,8 +72,8 @@ class URLScopeFilter extends Filter implements MessageListener MessageHandler messageHandler; /** - * the regular expression which describes a valid URL - */ + * the regular expression which describes a valid URL + */ private Pattern pattern; private Perl5Matcher matcher; private Perl5Compiler compiler; @@ -36,10 +89,10 @@ class URLScopeFilter extends Filter implements MessageListener return pattern.toString(); } - /** - * set the regular expression - * @param rexString the expression - */ + /** + * set the regular expression + * @param rexString the expression + */ public void setRexString(String rexString) throws org.apache.oro.text.regex.MalformedPatternException { this.pattern = compiler.compile(rexString, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK); @@ -49,27 +102,27 @@ class URLScopeFilter extends Filter implements MessageListener /** * this method will be called by the message handler. Tests the URL - * and throws it out if it's not in the scope + * and throws it out if it's not in the scope */ - public Message handleRequest(Message message) - { - if(message instanceof URLMessage) - { - String urlString = ((URLMessage)message).toString(); - int length = urlString.length(); - char buffer[] = new char[length]; - urlString.getChars(0,length,buffer,0); + public Message handleRequest(Message message) + { + if(message instanceof URLMessage) + { + String urlString = ((URLMessage)message).toString(); + int length = urlString.length(); + char buffer[] = new char[length]; + urlString.getChars(0,length,buffer,0); //System.out.println("using pattern: " + pattern); - boolean match = matcher.matches(buffer, pattern); - if(!match) - { - //System.out.println("not in Scope: " + urlString); + boolean match = matcher.matches(buffer, pattern); + if(!match) + { + //System.out.println("not in Scope: " + urlString); filtered++; - return null; - } - } + return null; + } + } return message; - } + } } \ No newline at end of file diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLVisitedFilter.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLVisitedFilter.java index 0c9ba7cb75b..e972930c7ac 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLVisitedFilter.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLVisitedFilter.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.fetcher; import java.net.URL; @@ -13,6 +67,7 @@ import de.lanlab.larm.util.SimpleLogger; * * @author Clemens Marschner * @created 3. Januar 2002 + * @version $Id$ */ class URLVisitedFilter extends Filter implements MessageListener { @@ -79,7 +134,7 @@ class URLVisitedFilter extends Filter implements MessageListener filtered++; if(log != null) { - log.logThreadSafe(urlMessage.getInfo()); + log.log(urlMessage.getInfo()); } return null; } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/graph/DistanceCount.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/graph/DistanceCount.java index 444523ff6b2..9598bca84b3 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/graph/DistanceCount.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/graph/DistanceCount.java @@ -1,70 +1,148 @@ -package de.lanlab.larm.graph; - -/** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * @author - * @version 1.0 + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ +package de.lanlab.larm.graph; + import java.io.*; + import java.util.*; /** - * Description of the Class + * this is just a test to use some graph algorithms on the URL graph * * @author Administrator * @created 30. Januar 2002 + * @version $Id$ */ + class Node implements Comparable { + LinkedList incoming; + // 16 + 4 per entry + //HashSet incomingNodes; // 16 + 16 per entry, 11 x 16 default size = 192 + LinkedList outgoing; + // 16 + 4 per entry + //Object o; + //HashSet outgoingNodes; // 16 + 16 per entry, 11 x 16 default size = 192 //LinkedList shortestIncoming; + int id; + // 4 + float distance; + // 8 + String name; + // 4 + String object + String title; + // 4 + String object + float nodeRank[] = new float[2]; + // 16 + // 470 bytes + 2 string objects + /** * Description of the Field */ + public static int sortType = 0; + /** * Description of the Method * * @param n Description of the Parameter * @return Description of the Return Value */ + public int compareTo(Object n) { + if (sortType < 2) { + double diff = ((Node) n).nodeRank[sortType] - nodeRank[sortType]; + return diff < 0 ? -1 : diff > 0 ? 1 : 0; } + else { + return (((Node) n).incoming.size() - incoming.size()); } + } + /** * Constructor for the Node object * @@ -72,164 +150,225 @@ class Node implements Comparable * @param name Description of the Parameter * @param title Description of the Parameter */ + public Node(int id, String name, String title) { + this.id = id; + this.name = name; + this.title = title; + this.incoming = new LinkedList(); + this.outgoing = new LinkedList(); + //this.incomingNodes = new HashSet(); + //this.outgoingNodes = new HashSet(); + this.distance = Float.MAX_VALUE; + this.nodeRank[0] = this.nodeRank[1] = 1; + } + /** * Adds a feature to the Incoming attribute of the Node object * * @param incomingT The feature to be added to the Incoming attribute * @return Description of the Return Value */ + public boolean addIncoming(Transition incomingT) { + Integer id = new Integer(incomingT.getFrom().id); + if (!incoming.contains(id)) { + // attn: doesn't scale well, but also saves memory incoming.addLast(incomingT); + //incomingNodes.add(id); + return true; } + else { + return false; } + } + /** * Adds a feature to the Outgoing attribute of the Node object * * @param outgoingT The feature to be added to the Outgoing attribute * @return Description of the Return Value */ + public boolean addOutgoing(Transition outgoingT) { + Integer id = new Integer(outgoingT.getTo().id); + if (!outgoing.contains(id)) { + outgoing.addLast(outgoingT); + //outgoingNodes.add(id); + return true; } + else { + return false; } + } + /** * Gets the incoming attribute of the Node object * * @return The incoming value */ + public LinkedList getIncoming() { + return incoming; } + /** * Gets the outgoing attribute of the Node object * * @return The outgoing value */ + public LinkedList getOutgoing() { + return outgoing; } + /** * Sets the distance attribute of the Node object * * @param distance The new distance value */ + public void setDistance(float distance) { + this.distance = distance; + } + /** * Gets the distance attribute of the Node object * * @return The distance value */ + public float getDistance() { + return distance; } + /** * Gets the name attribute of the Node object * * @return The name value */ + public String getName() { + return name; } + /** * Sets the title attribute of the Node object * * @param title The new title value */ + public void setTitle(String title) { + this.title = title; + } + /** * Gets the title attribute of the Node object * * @return The title value */ + public String getTitle() { + return title; } + /** * Gets the nodeRank attribute of the Node object * * @param idx Description of the Parameter * @return The nodeRank value */ + public float getNodeRank(int idx) { + return nodeRank[idx]; } + /** * Sets the nodeRank attribute of the Node object * * @param nodeRank The new nodeRank value * @param idx The new nodeRank value */ + public void setNodeRank(float nodeRank, int idx) { + this.nodeRank[idx] = nodeRank; + } } @@ -240,17 +379,24 @@ class Node implements Comparable * @author Administrator * @created 30. Januar 2002 */ + class Transition { + Node from; + Node to; + float distance; + float linkRank[] = new float[2]; + boolean isFrame; + /** * Constructor for the Transition object * @@ -258,105 +404,147 @@ class Transition * @param to Description of the Parameter * @param isFrame Description of the Parameter */ + public Transition(Node from, Node to, boolean isFrame) { + LinkedList l = from.getOutgoing(); + Iterator i = l.iterator(); - while(i.hasNext()) + + while (i.hasNext()) { - Transition t = (Transition)i.next(); - if(t.getTo() == to) + + Transition t = (Transition) i.next(); + + if (t.getTo() == to) { - return; // schon enthalten + + return; + // schon enthalten + } + } + this.from = from; + this.to = to; + from.addOutgoing(this); + to.addIncoming(this); + this.distance = Integer.MAX_VALUE; + this.isFrame = isFrame; + this.linkRank[0] = this.linkRank[1] = 1; + } + /** * Gets the to attribute of the Transition object * * @return The to value */ + public Node getTo() { + return to; } + /** * Gets the from attribute of the Transition object * * @return The from value */ + public Node getFrom() { + return from; } + /** * Gets the distance attribute of the Transition object * * @return The distance value */ + public float getDistance() { + return distance; } + /** * Sets the distance attribute of the Transition object * * @param distance The new distance value */ + public void setDistance(float distance) { + this.distance = distance; + } + /** * Gets the frame attribute of the Transition object * * @return The frame value */ + public boolean isFrame() { + return isFrame; } + /** * Gets the linkRank attribute of the Transition object * * @param idx Description of the Parameter * @return The linkRank value */ + public float getLinkRank(int idx) { + return linkRank[idx]; } + /** * Sets the linkRank attribute of the Transition object * * @param linkRank The new linkRank value * @param idx The new linkRank value */ + public void setLinkRank(float linkRank, int idx) { + this.linkRank[idx] = linkRank; + } + } /** @@ -365,15 +553,20 @@ class Transition * @author Administrator * @created 30. Januar 2002 */ + public class DistanceCount { + HashMap nodes = new HashMap(100000); + LinkedList nodesToDo = new LinkedList(); + static int id = 0; + /** * Gets the orCreateNode attribute of the DistanceCount object * @@ -381,73 +574,121 @@ public class DistanceCount * @param title Description of the Parameter * @return The orCreateNode value */ + Node getOrCreateNode(String name, String title) { + Node node = (Node) nodes.get(name); + if (node != null) { + if (title != null) { + node.setTitle(title); + } + return node; } + else { + node = new Node(id++, name, title); + nodes.put(name, node); + return node; } + } + /** * Constructor for the DistanceCount object * * @param filename Description of the Parameter * @exception IOException Description of the Exception */ + public DistanceCount(String filename) throws IOException { + System.out.println("reading file..."); + long t1 = System.currentTimeMillis(); + BufferedReader b = new BufferedReader(new FileReader(filename)); + String line; + boolean firstNotFound = true; + Node firstNode = null; + int lines = 0; + while ((line = b.readLine()) != null) { + lines++; + String title = null; + try { + //StringTokenizer st = new StringTokenizer(line, " "); + StringTokenizer st = new StringTokenizer(line, "\t"); + String from = st.nextToken(); + if (from.endsWith("/")) { + from = from.substring(0, from.length() - 1); + } + from = from.toLowerCase(); + String to = st.nextToken(); + if (to.endsWith("/")) { + to = to.substring(0, to.length() - 1); + } + to = to.toLowerCase(); + boolean isFrame = (Integer.parseInt(st.nextToken()) == 1); + if (st.countTokens() > 3) { + title = "<untitled>"; + //StringBuffer sb = new StringBuffer(); + st.nextToken(); + // result + st.nextToken(); + // Mime Type + st.nextToken(); + // Size + /* * while(st.hasMoreTokens()) * { @@ -455,20 +696,31 @@ public class DistanceCount * } */ title = st.nextToken(); + if (title.length() > 2) { title = title.substring(1, title.length() - 1); + int indexOfPara = title.indexOf("\""); + if (indexOfPara > -1) { + title = title.substring(0, indexOfPara); + } + } + } + Node fromNode = getOrCreateNode(from, null); + Node toNode = getOrCreateNode(to, title); + Transition t = new Transition(fromNode, toNode, isFrame); + /* * if(firstNotFound && to.equals("http://127.0.0.1")) * { @@ -478,20 +730,33 @@ public class DistanceCount */ if (lines % 10000 == 0) { + System.out.println("" + lines + " Lines; " + nodes.size() + " nodes"); + } + } + catch (NoSuchElementException e) { + System.out.println("Malformed line " + lines + ": field number doesn't match"); + } + catch (NumberFormatException e) { + System.out.println("Malformed line " + lines + ": NumberFormat wrong"); + } + } + System.out.println("finished; b" + lines + " Lines; " + nodes.size() + " nodes"); + long t2 = System.currentTimeMillis(); + System.out.println("" + (t2 - t1) + " ms"); /* @@ -504,51 +769,81 @@ public class DistanceCount } + /** * Description of the Method * * @param firstNode Description of the Parameter */ + public void calculateShortestDistance(Node firstNode) { + clearDistances(); + firstNode.setDistance(0); + nodesToDo.addLast(firstNode); + int calculations = 0; + while (!nodesToDo.isEmpty()) { + if (calculations % 100000 == 0) { + System.out.println("Calculations: " + calculations + "; nodes to go: " + nodesToDo.size() + " total Mem: " + Runtime.getRuntime().totalMemory() + "; free mem: " + Runtime.getRuntime().freeMemory()); + } + calculations++; Node act = (Node) nodesToDo.removeFirst(); + LinkedList outTrans = act.getOutgoing(); + float distance = act.getDistance(); + Iterator i = outTrans.iterator(); + //distance++; while (i.hasNext()) { + Transition t = (Transition) i.next(); + float transDistance = t.getDistance(); - /*if (t.isFrame()) - { - System.out.println("Frame from " + t.from.getName() + " to " + t.to.getName()); - }*/ + + /* + * if (t.isFrame()) + * { + * System.out.println("Frame from " + t.from.getName() + " to " + t.to.getName()); + * } + */ float newDistance = distance + (t.isFrame() ? 0.25f : 1f); + if (transDistance > newDistance) { + t.setDistance(newDistance); + Node to = t.getTo(); + if (to.distance > distance) { + to.setDistance(newDistance); + nodesToDo.addLast(to); + } + } + } + /* * if(looksGood) * { @@ -556,53 +851,81 @@ public class DistanceCount * } */ } - System.out.println("Calculations: " + calculations ); + + System.out.println("Calculations: " + calculations); } + + /** + * Description of the Method + */ public void clearDistances() { + System.out.println("Clearing distance data..."); + Iterator it = nodes.values().iterator(); + int nr = 0; + while (it.hasNext()) { + Node n = (Node) it.next(); + nr++; + n.setDistance(Float.MAX_VALUE); + } + System.out.println("cleared " + nr + " nodes. done"); } + + /** * Description of the Method * * @param nodeFrom Description of the Parameter * @param nodeTo Description of the Parameter */ + public void printDistance(String nodeFrom, String nodeTo) { Node firstNode = (Node) nodes.get(nodeFrom); + if (firstNode == null) { + System.out.println("FROM node not found"); + return; } + Node toNode = (Node) nodes.get(nodeTo); + if (toNode == null) { + System.out.println("TO node not found"); + return; } + //System.out.println("resetting node distance..."); + //clearDistances(); System.out.println("calculating..."); + calculateShortestDistance(firstNode); //t1 = System.currentTimeMillis(); + //System.out.println("" + (t1-t2) + " ms"); @@ -708,150 +1031,244 @@ public class DistanceCount * System.out.println("Referer Median: " + ((Node)(nodeArray[Math.round(nodeArray.length/2)])).incoming.size()); * System.out.println("\nSamples:"); */ - - printShortestRoute(toNode, 0,0); + printShortestRoute(toNode, 0, 0); } + /** * Description of the Method */ + public void printRandomRoute() { + Random r = new java.util.Random(System.currentTimeMillis()); + Collection nodeColl = nodes.values(); - Object[] nodeArray = (Object[])nodeColl.toArray(); + + Object[] nodeArray = (Object[]) nodeColl.toArray(); + int rnd = (int) (r.nextDouble() * nodeArray.length); + Node from = (Node) nodeArray[rnd]; - rnd = (int) (r.nextDouble() * nodeArray.length); + + rnd = (int) (r.nextDouble() * nodeArray.length); + Node to = (Node) nodeArray[rnd]; + System.out.println("Calculating distance..."); + calculateShortestDistance(from); + System.out.println("printing..."); - printShortestRoute(to, 0,0); + + printShortestRoute(to, 0, 0); + } + /** * Description of the Method * - * @param n Description of the Parameter - * @param indent Description of the Parameter + * @param n Description of the Parameter + * @param indent Description of the Parameter + * @param linkCount Description of the Parameter */ + public void printShortestRoute(Node n, int indent, int linkCount) { + String spaces = " ".substring(0, indent); if (n.getIncoming().isEmpty()) { + System.out.println(spaces + "<start>"); + } + else { - System.out.print(spaces + "+- " + n.name + " (" + (n.getTitle() != null ? n.getTitle().substring(0,Math.min(n.getTitle().length(),25)) : "") + "\") D:" + n.distance + "; L:" + n.getIncoming().size() + "; C:" + linkCount); + + System.out.print(spaces + "+- " + n.name + " (" + (n.getTitle() != null ? n.getTitle().substring(0, Math.min(n.getTitle().length(), 25)) : "") + "\") D:" + n.distance + "; L:" + n.getIncoming().size() + "; C:" + linkCount); + Iterator it = n.getIncoming().iterator(); + float dist = n.distance; + if (dist > 10000000) { + System.out.println(spaces + "\n--no link--"); + return; } + while (it.hasNext()) { + Transition t = (Transition) it.next(); + if (t.distance <= dist) { + if (t.isFrame()) { + System.out.println(" **F** ->"); + } + else { + System.out.println(" -> "); + } + printShortestRoute(t.getFrom(), indent + 1, linkCount + n.getIncoming().size()); + } + } + } + //System.out.println(""); + } + /** - * this class reads in store.log, constructs a graph of the crawled web and is able - * to perform a breadth-first search for the shortest distance between two nodes<br> - * Note: this is experimental stuff. get into the source code to see how it works + * this class reads in store.log, constructs a graph of the crawled web and + * is able to perform a breadth-first search for the shortest distance + * between two nodes<br> + * Note: this is experimental stuff. get into the source code to see how it + * works + * * @param args args[0] must point to the store.log file */ + public static void main(String[] args) { + // Syntax: DistanceCount <store.log> + try { + DistanceCount dc = new DistanceCount(args[0]); + boolean running = true; - BufferedReader in = new BufferedReader(new InputStreamReader(System.in),400); + + BufferedReader in = new BufferedReader(new InputStreamReader(System.in), 400); + while (running) { + System.out.print("\n\nCommand (? for help) > "); + String newL; + String input = ""; + //while((newL = in.readLine()) != null) + //{ - input = in.readLine(); - StringTokenizer st = new StringTokenizer(input," "); + + input = in.readLine(); + + StringTokenizer st = new StringTokenizer(input, " "); + String command; + boolean printHelp = false; if (!st.hasMoreTokens()) { + printHelp = true; + command = "?"; + } + else { + command = st.nextToken(); + } try { + if ("?".equals(command)) { + printHelp = true; + } + else if ("d".equals(command)) { + String from = st.nextToken(); + String to = st.nextToken(); - dc.printDistance(from ,to); + + dc.printDistance(from, to); + } + else if ("q".equals(command)) { + running = false; + } + else if ("r".equals(command)) { + dc.printRandomRoute(); + } + else { + System.out.println("unknown command '" + command + "'"); + } + } + catch (java.util.NoSuchElementException e) { + System.out.println("Syntax error"); + e.printStackTrace(); + printHelp = true; + } - catch(Exception e) + + catch (Exception e) { + e.printStackTrace(); + } if (printHelp) { + System.out.println("\nSyntax\n" + "? print this help message\n" + "d <page1> <page2> print shortest route from page1 to page2\n" + @@ -859,17 +1276,26 @@ public class DistanceCount "q quit"); } + } } + catch (IOException e) { + e.printStackTrace(); + } + catch (ArrayIndexOutOfBoundsException e) { + System.out.println("Syntax: java ... store.log"); + } } + } + diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/AboutDialog.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/AboutDialog.java index e2a1137faaa..01a5548c6cd 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/AboutDialog.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/AboutDialog.java @@ -1,154 +1,208 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.gui; /* - A basic extension of the java.awt.Dialog class + A basic extension of the java.awt.Dialog class */ import java.awt.*; public class AboutDialog extends Dialog { - public AboutDialog(Frame parent, boolean modal) - { - super(parent, modal); + public AboutDialog(Frame parent, boolean modal) + { + super(parent, modal); - // This code is automatically generated by Visual Cafe when you add - // components to the visual environment. It instantiates and initializes - // the components. To modify the code, only use code syntax that matches - // what Visual Cafe can generate, or Visual Cafe may be unable to back - // parse your Java file into its visual environment. - - //{{INIT_CONTROLS - setLayout(null); - setSize(249,150); - setVisible(false); - label1.setText("LARM - LANLab Retrieval Machine"); - add(label1); - label1.setBounds(12,12,228,24); - okButton.setLabel("OK"); - add(okButton); - okButton.setBounds(95,85,66,27); - label2.setText("(C) 2000 Clemens Marschner"); - add(label2); - label2.setBounds(12,36,228,24); - setTitle("AWT-Anwendung - Info"); - //}} - - //{{REGISTER_LISTENERS - SymWindow aSymWindow = new SymWindow(); - this.addWindowListener(aSymWindow); - SymAction lSymAction = new SymAction(); - okButton.addActionListener(lSymAction); - //}} + // This code is automatically generated by Visual Cafe when you add + // components to the visual environment. It instantiates and initializes + // the components. To modify the code, only use code syntax that matches + // what Visual Cafe can generate, or Visual Cafe may be unable to back + // parse your Java file into its visual environment. - } - - public AboutDialog(Frame parent, String title, boolean modal) - { - this(parent, modal); - setTitle(title); - } + //{{INIT_CONTROLS + setLayout(null); + setSize(249,150); + setVisible(false); + label1.setText("LARM - LANLab Retrieval Machine"); + add(label1); + label1.setBounds(12,12,228,24); + okButton.setLabel("OK"); + add(okButton); + okButton.setBounds(95,85,66,27); + label2.setText("(C) 2000 Clemens Marschner"); + add(label2); + label2.setBounds(12,36,228,24); + setTitle("AWT-Anwendung - Info"); + //}} - public void addNotify() - { - // Record the size of the window prior to calling parents addNotify. + //{{REGISTER_LISTENERS + SymWindow aSymWindow = new SymWindow(); + this.addWindowListener(aSymWindow); + SymAction lSymAction = new SymAction(); + okButton.addActionListener(lSymAction); + //}} + + } + + public AboutDialog(Frame parent, String title, boolean modal) + { + this(parent, modal); + setTitle(title); + } + + public void addNotify() + { + // Record the size of the window prior to calling parents addNotify. Dimension d = getSize(); - super.addNotify(); + super.addNotify(); - // Only do this once. - if (fComponentsAdjusted) - return; + // Only do this once. + if (fComponentsAdjusted) + return; - // Adjust components according to the insets - Insets insets = getInsets(); - setSize(insets.left + insets.right + d.width, insets.top + insets.bottom + d.height); - Component components[] = getComponents(); - for (int i = 0; i < components.length; i++) - { - Point p = components[i].getLocation(); - p.translate(insets.left, insets.top); - components[i].setLocation(p); - } + // Adjust components according to the insets + Insets insets = getInsets(); + setSize(insets.left + insets.right + d.width, insets.top + insets.bottom + d.height); + Component components[] = getComponents(); + for (int i = 0; i < components.length; i++) + { + Point p = components[i].getLocation(); + p.translate(insets.left, insets.top); + components[i].setLocation(p); + } - // Used for addNotify check. - fComponentsAdjusted = true; - } + // Used for addNotify check. + fComponentsAdjusted = true; + } - public void setVisible(boolean b) - { - if (b) - { - Rectangle bounds = getParent().getBounds(); - Rectangle abounds = getBounds(); + public void setVisible(boolean b) + { + if (b) + { + Rectangle bounds = getParent().getBounds(); + Rectangle abounds = getBounds(); - setLocation(bounds.x + (bounds.width - abounds.width)/ 2, - bounds.y + (bounds.height - abounds.height)/2); - } + setLocation(bounds.x + (bounds.width - abounds.width)/ 2, + bounds.y + (bounds.height - abounds.height)/2); + } - super.setVisible(b); - } + super.setVisible(b); + } + + //{{DECLARE_CONTROLS + java.awt.Label label1 = new java.awt.Label(); + java.awt.Button okButton = new java.awt.Button(); + java.awt.Label label2 = new java.awt.Label(); + //}} - //{{DECLARE_CONTROLS - java.awt.Label label1 = new java.awt.Label(); - java.awt.Button okButton = new java.awt.Button(); - java.awt.Label label2 = new java.awt.Label(); - //}} - // Used for addNotify check. - boolean fComponentsAdjusted = false; - - class SymAction implements java.awt.event.ActionListener - { - public void actionPerformed(java.awt.event.ActionEvent event) - { - Object object = event.getSource(); - if (object == okButton) - okButton_ActionPerformed(event); - } - } + boolean fComponentsAdjusted = false; - void okButton_ActionPerformed(java.awt.event.ActionEvent event) - { - // to do: code goes here. - - okButton_ActionPerformed_Interaction1(event); - } + class SymAction implements java.awt.event.ActionListener + { + public void actionPerformed(java.awt.event.ActionEvent event) + { + Object object = event.getSource(); + if (object == okButton) + okButton_ActionPerformed(event); + } + } + + void okButton_ActionPerformed(java.awt.event.ActionEvent event) + { + // to do: code goes here. + + okButton_ActionPerformed_Interaction1(event); + } - void okButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) - { - try { - this.dispose(); - } catch (Exception e) { - } - } + void okButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) + { + try { + this.dispose(); + } catch (Exception e) { + } + } - class SymWindow extends java.awt.event.WindowAdapter - { - public void windowClosing(java.awt.event.WindowEvent event) - { - Object object = event.getSource(); - if (object == AboutDialog.this) - AboutDialog_WindowClosing(event); - } - } + class SymWindow extends java.awt.event.WindowAdapter + { + public void windowClosing(java.awt.event.WindowEvent event) + { + Object object = event.getSource(); + if (object == AboutDialog.this) + AboutDialog_WindowClosing(event); + } + } - void AboutDialog_WindowClosing(java.awt.event.WindowEvent event) - { - // to do: code goes here. - - AboutDialog_WindowClosing_Interaction1(event); - } + void AboutDialog_WindowClosing(java.awt.event.WindowEvent event) + { + // to do: code goes here. + + AboutDialog_WindowClosing_Interaction1(event); + } - void AboutDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event) - { - try { - this.dispose(); - } catch (Exception e) { - } - } + void AboutDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event) + { + try { + this.dispose(); + } catch (Exception e) { + } + } } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherFrame.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherFrame.java index a3d8dd242ee..41006eaf710 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherFrame.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherFrame.java @@ -1,9 +1,63 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.gui; /* - This simple extension of the java.awt.Frame class - contains all the elements necessary to act as the - main window of an application. + This simple extension of the java.awt.Frame class + contains all the elements necessary to act as the + main window of an application. */ import java.awt.*; @@ -12,470 +66,470 @@ import java.awt.event.ActionListener; public class FetcherFrame extends Frame { - public FetcherFrame() - { - // This code is automatically generated by Visual Cafe when you add - // components to the visual environment. It instantiates and initializes - // the components. To modify the code, only use code syntax that matches - // what Visual Cafe can generate, or Visual Cafe may be unable to back - // parse your Java file into its visual environment. - - //{{INIT_CONTROLS - setLayout(new BorderLayout(0,0)); - setSize(800,600); - setVisible(false); - openFileDialog1.setMode(FileDialog.LOAD); - openFileDialog1.setTitle("Öffnen"); - //$$ openFileDialog1.move(24,312); - mainPanelWithBorders.setLayout(new BorderLayout(0,0)); - add("Center", mainPanelWithBorders); - mainPanelWithBorders.setBounds(0,0,800,600); - northBorder.setLayout(null); - mainPanelWithBorders.add("North", northBorder); - northBorder.setBackground(java.awt.Color.lightGray); - northBorder.setBounds(0,0,800,3); - southBorder.setLayout(null); - mainPanelWithBorders.add("South", southBorder); - southBorder.setBackground(java.awt.Color.lightGray); - southBorder.setBounds(0,597,800,3); - westBorder.setLayout(null); - mainPanelWithBorders.add("West", westBorder); - westBorder.setBackground(java.awt.Color.lightGray); - westBorder.setBounds(0,3,3,594); - eastBorder.setLayout(null); - mainPanelWithBorders.add("East", eastBorder); - eastBorder.setBackground(java.awt.Color.lightGray); - eastBorder.setBounds(797,3,3,594); - mainPanel.setLayout(new BorderLayout(0,3)); - mainPanelWithBorders.add("Center", mainPanel); - mainPanel.setBackground(java.awt.Color.lightGray); - mainPanel.setBounds(3,3,794,594); - upperPanel.setLayout(new GridLayout(1,2,0,0)); - mainPanel.add("North", upperPanel); - upperPanel.setBounds(0,0,794,150); - preferencesPanel.setLayout(null); - upperPanel.add(preferencesPanel); - preferencesPanel.setBounds(0,0,397,150); - startURLlabel.setText("Start-URL"); - preferencesPanel.add(startURLlabel); - startURLlabel.setBounds(12,0,121,24); - startURL.setText("uni-muenchen.de"); - preferencesPanel.add(startURL); - startURL.setBounds(132,0,133,24); - startButton.setLabel("Start"); - preferencesPanel.add(startButton); - startButton.setFont(new Font("Dialog", Font.BOLD, 12)); - startButton.setBounds(288,36,99,24); - restrictToLabel.setText("Restrict host to"); - preferencesPanel.add(restrictToLabel); - restrictToLabel.setBounds(12,36,121,28); - preferencesPanel.add(restrictTo); - restrictTo.setBounds(133,36,133,24); - logPanel.setLayout(new BorderLayout(0,0)); - upperPanel.add(logPanel); - logPanel.setBounds(397,0,397,150); - logPanel.add("Center", logList); - logList.setBackground(java.awt.Color.white); - logList.setBounds(0,0,397,150); - lowerPanel.setLayout(new GridLayout(1,3,3,3)); - mainPanel.add("Center", lowerPanel); - lowerPanel.setBounds(0,153,794,441); - urlQueuePanel.setLayout(new BorderLayout(0,0)); - lowerPanel.add(urlQueuePanel); - urlQueuePanel.setBounds(0,0,196,441); - urlQueueLabel.setText("URLQueue"); - urlQueuePanel.add("North", urlQueueLabel); - urlQueueLabel.setBounds(0,0,196,23); - urlQueuePanel.add("Center", urlQueueList); - urlQueueList.setBackground(java.awt.Color.white); - urlQueueList.setBounds(0,23,196,418); - urlThreadPanel.setLayout(new BorderLayout(0,0)); - lowerPanel.add(urlThreadPanel); - urlThreadPanel.setBounds(199,0,196,441); - urlThreadLabel.setText("URLThreads"); - urlThreadPanel.add("North", urlThreadLabel); - urlThreadLabel.setBounds(0,0,196,23); - urlThreadPanel.add("Center", urlThreadList); - urlThreadList.setBackground(java.awt.Color.white); - urlThreadList.setBounds(0,23,196,418); - docQueuePanel.setLayout(new BorderLayout(0,0)); - lowerPanel.add(docQueuePanel); - docQueuePanel.setBounds(398,0,196,441); - docQueueLabel.setText("DocQueue"); - docQueuePanel.add("North", docQueueLabel); - docQueueLabel.setBounds(0,0,196,23); - docQueuePanel.add("Center", docQueueList); - docQueueList.setBackground(java.awt.Color.white); - docQueueList.setBounds(0,23,196,418); - docThreadPanel.setLayout(new BorderLayout(0,0)); - lowerPanel.add(docThreadPanel); - docThreadPanel.setBounds(597,0,196,441); - docThreadLabel.setText("DocThreads"); - docThreadPanel.add("North", docThreadLabel); - docThreadLabel.setBounds(0,0,196,23); - docThreadPanel.add("Center", docThreadList); - docThreadList.setBackground(java.awt.Color.white); - docThreadList.setBounds(0,23,196,418); - setTitle("LARM - Fetcher"); - //}} - - //{{INIT_MENUS - menu1.setLabel("Datei"); - menu1.add(newMenuItem); - newMenuItem.setEnabled(false); - newMenuItem.setLabel("Neu"); - newMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_N,false)); - menu1.add(openMenuItem); - openMenuItem.setLabel("Öffnen..."); - openMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_O,false)); - menu1.add(saveMenuItem); - saveMenuItem.setEnabled(false); - saveMenuItem.setLabel("Speichern"); - saveMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_S,false)); - menu1.add(saveAsMenuItem); - saveAsMenuItem.setEnabled(false); - saveAsMenuItem.setLabel("Speichern unter..."); - menu1.add(separatorMenuItem); - separatorMenuItem.setLabel("-"); - menu1.add(exitMenuItem); - exitMenuItem.setLabel("Beenden"); - mainMenuBar.add(menu1); - menu2.setLabel("Bearbeiten"); - menu2.add(cutMenuItem); - cutMenuItem.setEnabled(false); - cutMenuItem.setLabel("Ausschneiden"); - cutMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_X,false)); - menu2.add(copyMenuItem); - copyMenuItem.setEnabled(false); - copyMenuItem.setLabel("Kopieren"); - copyMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_C,false)); - menu2.add(pasteMenuItem); - pasteMenuItem.setEnabled(false); - pasteMenuItem.setLabel("Einfügen"); - pasteMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_V,false)); - mainMenuBar.add(menu2); - menu3.setLabel("Hilfe"); - menu3.add(aboutMenuItem); - aboutMenuItem.setLabel("Info..."); - mainMenuBar.add(menu3); - //$$ mainMenuBar.move(0,312); - setMenuBar(mainMenuBar); - //}} - - //{{REGISTER_LISTENERS - SymWindow aSymWindow = new SymWindow(); - this.addWindowListener(aSymWindow); - SymAction lSymAction = new SymAction(); - openMenuItem.addActionListener(lSymAction); - exitMenuItem.addActionListener(lSymAction); - aboutMenuItem.addActionListener(lSymAction); - startButton.addActionListener(lSymAction); - //}} - } - - public FetcherFrame(String title) - { - this(); - setTitle(title); - } - + public FetcherFrame() + { + // This code is automatically generated by Visual Cafe when you add + // components to the visual environment. It instantiates and initializes + // the components. To modify the code, only use code syntax that matches + // what Visual Cafe can generate, or Visual Cafe may be unable to back + // parse your Java file into its visual environment. + + //{{INIT_CONTROLS + setLayout(new BorderLayout(0,0)); + setSize(800,600); + setVisible(false); + openFileDialog1.setMode(FileDialog.LOAD); + openFileDialog1.setTitle("Öffnen"); + //$$ openFileDialog1.move(24,312); + mainPanelWithBorders.setLayout(new BorderLayout(0,0)); + add("Center", mainPanelWithBorders); + mainPanelWithBorders.setBounds(0,0,800,600); + northBorder.setLayout(null); + mainPanelWithBorders.add("North", northBorder); + northBorder.setBackground(java.awt.Color.lightGray); + northBorder.setBounds(0,0,800,3); + southBorder.setLayout(null); + mainPanelWithBorders.add("South", southBorder); + southBorder.setBackground(java.awt.Color.lightGray); + southBorder.setBounds(0,597,800,3); + westBorder.setLayout(null); + mainPanelWithBorders.add("West", westBorder); + westBorder.setBackground(java.awt.Color.lightGray); + westBorder.setBounds(0,3,3,594); + eastBorder.setLayout(null); + mainPanelWithBorders.add("East", eastBorder); + eastBorder.setBackground(java.awt.Color.lightGray); + eastBorder.setBounds(797,3,3,594); + mainPanel.setLayout(new BorderLayout(0,3)); + mainPanelWithBorders.add("Center", mainPanel); + mainPanel.setBackground(java.awt.Color.lightGray); + mainPanel.setBounds(3,3,794,594); + upperPanel.setLayout(new GridLayout(1,2,0,0)); + mainPanel.add("North", upperPanel); + upperPanel.setBounds(0,0,794,150); + preferencesPanel.setLayout(null); + upperPanel.add(preferencesPanel); + preferencesPanel.setBounds(0,0,397,150); + startURLlabel.setText("Start-URL"); + preferencesPanel.add(startURLlabel); + startURLlabel.setBounds(12,0,121,24); + startURL.setText("uni-muenchen.de"); + preferencesPanel.add(startURL); + startURL.setBounds(132,0,133,24); + startButton.setLabel("Start"); + preferencesPanel.add(startButton); + startButton.setFont(new Font("Dialog", Font.BOLD, 12)); + startButton.setBounds(288,36,99,24); + restrictToLabel.setText("Restrict host to"); + preferencesPanel.add(restrictToLabel); + restrictToLabel.setBounds(12,36,121,28); + preferencesPanel.add(restrictTo); + restrictTo.setBounds(133,36,133,24); + logPanel.setLayout(new BorderLayout(0,0)); + upperPanel.add(logPanel); + logPanel.setBounds(397,0,397,150); + logPanel.add("Center", logList); + logList.setBackground(java.awt.Color.white); + logList.setBounds(0,0,397,150); + lowerPanel.setLayout(new GridLayout(1,3,3,3)); + mainPanel.add("Center", lowerPanel); + lowerPanel.setBounds(0,153,794,441); + urlQueuePanel.setLayout(new BorderLayout(0,0)); + lowerPanel.add(urlQueuePanel); + urlQueuePanel.setBounds(0,0,196,441); + urlQueueLabel.setText("URLQueue"); + urlQueuePanel.add("North", urlQueueLabel); + urlQueueLabel.setBounds(0,0,196,23); + urlQueuePanel.add("Center", urlQueueList); + urlQueueList.setBackground(java.awt.Color.white); + urlQueueList.setBounds(0,23,196,418); + urlThreadPanel.setLayout(new BorderLayout(0,0)); + lowerPanel.add(urlThreadPanel); + urlThreadPanel.setBounds(199,0,196,441); + urlThreadLabel.setText("URLThreads"); + urlThreadPanel.add("North", urlThreadLabel); + urlThreadLabel.setBounds(0,0,196,23); + urlThreadPanel.add("Center", urlThreadList); + urlThreadList.setBackground(java.awt.Color.white); + urlThreadList.setBounds(0,23,196,418); + docQueuePanel.setLayout(new BorderLayout(0,0)); + lowerPanel.add(docQueuePanel); + docQueuePanel.setBounds(398,0,196,441); + docQueueLabel.setText("DocQueue"); + docQueuePanel.add("North", docQueueLabel); + docQueueLabel.setBounds(0,0,196,23); + docQueuePanel.add("Center", docQueueList); + docQueueList.setBackground(java.awt.Color.white); + docQueueList.setBounds(0,23,196,418); + docThreadPanel.setLayout(new BorderLayout(0,0)); + lowerPanel.add(docThreadPanel); + docThreadPanel.setBounds(597,0,196,441); + docThreadLabel.setText("DocThreads"); + docThreadPanel.add("North", docThreadLabel); + docThreadLabel.setBounds(0,0,196,23); + docThreadPanel.add("Center", docThreadList); + docThreadList.setBackground(java.awt.Color.white); + docThreadList.setBounds(0,23,196,418); + setTitle("LARM - Fetcher"); + //}} + + //{{INIT_MENUS + menu1.setLabel("Datei"); + menu1.add(newMenuItem); + newMenuItem.setEnabled(false); + newMenuItem.setLabel("Neu"); + newMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_N,false)); + menu1.add(openMenuItem); + openMenuItem.setLabel("Öffnen..."); + openMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_O,false)); + menu1.add(saveMenuItem); + saveMenuItem.setEnabled(false); + saveMenuItem.setLabel("Speichern"); + saveMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_S,false)); + menu1.add(saveAsMenuItem); + saveAsMenuItem.setEnabled(false); + saveAsMenuItem.setLabel("Speichern unter..."); + menu1.add(separatorMenuItem); + separatorMenuItem.setLabel("-"); + menu1.add(exitMenuItem); + exitMenuItem.setLabel("Beenden"); + mainMenuBar.add(menu1); + menu2.setLabel("Bearbeiten"); + menu2.add(cutMenuItem); + cutMenuItem.setEnabled(false); + cutMenuItem.setLabel("Ausschneiden"); + cutMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_X,false)); + menu2.add(copyMenuItem); + copyMenuItem.setEnabled(false); + copyMenuItem.setLabel("Kopieren"); + copyMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_C,false)); + menu2.add(pasteMenuItem); + pasteMenuItem.setEnabled(false); + pasteMenuItem.setLabel("Einfügen"); + pasteMenuItem.setShortcut(new MenuShortcut(java.awt.event.KeyEvent.VK_V,false)); + mainMenuBar.add(menu2); + menu3.setLabel("Hilfe"); + menu3.add(aboutMenuItem); + aboutMenuItem.setLabel("Info..."); + mainMenuBar.add(menu3); + //$$ mainMenuBar.move(0,312); + setMenuBar(mainMenuBar); + //}} + + //{{REGISTER_LISTENERS + SymWindow aSymWindow = new SymWindow(); + this.addWindowListener(aSymWindow); + SymAction lSymAction = new SymAction(); + openMenuItem.addActionListener(lSymAction); + exitMenuItem.addActionListener(lSymAction); + aboutMenuItem.addActionListener(lSymAction); + startButton.addActionListener(lSymAction); + //}} + } + + public FetcherFrame(String title) + { + this(); + setTitle(title); + } + /** * Shows or hides the component depending on the boolean flag b. * @param b if true, show the component; otherwise, hide the component. * @see java.awt.Component#isVisible */ public void setVisible(boolean b) - { - if(b) - { - setLocation(50, 50); - } - super.setVisible(b); - } - - static public void main(String args[]) - { - try - { - //Create a new instance of our application's frame, and make it visible. - (new FetcherFrame()).setVisible(true); - } - catch (Throwable t) - { - System.err.println(t); - t.printStackTrace(); - //Ensure the application exits with an error condition. - System.exit(1); - } - } - - public void addNotify() - { - // Record the size of the window prior to calling parents addNotify. - Dimension d = getSize(); - - super.addNotify(); - - if (fComponentsAdjusted) - return; - - // Adjust components according to the insets - setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height); - Component components[] = getComponents(); - for (int i = 0; i < components.length; i++) - { - Point p = components[i].getLocation(); - p.translate(getInsets().left, getInsets().top); - components[i].setLocation(p); - } - fComponentsAdjusted = true; - } - - // Used for addNotify check. - boolean fComponentsAdjusted = false; - - //{{DECLARE_CONTROLS - java.awt.FileDialog openFileDialog1 = new java.awt.FileDialog(this); - java.awt.Panel mainPanelWithBorders = new java.awt.Panel(); - java.awt.Panel northBorder = new java.awt.Panel(); - java.awt.Panel southBorder = new java.awt.Panel(); - java.awt.Panel westBorder = new java.awt.Panel(); - java.awt.Panel eastBorder = new java.awt.Panel(); - java.awt.Panel mainPanel = new java.awt.Panel(); - java.awt.Panel upperPanel = new java.awt.Panel(); - java.awt.Panel preferencesPanel = new java.awt.Panel(); - java.awt.Label startURLlabel = new java.awt.Label(); - java.awt.TextField startURL = new java.awt.TextField(30); - java.awt.Button startButton = new java.awt.Button(); - java.awt.Label restrictToLabel = new java.awt.Label(); - java.awt.TextField restrictTo = new java.awt.TextField(); - java.awt.Panel logPanel = new java.awt.Panel(); - java.awt.List logList = new java.awt.List(8); - java.awt.Panel lowerPanel = new java.awt.Panel(); - java.awt.Panel urlQueuePanel = new java.awt.Panel(); - java.awt.Label urlQueueLabel = new java.awt.Label(); - java.awt.List urlQueueList = new java.awt.List(5); - java.awt.Panel urlThreadPanel = new java.awt.Panel(); - java.awt.Label urlThreadLabel = new java.awt.Label(); - java.awt.List urlThreadList = new java.awt.List(4); - java.awt.Panel docQueuePanel = new java.awt.Panel(); - java.awt.Label docQueueLabel = new java.awt.Label(); - java.awt.List docQueueList = new java.awt.List(4); - java.awt.Panel docThreadPanel = new java.awt.Panel(); - java.awt.Label docThreadLabel = new java.awt.Label(); - java.awt.List docThreadList = new java.awt.List(4); - //}} - - //{{DECLARE_MENUS - java.awt.MenuBar mainMenuBar = new java.awt.MenuBar(); - java.awt.Menu menu1 = new java.awt.Menu(); - java.awt.MenuItem newMenuItem = new java.awt.MenuItem(); - java.awt.MenuItem openMenuItem = new java.awt.MenuItem(); - java.awt.MenuItem saveMenuItem = new java.awt.MenuItem(); - java.awt.MenuItem saveAsMenuItem = new java.awt.MenuItem(); - java.awt.MenuItem separatorMenuItem = new java.awt.MenuItem(); - java.awt.MenuItem exitMenuItem = new java.awt.MenuItem(); - java.awt.Menu menu2 = new java.awt.Menu(); - java.awt.MenuItem cutMenuItem = new java.awt.MenuItem(); - java.awt.MenuItem copyMenuItem = new java.awt.MenuItem(); - java.awt.MenuItem pasteMenuItem = new java.awt.MenuItem(); - java.awt.Menu menu3 = new java.awt.Menu(); - java.awt.MenuItem aboutMenuItem = new java.awt.MenuItem(); - //}} - - class SymWindow extends java.awt.event.WindowAdapter - { - public void windowClosing(java.awt.event.WindowEvent event) - { - Object object = event.getSource(); - if (object == FetcherFrame.this) - FetcherFrame_WindowClosing(event); - } - } - - void FetcherFrame_WindowClosing(java.awt.event.WindowEvent event) - { - // to do: code goes here. - - FetcherFrame_WindowClosing_Interaction1(event); - } + { + if(b) + { + setLocation(50, 50); + } + super.setVisible(b); + } + + static public void main(String args[]) + { + try + { + //Create a new instance of our application's frame, and make it visible. + (new FetcherFrame()).setVisible(true); + } + catch (Throwable t) + { + System.err.println(t); + t.printStackTrace(); + //Ensure the application exits with an error condition. + System.exit(1); + } + } + + public void addNotify() + { + // Record the size of the window prior to calling parents addNotify. + Dimension d = getSize(); + + super.addNotify(); + + if (fComponentsAdjusted) + return; + + // Adjust components according to the insets + setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height); + Component components[] = getComponents(); + for (int i = 0; i < components.length; i++) + { + Point p = components[i].getLocation(); + p.translate(getInsets().left, getInsets().top); + components[i].setLocation(p); + } + fComponentsAdjusted = true; + } + + // Used for addNotify check. + boolean fComponentsAdjusted = false; + + //{{DECLARE_CONTROLS + java.awt.FileDialog openFileDialog1 = new java.awt.FileDialog(this); + java.awt.Panel mainPanelWithBorders = new java.awt.Panel(); + java.awt.Panel northBorder = new java.awt.Panel(); + java.awt.Panel southBorder = new java.awt.Panel(); + java.awt.Panel westBorder = new java.awt.Panel(); + java.awt.Panel eastBorder = new java.awt.Panel(); + java.awt.Panel mainPanel = new java.awt.Panel(); + java.awt.Panel upperPanel = new java.awt.Panel(); + java.awt.Panel preferencesPanel = new java.awt.Panel(); + java.awt.Label startURLlabel = new java.awt.Label(); + java.awt.TextField startURL = new java.awt.TextField(30); + java.awt.Button startButton = new java.awt.Button(); + java.awt.Label restrictToLabel = new java.awt.Label(); + java.awt.TextField restrictTo = new java.awt.TextField(); + java.awt.Panel logPanel = new java.awt.Panel(); + java.awt.List logList = new java.awt.List(8); + java.awt.Panel lowerPanel = new java.awt.Panel(); + java.awt.Panel urlQueuePanel = new java.awt.Panel(); + java.awt.Label urlQueueLabel = new java.awt.Label(); + java.awt.List urlQueueList = new java.awt.List(5); + java.awt.Panel urlThreadPanel = new java.awt.Panel(); + java.awt.Label urlThreadLabel = new java.awt.Label(); + java.awt.List urlThreadList = new java.awt.List(4); + java.awt.Panel docQueuePanel = new java.awt.Panel(); + java.awt.Label docQueueLabel = new java.awt.Label(); + java.awt.List docQueueList = new java.awt.List(4); + java.awt.Panel docThreadPanel = new java.awt.Panel(); + java.awt.Label docThreadLabel = new java.awt.Label(); + java.awt.List docThreadList = new java.awt.List(4); + //}} + + //{{DECLARE_MENUS + java.awt.MenuBar mainMenuBar = new java.awt.MenuBar(); + java.awt.Menu menu1 = new java.awt.Menu(); + java.awt.MenuItem newMenuItem = new java.awt.MenuItem(); + java.awt.MenuItem openMenuItem = new java.awt.MenuItem(); + java.awt.MenuItem saveMenuItem = new java.awt.MenuItem(); + java.awt.MenuItem saveAsMenuItem = new java.awt.MenuItem(); + java.awt.MenuItem separatorMenuItem = new java.awt.MenuItem(); + java.awt.MenuItem exitMenuItem = new java.awt.MenuItem(); + java.awt.Menu menu2 = new java.awt.Menu(); + java.awt.MenuItem cutMenuItem = new java.awt.MenuItem(); + java.awt.MenuItem copyMenuItem = new java.awt.MenuItem(); + java.awt.MenuItem pasteMenuItem = new java.awt.MenuItem(); + java.awt.Menu menu3 = new java.awt.Menu(); + java.awt.MenuItem aboutMenuItem = new java.awt.MenuItem(); + //}} + + class SymWindow extends java.awt.event.WindowAdapter + { + public void windowClosing(java.awt.event.WindowEvent event) + { + Object object = event.getSource(); + if (object == FetcherFrame.this) + FetcherFrame_WindowClosing(event); + } + } + + void FetcherFrame_WindowClosing(java.awt.event.WindowEvent event) + { + // to do: code goes here. + + FetcherFrame_WindowClosing_Interaction1(event); + } - void FetcherFrame_WindowClosing_Interaction1(java.awt.event.WindowEvent event) - { - try { - // QuitDialog Create and show as modal - (new QuitDialog(this, true)).setVisible(true); - } catch (Exception e) { - } - } - - - class SymAction implements java.awt.event.ActionListener - { - public void actionPerformed(java.awt.event.ActionEvent event) - { - Object object = event.getSource(); - if (object == openMenuItem) - openMenuItem_ActionPerformed(event); - else if (object == aboutMenuItem) - aboutMenuItem_ActionPerformed(event); - else if (object == exitMenuItem) - exitMenuItem_ActionPerformed(event); - else if (object == startButton) - startButton_ActionPerformed(event); - } - } - - void openMenuItem_ActionPerformed(java.awt.event.ActionEvent event) - { - // to do: code goes here. - - openMenuItem_ActionPerformed_Interaction1(event); - } + void FetcherFrame_WindowClosing_Interaction1(java.awt.event.WindowEvent event) + { + try { + // QuitDialog Create and show as modal + (new QuitDialog(this, true)).setVisible(true); + } catch (Exception e) { + } + } - void openMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) - { - try { - // OpenFileDialog Create and show as modal - int defMode = openFileDialog1.getMode(); - String defTitle = openFileDialog1.getTitle(); - String defDirectory = openFileDialog1.getDirectory(); - String defFile = openFileDialog1.getFile(); + class SymAction implements java.awt.event.ActionListener + { + public void actionPerformed(java.awt.event.ActionEvent event) + { + Object object = event.getSource(); + if (object == openMenuItem) + openMenuItem_ActionPerformed(event); + else if (object == aboutMenuItem) + aboutMenuItem_ActionPerformed(event); + else if (object == exitMenuItem) + exitMenuItem_ActionPerformed(event); + else if (object == startButton) + startButton_ActionPerformed(event); + } + } - openFileDialog1 = new java.awt.FileDialog(this, defTitle, defMode); - openFileDialog1.setDirectory(defDirectory); - openFileDialog1.setFile(defFile); - openFileDialog1.setVisible(true); - } catch (Exception e) { - } - } + void openMenuItem_ActionPerformed(java.awt.event.ActionEvent event) + { + // to do: code goes here. + + openMenuItem_ActionPerformed_Interaction1(event); + } - void aboutMenuItem_ActionPerformed(java.awt.event.ActionEvent event) - { - // to do: code goes here. - - aboutMenuItem_ActionPerformed_Interaction1(event); - } + void openMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) + { + try { + // OpenFileDialog Create and show as modal + int defMode = openFileDialog1.getMode(); + String defTitle = openFileDialog1.getTitle(); + String defDirectory = openFileDialog1.getDirectory(); + String defFile = openFileDialog1.getFile(); + + openFileDialog1 = new java.awt.FileDialog(this, defTitle, defMode); + openFileDialog1.setDirectory(defDirectory); + openFileDialog1.setFile(defFile); + openFileDialog1.setVisible(true); + } catch (Exception e) { + } + } - void aboutMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) - { - try { - // AboutDialog Create and show as modal - (new AboutDialog(this, true)).setVisible(true); - } catch (Exception e) { - } - } - - - void exitMenuItem_ActionPerformed(java.awt.event.ActionEvent event) - { - // to do: code goes here. - - exitMenuItem_ActionPerformed_Interaction1(event); - } + void aboutMenuItem_ActionPerformed(java.awt.event.ActionEvent event) + { + // to do: code goes here. + + aboutMenuItem_ActionPerformed_Interaction1(event); + } - void exitMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) - { - try { - // QuitDialog Create and show as modal - (new QuitDialog(this, true)).setVisible(true); - } catch (Exception e) { - } - } + void aboutMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) + { + try { + // AboutDialog Create and show as modal + (new AboutDialog(this, true)).setVisible(true); + } catch (Exception e) { + } + } - public void startButton_ActionPerformed(java.awt.event.ActionEvent event) - { - // to do: code goes here. - - } - - public void addUrlQueueItem(String item) - { - urlQueueList.add(item); - } - - public void removeUrlQueueItem(String item) - { - urlQueueList.remove(item); - } - public void addDocQueueItem(String item) - { - docQueueList.add(item); - } - - public void removeDocQueueItem(String item) - { - docQueueList.remove(item); - } - - public synchronized int addUrlThreadItem(String item) - { - urlThreadList.add(item); - return urlThreadList.getItemCount(); - } + void exitMenuItem_ActionPerformed(java.awt.event.ActionEvent event) + { + // to do: code goes here. - public synchronized int addUrlThreadItem(String item, int pos) - { - urlThreadList.add(item,pos); - return urlThreadList.getItemCount(); - } - - public void replaceUrlThreadItem(String item, int index) - { - urlThreadList.replaceItem(item,index); - } - - public synchronized int addDocThreadItem(String item) - { - docThreadList.add(item); - return docThreadList.getItemCount(); - } + exitMenuItem_ActionPerformed_Interaction1(event); + } - public void replaceDocThreadItem(String item, int index) - { - docThreadList.replaceItem(item,index); - } - - - - public void addLogEntry(String entry) - { - logList.add(entry); - logList.makeVisible(logList.getItemCount()-1); - } - - public void clearLog() - { - logList.removeAll(); - } - - public void addStartButtonListener(ActionListener a) - { - startButton.addActionListener(a); - } - - public String getRestrictTo() - { - return restrictTo.getText(); - } - public void setRestrictTo(String restrictTo) - { - this.restrictTo.setText(restrictTo); - } - public String getStartURL() - { - return startURL.getText(); - } + + void exitMenuItem_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) + { + try { + // QuitDialog Create and show as modal + (new QuitDialog(this, true)).setVisible(true); + } catch (Exception e) { + } + } + + + public void startButton_ActionPerformed(java.awt.event.ActionEvent event) + { + // to do: code goes here. + + } + + public void addUrlQueueItem(String item) + { + urlQueueList.add(item); + } + + public void removeUrlQueueItem(String item) + { + urlQueueList.remove(item); + } + public void addDocQueueItem(String item) + { + docQueueList.add(item); + } + + public void removeDocQueueItem(String item) + { + docQueueList.remove(item); + } + + public synchronized int addUrlThreadItem(String item) + { + urlThreadList.add(item); + return urlThreadList.getItemCount(); + } + + public synchronized int addUrlThreadItem(String item, int pos) + { + urlThreadList.add(item,pos); + return urlThreadList.getItemCount(); + } + + public void replaceUrlThreadItem(String item, int index) + { + urlThreadList.replaceItem(item,index); + } + + public synchronized int addDocThreadItem(String item) + { + docThreadList.add(item); + return docThreadList.getItemCount(); + } + + public void replaceDocThreadItem(String item, int index) + { + docThreadList.replaceItem(item,index); + } + + + + public void addLogEntry(String entry) + { + logList.add(entry); + logList.makeVisible(logList.getItemCount()-1); + } + + public void clearLog() + { + logList.removeAll(); + } + + public void addStartButtonListener(ActionListener a) + { + startButton.addActionListener(a); + } + + public String getRestrictTo() + { + return restrictTo.getText(); + } + public void setRestrictTo(String restrictTo) + { + this.restrictTo.setText(restrictTo); + } + public String getStartURL() + { + return startURL.getText(); + } public void setStartURL(String startURL) { this.startURL.setText(startURL); - } + } //public void setInfoText(String text) //{ diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherSummaryFrame.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherSummaryFrame.java index 405f9db7839..06f3ee454ca 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherSummaryFrame.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/FetcherSummaryFrame.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c) <p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.gui; import javax.swing.*; @@ -54,8 +99,8 @@ public class FetcherSummaryFrame extends JFrame JLabel bpsText = new JLabel(); JLabel docsLabel = new JLabel(); JLabel docsText = new JLabel(); - JLabel docsReadLabel = new JLabel(); - JLabel docsReadText = new JLabel(); + JLabel docsReadLabel = new JLabel(); + JLabel docsReadText = new JLabel(); JProgressBar urlsCaughtProgress = new JProgressBar(0,100); JLabel urlsCaughtText = new JLabel(); JLabel robotsTxtsText = new JLabel(); @@ -190,8 +235,8 @@ public class FetcherSummaryFrame extends JFrame lowerPanel.add(rightPanel, null); rightPanel.add(docsLabel, null); rightPanel.add(docsText, null); - rightPanel.add(docsReadLabel, null); - rightPanel.add(docsReadText, null); + rightPanel.add(docsReadLabel, null); + rightPanel.add(docsReadText, null); rightPanel.add(bpsLabel, null); rightPanel.add(bpsText, null); rightPanel.add(totalMemLabel, null); @@ -268,25 +313,25 @@ public class FetcherSummaryFrame extends JFrame setCounterProgressBar(this.urlQueuedProgress, count); } - public void addStartButtonListener(ActionListener a) - { - startButton.addActionListener(a); - } + public void addStartButtonListener(ActionListener a) + { + startButton.addActionListener(a); + } - public String getRestrictTo() - { - return restrictTo.getText(); - } - public void setRestrictTo(String restrictTo) - { - this.restrictTo.setText(restrictTo); - } - public String getStartURL() - { - return startURL.getText(); - } + public String getRestrictTo() + { + return restrictTo.getText(); + } + public void setRestrictTo(String restrictTo) + { + this.restrictTo.setText(restrictTo); + } + public String getStartURL() + { + return startURL.getText(); + } public void setStartURL(String startURL) { this.startURL.setText(startURL); @@ -323,10 +368,10 @@ public class FetcherSummaryFrame extends JFrame setCounterProgressBar(robotsTxtsProgress, robotsTxtCount); } - public void setDocsRead(int docs) - { - bpsText.setText("" + docs); - } + public void setDocsRead(int docs) + { + bpsText.setText("" + docs); + } } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/QuitDialog.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/QuitDialog.java index d06b91642f9..03bda60dcc0 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/QuitDialog.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/gui/QuitDialog.java @@ -1,6 +1,60 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.gui; /* - A basic extension of the java.awt.Dialog class + A basic extension of the java.awt.Dialog class */ import java.awt.*; @@ -8,73 +62,73 @@ import java.awt.event.*; public class QuitDialog extends Dialog { - public QuitDialog(Frame parent, boolean modal) - { - super(parent, modal); + public QuitDialog(Frame parent, boolean modal) + { + super(parent, modal); //Keep a local reference to the invoking frame frame = parent; - - // This code is automatically generated by Visual Cafe when you add - // components to the visual environment. It instantiates and initializes - // the components. To modify the code, only use code syntax that matches - // what Visual Cafe can generate, or Visual Cafe may be unable to back - // parse your Java file into its visual environment. - //{{INIT_CONTROLS - setLayout(null); - setSize(337,135); - setVisible(false); - yesButton.setLabel(" Ja "); - add(yesButton); - yesButton.setFont(new Font("Dialog", Font.BOLD, 12)); - yesButton.setBounds(72,80,79,22); - noButton.setLabel(" Nein "); - add(noButton); - noButton.setFont(new Font("Dialog", Font.BOLD, 12)); - noButton.setBounds(185,80,79,22); - label1.setText("Möchten Sie LARM beenden?"); - label1.setAlignment(java.awt.Label.CENTER); - add(label1); - label1.setBounds(68,33,220,23); - setTitle("LARM - Beenden"); - //}} - //{{REGISTER_LISTENERS - SymWindow aSymWindow = new SymWindow(); - this.addWindowListener(aSymWindow); - SymAction lSymAction = new SymAction(); - noButton.addActionListener(lSymAction); - yesButton.addActionListener(lSymAction); - //}} - } + // This code is automatically generated by Visual Cafe when you add + // components to the visual environment. It instantiates and initializes + // the components. To modify the code, only use code syntax that matches + // what Visual Cafe can generate, or Visual Cafe may be unable to back + // parse your Java file into its visual environment. + //{{INIT_CONTROLS + setLayout(null); + setSize(337,135); + setVisible(false); + yesButton.setLabel(" Ja "); + add(yesButton); + yesButton.setFont(new Font("Dialog", Font.BOLD, 12)); + yesButton.setBounds(72,80,79,22); + noButton.setLabel(" Nein "); + add(noButton); + noButton.setFont(new Font("Dialog", Font.BOLD, 12)); + noButton.setBounds(185,80,79,22); + label1.setText("Möchten Sie LARM beenden?"); + label1.setAlignment(java.awt.Label.CENTER); + add(label1); + label1.setBounds(68,33,220,23); + setTitle("LARM - Beenden"); + //}} - public void addNotify() - { - // Record the size of the window prior to calling parents addNotify. - Dimension d = getSize(); - - super.addNotify(); + //{{REGISTER_LISTENERS + SymWindow aSymWindow = new SymWindow(); + this.addWindowListener(aSymWindow); + SymAction lSymAction = new SymAction(); + noButton.addActionListener(lSymAction); + yesButton.addActionListener(lSymAction); + //}} + } - if (fComponentsAdjusted) - return; + public void addNotify() + { + // Record the size of the window prior to calling parents addNotify. + Dimension d = getSize(); - // Adjust components according to the insets - setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height); - Component components[] = getComponents(); - for (int i = 0; i < components.length; i++) - { - Point p = components[i].getLocation(); - p.translate(getInsets().left, getInsets().top); - components[i].setLocation(p); - } - fComponentsAdjusted = true; - } + super.addNotify(); - public QuitDialog(Frame parent, String title, boolean modal) - { - this(parent, modal); - setTitle(title); - } + if (fComponentsAdjusted) + return; + + // Adjust components according to the insets + setSize(getInsets().left + getInsets().right + d.width, getInsets().top + getInsets().bottom + d.height); + Component components[] = getComponents(); + for (int i = 0; i < components.length; i++) + { + Point p = components[i].getLocation(); + p.translate(getInsets().left, getInsets().top); + components[i].setLocation(p); + } + fComponentsAdjusted = true; + } + + public QuitDialog(Frame parent, String title, boolean modal) + { + this(parent, modal); + setTitle(title); + } /** * Shows or hides the component depending on the boolean flag b. @@ -82,103 +136,103 @@ public class QuitDialog extends Dialog * @see java.awt.Component#isVisible */ public void setVisible(boolean b) - { - if(b) - { - Rectangle bounds = getParent().getBounds(); - Rectangle abounds = getBounds(); - - setLocation(bounds.x + (bounds.width - abounds.width)/ 2, - bounds.y + (bounds.height - abounds.height)/2); - Toolkit.getDefaultToolkit().beep(); - } - super.setVisible(b); - } + { + if(b) + { + Rectangle bounds = getParent().getBounds(); + Rectangle abounds = getBounds(); + + setLocation(bounds.x + (bounds.width - abounds.width)/ 2, + bounds.y + (bounds.height - abounds.height)/2); + Toolkit.getDefaultToolkit().beep(); + } + super.setVisible(b); + } // Used for addNotify check. - boolean fComponentsAdjusted = false; - // Invoking frame - Frame frame = null; + boolean fComponentsAdjusted = false; + // Invoking frame + Frame frame = null; - //{{DECLARE_CONTROLS - java.awt.Button yesButton = new java.awt.Button(); - java.awt.Button noButton = new java.awt.Button(); - java.awt.Label label1 = new java.awt.Label(); - //}} + //{{DECLARE_CONTROLS + java.awt.Button yesButton = new java.awt.Button(); + java.awt.Button noButton = new java.awt.Button(); + java.awt.Label label1 = new java.awt.Label(); + //}} - class SymAction implements java.awt.event.ActionListener - { - public void actionPerformed(java.awt.event.ActionEvent event) - { - Object object = event.getSource(); - if (object == yesButton) - yesButton_ActionPerformed(event); - else if (object == noButton) - noButton_ActionPerformed(event); - } - } + class SymAction implements java.awt.event.ActionListener + { + public void actionPerformed(java.awt.event.ActionEvent event) + { + Object object = event.getSource(); + if (object == yesButton) + yesButton_ActionPerformed(event); + else if (object == noButton) + noButton_ActionPerformed(event); + } + } - void yesButton_ActionPerformed(java.awt.event.ActionEvent event) - { - // to do: code goes here. - - yesButton_ActionPerformed_Interaction1(event); - } + void yesButton_ActionPerformed(java.awt.event.ActionEvent event) + { + // to do: code goes here. + + yesButton_ActionPerformed_Interaction1(event); + } - void yesButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) - { - try { - frame.setVisible(false); // Hide the invoking frame - frame.dispose(); // Free system resources - this.dispose(); // Free system resources - System.exit(0); // close the application - } catch (Exception e) { - } - } + void yesButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) + { + try { + frame.setVisible(false); // Hide the invoking frame + frame.dispose(); // Free system resources + this.dispose(); // Free system resources + System.exit(0); // close the application + } catch (Exception e) { + } + } - void noButton_ActionPerformed(java.awt.event.ActionEvent event) - { - // to do: code goes here. - - noButton_ActionPerformed_Interaction1(event); - } + void noButton_ActionPerformed(java.awt.event.ActionEvent event) + { + // to do: code goes here. + + noButton_ActionPerformed_Interaction1(event); + } - void noButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) - { - try { - this.dispose(); - } catch (Exception e) { - } - } + void noButton_ActionPerformed_Interaction1(java.awt.event.ActionEvent event) + { + try { + this.dispose(); + } catch (Exception e) { + } + } - class SymWindow extends java.awt.event.WindowAdapter - { - public void windowClosing(java.awt.event.WindowEvent event) - { - Object object = event.getSource(); - if (object == QuitDialog.this) - QuitDialog_WindowClosing(event); - } - } + class SymWindow extends java.awt.event.WindowAdapter + { + public void windowClosing(java.awt.event.WindowEvent event) + { + Object object = event.getSource(); + if (object == QuitDialog.this) + QuitDialog_WindowClosing(event); + } + } - void QuitDialog_WindowClosing(java.awt.event.WindowEvent event) - { - // to do: code goes here. - - QuitDialog_WindowClosing_Interaction1(event); - } + void QuitDialog_WindowClosing(java.awt.event.WindowEvent event) + { + // to do: code goes here. + + QuitDialog_WindowClosing_Interaction1(event); + } - void QuitDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event) - { - try { - this.dispose(); - } catch (Exception e) { - } - } + void QuitDialog_WindowClosing_Interaction1(java.awt.event.WindowEvent event) + { + try { + this.dispose(); + } catch (Exception e) { + } + } } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpClientTimeout.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpClientTimeout.java index b2dd21fc353..d25f532ed21 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpClientTimeout.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpClientTimeout.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.net; // whatever package you want @@ -16,121 +70,121 @@ import java.io.*; *@created 2. Mai 2001 */ public class HttpClientTimeout extends HttpClient { - private int timeout = -1; + private int timeout = -1; - /** - * Constructor for the HttpClientTimeout object - * - *@param url Description of Parameter - *@param proxy Description of Parameter - *@param proxyPort Description of Parameter - *@exception IOException Description of Exception - */ - public HttpClientTimeout(URL url, String proxy, int proxyPort) throws IOException { - super(url, proxy, proxyPort); - } + /** + * Constructor for the HttpClientTimeout object + * + *@param url Description of Parameter + *@param proxy Description of Parameter + *@param proxyPort Description of Parameter + *@exception IOException Description of Exception + */ + public HttpClientTimeout(URL url, String proxy, int proxyPort) throws IOException { + super(url, proxy, proxyPort); + } - /** - * Constructor for the HttpClientTimeout object - * - *@param url Description of Parameter - *@exception IOException Description of Exception - */ - public HttpClientTimeout(URL url) throws IOException { - super(url, null, -1); - } + /** + * Constructor for the HttpClientTimeout object + * + *@param url Description of Parameter + *@exception IOException Description of Exception + */ + public HttpClientTimeout(URL url) throws IOException { + super(url, null, -1); + } - /** - * Sets the Timeout attribute of the HttpClientTimeout object - * - *@param i The new Timeout value - *@exception SocketException Description of Exception - */ - public void setTimeout(int i) throws SocketException { - this.timeout = -1; - serverSocket.setSoTimeout(i); - } + /** + * Sets the Timeout attribute of the HttpClientTimeout object + * + *@param i The new Timeout value + *@exception SocketException Description of Exception + */ + public void setTimeout(int i) throws SocketException { + this.timeout = -1; + serverSocket.setSoTimeout(i); + } - /** - * Gets the Socket attribute of the HttpClientTimeout object - * - *@return The Socket value - */ - public Socket getSocket() { - return serverSocket; - } + /** + * Gets the Socket attribute of the HttpClientTimeout object + * + *@return The Socket value + */ + public Socket getSocket() { + return serverSocket; + } - /** - * Description of the Method - * - *@param header Description of Parameter - *@param entry Description of Parameter - *@return Description of the Returned Value - *@exception java.io.IOException Description of Exception - */ - public boolean parseHTTP(MessageHeader header, ProgressEntry entry) throws java.io.IOException { - if (this.timeout != -1) { - try { - serverSocket.setSoTimeout(this.timeout); - } - catch (SocketException e) { - throw new java.io.IOException("unable to set socket timeout!"); - } - } - return super.parseHTTP(header, entry); - } + /** + * Description of the Method + * + *@param header Description of Parameter + *@param entry Description of Parameter + *@return Description of the Returned Value + *@exception java.io.IOException Description of Exception + */ + public boolean parseHTTP(MessageHeader header, ProgressEntry entry) throws java.io.IOException { + if (this.timeout != -1) { + try { + serverSocket.setSoTimeout(this.timeout); + } + catch (SocketException e) { + throw new java.io.IOException("unable to set socket timeout!"); + } + } + return super.parseHTTP(header, entry); + } - /** - * Description of the Method - * - *@exception IOException Description of Exception - */ - public void close() throws IOException { - serverSocket.close(); - } + /** + * Description of the Method + * + *@exception IOException Description of Exception + */ + public void close() throws IOException { + serverSocket.close(); + } - /* - * public void SetTimeout(int i) throws SocketException { - * serverSocket.setSoTimeout(i); - * } - */ - /* - * This class has no public constructor for HTTP. This method is used to - * get an HttpClient to the specifed URL. If there's currently an - * active HttpClient to that server/port, you'll get that one. - * - * no longer syncrhonized -- it slows things down too much - * synchronize at a higher level - */ - /** - * Gets the New attribute of the HttpClientTimeout class - * - *@param url Description of Parameter - *@return The New value - *@exception IOException Description of Exception - */ - public static HttpClientTimeout getNew(URL url) throws IOException { - /* - * see if one's already around - */ - HttpClientTimeout ret = (HttpClientTimeout) kac.get(url); - if (ret == null) { - ret = new HttpClientTimeout(url); - // CTOR called openServer() - } - else { - ret.url = url; - } - // don't know if we're keeping alive until we parse the headers - // for now, keepingAlive is false - return ret; - } + /* + * public void SetTimeout(int i) throws SocketException { + * serverSocket.setSoTimeout(i); + * } + */ + /* + * This class has no public constructor for HTTP. This method is used to + * get an HttpClient to the specifed URL. If there's currently an + * active HttpClient to that server/port, you'll get that one. + * + * no longer syncrhonized -- it slows things down too much + * synchronize at a higher level + */ + /** + * Gets the New attribute of the HttpClientTimeout class + * + *@param url Description of Parameter + *@return The New value + *@exception IOException Description of Exception + */ + public static HttpClientTimeout getNew(URL url) throws IOException { + /* + * see if one's already around + */ + HttpClientTimeout ret = (HttpClientTimeout) kac.get(url); + if (ret == null) { + ret = new HttpClientTimeout(url); + // CTOR called openServer() + } + else { + ret.url = url; + } + // don't know if we're keeping alive until we parse the headers + // for now, keepingAlive is false + return ret; + } } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutFactory.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutFactory.java index aff661cb6c1..c2a141267e7 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutFactory.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutFactory.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.net; import java.net.*; @@ -9,28 +63,28 @@ import java.net.*; *@created 2. Mai 2001 */ public class HttpTimeoutFactory implements URLStreamHandlerFactory { - int fiTimeoutVal; + int fiTimeoutVal; - /** - * Constructor for the HttpTimeoutFactory object - * - *@param iT Description of Parameter - */ - public HttpTimeoutFactory(int iT) { - fiTimeoutVal = iT; - } + /** + * Constructor for the HttpTimeoutFactory object + * + *@param iT Description of Parameter + */ + public HttpTimeoutFactory(int iT) { + fiTimeoutVal = iT; + } - /** - * Description of the Method - * - *@param str Description of Parameter - *@return Description of the Returned Value - */ - public URLStreamHandler createURLStreamHandler(String str) { - return new HttpTimeoutHandler(fiTimeoutVal); - } + /** + * Description of the Method + * + *@param str Description of Parameter + *@return Description of the Returned Value + */ + public URLStreamHandler createURLStreamHandler(String str) { + return new HttpTimeoutHandler(fiTimeoutVal); + } static HttpTimeoutFactory instance = null; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutHandler.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutHandler.java index b551e4fa6c2..f4b590b6400 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutHandler.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpTimeoutHandler.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.net; import java.net.*; @@ -10,71 +64,71 @@ import java.io.IOException; *@created 2. Mai 2001 */ public class HttpTimeoutHandler extends sun.net.www.protocol.http.Handler { - int timeoutVal; - HttpURLConnectionTimeout fHUCT; + int timeoutVal; + HttpURLConnectionTimeout fHUCT; - /** - * Constructor for the HttpTimeoutHandler object - * - *@param iT Description of Parameter - */ - public HttpTimeoutHandler(int iT) { - timeoutVal = iT; - } + /** + * Constructor for the HttpTimeoutHandler object + * + *@param iT Description of Parameter + */ + public HttpTimeoutHandler(int iT) { + timeoutVal = iT; + } - /** - * Gets the Socket attribute of the HttpTimeoutHandler object - * - *@return The Socket value - */ - public Socket getSocket() { - return fHUCT.getSocket(); - } + /** + * Gets the Socket attribute of the HttpTimeoutHandler object + * + *@return The Socket value + */ + public Socket getSocket() { + return fHUCT.getSocket(); + } - /** - * Description of the Method - * - *@exception Exception Description of Exception - */ - public void close() throws Exception { - fHUCT.close(); - } + /** + * Description of the Method + * + *@exception Exception Description of Exception + */ + public void close() throws Exception { + fHUCT.close(); + } - /** - * Description of the Method - * - *@param u Description of Parameter - *@return Description of the Returned Value - *@exception IOException Description of Exception - */ - protected java.net.URLConnection openConnection(URL u) throws IOException { - return fHUCT = new HttpURLConnectionTimeout(u, this, timeoutVal); - } + /** + * Description of the Method + * + *@param u Description of Parameter + *@return Description of the Returned Value + *@exception IOException Description of Exception + */ + protected java.net.URLConnection openConnection(URL u) throws IOException { + return fHUCT = new HttpURLConnectionTimeout(u, this, timeoutVal); + } - /** - * Gets the Proxy attribute of the HttpTimeoutHandler object - * - *@return The Proxy value - */ - String getProxy() { - return proxy; - // breaking encapsulation - } + /** + * Gets the Proxy attribute of the HttpTimeoutHandler object + * + *@return The Proxy value + */ + String getProxy() { + return proxy; + // breaking encapsulation + } - /** - * Gets the ProxyPort attribute of the HttpTimeoutHandler object - * - *@return The ProxyPort value - */ - int getProxyPort() { - return proxyPort; - // breaking encapsulation - } + /** + * Gets the ProxyPort attribute of the HttpTimeoutHandler object + * + *@return The ProxyPort value + */ + int getProxyPort() { + return proxyPort; + // breaking encapsulation + } } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpURLConnectionTimeout.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpURLConnectionTimeout.java index 16b07ace098..58fb8e1dcb3 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpURLConnectionTimeout.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HttpURLConnectionTimeout.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.net; import java.net.*; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/LinkHandler.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/LinkHandler.java index 5f96063da54..7f4020b5c25 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/LinkHandler.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/LinkHandler.java @@ -1,17 +1,68 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.parser; +/** + * + * + * @author Clemens Marschner + * @version $Id$ + */ public interface LinkHandler { - public void handleLink(String value, boolean isFrame); + public void handleLink(String value, String anchorText, boolean isFrame); public void handleBase(String value); public void handleTitle(String value); } \ No newline at end of file diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/Tokenizer.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/Tokenizer.java index 9ccda662ed6..20bbc8c1f7b 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/Tokenizer.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/parser/Tokenizer.java @@ -1,9 +1,57 @@ -/* - * $Id$ +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * Copyright 2000 LANLab + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.parser; import hplb.org.xml.sax.*; @@ -26,7 +74,9 @@ import java.net.URL; * @todo add handling of anchor texts * * @author Clemens Marschner + * $Id$ */ + public class Tokenizer implements hplb.org.xml.sax.Parser { /** @@ -71,6 +121,7 @@ public class Tokenizer implements hplb.org.xml.sax.Parser final static int ST_VALUE_QUOTED = 11; final static int ST_PCDATA = 21; final static int ST_COMMENT = 22; + final static int ST_IN_ANCHOR = 23; LinkHandler linkHandler; @@ -109,9 +160,10 @@ public class Tokenizer implements hplb.org.xml.sax.Parser private final static int ATTR_SRC = 2; private final static int LINKTYPE_NONE = 0; - private final static int LINKTYPE_LINK = 1; + private final static int LINKTYPE_A = 1; private final static int LINKTYPE_BASE = 2; private final static int LINKTYPE_FRAME = 3; + private final static int LINKTYPE_AREA = 4; private byte linkTagType; @@ -121,7 +173,6 @@ public class Tokenizer implements hplb.org.xml.sax.Parser private boolean keepPCData; private boolean isInTitleTag; private boolean isInAnchorTag; - CharBuffer buf = new CharBuffer(); boolean isStartTag = true; /** @@ -133,6 +184,12 @@ public class Tokenizer implements hplb.org.xml.sax.Parser CharBuffer attrName = new CharBuffer(); CharBuffer attrValue = new CharBuffer(1000); CharBuffer pcData = new CharBuffer(8000); + int pcDataLength; + + /** + * specifies how much pcData is kept when a link was found + */ + private final static int MAX_PCDATA_LENGTH = 200; Reader in; @@ -256,8 +313,11 @@ public class Tokenizer implements hplb.org.xml.sax.Parser attrName.reset(); attrValue.reset(); pcData.reset(); + pcDataLength = 0; //attrs.clear(); isStartTag = true; + isInAnchorTag = false; + // until proven wrong linkTagType = LINKTYPE_NONE; @@ -313,6 +373,12 @@ public class Tokenizer implements hplb.org.xml.sax.Parser if(keepPCData) { pcData.write(c); + pcDataLength++; + if(pcDataLength > MAX_PCDATA_LENGTH) + { + this.gotPCDATA(false); + keepPCData = false; + } } } @@ -336,6 +402,13 @@ public class Tokenizer implements hplb.org.xml.sax.Parser if(keepPCData) { pcData.write(c); + pcDataLength++; + if(pcDataLength > MAX_PCDATA_LENGTH) + { + this.gotPCDATA(false); + keepPCData = false; + } + } } break; @@ -643,6 +716,69 @@ public class Tokenizer implements hplb.org.xml.sax.Parser keepPCData = false; break; } + break; + case ST_IN_ANCHOR: + // we've seen <a href="...">. href is in linkValue. Read until + // the next end tag, at most 200 characters. + // (end tags are often ommited, i.e. <a ...>text</td>) + // regards other tags as text + // todo: read until next </a> or a couple other tags + try + { + short count = 0; + switch(c) + { + case '\t': + case '\n': + case '\r': + pcData.write(' '); + break; + default: + pcData.write(c); + } + while(count < MAX_PCDATA_LENGTH) + { + count++; + while(((c =read_ex()) != '<') && (count < MAX_PCDATA_LENGTH)) + { + switch(c) + { + case '\t': + case '\n': + case '\r': + pcData.write(' '); + break; + default: + pcData.write(c); + } + } + if(count >= MAX_PCDATA_LENGTH) + { + gotAnchor(false); + break; + } + else + { + pcData.write(c); + count++; + if((c=read_ex()) == '/') + { + gotAnchor(true); + isStartTag = false; + state = ST_TAG_NAME; + break; + } + else + { + pcData.write(c); + } + } + } + } + catch(EmptyInputStream ex) + { + + } } } @@ -749,6 +885,21 @@ public class Tokenizer implements hplb.org.xml.sax.Parser //attrs.put(nm, val); } + protected void gotAnchor(boolean tooMuch) + { + String anchor; + if(tooMuch) + { + anchor = pcData.toString().substring(0, pcData.getLength()-1); + } + else + { + anchor = pcData.toString(); + } + linkHandler.handleLink(linkValue, anchor, false); + toStart(); + } + /** * Description of the Method @@ -761,11 +912,10 @@ public class Tokenizer implements hplb.org.xml.sax.Parser { case 1: // A - if (tag[0] == 'a') + if (isStartTag && tag[0] == 'a') { - linkTagType = LINKTYPE_LINK; + linkTagType = LINKTYPE_A; linkAttrType = ATTR_HREF; - } break; // [case 3: // IMG] @@ -780,7 +930,7 @@ public class Tokenizer implements hplb.org.xml.sax.Parser } else if (tag[0] == 'a' && tag[1] == 'r' && tag[2] == 'e' && tag[3] == 'a') { - linkTagType = LINKTYPE_LINK; + linkTagType = LINKTYPE_AREA; linkAttrType = ATTR_HREF; } } @@ -817,17 +967,20 @@ public class Tokenizer implements hplb.org.xml.sax.Parser { switch (linkTagType) { - case LINKTYPE_LINK: + case LINKTYPE_AREA: //System.out.println("got link " + linkValue); - linkHandler.handleLink(linkValue, false); + linkHandler.handleLink(linkValue, /*@todo altText*/ null, false); break; case LINKTYPE_FRAME: //System.out.println("got link " + linkValue); - linkHandler.handleLink(linkValue, true); + linkHandler.handleLink(linkValue, null, true); break; case LINKTYPE_BASE: linkHandler.handleBase(linkValue); break; + case LINKTYPE_A: + state = ST_IN_ANCHOR; + return; } } toStart(); @@ -867,6 +1020,11 @@ public class Tokenizer implements hplb.org.xml.sax.Parser linkHandler.handleTitle(pcData.toString()); isInTitleTag = false; } + else if(isInAnchorTag) + { + linkHandler.handleLink(this.linkValue, pcData.toString(), false); + isInAnchorTag = false; + } // ignore it toStart(); @@ -1288,9 +1446,9 @@ public class Tokenizer implements hplb.org.xml.sax.Parser int nr = 0; - public void handleLink(String link, boolean isFrame) + public void handleLink(String link, String anchor, boolean isFrame) { - System.out.println("found link " + (++nr) + ": " + link); + System.out.println("found link " + (++nr) + ": " + link + "; Text: " + anchor); } public void handleTitle(String title) { diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/DocumentStorage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/DocumentStorage.java index 26417c05449..6dc4efd7447 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/DocumentStorage.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/DocumentStorage.java @@ -1,19 +1,59 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * Description: <p> + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * - * Copyright: Copyright (c)<p> + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * Company: <p> + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. * - * @author - * @version 1.0 + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.storage; + import de.lanlab.larm.util.*; /** diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java index 2b6507195c3..7d28a6015c6 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.storage; import de.lanlab.larm.util.WebDocument; @@ -5,17 +59,6 @@ import de.lanlab.larm.util.SimpleLogger; import java.io.*; -/** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: - * - * @author - * @created 11. Januar 2002 - * @version 1.0 - */ - - - /** * this class saves the documents into page files of 50 MB and keeps a record of all * the positions into a Logger. the log file contains URL, page file number, and diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/NullStorage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/NullStorage.java index 57037ce3d0f..641b74b7ef3 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/NullStorage.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/NullStorage.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.storage; import de.lanlab.larm.util.*; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/SQLServerStorage.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/SQLServerStorage.java index 522a8760d24..64883632dfc 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/SQLServerStorage.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/SQLServerStorage.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.storage; import java.sql.*; import de.lanlab.larm.util.*; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/InterruptableTask.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/InterruptableTask.java index e31c53363b2..b8bf4ee427f 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/InterruptableTask.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/InterruptableTask.java @@ -1,9 +1,62 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ package de.lanlab.larm.threads; public interface InterruptableTask { - public void run(ServerThread thread); - public void interrupt(); - public String getInfo(); + public void run(ServerThread thread); + public void interrupt(); + public String getInfo(); } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ServerThread.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ServerThread.java index 6f5f08df99c..b0a6bc86493 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ServerThread.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ServerThread.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.threads; import java.util.Vector; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskQueue.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskQueue.java index 568c4a01a37..f483d3e7c32 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskQueue.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskQueue.java @@ -1,4 +1,59 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.threads; + import de.lanlab.larm.util.Queue; import java.util.Collection; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskReadyListener.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskReadyListener.java index 75196422b78..d57967c0154 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskReadyListener.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/TaskReadyListener.java @@ -1,9 +1,63 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.threads; import de.lanlab.larm.util.Observer; public interface TaskReadyListener extends Observer { - public void taskReady(ServerThread s); + public void taskReady(ServerThread s); } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadFactory.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadFactory.java index bf22d939c33..721ab1ea54f 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadFactory.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadFactory.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c)<p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.threads; public class ThreadFactory diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPool.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPool.java index 84c1ef57fa7..7c069d49830 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPool.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPool.java @@ -1,3 +1,56 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ package de.lanlab.larm.threads; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPoolObserver.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPoolObserver.java index 47e11156265..51311a57a13 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPoolObserver.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadPoolObserver.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.threads; import de.lanlab.larm.util.Observer; @@ -7,6 +61,6 @@ import de.lanlab.larm.util.Observer; */ public interface ThreadPoolObserver extends Observer { - public void queueUpdate(String info, String action); - public void threadUpdate(int threadNr, String action, String info); + public void queueUpdate(String info, String action); + public void threadUpdate(int threadNr, String action, String info); } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadingStrategy.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadingStrategy.java index ab78ae89dcb..4a3d37cdf46 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadingStrategy.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/threads/ThreadingStrategy.java @@ -1,8 +1,62 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.threads; public interface ThreadingStrategy { - public void doTask(InterruptableTask t, Object key); - public void interrupt(); - public void stop(); + public void doTask(InterruptableTask t, Object key); + public void interrupt(); + public void stop(); } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/CachingQueue.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/CachingQueue.java index 2cb43ba8831..87c1431ab58 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/CachingQueue.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/CachingQueue.java @@ -1,19 +1,59 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> +/* ==================================================================== + * The Apache Software License, Version 1.1 * - * Description: <p> + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * - * Copyright: Copyright (c)<p> + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * Company: <p> + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. * - * @author - * @version 1.0 + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.util; + import java.io.*; import java.util.*; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ClassInfo.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ClassInfo.java index 231c17d3f9f..cbff67abab9 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ClassInfo.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ClassInfo.java @@ -1,18 +1,63 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; import java.lang.reflect.*; import java.io.*; import java.util.*; -/** - * Title: LARM Lanlab Retrieval Machine - * Description: - * Copyright: Copyright (c) - * Company: - * @author - * @version 1.0 - */ - /** * prints class information with the reflection api * for debugging only diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/HashedCircularLinkedList.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/HashedCircularLinkedList.java index 6b0d16fb6d1..8c980fa6140 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/HashedCircularLinkedList.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/HashedCircularLinkedList.java @@ -1,12 +1,57 @@ package de.lanlab.larm.util; -/** - * Title: - * Description: - * Copyright: Copyright (c) - * Company: - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ import java.util.*; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/InputStreamObserver.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/InputStreamObserver.java index c16940ffac5..72edaf1c0d8 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/InputStreamObserver.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/InputStreamObserver.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c) <p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.util; public interface InputStreamObserver diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Logger.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Logger.java index 2564b661c14..8945e89bc54 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Logger.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Logger.java @@ -1,8 +1,57 @@ -/* +/* ==================================================================== + * The Apache Software License, Version 1.1 * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.util; import java.io.*; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ObservableInputStream.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ObservableInputStream.java index d261d2bd75d..f7e2a64e3d0 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ObservableInputStream.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/ObservableInputStream.java @@ -1,12 +1,57 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c) <p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ + package de.lanlab.larm.util; import java.io.*; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Observer.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Observer.java index a81095094da..17dbd842ca6 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Observer.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Observer.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/OverflowException.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/OverflowException.java index a1f427e667a..cd9f4554bce 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/OverflowException.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/OverflowException.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; /** diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Queue.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Queue.java index 26105c3c333..3e7ee8f9199 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Queue.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/Queue.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; /** diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleCharArrayReader.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleCharArrayReader.java index 2e1cfd4c903..695e79d41e2 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleCharArrayReader.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleCharArrayReader.java @@ -1,6 +1,55 @@ -/* - * @(#)SimpleCharArrayReader.java 1.35 00/02/02 - * +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ package de.lanlab.larm.util; @@ -22,7 +71,7 @@ import java.io.*; * @see java.io.ByteArrayInputStream */ public -class SimpleCharArrayReader extends Reader +class SimpleCharArrayReader extends Reader { /** @@ -44,7 +93,7 @@ class SimpleCharArrayReader extends Reader * The index of the next character to read from the input stream buffer. * This value should always be nonnegative * and not larger than the value of <code>count</code>. - * The next byte to be read from the input stream buffer + * The next byte to be read from the input stream buffer * will be <code>buf[pos]</code>. */ protected int pos; @@ -62,8 +111,8 @@ class SimpleCharArrayReader extends Reader protected int mark = 0; /** - * The index one greater than the last valid character in the input - * stream buffer. + * The index one greater than the last valid character in the input + * stream buffer. * This value should always be nonnegative * and not larger than the length of <code>buf</code>. * It is one greater than the position of @@ -75,8 +124,8 @@ class SimpleCharArrayReader extends Reader /** * Creates a <code>SimpleCharArrayReader</code> * so that it uses <code>buf</code> as its - * buffer array. - * The buffer array is not copied. + * buffer array. + * The buffer array is not copied. * The initial value of <code>pos</code> * is <code>0</code> and the initial value * of <code>count</code> is the length of @@ -84,11 +133,11 @@ class SimpleCharArrayReader extends Reader * * @param buf the input buffer. */ - public SimpleCharArrayReader(char buf[]) - { - this.buf = buf; + public SimpleCharArrayReader(char buf[]) + { + this.buf = buf; this.pos = 0; - this.count = buf.length; + this.count = buf.length; } /** @@ -97,7 +146,7 @@ class SimpleCharArrayReader extends Reader * buffer array. The initial value of <code>pos</code> * is <code>offset</code> and the initial value * of <code>count</code> is <code>offset+len</code>. - * The buffer array is not copied. + * The buffer array is not copied. * <p> * Note that if bytes are simply read from * the resulting input stream, elements <code>buf[pos]</code> @@ -111,33 +160,33 @@ class SimpleCharArrayReader extends Reader * @param offset the offset in the buffer of the first byte to read. * @param length the maximum number of bytes to read from the buffer. */ - public SimpleCharArrayReader(char buf[], int offset, int length) - { - this.buf = buf; + public SimpleCharArrayReader(char buf[], int offset, int length) + { + this.buf = buf; this.pos = offset; - this.count = Math.min(offset + length, buf.length); + this.count = Math.min(offset + length, buf.length); this.mark = offset; } /** - * Reads the next byte of data from this input stream. The value - * byte is returned as an <code>int</code> in the range - * <code>0</code> to <code>255</code>. If no byte is available - * because the end of the stream has been reached, the value - * <code>-1</code> is returned. + * Reads the next byte of data from this input stream. The value + * byte is returned as an <code>int</code> in the range + * <code>0</code> to <code>255</code>. If no byte is available + * because the end of the stream has been reached, the value + * <code>-1</code> is returned. * <p> * * @return the next byte of data, or <code>-1</code> if the end of the * stream has been reached. */ - public int read() - { - return (pos < count) ? (buf[pos++] & 0xff) : -1; + public int read() + { + return (pos < count) ? (buf[pos++] & 0xff) : -1; } /** - * Reads up to <code>len</code> bytes of data into an array of bytes - * from this input stream. + * Reads up to <code>len</code> bytes of data into an array of bytes + * from this input stream. * If <code>pos</code> equals <code>count</code>, * then <code>-1</code> is returned to indicate * end of file. Otherwise, the number <code>k</code> @@ -151,7 +200,7 @@ class SimpleCharArrayReader extends Reader * value <code>k</code> is added into <code>pos</code> * and <code>k</code> is returned. * <p> - * This <code>read</code> method cannot block. + * This <code>read</code> method cannot block. * * @param b the buffer into which the data is read. * @param off the start offset of the data. @@ -160,37 +209,37 @@ class SimpleCharArrayReader extends Reader * <code>-1</code> if there is no more data because the end of * the stream has been reached. */ - public int read(char b[], int off, int len) - { - if (b == null) - { - throw new NullPointerException(); - } - else if ((off < 0) || (off > b.length) || (len < 0) || - ((off + len) > b.length) || ((off + len) < 0)) - { - throw new IndexOutOfBoundsException(); - } - if (pos >= count) - { - return -1; - } - if (pos + len > count) - { - len = count - pos; - } - if (len <= 0) - { - return 0; - } - System.arraycopy(buf, pos, b, off, len); - pos += len; - return len; + public int read(char b[], int off, int len) + { + if (b == null) + { + throw new NullPointerException(); + } + else if ((off < 0) || (off > b.length) || (len < 0) || + ((off + len) > b.length) || ((off + len) < 0)) + { + throw new IndexOutOfBoundsException(); + } + if (pos >= count) + { + return -1; + } + if (pos + len > count) + { + len = count - pos; + } + if (len <= 0) + { + return 0; + } + System.arraycopy(buf, pos, b, off, len); + pos += len; + return len; } /** - * Skips <code>n</code> bytes of input from this input stream. Fewer - * bytes might be skipped if the end of the input stream is reached. + * Skips <code>n</code> bytes of input from this input stream. Fewer + * bytes might be skipped if the end of the input stream is reached. * The actual number <code>k</code> * of bytes to be skipped is equal to the smaller * of <code>n</code> and <code>count-pos</code>. @@ -200,33 +249,33 @@ class SimpleCharArrayReader extends Reader * @param n the number of bytes to be skipped. * @return the actual number of bytes skipped. */ - public long skip(long n) - { - if (pos + n > count) - { - n = count - pos; - } - if (n < 0) - { - return 0; - } - pos += n; - return n; + public long skip(long n) + { + if (pos + n > count) + { + n = count - pos; + } + if (n < 0) + { + return 0; + } + pos += n; + return n; } /** - * Returns the number of bytes that can be read from this input - * stream without blocking. + * Returns the number of bytes that can be read from this input + * stream without blocking. * The value returned is - * <code>count - pos</code>, + * <code>count - pos</code>, * which is the number of bytes remaining to be read from the input buffer. * * @return the number of bytes that can be read from the input stream * without blocking. */ - public int available() - { - return count - pos; + public int available() + { + return count - pos; } /** @@ -234,9 +283,9 @@ class SimpleCharArrayReader extends Reader * * @since JDK1.1 */ - public boolean markSupported() - { - return true; + public boolean markSupported() + { + return true; } /** @@ -247,9 +296,9 @@ class SimpleCharArrayReader extends Reader * * @since JDK1.1 */ - public void mark(int readAheadLimit) - { - mark = pos; + public void mark(int readAheadLimit) + { + mark = pos; } /** @@ -257,29 +306,29 @@ class SimpleCharArrayReader extends Reader * is the beginning unless another position was marked. * The value of <code>pos</code> is set to 0. */ - public void reset() - { - - pos = mark; + public void reset() + { + + pos = mark; } /** - * Closes this input stream and releases any system resources - * associated with the stream. + * Closes this input stream and releases any system resources + * associated with the stream. * <p> */ - public void close() throws IOException - { - isClosed = true; + public void close() throws IOException + { + isClosed = true; } /** Check to make sure that the stream has not been closed */ - private void ensureOpen() - { - /* This method does nothing for now. Once we add throws clauses - * to the I/O methods in this class, it will throw an IOException - * if the stream has been closed. - */ + private void ensureOpen() + { + /* This method does nothing for now. Once we add throws clauses + * to the I/O methods in this class, it will throw an IOException + * if the stream has been closed. + */ } } diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLogger.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLogger.java index 60cd99b2b58..4b3e889510a 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLogger.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLogger.java @@ -1,13 +1,59 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; -/** - * Title: LARM Lanlab Retrieval Machine - * Description: - * Copyright: Copyright (c) - * Company: - * @author - * @version 1.0 - */ import java.io.*; import java.util.*; import java.text.*; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLoggerManager.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLoggerManager.java index 44717249305..ec70a57045f 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLoggerManager.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleLoggerManager.java @@ -1,14 +1,59 @@ -package de.lanlab.larm.util; - -/** - * Title: LARM Lanlab Retrieval Machine - * Description: - * Copyright: Copyright (c) - * Company: - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ +package de.lanlab.larm.util; + import java.util.*; import java.io.IOException; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleObservable.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleObservable.java index a24f9f2e181..d568a3d8b6a 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleObservable.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/SimpleObservable.java @@ -1,13 +1,58 @@ - -/** - * Title: LARM Lanlab Retrieval Machine<p> - * Description: <p> - * Copyright: Copyright (c) <p> - * Company: <p> - * @author - * @version 1.0 +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. */ -package de.lanlab.larm.util; + + package de.lanlab.larm.util; import java.util.Observable; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/State.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/State.java index 87ae48fe1b6..f1012c91242 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/State.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/State.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; import java.io.Serializable; diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/URLUtils.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/URLUtils.java index 1956e81886a..e9e54832c69 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/URLUtils.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/URLUtils.java @@ -1,12 +1,59 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; -/** - * Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c) - * Company: - * - * @author - * @version 1.0 - */ import java.net.URL; /** diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/UnderflowException.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/UnderflowException.java index e07b63ff58e..dd589c5ec6a 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/UnderflowException.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/UnderflowException.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; /** diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java index 3287fd51f6b..7c4e4b5e215 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java @@ -1,3 +1,57 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package de.lanlab.larm.util; @@ -9,32 +63,32 @@ import de.lanlab.larm.fetcher.URLMessage; */ public class WebDocument extends URLMessage { - protected String mimeType; - protected byte[] document; + protected String mimeType; + protected byte[] document; protected int resultCode; protected int size; protected String title; - public WebDocument(URL url, String mimeType, byte[] document, int resultCode, URL referer, int size, String title) - { - super(url, referer, false); - this.url = url; - this.mimeType = mimeType; + public WebDocument(URL url, String mimeType, byte[] document, int resultCode, URL referer, int size, String title) + { + super(url, referer, false, null); + this.url = url; + this.mimeType = mimeType; this.document = document; this.resultCode = resultCode; this.size = size; this.title = title; - } + } public String getTitle() { return title; } - public URL getUrl() - { - return url; - } + public URL getUrl() + { + return url; + } public int getSize() { @@ -47,39 +101,39 @@ public class WebDocument extends URLMessage } - public void setDocument(byte[] document) - { - this.document = document; - } - public int getResultCode() - { - return resultCode; - } + public void setDocument(byte[] document) + { + this.document = document; + } + public int getResultCode() + { + return resultCode; + } - public void setResultCode(int resultCode) - { - this.resultCode = resultCode; - } + public void setResultCode(int resultCode) + { + this.resultCode = resultCode; + } - public byte[] getDocumentBytes() - { - return this.document; - } + public byte[] getDocumentBytes() + { + return this.document; + } - public void setUrl(URL url) - { - this.url = url; - } + public void setUrl(URL url) + { + this.url = url; + } - public void setMimeType(String mimeType) - { - this.mimeType = mimeType; - } + public void setMimeType(String mimeType) + { + this.mimeType = mimeType; + } - public String getMimeType() - { - return mimeType; - } + public String getMimeType() + { + return mimeType; + } public String getInfo() {