mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 00:09:28 +00:00
improved logging
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150842 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c221f17ea5
commit
e9ebe848b8
@ -121,9 +121,9 @@ public class RobotExclusionFilter extends Filter implements MessageListener
|
|||||||
*/
|
*/
|
||||||
public RobotExclusionFilter(HostManager hm)
|
public RobotExclusionFilter(HostManager hm)
|
||||||
{
|
{
|
||||||
log = new SimpleLogger("RobotExclusionFilter");
|
log = new SimpleLogger("RobotExclusionFilter", true);
|
||||||
hostManager = hm;
|
hostManager = hm;
|
||||||
rePool = new ThreadPool(2, new REFThreadFactory());
|
rePool = new ThreadPool(5, new REFThreadFactory());
|
||||||
rePool.init();
|
rePool.init();
|
||||||
log.setFlushAtOnce(true);
|
log.setFlushAtOnce(true);
|
||||||
log.log("refilter: initialized");
|
log.log("refilter: initialized");
|
||||||
@ -164,8 +164,12 @@ public class RobotExclusionFilter extends Filter implements MessageListener
|
|||||||
// assert message instanceof URLMessage;
|
// assert message instanceof URLMessage;
|
||||||
URLMessage urlMsg = ((URLMessage) message);
|
URLMessage urlMsg = ((URLMessage) message);
|
||||||
URL url = urlMsg.getUrl();
|
URL url = urlMsg.getUrl();
|
||||||
|
// String urlString = urlMsg.getNormalizedURLString();
|
||||||
|
// URL nUrl = new URL(urlString);
|
||||||
//assert url != null;
|
//assert url != null;
|
||||||
HostInfo h = hostManager.getHostInfo(url.getHost().toLowerCase());
|
HostInfo h = hostManager.getHostInfo(url.getHost());
|
||||||
|
synchronized (h)
|
||||||
|
{
|
||||||
if (!h.isRobotTxtChecked() && !h.isLoadingRobotsTxt())
|
if (!h.isRobotTxtChecked() && !h.isLoadingRobotsTxt())
|
||||||
{
|
{
|
||||||
log.logThreadSafe("handleRequest: starting to get robots.txt");
|
log.logThreadSafe("handleRequest: starting to get robots.txt");
|
||||||
@ -175,8 +179,6 @@ public class RobotExclusionFilter extends Filter implements MessageListener
|
|||||||
h.setLoadingRobotsTxt(true);
|
h.setLoadingRobotsTxt(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized (h)
|
|
||||||
{
|
|
||||||
// isLoading...() and queuedRequest.insert() must be atomic
|
// isLoading...() and queuedRequest.insert() must be atomic
|
||||||
if (h.isLoadingRobotsTxt())
|
if (h.isLoadingRobotsTxt())
|
||||||
{
|
{
|
||||||
@ -271,8 +273,16 @@ public class RobotExclusionFilter extends Filter implements MessageListener
|
|||||||
*/
|
*/
|
||||||
public void run(ServerThread thread)
|
public void run(ServerThread thread)
|
||||||
{
|
{
|
||||||
// assert hostInfo != null;
|
|
||||||
String threadName = Thread.currentThread().getName();
|
String threadName = Thread.currentThread().getName();
|
||||||
|
synchronized(hostInfo)
|
||||||
|
{
|
||||||
|
if(hostInfo.isRobotTxtChecked())
|
||||||
|
{
|
||||||
|
log.logThreadSafe("task " + threadName + ": already loaded " + hostInfo.getHostName());
|
||||||
|
return; // may happen 'cause check is not synchronized
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// assert hostInfo != null;
|
||||||
|
|
||||||
log.logThreadSafe("task " + threadName + ": starting to load " + hostInfo.getHostName());
|
log.logThreadSafe("task " + threadName + ": starting to load " + hostInfo.getHostName());
|
||||||
//hostInfo.setLoadingRobotsTxt(true);
|
//hostInfo.setLoadingRobotsTxt(true);
|
||||||
@ -290,6 +300,7 @@ public class RobotExclusionFilter extends Filter implements MessageListener
|
|||||||
if (res.getStatusCode() != 200)
|
if (res.getStatusCode() != 200)
|
||||||
{
|
{
|
||||||
errorOccured = true;
|
errorOccured = true;
|
||||||
|
log.log("task " + threadName + ": return code was " + res.getStatusCode());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -309,26 +320,26 @@ public class RobotExclusionFilter extends Filter implements MessageListener
|
|||||||
catch (java.net.UnknownHostException e)
|
catch (java.net.UnknownHostException e)
|
||||||
{
|
{
|
||||||
hostInfo.setReachable(false);
|
hostInfo.setReachable(false);
|
||||||
log.logThreadSafe("task " + threadName + ": unknown host. setting to unreachable");
|
log.logThreadSafe("task " + threadName + ": unknown host '" + hostInfo.getHostName() + "'. setting to unreachable");
|
||||||
errorOccured = true;
|
errorOccured = true;
|
||||||
}
|
}
|
||||||
catch (java.net.NoRouteToHostException e)
|
catch (java.net.NoRouteToHostException e)
|
||||||
{
|
{
|
||||||
hostInfo.setReachable(false);
|
hostInfo.setReachable(false);
|
||||||
log.logThreadSafe("task " + threadName + ": no route to. setting to unreachable");
|
log.logThreadSafe("task " + threadName + ": no route to '"+hostInfo.getHostName()+"'. setting to unreachable");
|
||||||
errorOccured = true;
|
errorOccured = true;
|
||||||
}
|
}
|
||||||
catch (java.net.ConnectException e)
|
catch (java.net.ConnectException e)
|
||||||
{
|
{
|
||||||
hostInfo.setReachable(false);
|
hostInfo.setReachable(false);
|
||||||
log.logThreadSafe("task " + threadName + ": connect exception. setting to unreachable");
|
log.logThreadSafe("task " + threadName + ": connect exception while connecting to '"+hostInfo.getHostName()+"'. setting to unreachable");
|
||||||
errorOccured = true;
|
errorOccured = true;
|
||||||
}
|
}
|
||||||
catch (java.io.InterruptedIOException e)
|
catch (java.io.InterruptedIOException e)
|
||||||
{
|
{
|
||||||
// time out. fatal in this case
|
// time out. fatal in this case
|
||||||
hostInfo.setReachable(false);
|
hostInfo.setReachable(false);
|
||||||
log.logThreadSafe("task " + threadName + ": time out. setting to unreachable");
|
log.logThreadSafe("task " + threadName + ": time out while connecting to '" +hostInfo.getHostName() + "'. setting to unreachable");
|
||||||
errorOccured = true;
|
errorOccured = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -343,19 +354,20 @@ public class RobotExclusionFilter extends Filter implements MessageListener
|
|||||||
{
|
{
|
||||||
if (errorOccured)
|
if (errorOccured)
|
||||||
{
|
{
|
||||||
|
log.logThreadSafe("task " + threadName + ": error occured. putback...");
|
||||||
synchronized (hostInfo)
|
synchronized (hostInfo)
|
||||||
{
|
{
|
||||||
hostInfo.setRobotsChecked(true, null);
|
hostInfo.setRobotsChecked(true, null);
|
||||||
// crawl everything
|
// crawl everything
|
||||||
hostInfo.setLoadingRobotsTxt(false);
|
hostInfo.setLoadingRobotsTxt(false);
|
||||||
log.logThreadSafe("task " + threadName + ": error occured");
|
|
||||||
log.logThreadSafe("task " + threadName + ": now put " + hostInfo.getQueueSize() + " queueud requests back");
|
log.logThreadSafe("task " + threadName + ": now put " + hostInfo.getQueueSize() + " queueud requests back");
|
||||||
hostInfo.setLoadingRobotsTxt(false);
|
//hostInfo.setLoadingRobotsTxt(false);
|
||||||
putBackURLs();
|
putBackURLs();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
log.logThreadSafe("task " + threadName + ": finished. putback...");
|
||||||
synchronized (hostInfo)
|
synchronized (hostInfo)
|
||||||
{
|
{
|
||||||
hostInfo.setRobotsChecked(true, disallows);
|
hostInfo.setRobotsChecked(true, disallows);
|
||||||
@ -374,11 +386,13 @@ public class RobotExclusionFilter extends Filter implements MessageListener
|
|||||||
*/
|
*/
|
||||||
private void putBackURLs()
|
private void putBackURLs()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
int qSize = hostInfo.getQueueSize();
|
||||||
while (hostInfo.getQueueSize() > 0)
|
while (hostInfo.getQueueSize() > 0)
|
||||||
{
|
{
|
||||||
messageHandler.putMessage((Message) hostInfo.removeFromQueue());
|
messageHandler.putMessage((Message) hostInfo.removeFromQueue());
|
||||||
}
|
}
|
||||||
log.logThreadSafe("task " + Thread.currentThread().getName() + ": finished");
|
log.logThreadSafe("task " + Thread.currentThread().getName() + ": finished. put " + qSize + " URLs back");
|
||||||
hostInfo.removeQueue();
|
hostInfo.removeQueue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user