takes normalized URL string for comparisons; added logging

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150845 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
cmarschner 2002-10-22 15:21:00 +00:00
parent a27b6a627a
commit 9f5185b44c
1 changed files with 8 additions and 3 deletions

View File

@ -57,6 +57,7 @@ package de.lanlab.larm.fetcher;
import org.apache.oro.text.regex.Perl5Matcher;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Pattern;
import de.lanlab.larm.util.*;
/**
* filter class. Tries to match a regular expression with an incoming URL
@ -77,11 +78,13 @@ class URLScopeFilter extends Filter implements MessageListener
private Pattern pattern;
private Perl5Matcher matcher;
private Perl5Compiler compiler;
SimpleLogger log;
public URLScopeFilter()
public URLScopeFilter(SimpleLogger log)
{
matcher = new Perl5Matcher();
compiler = new Perl5Compiler();
this.log = log;
}
public String getRexString()
@ -108,7 +111,7 @@ class URLScopeFilter extends Filter implements MessageListener
{
if(message instanceof URLMessage)
{
String urlString = ((URLMessage)message).toString();
String urlString = ((URLMessage)message).getNormalizedURLString();
int length = urlString.length();
char buffer[] = new char[length];
urlString.getChars(0,length,buffer,0);
@ -117,8 +120,10 @@ class URLScopeFilter extends Filter implements MessageListener
boolean match = matcher.matches(buffer, pattern);
if(!match)
{
//System.out.println("not in Scope: " + urlString);
//log.log("URLScopeFilter: not in scope: " + urlString);
log.log(message.toString());
filtered++;
return null;
}
}