takes normalized URL string for comparisons; added logging

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150845 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
cmarschner 2002-10-22 15:21:00 +00:00
parent a27b6a627a
commit 9f5185b44c
1 changed files with 8 additions and 3 deletions

View File

@ -57,6 +57,7 @@ package de.lanlab.larm.fetcher;
import org.apache.oro.text.regex.Perl5Matcher; import org.apache.oro.text.regex.Perl5Matcher;
import org.apache.oro.text.regex.Perl5Compiler; import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Pattern; import org.apache.oro.text.regex.Pattern;
import de.lanlab.larm.util.*;
/** /**
* filter class. Tries to match a regular expression with an incoming URL * filter class. Tries to match a regular expression with an incoming URL
@ -77,11 +78,13 @@ class URLScopeFilter extends Filter implements MessageListener
private Pattern pattern; private Pattern pattern;
private Perl5Matcher matcher; private Perl5Matcher matcher;
private Perl5Compiler compiler; private Perl5Compiler compiler;
SimpleLogger log;
public URLScopeFilter() public URLScopeFilter(SimpleLogger log)
{ {
matcher = new Perl5Matcher(); matcher = new Perl5Matcher();
compiler = new Perl5Compiler(); compiler = new Perl5Compiler();
this.log = log;
} }
public String getRexString() public String getRexString()
@ -108,7 +111,7 @@ class URLScopeFilter extends Filter implements MessageListener
{ {
if(message instanceof URLMessage) if(message instanceof URLMessage)
{ {
String urlString = ((URLMessage)message).toString(); String urlString = ((URLMessage)message).getNormalizedURLString();
int length = urlString.length(); int length = urlString.length();
char buffer[] = new char[length]; char buffer[] = new char[length];
urlString.getChars(0,length,buffer,0); urlString.getChars(0,length,buffer,0);
@ -117,8 +120,10 @@ class URLScopeFilter extends Filter implements MessageListener
boolean match = matcher.matches(buffer, pattern); boolean match = matcher.matches(buffer, pattern);
if(!match) if(!match)
{ {
//System.out.println("not in Scope: " + urlString); //log.log("URLScopeFilter: not in scope: " + urlString);
log.log(message.toString());
filtered++; filtered++;
return null; return null;
} }
} }