mirror of https://github.com/apache/lucene.git
takes normalized URL string for comparisons; added logging
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150845 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a27b6a627a
commit
9f5185b44c
|
@ -57,6 +57,7 @@ package de.lanlab.larm.fetcher;
|
||||||
import org.apache.oro.text.regex.Perl5Matcher;
|
import org.apache.oro.text.regex.Perl5Matcher;
|
||||||
import org.apache.oro.text.regex.Perl5Compiler;
|
import org.apache.oro.text.regex.Perl5Compiler;
|
||||||
import org.apache.oro.text.regex.Pattern;
|
import org.apache.oro.text.regex.Pattern;
|
||||||
|
import de.lanlab.larm.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* filter class. Tries to match a regular expression with an incoming URL
|
* filter class. Tries to match a regular expression with an incoming URL
|
||||||
|
@ -77,11 +78,13 @@ class URLScopeFilter extends Filter implements MessageListener
|
||||||
private Pattern pattern;
|
private Pattern pattern;
|
||||||
private Perl5Matcher matcher;
|
private Perl5Matcher matcher;
|
||||||
private Perl5Compiler compiler;
|
private Perl5Compiler compiler;
|
||||||
|
SimpleLogger log;
|
||||||
|
|
||||||
public URLScopeFilter()
|
public URLScopeFilter(SimpleLogger log)
|
||||||
{
|
{
|
||||||
matcher = new Perl5Matcher();
|
matcher = new Perl5Matcher();
|
||||||
compiler = new Perl5Compiler();
|
compiler = new Perl5Compiler();
|
||||||
|
this.log = log;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getRexString()
|
public String getRexString()
|
||||||
|
@ -108,7 +111,7 @@ class URLScopeFilter extends Filter implements MessageListener
|
||||||
{
|
{
|
||||||
if(message instanceof URLMessage)
|
if(message instanceof URLMessage)
|
||||||
{
|
{
|
||||||
String urlString = ((URLMessage)message).toString();
|
String urlString = ((URLMessage)message).getNormalizedURLString();
|
||||||
int length = urlString.length();
|
int length = urlString.length();
|
||||||
char buffer[] = new char[length];
|
char buffer[] = new char[length];
|
||||||
urlString.getChars(0,length,buffer,0);
|
urlString.getChars(0,length,buffer,0);
|
||||||
|
@ -117,8 +120,10 @@ class URLScopeFilter extends Filter implements MessageListener
|
||||||
boolean match = matcher.matches(buffer, pattern);
|
boolean match = matcher.matches(buffer, pattern);
|
||||||
if(!match)
|
if(!match)
|
||||||
{
|
{
|
||||||
//System.out.println("not in Scope: " + urlString);
|
//log.log("URLScopeFilter: not in scope: " + urlString);
|
||||||
|
log.log(message.toString());
|
||||||
filtered++;
|
filtered++;
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue