mirror of https://github.com/apache/lucene.git
SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore
This commit is contained in:
parent
bca22d58e2
commit
c60685f9e4
|
@ -41,6 +41,10 @@ Upgrade Notes
|
|||
expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup
|
||||
(Gus Heck).
|
||||
|
||||
* SOLR-13324: URLClassifyProcessor#getCanonicalUrl now throws MalformedURLException rather than hiding it. Although the
|
||||
present code is unlikely to produce such an exception it may be possible in future changes or in subclasses.
|
||||
Currently this change should only effect compatibility of custom code overriding this method (Gus Heck).
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
* SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on
|
||||
|
|
|
@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory;
|
|||
* and helping to produce values which may be used for boosting or filtering later.
|
||||
*/
|
||||
public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||
|
||||
|
||||
private static final String INPUT_FIELD_PARAM = "inputField";
|
||||
private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
|
||||
private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
|
||||
|
@ -84,16 +84,16 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
|||
"welcome.asp",
|
||||
"welcome.aspx"
|
||||
};
|
||||
|
||||
|
||||
public URLClassifyProcessor(SolrParams parameters,
|
||||
SolrQueryRequest request,
|
||||
SolrQueryResponse response,
|
||||
UpdateRequestProcessor nextProcessor) {
|
||||
super(nextProcessor);
|
||||
|
||||
|
||||
this.initParameters(parameters);
|
||||
}
|
||||
|
||||
|
||||
private void initParameters(SolrParams parameters) {
|
||||
if (parameters != null) {
|
||||
this.setEnabled(parameters.getBool("enabled", true));
|
||||
|
@ -106,7 +106,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
|||
this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand command) throws IOException {
|
||||
if (isEnabled()) {
|
||||
|
@ -133,24 +133,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
|||
}
|
||||
super.processAdd(command);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets a canonical form of the URL for use as main URL
|
||||
* @param url The input url
|
||||
* @return The URL object representing the canonical URL
|
||||
*/
|
||||
public URL getCanonicalUrl(URL url) {
|
||||
public URL getCanonicalUrl(URL url) throws MalformedURLException {
|
||||
// NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should)
|
||||
String urlString = url.toString();
|
||||
try {
|
||||
String lps = landingPageSuffix(url);
|
||||
return new URL(urlString.replaceFirst("/"+lps+"$", "/"));
|
||||
} catch (MalformedURLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return url;
|
||||
String lps = landingPageSuffix(url);
|
||||
return new URL(urlString.replaceFirst("/" + lps + "$", "/"));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the length of the URL in characters
|
||||
* @param url The input URL
|
||||
|
@ -159,7 +154,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
|||
public int length(URL url) {
|
||||
return url.toString().length();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the number of path levels in the given URL
|
||||
* @param url The input URL
|
||||
|
@ -176,7 +171,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
|||
}
|
||||
return levels;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates whether a URL is a top level page
|
||||
* @param url The input URL
|
||||
|
@ -187,7 +182,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
|||
String path = getPathWithoutSuffix(url).replaceAll("/+$", "");
|
||||
return path.length() == 0 && url.getQuery() == null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates whether the URL is a landing page or not
|
||||
* @param url The input URL
|
||||
|
@ -200,19 +195,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
|||
return landingPageSuffix(url) != "";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException {
|
||||
return new URI(url).normalize().toURL();
|
||||
}
|
||||
|
||||
|
||||
public boolean isEnabled() {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
|
||||
public void setEnabled(boolean enabled) {
|
||||
this.enabled = enabled;
|
||||
}
|
||||
|
||||
|
||||
private String landingPageSuffix(URL url) {
|
||||
String path = url.getPath().toLowerCase(Locale.ROOT);
|
||||
for(String suffix : landingPageSuffixes) {
|
||||
|
@ -222,7 +217,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
|||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
private String getPathWithoutSuffix(URL url) {
|
||||
return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", "");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue