SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore

This commit is contained in:
Gus Heck 2019-03-24 19:07:26 -04:00
parent bca22d58e2
commit c60685f9e4
2 changed files with 22 additions and 23 deletions

View File

@ -41,6 +41,10 @@ Upgrade Notes
expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup
(Gus Heck). (Gus Heck).
* SOLR-13324: URLClassifyProcessor#getCanonicalUrl now throws MalformedURLException rather than hiding it. Although the
present code is unlikely to produce such an exception it may be possible in future changes or in subclasses.
Currently this change should only effect compatibility of custom code overriding this method (Gus Heck).
New Features New Features
---------------------- ----------------------
* SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on * SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on

View File

@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory;
* and helping to produce values which may be used for boosting or filtering later. * and helping to produce values which may be used for boosting or filtering later.
*/ */
public class URLClassifyProcessor extends UpdateRequestProcessor { public class URLClassifyProcessor extends UpdateRequestProcessor {
private static final String INPUT_FIELD_PARAM = "inputField"; private static final String INPUT_FIELD_PARAM = "inputField";
private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField"; private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField"; private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
@ -84,16 +84,16 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
"welcome.asp", "welcome.asp",
"welcome.aspx" "welcome.aspx"
}; };
public URLClassifyProcessor(SolrParams parameters, public URLClassifyProcessor(SolrParams parameters,
SolrQueryRequest request, SolrQueryRequest request,
SolrQueryResponse response, SolrQueryResponse response,
UpdateRequestProcessor nextProcessor) { UpdateRequestProcessor nextProcessor) {
super(nextProcessor); super(nextProcessor);
this.initParameters(parameters); this.initParameters(parameters);
} }
private void initParameters(SolrParams parameters) { private void initParameters(SolrParams parameters) {
if (parameters != null) { if (parameters != null) {
this.setEnabled(parameters.getBool("enabled", true)); this.setEnabled(parameters.getBool("enabled", true));
@ -106,7 +106,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM); this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM);
} }
} }
@Override @Override
public void processAdd(AddUpdateCommand command) throws IOException { public void processAdd(AddUpdateCommand command) throws IOException {
if (isEnabled()) { if (isEnabled()) {
@ -133,24 +133,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
} }
super.processAdd(command); super.processAdd(command);
} }
/** /**
* Gets a canonical form of the URL for use as main URL * Gets a canonical form of the URL for use as main URL
* @param url The input url * @param url The input url
* @return The URL object representing the canonical URL * @return The URL object representing the canonical URL
*/ */
public URL getCanonicalUrl(URL url) { public URL getCanonicalUrl(URL url) throws MalformedURLException {
// NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should) // NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should)
String urlString = url.toString(); String urlString = url.toString();
try { String lps = landingPageSuffix(url);
String lps = landingPageSuffix(url); return new URL(urlString.replaceFirst("/" + lps + "$", "/"));
return new URL(urlString.replaceFirst("/"+lps+"$", "/"));
} catch (MalformedURLException e) {
e.printStackTrace();
}
return url;
} }
/** /**
* Calculates the length of the URL in characters * Calculates the length of the URL in characters
* @param url The input URL * @param url The input URL
@ -159,7 +154,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
public int length(URL url) { public int length(URL url) {
return url.toString().length(); return url.toString().length();
} }
/** /**
* Calculates the number of path levels in the given URL * Calculates the number of path levels in the given URL
* @param url The input URL * @param url The input URL
@ -176,7 +171,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
} }
return levels; return levels;
} }
/** /**
* Calculates whether a URL is a top level page * Calculates whether a URL is a top level page
* @param url The input URL * @param url The input URL
@ -187,7 +182,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
String path = getPathWithoutSuffix(url).replaceAll("/+$", ""); String path = getPathWithoutSuffix(url).replaceAll("/+$", "");
return path.length() == 0 && url.getQuery() == null; return path.length() == 0 && url.getQuery() == null;
} }
/** /**
* Calculates whether the URL is a landing page or not * Calculates whether the URL is a landing page or not
* @param url The input URL * @param url The input URL
@ -200,19 +195,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
return landingPageSuffix(url) != ""; return landingPageSuffix(url) != "";
} }
} }
public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException { public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException {
return new URI(url).normalize().toURL(); return new URI(url).normalize().toURL();
} }
public boolean isEnabled() { public boolean isEnabled() {
return enabled; return enabled;
} }
public void setEnabled(boolean enabled) { public void setEnabled(boolean enabled) {
this.enabled = enabled; this.enabled = enabled;
} }
private String landingPageSuffix(URL url) { private String landingPageSuffix(URL url) {
String path = url.getPath().toLowerCase(Locale.ROOT); String path = url.getPath().toLowerCase(Locale.ROOT);
for(String suffix : landingPageSuffixes) { for(String suffix : landingPageSuffixes) {
@ -222,7 +217,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
} }
return ""; return "";
} }
private String getPathWithoutSuffix(URL url) { private String getPathWithoutSuffix(URL url) {
return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", ""); return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", "");
} }