diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index bbf8333b314..834db2fd72d 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -41,6 +41,10 @@ Upgrade Notes expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup (Gus Heck). +* SOLR-13324: URLClassifyProcessor#getCanonicalUrl now throws MalformedURLException rather than hiding it. Although the + present code is unlikely to produce such an exception it may be possible in future changes or in subclasses. + Currently this change should only effect compatibility of custom code overriding this method (Gus Heck). + New Features ---------------------- * SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on diff --git a/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java index 0844b6023fc..a99b7cb7dc6 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java +++ b/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java @@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory; * and helping to produce values which may be used for boosting or filtering later. */ public class URLClassifyProcessor extends UpdateRequestProcessor { - + private static final String INPUT_FIELD_PARAM = "inputField"; private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField"; private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField"; @@ -84,16 +84,16 @@ public class URLClassifyProcessor extends UpdateRequestProcessor { "welcome.asp", "welcome.aspx" }; - + public URLClassifyProcessor(SolrParams parameters, SolrQueryRequest request, SolrQueryResponse response, UpdateRequestProcessor nextProcessor) { super(nextProcessor); - + this.initParameters(parameters); } - + private void initParameters(SolrParams parameters) { if (parameters != null) { this.setEnabled(parameters.getBool("enabled", true)); @@ -106,7 +106,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor { this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM); } } - + @Override public void processAdd(AddUpdateCommand command) throws IOException { if (isEnabled()) { @@ -133,24 +133,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor { } super.processAdd(command); } - + /** * Gets a canonical form of the URL for use as main URL * @param url The input url * @return The URL object representing the canonical URL */ - public URL getCanonicalUrl(URL url) { + public URL getCanonicalUrl(URL url) throws MalformedURLException { // NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should) String urlString = url.toString(); - try { - String lps = landingPageSuffix(url); - return new URL(urlString.replaceFirst("/"+lps+"$", "/")); - } catch (MalformedURLException e) { - e.printStackTrace(); - } - return url; + String lps = landingPageSuffix(url); + return new URL(urlString.replaceFirst("/" + lps + "$", "/")); } - + /** * Calculates the length of the URL in characters * @param url The input URL @@ -159,7 +154,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor { public int length(URL url) { return url.toString().length(); } - + /** * Calculates the number of path levels in the given URL * @param url The input URL @@ -176,7 +171,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor { } return levels; } - + /** * Calculates whether a URL is a top level page * @param url The input URL @@ -187,7 +182,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor { String path = getPathWithoutSuffix(url).replaceAll("/+$", ""); return path.length() == 0 && url.getQuery() == null; } - + /** * Calculates whether the URL is a landing page or not * @param url The input URL @@ -200,19 +195,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor { return landingPageSuffix(url) != ""; } } - + public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException { return new URI(url).normalize().toURL(); } - + public boolean isEnabled() { return enabled; } - + public void setEnabled(boolean enabled) { this.enabled = enabled; } - + private String landingPageSuffix(URL url) { String path = url.getPath().toLowerCase(Locale.ROOT); for(String suffix : landingPageSuffixes) { @@ -222,7 +217,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor { } return ""; } - + private String getPathWithoutSuffix(URL url) { return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", ""); }