SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore

2025-03-01 05:49:33 +00:00 · 2019-03-24 19:07:26 -04:00 · 2019-03-24 19:07:26 -04:00 · c60685f9e4
commit c60685f9e4
parent bca22d58e2
2 changed files with 22 additions and 23 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -41,6 +41,10 @@ Upgrade Notes
  expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup
  (Gus Heck).

+* SOLR-13324: URLClassifyProcessor#getCanonicalUrl now throws MalformedURLException rather than hiding it. Although the
+  present code is unlikely to produce such an exception it may be possible in future changes or in subclasses.
+  Currently this change should only effect compatibility of custom code overriding this method (Gus Heck).
+
 New Features
 ----------------------
 * SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on
--- a/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory;
 * and helping to produce values which may be used for boosting or filtering later.
 */
 public class URLClassifyProcessor extends UpdateRequestProcessor {
-  
+
  private static final String INPUT_FIELD_PARAM = "inputField";
  private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
  private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
@ -84,16 +84,16 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
      "welcome.asp",
      "welcome.aspx"
  };
-  
+
  public URLClassifyProcessor(SolrParams parameters,
      SolrQueryRequest request,
      SolrQueryResponse response,
      UpdateRequestProcessor nextProcessor) {
    super(nextProcessor);
-    
+
    this.initParameters(parameters);
  }
-  
+
  private void initParameters(SolrParams parameters) {
    if (parameters != null) {
      this.setEnabled(parameters.getBool("enabled", true));
@ -106,7 +106,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
      this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM);
    }
  }
-  
+
  @Override
  public void processAdd(AddUpdateCommand command) throws IOException {
    if (isEnabled()) {
@ -133,24 +133,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
    }
    super.processAdd(command);
  }
-  
+
  /**
   * Gets a canonical form of the URL for use as main URL
   * @param url The input url
   * @return The URL object representing the canonical URL
   */
-  public URL getCanonicalUrl(URL url) {
+  public URL getCanonicalUrl(URL url) throws MalformedURLException {
    // NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should)
    String urlString = url.toString();
-    try {
-      String lps = landingPageSuffix(url);
-      return new URL(urlString.replaceFirst("/"+lps+"$", "/"));
-    } catch (MalformedURLException e) {
-      e.printStackTrace();
-    }
-    return url;
+    String lps = landingPageSuffix(url);
+    return new URL(urlString.replaceFirst("/" + lps + "$", "/"));
  }
-  
+
  /**
   * Calculates the length of the URL in characters
   * @param url The input URL
@ -159,7 +154,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
  public int length(URL url) {
    return url.toString().length();
  }
-  
+
  /**
   * Calculates the number of path levels in the given URL
   * @param url The input URL
@ -176,7 +171,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
    }
    return levels;
  }
-  
+
  /**
   * Calculates whether a URL is a top level page
   * @param url The input URL
@ -187,7 +182,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
    String path = getPathWithoutSuffix(url).replaceAll("/+$", "");
    return path.length() == 0 && url.getQuery() == null;
  }
-  
+
  /**
   * Calculates whether the URL is a landing page or not
   * @param url The input URL
@ -200,19 +195,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
      return landingPageSuffix(url) != "";
    }
  }
-  
+
  public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException {
    return new URI(url).normalize().toURL();
  }
-  
+
  public boolean isEnabled() {
    return enabled;
  }
-  
+
  public void setEnabled(boolean enabled) {
    this.enabled = enabled;
  }
-  
+
  private String landingPageSuffix(URL url) {
    String path = url.getPath().toLowerCase(Locale.ROOT);
    for(String suffix : landingPageSuffixes) {
@ -222,7 +217,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
    }
    return "";
  }
-  
+
  private String getPathWithoutSuffix(URL url) {
    return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", "");
  }