mirror of https://github.com/apache/lucene.git
SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore
This commit is contained in:
parent
bca22d58e2
commit
c60685f9e4
|
@ -41,6 +41,10 @@ Upgrade Notes
|
||||||
expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup
|
expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup
|
||||||
(Gus Heck).
|
(Gus Heck).
|
||||||
|
|
||||||
|
* SOLR-13324: URLClassifyProcessor#getCanonicalUrl now throws MalformedURLException rather than hiding it. Although the
|
||||||
|
present code is unlikely to produce such an exception it may be possible in future changes or in subclasses.
|
||||||
|
Currently this change should only effect compatibility of custom code overriding this method (Gus Heck).
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on
|
* SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on
|
||||||
|
|
|
@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* and helping to produce values which may be used for boosting or filtering later.
|
* and helping to produce values which may be used for boosting or filtering later.
|
||||||
*/
|
*/
|
||||||
public class URLClassifyProcessor extends UpdateRequestProcessor {
|
public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
|
|
||||||
private static final String INPUT_FIELD_PARAM = "inputField";
|
private static final String INPUT_FIELD_PARAM = "inputField";
|
||||||
private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
|
private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
|
||||||
private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
|
private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
|
||||||
|
@ -84,16 +84,16 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
"welcome.asp",
|
"welcome.asp",
|
||||||
"welcome.aspx"
|
"welcome.aspx"
|
||||||
};
|
};
|
||||||
|
|
||||||
public URLClassifyProcessor(SolrParams parameters,
|
public URLClassifyProcessor(SolrParams parameters,
|
||||||
SolrQueryRequest request,
|
SolrQueryRequest request,
|
||||||
SolrQueryResponse response,
|
SolrQueryResponse response,
|
||||||
UpdateRequestProcessor nextProcessor) {
|
UpdateRequestProcessor nextProcessor) {
|
||||||
super(nextProcessor);
|
super(nextProcessor);
|
||||||
|
|
||||||
this.initParameters(parameters);
|
this.initParameters(parameters);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initParameters(SolrParams parameters) {
|
private void initParameters(SolrParams parameters) {
|
||||||
if (parameters != null) {
|
if (parameters != null) {
|
||||||
this.setEnabled(parameters.getBool("enabled", true));
|
this.setEnabled(parameters.getBool("enabled", true));
|
||||||
|
@ -106,7 +106,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM);
|
this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void processAdd(AddUpdateCommand command) throws IOException {
|
public void processAdd(AddUpdateCommand command) throws IOException {
|
||||||
if (isEnabled()) {
|
if (isEnabled()) {
|
||||||
|
@ -133,24 +133,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
}
|
}
|
||||||
super.processAdd(command);
|
super.processAdd(command);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a canonical form of the URL for use as main URL
|
* Gets a canonical form of the URL for use as main URL
|
||||||
* @param url The input url
|
* @param url The input url
|
||||||
* @return The URL object representing the canonical URL
|
* @return The URL object representing the canonical URL
|
||||||
*/
|
*/
|
||||||
public URL getCanonicalUrl(URL url) {
|
public URL getCanonicalUrl(URL url) throws MalformedURLException {
|
||||||
// NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should)
|
// NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should)
|
||||||
String urlString = url.toString();
|
String urlString = url.toString();
|
||||||
try {
|
String lps = landingPageSuffix(url);
|
||||||
String lps = landingPageSuffix(url);
|
return new URL(urlString.replaceFirst("/" + lps + "$", "/"));
|
||||||
return new URL(urlString.replaceFirst("/"+lps+"$", "/"));
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
return url;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the length of the URL in characters
|
* Calculates the length of the URL in characters
|
||||||
* @param url The input URL
|
* @param url The input URL
|
||||||
|
@ -159,7 +154,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
public int length(URL url) {
|
public int length(URL url) {
|
||||||
return url.toString().length();
|
return url.toString().length();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the number of path levels in the given URL
|
* Calculates the number of path levels in the given URL
|
||||||
* @param url The input URL
|
* @param url The input URL
|
||||||
|
@ -176,7 +171,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
}
|
}
|
||||||
return levels;
|
return levels;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates whether a URL is a top level page
|
* Calculates whether a URL is a top level page
|
||||||
* @param url The input URL
|
* @param url The input URL
|
||||||
|
@ -187,7 +182,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
String path = getPathWithoutSuffix(url).replaceAll("/+$", "");
|
String path = getPathWithoutSuffix(url).replaceAll("/+$", "");
|
||||||
return path.length() == 0 && url.getQuery() == null;
|
return path.length() == 0 && url.getQuery() == null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates whether the URL is a landing page or not
|
* Calculates whether the URL is a landing page or not
|
||||||
* @param url The input URL
|
* @param url The input URL
|
||||||
|
@ -200,19 +195,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
return landingPageSuffix(url) != "";
|
return landingPageSuffix(url) != "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException {
|
public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException {
|
||||||
return new URI(url).normalize().toURL();
|
return new URI(url).normalize().toURL();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isEnabled() {
|
public boolean isEnabled() {
|
||||||
return enabled;
|
return enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setEnabled(boolean enabled) {
|
public void setEnabled(boolean enabled) {
|
||||||
this.enabled = enabled;
|
this.enabled = enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String landingPageSuffix(URL url) {
|
private String landingPageSuffix(URL url) {
|
||||||
String path = url.getPath().toLowerCase(Locale.ROOT);
|
String path = url.getPath().toLowerCase(Locale.ROOT);
|
||||||
for(String suffix : landingPageSuffixes) {
|
for(String suffix : landingPageSuffixes) {
|
||||||
|
@ -222,7 +217,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getPathWithoutSuffix(URL url) {
|
private String getPathWithoutSuffix(URL url) {
|
||||||
return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", "");
|
return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", "");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue