Changing url-check so that it allows different schemes but still requires the same domain
(cherry picked from commit d4ff78918dd9317a5686b9675cdade3cb6c2007e) Signed-off-by: Dan Fabulich <dan.fabulich@redfin.com>
This commit is contained in:
parent
95b23f2eb0
commit
5ea973b161
|
@ -7,7 +7,7 @@ import java.net.URL;
|
|||
// It makes sense, I swear! http://madbean.com/2004/mb2004-3/
|
||||
abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGeneratorOptions<THIS>> {
|
||||
File baseDir;
|
||||
String baseUrl;
|
||||
URL baseUrl;
|
||||
String fileNamePrefix = "sitemap";
|
||||
boolean allowMultipleSitemaps = true;
|
||||
W3CDateFormat dateFormat;
|
||||
|
@ -19,7 +19,7 @@ abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGener
|
|||
if (baseDir == null) throw new NullPointerException("baseDir may not be null");
|
||||
if (baseUrl == null) throw new NullPointerException("baseUrl may not be null");
|
||||
this.baseDir = baseDir;
|
||||
this.baseUrl = baseUrl.toString();
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
/** The prefix of the name of the sitemaps we'll create; by default this is "sitemap" */
|
||||
|
@ -62,6 +62,7 @@ abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGener
|
|||
this.gzip = gzip;
|
||||
return getThis();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
THIS getThis() {
|
||||
return (THIS)this;
|
||||
|
|
|
@ -17,7 +17,7 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
|
|||
/** 50000 URLs per sitemap maximum */
|
||||
public static final int MAX_URLS_PER_SITEMAP = 50000;
|
||||
|
||||
private final String baseUrl;
|
||||
private final URL baseUrl;
|
||||
private final File baseDir;
|
||||
private final String fileNamePrefix;
|
||||
private final String fileNameSuffix;
|
||||
|
@ -56,7 +56,7 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
|
|||
*/
|
||||
public THIS addUrl(U url) {
|
||||
if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
|
||||
UrlUtils.checkUrl(url.getUrl().toString(), baseUrl);
|
||||
UrlUtils.checkUrl(url.getUrl(), baseUrl);
|
||||
if (urls.size() == maxUrls) {
|
||||
if (!allowMultipleSitemaps) throw new RuntimeException("More than " + maxUrls + " urls, but allowMultipleSitemaps is false. Enable allowMultipleSitemaps to split the sitemap into multiple files with a sitemap index.");
|
||||
if (mapCount == 0) mapCount++;
|
||||
|
@ -169,12 +169,8 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
|
|||
public void writeSitemapsWithIndex() {
|
||||
if (!finished) throw new RuntimeException("Sitemaps not generated yet; call write() first");
|
||||
File outFile = new File(baseDir, "sitemap_index.xml");
|
||||
SitemapIndexGenerator sig;
|
||||
try {
|
||||
sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();
|
||||
} catch (MalformedURLException e) {
|
||||
throw new RuntimeException("bug", e);
|
||||
}
|
||||
SitemapIndexGenerator sig;
|
||||
sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();
|
||||
sig.addUrls(fileNamePrefix, fileNameSuffix, mapCount).write();
|
||||
}
|
||||
|
||||
|
|
|
@ -17,8 +17,7 @@ import org.xml.sax.SAXException;
|
|||
*
|
||||
*/
|
||||
public class SitemapIndexGenerator {
|
||||
private final URL baseUrl;
|
||||
private final String baseUrlString;
|
||||
private final URL baseUrl;
|
||||
private final File outFile;
|
||||
private final ArrayList<SitemapIndexUrl> urls = new ArrayList<SitemapIndexUrl>();
|
||||
private final int maxUrls;
|
||||
|
@ -115,8 +114,7 @@ public class SitemapIndexGenerator {
|
|||
}
|
||||
|
||||
private SitemapIndexGenerator(Options options) {
|
||||
this.baseUrl = options.baseUrl;
|
||||
this.baseUrlString = baseUrl.toString();
|
||||
this.baseUrl = options.baseUrl;
|
||||
this.outFile = options.outFile;
|
||||
this.maxUrls = options.maxUrls;
|
||||
W3CDateFormat dateFormat = options.dateFormat;
|
||||
|
@ -128,7 +126,7 @@ public class SitemapIndexGenerator {
|
|||
|
||||
/** Adds a single sitemap to the index */
|
||||
public SitemapIndexGenerator addUrl(SitemapIndexUrl url) {
|
||||
UrlUtils.checkUrl(url.url.toString(), baseUrlString);
|
||||
UrlUtils.checkUrl(url.url, baseUrl);
|
||||
if (urls.size() >= maxUrls) {
|
||||
throw new RuntimeException("More than " + maxUrls + " urls");
|
||||
}
|
||||
|
|
|
@ -1,13 +1,19 @@
|
|||
package com.redfin.sitemapgenerator;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
|
||||
class UrlUtils {
|
||||
|
||||
static void checkUrl(String url, String baseUrl) {
|
||||
static void checkUrl(URL url, URL baseUrl) {
|
||||
// Is there a better test to use here?
|
||||
if (!url.startsWith(baseUrl)) {
|
||||
throw new RuntimeException("Url " + url + " doesn't start with base URL " + baseUrl);
|
||||
|
||||
if (baseUrl.getHost() == null) {
|
||||
throw new RuntimeException("base URL is null");
|
||||
}
|
||||
|
||||
if (!baseUrl.getHost().equalsIgnoreCase(url.getHost())) {
|
||||
throw new RuntimeException("Domain of URL " + url + " doesn't match base URL " + baseUrl);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -161,6 +161,21 @@ public class SitemapGeneratorTest extends TestCase {
|
|||
fail("wrong domain allowed to be added");
|
||||
} catch (RuntimeException e) {}
|
||||
}
|
||||
|
||||
public void testSameDomainDifferentSchemeOK() throws Exception {
|
||||
wsg = new WebSitemapGenerator("http://www.example.com", dir);
|
||||
|
||||
wsg.addUrl("https://www.example.com/index.html");
|
||||
|
||||
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
|
||||
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
|
||||
" <url>\n" +
|
||||
" <loc>https://www.example.com/index.html</loc>\n" +
|
||||
" </url>\n" +
|
||||
"</urlset>";
|
||||
String sitemap = writeSingleSiteMap(wsg);
|
||||
assertEquals(expected, sitemap);
|
||||
}
|
||||
|
||||
public void testDoubleWrite() throws Exception {
|
||||
testSimpleUrl();
|
||||
|
|
Loading…
Reference in New Issue