Changing url-check so that it allows different schemes but still requires the same domain

(cherry picked from commit d4ff78918dd9317a5686b9675cdade3cb6c2007e)
Signed-off-by: Dan Fabulich <dan.fabulich@redfin.com>
This commit is contained in:
Navtej Sadhal 2014-04-30 20:22:16 -07:00 committed by Dan Fabulich
parent 95b23f2eb0
commit 5ea973b161
5 changed files with 34 additions and 18 deletions

View File

@ -7,7 +7,7 @@ import java.net.URL;
// It makes sense, I swear! http://madbean.com/2004/mb2004-3/
abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGeneratorOptions<THIS>> {
File baseDir;
String baseUrl;
URL baseUrl;
String fileNamePrefix = "sitemap";
boolean allowMultipleSitemaps = true;
W3CDateFormat dateFormat;
@ -19,7 +19,7 @@ abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGener
if (baseDir == null) throw new NullPointerException("baseDir may not be null");
if (baseUrl == null) throw new NullPointerException("baseUrl may not be null");
this.baseDir = baseDir;
this.baseUrl = baseUrl.toString();
this.baseUrl = baseUrl;
}
/** The prefix of the name of the sitemaps we'll create; by default this is "sitemap" */
@ -62,6 +62,7 @@ abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGener
this.gzip = gzip;
return getThis();
}
@SuppressWarnings("unchecked")
THIS getThis() {
return (THIS)this;

View File

@ -17,7 +17,7 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
/** 50000 URLs per sitemap maximum */
public static final int MAX_URLS_PER_SITEMAP = 50000;
private final String baseUrl;
private final URL baseUrl;
private final File baseDir;
private final String fileNamePrefix;
private final String fileNameSuffix;
@ -56,7 +56,7 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
*/
public THIS addUrl(U url) {
if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
UrlUtils.checkUrl(url.getUrl().toString(), baseUrl);
UrlUtils.checkUrl(url.getUrl(), baseUrl);
if (urls.size() == maxUrls) {
if (!allowMultipleSitemaps) throw new RuntimeException("More than " + maxUrls + " urls, but allowMultipleSitemaps is false. Enable allowMultipleSitemaps to split the sitemap into multiple files with a sitemap index.");
if (mapCount == 0) mapCount++;
@ -169,12 +169,8 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
public void writeSitemapsWithIndex() {
if (!finished) throw new RuntimeException("Sitemaps not generated yet; call write() first");
File outFile = new File(baseDir, "sitemap_index.xml");
SitemapIndexGenerator sig;
try {
sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();
} catch (MalformedURLException e) {
throw new RuntimeException("bug", e);
}
SitemapIndexGenerator sig;
sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();
sig.addUrls(fileNamePrefix, fileNameSuffix, mapCount).write();
}

View File

@ -17,8 +17,7 @@ import org.xml.sax.SAXException;
*
*/
public class SitemapIndexGenerator {
private final URL baseUrl;
private final String baseUrlString;
private final URL baseUrl;
private final File outFile;
private final ArrayList<SitemapIndexUrl> urls = new ArrayList<SitemapIndexUrl>();
private final int maxUrls;
@ -115,8 +114,7 @@ public class SitemapIndexGenerator {
}
private SitemapIndexGenerator(Options options) {
this.baseUrl = options.baseUrl;
this.baseUrlString = baseUrl.toString();
this.baseUrl = options.baseUrl;
this.outFile = options.outFile;
this.maxUrls = options.maxUrls;
W3CDateFormat dateFormat = options.dateFormat;
@ -128,7 +126,7 @@ public class SitemapIndexGenerator {
/** Adds a single sitemap to the index */
public SitemapIndexGenerator addUrl(SitemapIndexUrl url) {
UrlUtils.checkUrl(url.url.toString(), baseUrlString);
UrlUtils.checkUrl(url.url, baseUrl);
if (urls.size() >= maxUrls) {
throw new RuntimeException("More than " + maxUrls + " urls");
}

View File

@ -1,13 +1,19 @@
package com.redfin.sitemapgenerator;
import java.net.URL;
import java.util.HashMap;
class UrlUtils {
static void checkUrl(String url, String baseUrl) {
static void checkUrl(URL url, URL baseUrl) {
// Is there a better test to use here?
if (!url.startsWith(baseUrl)) {
throw new RuntimeException("Url " + url + " doesn't start with base URL " + baseUrl);
if (baseUrl.getHost() == null) {
throw new RuntimeException("base URL is null");
}
if (!baseUrl.getHost().equalsIgnoreCase(url.getHost())) {
throw new RuntimeException("Domain of URL " + url + " doesn't match base URL " + baseUrl);
}
}

View File

@ -161,6 +161,21 @@ public class SitemapGeneratorTest extends TestCase {
fail("wrong domain allowed to be added");
} catch (RuntimeException e) {}
}
public void testSameDomainDifferentSchemeOK() throws Exception {
wsg = new WebSitemapGenerator("http://www.example.com", dir);
wsg.addUrl("https://www.example.com/index.html");
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
" <url>\n" +
" <loc>https://www.example.com/index.html</loc>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testDoubleWrite() throws Exception {
testSimpleUrl();