First checkin

git-svn-id: https://sitemapgen4j.googlecode.com/svn/trunk@2 aa787bee-eda5-11dd-ada0-abde575de245
This commit is contained in:
DanFabulich 2009-01-29 01:48:40 +00:00
parent c654290116
commit 56bd6d728b
40 changed files with 4004 additions and 0 deletions

18
TODO.txt Normal file
View File

@ -0,0 +1,18 @@
Ping search engines
Text file reader
Sitemap reader
Improve validator for basic sitemap case (gzip, 10MB, urls, encoding)
validate Mobile/Geo/Video/Code/News sitemaps
JS api
addUrl({url:"http://www.example.com",lastMod:"2007-08-01");
new WebSitemapGenerator({});
new SitemapIndexGenerator({});
Google KML generation
GeoRSS generation
Google Code packagemap http://www.google.com/help/codesearch_packagemap.html

34
pom.xml Normal file
View File

@ -0,0 +1,34 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.redfin</groupId>
<artifactId>sitemapgen4j</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>SitemapGen4J</name>
<build>
<defaultGoal>install</defaultGoal>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.5</source>
<target>1.5</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.5.1</version>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,69 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.net.URL;
// that weird thing with generics is so sub-classed objects will return themselves
// It makes sense, I swear! http://madbean.com/2004/mb2004-3/
abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGeneratorOptions<THIS>> {
File baseDir;
String baseUrl;
String fileNamePrefix = "sitemap";
boolean allowMultipleSitemaps = true;
W3CDateFormat dateFormat;
int maxUrls = SitemapGenerator.MAX_URLS_PER_SITEMAP;
boolean autoValidate = false;
boolean gzip = false;
public AbstractSitemapGeneratorOptions(URL baseUrl, File baseDir) {
if (baseDir == null) throw new NullPointerException("baseDir may not be null");
if (baseUrl == null) throw new NullPointerException("baseUrl may not be null");
this.baseDir = baseDir;
this.baseUrl = baseUrl.toString();
}
/** The prefix of the name of the sitemaps we'll create; by default this is "sitemap" */
public THIS fileNamePrefix(String fileNamePrefix) {
if (fileNamePrefix == null) throw new NullPointerException("fileNamePrefix may not be null");
this.fileNamePrefix = fileNamePrefix;
return getThis();
}
/** When more than the maximum number of URLs are passed in, should we split into multiple sitemaps automatically, or just throw an exception? */
public THIS allowMultipleSitemaps(boolean allowMultipleSitemaps) {
this.allowMultipleSitemaps = allowMultipleSitemaps;
return getThis();
}
/** The date formatter, typically configured with a {@link W3CDateFormat.Pattern} and/or a time zone */
public THIS dateFormat(W3CDateFormat dateFormat) {
this.dateFormat = dateFormat;
return getThis();
}
/**
* The maximum number of URLs to allow per sitemap; the default is the
* maximum allowed (50,000), but you can decrease it if you wish (to make
* your auto-generated sitemaps smaller)
*/
public THIS maxUrls(int maxUrls) {
if (maxUrls > SitemapGenerator.MAX_URLS_PER_SITEMAP) {
throw new RuntimeException("You can only have 50,000 URLs per sitemap; to use more, allowMultipleSitemaps and generate a sitemap index. You asked for " + maxUrls);
}
this.maxUrls = maxUrls;
return getThis();
}
/**
* Validate the sitemaps automatically after writing them; this takes time (and may fail for Google-specific sitemaps)
*/
public THIS autoValidate(boolean autoValidate) {
this.autoValidate = autoValidate;
return getThis();
}
/** Gzip the sitemaps after they are written to disk */
THIS gzip(boolean gzip) {
this.gzip = gzip;
return getThis();
}
@SuppressWarnings("unchecked")
public THIS getThis() {
return (THIS)this;
}
}

View File

@ -0,0 +1,112 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.ParseException;
import java.util.Date;
/** Container for optional URL parameters */
//that weird thing with generics is so sub-classed objects will return themselves
//It makes sense, I swear! http://madbean.com/2004/mb2004-3/
abstract class AbstractSitemapUrlOptions<U extends WebSitemapUrl, THIS extends AbstractSitemapUrlOptions<U,THIS>> {
Date lastMod;
ChangeFreq changeFreq;
Double priority;
URL url;
Class<U> clazz;
public AbstractSitemapUrlOptions(String url, Class<U> clazz) throws MalformedURLException {
this(new URL(url), clazz);
}
public AbstractSitemapUrlOptions(URL url, Class<U> clazz) {
if (url == null) throw new NullPointerException("URL may not be null");
this.url = url;
this.clazz = clazz;
}
/**
* The date of last modification of the file. Note that this tag is
* separate from the If-Modified-Since (304) header the server can
* return, and search engines may use the information from both sources
* differently.
*/
public THIS lastMod(Date lastMod) {
this.lastMod = lastMod;
return getThis();
}
/**
* The date of last modification of the file. Note that this tag is
* separate from the If-Modified-Since (304) header the server can
* return, and search engines may use the information from both sources
* differently.
* @throws ParseException if the string isn't a valid W3C date time
* @see W3CDateFormat
*/
public THIS lastMod(String lastMod) throws ParseException {
this.lastMod = new W3CDateFormat().parse(lastMod);
return getThis();
}
/**
* How frequently the page is likely to change. This value provides
* general information to search engines and may not correlate exactly
* to how often they crawl the page. The value {@link ChangeFreq#ALWAYS} should be used to
* describe documents that change each time they are accessed. The value
* {@link ChangeFreq#NEVER} should be used to describe archived URLs.
*
* <p>Please note that the
* value of this tag is considered a <em>hint</em> and not a command. Even though
* search engine crawlers may consider this information when making
* decisions, they may crawl pages marked {@link ChangeFreq#HOURLY} less frequently than
* that, and they may crawl pages marked {@link ChangeFreq#YEARLY} more frequently than
* that. Crawlers may periodically crawl pages marked {@link ChangeFreq#NEVER} so that
* they can handle unexpected changes to those pages.</p>
*/
public THIS changeFreq(ChangeFreq changeFreq) {
this.changeFreq = changeFreq;
return getThis();
}
/**
* <p>The priority of this URL relative to other URLs on your site. Valid
* values range from 0.0 to 1.0. This value does not affect how your
* pages are compared to pages on other sitesit only lets the search
* engines know which pages you deem most important for the crawlers.</p>
*
* <p>The default priority of a page is 0.5.</p>
*
* <p>Please note that the priority you assign to a page is not likely to
* influence the position of your URLs in a search engine's result
* pages. Search engines may use this information when selecting between
* URLs on the same site, so you can use this tag to increase the
* likelihood that your most important pages are present in a search
* index.</p>
*
* <p>Also, please note that assigning a high priority to all of the URLs
* on your site is not likely to help you. Since the priority is
* relative, it is only used to select between URLs on your site.</p>
*/
public THIS priority(Double priority) {
if (priority > 1.0) throw new IllegalArgumentException("Priority may not be greater than 1.0: " + priority);
if (priority < 0) throw new IllegalArgumentException("Priority may not be less than 0: " + priority);
this.priority = priority;
return getThis();
}
@SuppressWarnings("unchecked")
THIS getThis() {
return (THIS)this;
}
/** Return an URL based on these settings */
public U build() {
try {
return clazz.getConstructor(getClass()).newInstance(this);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,48 @@
package com.redfin.sitemapgenerator;
import java.io.IOException;
import java.io.OutputStreamWriter;
abstract class AbstractSitemapUrlRenderer<T extends WebSitemapUrl> implements ISitemapUrlRenderer<T> {
public void render(WebSitemapUrl url, OutputStreamWriter out, W3CDateFormat dateFormat, String additionalData)
throws IOException {
out.write(" <url>\n");
out.write(" <loc>");
out.write(url.getUrl().toString());
out.write("</loc>\n");
if (url.getLastMod() != null) {
out.write(" <lastmod>");
out.write(dateFormat.format(url.getLastMod()));
out.write("</lastmod>\n");
}
if (url.getChangeFreq() != null) {
out.write(" <changefreq>");
out.write(url.getChangeFreq().toString());
out.write("</changefreq>\n");
}
if (url.getPriority() != null) {
out.write(" <priority>");
out.write(url.getPriority().toString());
out.write("</priority>\n");
}
if (additionalData != null) out.write(additionalData);
out.write(" </url>\n");
}
public void renderTag(StringBuilder sb, String namespace, String tagName, Object value) {
if (value == null) return;
sb.append(" <");
sb.append(namespace);
sb.append(':');
sb.append(tagName);
sb.append('>');
sb.append(value);
sb.append("</");
sb.append(namespace);
sb.append(':');
sb.append(tagName);
sb.append(">\n");
}
}

View File

@ -0,0 +1,29 @@
package com.redfin.sitemapgenerator;
/**
* How frequently the page is likely to change. This value provides
* general information to search engines and may not correlate exactly
* to how often they crawl the page. The value {@link #ALWAYS} should be used to
* describe documents that change each time they are accessed. The value
* {@link #NEVER} should be used to describe archived URLs.
*
* <p>Please note that the
* value of this tag is considered a <em>hint</em> and not a command. Even though
* search engine crawlers may consider this information when making
* decisions, they may crawl pages marked {@link #HOURLY} less frequently than
* that, and they may crawl pages marked {@link #YEARLY} more frequently than
* that. Crawlers may periodically crawl pages marked {@link #NEVER} so that
* they can handle unexpected changes to those pages.</p>
*/
public enum ChangeFreq {
ALWAYS, HOURLY, DAILY, WEEKLY, MONTHLY, YEARLY, NEVER;
String lowerCase;
private ChangeFreq() {
lowerCase = this.name().toLowerCase();
}
@Override
public String toString() {
return lowerCase;
}
}

View File

@ -0,0 +1,87 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Builds a code sitemap for Google Code Search. To configure options, use {@link #builder(URL, File)}
* @author Dan Fabulich
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75224">Creating Code Search Sitemaps</a>
*/
public class GoogleCodeSitemapGenerator extends SitemapGenerator<GoogleCodeSitemapUrl,GoogleCodeSitemapGenerator> {
GoogleCodeSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
}
/** Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleCodeSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleCodeSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleCodeSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<GoogleCodeSitemapGenerator>(baseUrl, baseDir, GoogleCodeSitemapGenerator.class);
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
* @throws MalformedURLException
*/
public static SitemapGeneratorBuilder<GoogleCodeSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleCodeSitemapGenerator>(baseUrl, baseDir, GoogleCodeSitemapGenerator.class);
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleCodeSitemapUrl> implements ISitemapUrlRenderer<GoogleCodeSitemapUrl> {
public Class<GoogleCodeSitemapUrl> getUrlClass() {
return GoogleCodeSitemapUrl.class;
}
public void render(GoogleCodeSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(" <codesearch:codesearch>\n");
renderTag(sb, "codesearch", "filetype", url.getFileType());
renderTag(sb, "codesearch", "license", url.getLicense());
renderTag(sb, "codesearch", "filename", url.getFileName());
renderTag(sb, "codesearch", "packageurl", url.getPackageUrl());
renderTag(sb, "codesearch", "packagemap", url.getPackageMap());
sb.append(" </codesearch:codesearch>\n");
super.render(url, out, dateFormat, sb.toString());
}
public String getXmlNamespaces() {
return "xmlns:codesearch=\"http://www.google.com/codesearch/schemas/sitemap/1.0\"";
}
}
}

View File

@ -0,0 +1,319 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
/**
* One configurable Google Code Search URL. To configure, use {@link Options}
* @author Dan Fabulich
* @see Options
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75224">Creating Code Search Sitemaps</a>
*/
public class GoogleCodeSitemapUrl extends WebSitemapUrl {
/** The type of code represented by this URL
*
* @author Dan Fabulich
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75252">Supported languages</a>
*/
public enum FileType {
/** A special value meaning that the URL is a compressed archive containing code.
* @see @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75259">Supported archive suffixes</a>
*/
ARCHIVE("Archive"),
ADA("Ada"),
APPLESCRIPT("AppleScript"),
ASP("ASP"),
ASSEMBLY("Assembly"),
AUTOCONF("Autoconf"),
AUTOMAKE("Automake"),
AWK("Awk"),
BASIC("Basic/Visual Basic"),
BAT("batch file"),
C("C"),
CPP("C++"),
CSHARP("C#"),
COBOL("COBOL"),
COLDFUSION("ColdFusion"),
CONFIGURE("configure script"),
CPUDESCGCC("GCC machine description"),
CPUDESCLCC("LCC machine description"),
CPUDESCMONO("Mono machine description"),
CPUDESCVCODE("VCODE machine description"),
CSS("CSS"),
CSV("CSV"),
D("D"),
EIFFEL("Eiffel"),
ERLANG("Erlang"),
FORTRAN("Fortran"),
HASKELL("Haskell"),
HTML("HTML"),
JAVA("Java"),
JAVASCRIPT("JavaScript"),
JSP("JSP"),
LEX("Lex"),
LIMBO("Limbo"),
LISP("Lisp"),
LUA("Lua"),
M4("m4"),
MAKEFILE("Makefile"),
MATHEMATICA("Mathematica"),
MATLAB("Matlab"),
MODULA2("Modula-2"),
MODULA3("Modula-3"),
OBJECTIVEC("Objective C"),
OCAML("OCaml"),
PASCAL("Pascal/Delphi"),
PATCH("diff file"),
PERL("Perl"),
PHP("PHP"),
POD("Plain Old Documentation"),
MESSAGECATALOG("message catalog"),
POSTSCRIPT("PostScript"),
PROLOG("Prolog"),
PYTHON("Python"),
R("R"),
REBOL("REBOL"),
RUBY("Ruby"),
SCHEME("Scheme"),
SHELL("Shell"),
SGML("SGML"),
SMALLTALK("Smalltalk"),
SQL("SQL"),
SML("Standard ML"),
SVG("SVG"),
TCL("Tcl"),
TEX("TeX/LaTeX"),
TEXINFO("Texinfo"),
TROFF("Troff"),
VHDL("VHDL"),
VIM("Vim script"),
XML("XML"),
XPM("XPM"),
XSLT("XSLT"),
XSUB("XSUB"),
XUL("XUL"),
YACC("Yacc");
private final String longName;
FileType(String longName) {
this.longName = longName;
}
/** The pretty name for this filetype */
public String getLongName() {
return longName;
}
@Override
public String toString() {
if (this == CPP) return "c++";
if (this == CSHARP) return "c#";
return this.name().toLowerCase();
};
}
/** The license of the code represented by this URL
*
* @author Dan Fabulich
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75256">Supported licenses</a>
*/
public enum License {
ALADDIN("Aladdin Public License"),
ARTISTIC("Artistic License"),
APACHE("Apache License"),
APPLE("Apple Public Source License"),
BSD("BSD License"),
CPL("Common Public License"),
GPL("GNU General Public License"),
LGPL("GNU Lesser General Public License"),
DISCLAIMER("Historical Permission Notice and Disclaimer"),
IBM("IBM Public License"),
LUCENT("Lucent Public License"),
MIT("MIT License"),
MOZILLA("Mozilla Public License"),
NASA("NASA Open Source Agreement"),
PYTHON("Python Software Foundation License"),
QPL("Q Public License"),
SLEEPYCAT("Sleepycat License"),
ZOPE("Zope Public License");
private final String longName;
License(String longName) {
this.longName = longName;
}
/** The pretty name for this license */
public String getLongName() {
return longName;
}
@Override
public String toString() {
return this.name().toLowerCase();
};
}
private final String fileType;
private final String license;
private final String fileName;
private final URL packageUrl;
private final String packageMap;
/** Options to configure Google Code Search URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleCodeSitemapUrl, Options> {
private String fileType;
private String license;
private String fileName;
private URL packageUrl;
private String packageMap;
/** Specifies an url and a filetype (both mandatory in Google Code Search) */
public Options(String url, FileType fileType) throws MalformedURLException {
super(url, GoogleCodeSitemapUrl.class);
this.fileType = fileType.toString();
}
/** Specifies an url and a filetype (both mandatory in Google Code Search) */
public Options(URL url, FileType fileType) {
super(url, GoogleCodeSitemapUrl.class);
this.fileType = fileType.toString();
}
/** Specifies an url and a filetype (both mandatory in Google Code Search) */
public Options(String url, String fileType) throws MalformedURLException {
super(url, GoogleCodeSitemapUrl.class);
this.fileType = fileType;
}
/** Specifies an url and a filetype (both mandatory in Google Code Search) */
public Options(URL url, String fileType) {
super(url, GoogleCodeSitemapUrl.class);
this.fileType = fileType;
}
/** Specifies code license */
public Options license(License license) {
this.license = license.toString();
return this;
}
/**
* Specifies code license; when the value is not one of the recognized
* licenses, this will cause Google to index the item as
* "unknown license".
*/
public Options license(String license) {
this.license = license;
return this;
}
/**
* The name of the actual file; this is useful if the URL ends in
* something like download.php?id=1234 instead of the actual filename.
* The name can contain any character except "/". If the file is an
* {@link FileType#ARCHIVE} file, it will be indexed only if it has one of the supported
* archive suffixes.
*
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75259">Supported archive suffixes</a>
*/
public Options fileName(String fileName) {
this.fileName = fileName;
return this;
}
/**
* The URL truncated at the top-level directory for the package; this
* tells Google which files belong together. For use only when the
* filetype is not {@link FileType#ARCHIVE}. For example, the file
* http://path/Foo/1.23/bar/file.c could have the package URL
* http://path/Foo/1.23. All files in a package should have the same
* packageurl.
*/
public Options packageUrl(URL packageUrl) {
this.packageUrl = packageUrl;
return this;
}
/**
* The URL truncated at the top-level directory for the package; this
* tells Google which files belong together. For use only when the
* filetype is not {@link FileType#ARCHIVE}. For example, the file
* http://path/Foo/1.23/bar/file.c could have the package URL
* http://path/Foo/1.23. All files in a package should have the same
* packageurl.
*/
public Options packageUrl(String packageUrl) throws MalformedURLException {
this.packageUrl = new URL(packageUrl);
return this;
}
/**
* The name of the packagemap file inside an {@link FileType#ARCHIVE};
* just like a Sitemap is a list of files on a web site, a packagemap is
* a list of files in a package. Case-sensitive. For use only when
* filetype is {@link FileType#ARCHIVE}.
*/
public Options packageMap(String packageMap) {
if (!FileType.ARCHIVE.toString().equals(fileType)) {
throw new IllegalArgumentException("You can only specify a packageMap when the fileType is 'archive'");
}
this.packageMap = packageMap;
return this;
}
}
/** Specifies an url and a filetype (both mandatory in Google Code Search) */
public GoogleCodeSitemapUrl(URL url, FileType fileType) {
this(new Options(url, fileType));
}
/** Specifies an url and a filetype (both mandatory in Google Code Search) */
public GoogleCodeSitemapUrl(String url, FileType fileType) throws MalformedURLException {
this(new Options(url, fileType));
}
/** Specifies an url and a filetype (both mandatory in Google Code Search) */
public GoogleCodeSitemapUrl(URL url, String fileType) {
this(new Options(url, fileType));
}
/** Specifies an url and a filetype (both mandatory in Google Code Search) */
public GoogleCodeSitemapUrl(String url, String fileType) throws MalformedURLException {
this(new Options(url, fileType));
}
public GoogleCodeSitemapUrl(Options options) {
super(options);
fileType = options.fileType;
license = options.license;
fileName = options.fileName;
packageUrl = options.packageUrl;
packageMap = options.packageMap;
}
/** Retrieves the {@link Options#fileType} */
public String getFileType() {
return fileType;
}
/** Retrieves the {@link Options#license} */
public String getLicense() {
return license;
}
/** Retrieves the {@link Options#fileName} */
public String getFileName() {
return fileName;
}
/** Retrieves the {@link Options#packageUrl} */
public URL getPackageUrl() {
return packageUrl;
}
/** Retrieves the {@link Options#packageMap} */
public String getPackageMap() {
return packageMap;
}
}

View File

@ -0,0 +1,82 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
/** Builds a Google Geo Sitemap (which points to KML or GeoRSS files). At this time, SitemapGen4j can't
* generate either KML or GeoRSS (sorry).
*
* @author Dan Fabulich
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=94555">Creating Geo Sitemaps</a>
*/
public class GoogleGeoSitemapGenerator extends SitemapGenerator<GoogleGeoSitemapUrl,GoogleGeoSitemapGenerator> {
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleGeoSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<GoogleGeoSitemapGenerator>(baseUrl, baseDir, GoogleGeoSitemapGenerator.class);
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleGeoSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleGeoSitemapGenerator>(baseUrl, baseDir, GoogleGeoSitemapGenerator.class);
}
GoogleGeoSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleGeoSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleGeoSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleGeoSitemapUrl> implements ISitemapUrlRenderer<GoogleGeoSitemapUrl> {
public Class<GoogleGeoSitemapUrl> getUrlClass() {
return GoogleGeoSitemapUrl.class;
}
public void render(GoogleGeoSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(" <geo:geo>\n");
sb.append(" <geo:format>"+url.getFormat()+"</geo:format>\n");
sb.append(" </geo:geo>\n");
super.render(url, out, dateFormat, sb.toString());
}
public String getXmlNamespaces() {
return "xmlns:geo=\"http://www.google.com/geo/schemas/sitemap/1.0\"";
}
}
}

View File

@ -0,0 +1,63 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
/**
* One configurable Geo URL, either KML or GeoRSS. At this time, SitemapGen4j can't generate either
* KML or GeoRSS (sorry). To configure this class, use {@link Options}
* @author Dan Fabulich
* @see Options
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=94555">Creating Geo Sitemaps</a>
*/
public class GoogleGeoSitemapUrl extends WebSitemapUrl {
/** The two Geo URL formats: KML and GeoRSS */
public enum Format { KML, GEORSS;
@Override
public String toString() {
return this.name().toLowerCase();
};
}
private final Format format;
/** Options to configure Geo URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleGeoSitemapUrl, Options> {
private Format format;
/** Specifies a Geo URL and its format */
public Options(String url, Format format) throws MalformedURLException {
super(url, GoogleGeoSitemapUrl.class);
this.format = format;
}
/** Specifies a Geo URL and its format */
public Options(URL url, Format format) {
super(url, GoogleGeoSitemapUrl.class);
this.format = format;
}
}
/** Specifies a Geo URL and its format */
public GoogleGeoSitemapUrl(URL url, Format format) {
this(new Options(url, format));
}
/** Specifies a Geo URL and its format */
public GoogleGeoSitemapUrl(String url, Format format) throws MalformedURLException {
this(new Options(url, format));
}
/** Configures the URL with {@link Options} */
public GoogleGeoSitemapUrl(Options options) {
super(options);
format = options.format;
}
/** Retrieves the URL {@link Format} */
public Format getFormat() {
return format;
}
}

View File

@ -0,0 +1,78 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Builds a Google Mobile Sitemap, consisting of only mobile-friendly content. To configure options, use {@link #builder(URL, File)}
* @author Dan Fabulich
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=34648">Creating Mobile Sitemaps</a>
*/
public class GoogleMobileSitemapGenerator extends SitemapGenerator<GoogleMobileSitemapUrl,GoogleMobileSitemapGenerator> {
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleMobileSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<GoogleMobileSitemapGenerator>(baseUrl, baseDir, GoogleMobileSitemapGenerator.class);
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleMobileSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleMobileSitemapGenerator>(baseUrl, baseDir, GoogleMobileSitemapGenerator.class);
}
GoogleMobileSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
}
/** Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleMobileSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/** Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleMobileSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleMobileSitemapUrl> implements ISitemapUrlRenderer<GoogleMobileSitemapUrl> {
public Class<GoogleMobileSitemapUrl> getUrlClass() {
return GoogleMobileSitemapUrl.class;
}
public void render(GoogleMobileSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
String additionalData = " <mobile:mobile/>\n";
super.render(url, out, dateFormat, additionalData);
}
public String getXmlNamespaces() {
return "xmlns:mobile=\"http://www.google.com/schemas/sitemap-mobile/1.0\"";
}
}
}

View File

@ -0,0 +1,43 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
/**
* One configurable Google Mobile Search URL. To configure, use {@link Options}
* @author Dan Fabulich
* @see Options
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=34648">Creating Mobile Sitemaps</a>
*/
public class GoogleMobileSitemapUrl extends WebSitemapUrl {
/** Options to configure mobile URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleMobileSitemapUrl, Options> {
/** Specifies the url */
public Options(String url) throws MalformedURLException {
this(new URL(url));
}
/** Specifies the url */
public Options(URL url) {
super(url, GoogleMobileSitemapUrl.class);
}
}
/** Specifies the url */
public GoogleMobileSitemapUrl(String url) throws MalformedURLException {
this(new Options(url));
}
/** Specifies the url */
public GoogleMobileSitemapUrl(URL url) {
this(new Options(url));
}
/** Specifies configures url with options */
public GoogleMobileSitemapUrl(Options options) {
super(options);
}
}

View File

@ -0,0 +1,95 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Builds a sitemap for Google News. To configure options, use {@link #builder(URL, File)}
* @author Dan Fabulich
* @see <a href="http://www.google.com/support/news_pub/bin/answer.py?answer=74288">Creating a News Sitemap</a>
*/
public class GoogleNewsSitemapGenerator extends SitemapGenerator<GoogleNewsSitemapUrl,GoogleNewsSitemapGenerator> {
/** 1000 URLs max in a Google News sitemap. */
public static final int MAX_URLS_PER_SITEMAP = 1000;
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleNewsSitemapGenerator> builder(URL baseUrl, File baseDir) {
SitemapGeneratorBuilder<GoogleNewsSitemapGenerator> builder =
new SitemapGeneratorBuilder<GoogleNewsSitemapGenerator>(baseUrl, baseDir, GoogleNewsSitemapGenerator.class);
builder.maxUrls = 1000;
return builder;
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleNewsSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
SitemapGeneratorBuilder<GoogleNewsSitemapGenerator> builder =
new SitemapGeneratorBuilder<GoogleNewsSitemapGenerator>(baseUrl, baseDir, GoogleNewsSitemapGenerator.class);
builder.maxUrls = GoogleNewsSitemapGenerator.MAX_URLS_PER_SITEMAP;
return builder;
}
GoogleNewsSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
if (options.maxUrls > GoogleNewsSitemapGenerator.MAX_URLS_PER_SITEMAP) {
throw new RuntimeException("Google News sitemaps can have only 1000 URLs per sitemap: " + options.maxUrls);
}
}
/** Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleNewsSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/** Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleNewsSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleNewsSitemapUrl> implements ISitemapUrlRenderer<GoogleNewsSitemapUrl> {
public Class<GoogleNewsSitemapUrl> getUrlClass() {
return GoogleNewsSitemapUrl.class;
}
public void render(GoogleNewsSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(" <news:news>\n");
renderTag(sb, "news", "publication_date", dateFormat.format(url.getPublicationDate()));
renderTag(sb, "news", "keywords", url.getKeywords());
sb.append(" </news:news>\n");
super.render(url, out, dateFormat, sb.toString());
}
public String getXmlNamespaces() {
return "xmlns:news=\"http://www.google.com/schemas/sitemap-news/0.9\"";
}
}
}

View File

@ -0,0 +1,95 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Date;
/**
* One configurable Google News Search URL. To configure, use {@link Options}
* @author Dan Fabulich
* @see Options
* @see <a href="http://www.google.com/support/news_pub/bin/answer.py?answer=74288">Creating a News Sitemap</a>
*/
public class GoogleNewsSitemapUrl extends WebSitemapUrl {
private final Date publicationDate;
private final String keywords;
/** Options to configure Google News URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleNewsSitemapUrl, Options> {
private Date publicationDate;
private String keywords;
/** Specifies an URL and publication date (which is mandatory for Google News) */
public Options(String url, Date publicationDate) throws MalformedURLException {
this(new URL(url), publicationDate);
}
/** Specifies an URL and publication date (which is mandatory for Google News) */
public Options(URL url, Date publicationDate) {
super(url, GoogleNewsSitemapUrl.class);
if (publicationDate == null) throw new NullPointerException("publicationDate must not be null");
this.publicationDate = publicationDate;
}
/** Specifies a list of comma-delimited keywords */
public Options keywords(String keywords) {
this.keywords = keywords;
return this;
}
/** Specifies a list of comma-delimited keywords */
public Options keywords(Iterable<String> keywords) {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (String keyword : keywords) {
if (first) {
first = false;
} else {
sb.append(", ");
}
sb.append(keyword);
}
this.keywords = sb.toString();
return this;
}
/** Specifies a list of comma-delimited keywords */
public Options keywords(String... keywords) {
return keywords(Arrays.asList(keywords));
}
}
/** Specifies an URL and publication date (which is mandatory for Google News) */
public GoogleNewsSitemapUrl(URL url, Date publicationDate) {
this(new Options(url, publicationDate));
}
/** Specifies an URL and publication date (which is mandatory for Google News) */
public GoogleNewsSitemapUrl(String url, Date publicationDate) throws MalformedURLException {
this(new Options(url, publicationDate));
}
/** Configures an URL with options */
public GoogleNewsSitemapUrl(Options options) {
super(options);
publicationDate = options.publicationDate;
keywords = options.keywords;
}
/** Retrieves the publication date */
public Date getPublicationDate() {
return publicationDate;
}
/** Retrieves the list of comma-delimited keywords */
public String getKeywords() {
return keywords;
}
}

View File

@ -0,0 +1,105 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Builds a sitemap for Google Video search. To configure options, use {@link #builder(URL, File)}
* @author Dan Fabulich
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=80472">Creating Video Sitemaps</a>
*/
public class GoogleVideoSitemapGenerator extends SitemapGenerator<GoogleVideoSitemapUrl,GoogleVideoSitemapGenerator> {
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleVideoSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<GoogleVideoSitemapGenerator>(baseUrl, baseDir, GoogleVideoSitemapGenerator.class);
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleVideoSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleVideoSitemapGenerator>(baseUrl, baseDir, GoogleVideoSitemapGenerator.class);
}
GoogleVideoSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleVideoSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleVideoSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleVideoSitemapUrl> implements ISitemapUrlRenderer<GoogleVideoSitemapUrl> {
public Class<GoogleVideoSitemapUrl> getUrlClass() {
return GoogleVideoSitemapUrl.class;
}
public void render(GoogleVideoSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(" <video:video>\n");
renderTag(sb, "video", "content_loc", url.getContentUrl());
if (url.getPlayerUrl() != null) {
sb.append(" <video:player_loc allow_embed=\"");
sb.append(url.getAllowEmbed());
sb.append("\">");
sb.append(url.getPlayerUrl());
sb.append("</video:player_loc>\n");
}
renderTag(sb, "video", "thumbnail_loc", url.getThumbnailUrl());
renderTag(sb, "video", "title", url.getTitle());
renderTag(sb, "video", "description", url.getDescription());
renderTag(sb, "video", "rating", url.getRating());
renderTag(sb, "video", "view_count", url.getViewCount());
if (url.getPublicationDate() != null) {
renderTag(sb, "video", "publication_date", dateFormat.format(url.getPublicationDate()));
}
if (url.getTags() != null) {
for (String tag : url.getTags()) {
renderTag(sb, "video", "tag", tag);
}
}
renderTag(sb, "video", "category", url.getCategory());
renderTag(sb, "video", "family_friendly", url.getFamilyFriendly());
renderTag(sb, "video", "duration", url.getDurationInSeconds());
sb.append(" </video:video>\n");
super.render(url, out, dateFormat, sb.toString());
}
public String getXmlNamespaces() {
return "xmlns:video=\"http://www.google.com/schemas/sitemap-video/1.1\"";
}
}
}

View File

@ -0,0 +1,351 @@
package com.redfin.sitemapgenerator;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
/** One configurable Google Video Search URL. To configure, use {@link Options}
*
* @author Dan Fabulich
* @see Options
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=80472">Creating Video Sitemaps</a>
*/
public class GoogleVideoSitemapUrl extends WebSitemapUrl {
private final URL playerUrl;
private final URL contentUrl;
private final URL thumbnailUrl;
private final String title;
private final String description;
private final Double rating;
private final Integer viewCount;
private final Date publicationDate;
private final ArrayList<String> tags;
private final String category;
// TODO can there be multiple categories?
// "Usually a video will belong to a single category."
// http://www.google.com/support/webmasters/bin/answer.py?answer=80472
private final String familyFriendly;
private final Integer durationInSeconds;
private final String allowEmbed;
/** Options to configure Google Video URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleVideoSitemapUrl, Options> {
private URL playerUrl;
private URL contentUrl;
private URL thumbnailUrl;
private String title;
private String description;
private Double rating;
private Integer viewCount;
private Date publicationDate;
private ArrayList<String> tags;
private String category;
// TODO can there be multiple categories?
// "Usually a video will belong to a single category."
// http://www.google.com/support/webmasters/bin/answer.py?answer=80472
private Boolean familyFriendly;
private Integer durationInSeconds;
private Boolean allowEmbed;
/** Specifies a landing page URL, together with a "player" (e.g. SWF)
*
* @param url the landing page URL
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public Options(URL url, URL playerUrl, boolean allowEmbed) {
super(url, GoogleVideoSitemapUrl.class);
this.playerUrl = playerUrl;
this.allowEmbed = allowEmbed;
}
/** Specifies a landing page URL, together with the URL of the underlying video (e.g. FLV)
*
* @param url the landing page URL
* @param contentUrl the URL of the underlying video (e.g. FLV)
*/
public Options(URL url, URL contentUrl) {
super(url, GoogleVideoSitemapUrl.class);
this.contentUrl = contentUrl;
}
/** Specifies a player URL (e.g. SWF)
*
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public Options playerUrl(URL playerUrl, boolean allowEmbed) {
this.playerUrl = playerUrl;
this.allowEmbed = allowEmbed;
return this;
}
/** Specifies the URL of the underlying video (e.g FLV) */
public Options contentUrl(URL contentUrl) {
this.contentUrl = contentUrl;
return this;
}
/**
* A URL pointing to the URL for the video thumbnail image file. This
* allows you to suggest the thumbnail you want displayed in search
* results. If you provide a {@link #contentUrl(URL)}, Google will attempt
* to generate a set of representative thumbnail images from your actual
* video content. However, we strongly recommended that you provide a
* thumbnail URL to increase the likelihood of your video being included
* in the video index.
*/
public Options thumbnailUrl(URL thumbnailUrl) {
this.thumbnailUrl = thumbnailUrl;
return this;
}
/** The title of the video. Limited to 100 characters. */
public Options title(String title) {
if (title != null) {
if (title.length() > 100) {
throw new RuntimeException("Video title is limited to 100 characters: " + title);
}
}
this.title = title;
return this;
}
/** The description of the video. Descriptions longer than 2048 characters will be truncated. */
public Options description(String description) {
if (description != null) {
if (description.length() > 2048) {
throw new RuntimeException("Truncate video descriptions to 2048 characters: " + description);
}
}
this.description = description;
return this;
}
/** The rating of the video. The value must be number in the range 0.0-5.0. */
public Options rating(Double rating) {
if (rating != null) {
if (rating < 0 || rating > 5.0) {
throw new RuntimeException("Rating must be between 0.0 and 5.0:" + rating);
}
}
this.rating = rating;
return this;
}
/** The number of times the video has been viewed */
public Options viewCount(int viewCount) {
this.viewCount = viewCount;
return this;
}
/** The date the video was first published, in {@link W3CDateFormat}. */
public Options publicationDate(Date publicationDate) {
this.publicationDate = publicationDate;
return this;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new <video:tag> element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(ArrayList<String> tags) {
this.tags = tags;
return this;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new <video:tag> element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(Iterable<String> tags) {
this.tags = new ArrayList<String>();
for (String tag : tags) {
this.tags.add(tag);
}
return this;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new <video:tag> element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(String... tags) {
return tags(Arrays.asList(tags));
}
/**
* The video's category; for example, <code>cooking</code>. The value
* should be a string no longer than 256 characters. In general,
* categories are broad groupings of content by subject. Usually a video
* will belong to a single category. For example, a site about cooking
* could have categories for Broiling, Baking, and Grilling
*/
public Options category(String category) {
if (category != null) {
if (category.length() > 256) {
throw new RuntimeException("Video category is limited to 256 characters: " + title);
}
}
this.category = category;
return this;
}
/** Whether the video is suitable for viewing by children */
public Options familyFriendly(boolean familyFriendly) {
this.familyFriendly = familyFriendly;
return this;
}
/** The duration of the video in seconds; value must be between 0 and 28800 (8 hours). */
public Options durationInSeconds(int durationInSeconds) {
if (durationInSeconds < 0 || durationInSeconds > 28800) {
throw new RuntimeException("Duration must be between 0 and 28800 (8 hours):" + durationInSeconds);
}
this.durationInSeconds = durationInSeconds;
return this;
}
}
/** Specifies a landing page URL, together with a "player" (e.g. SWF)
*
* @param url the landing page URL
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public GoogleVideoSitemapUrl(URL url, URL playerUrl, boolean allowEmbed) {
this(new Options(url, playerUrl, allowEmbed));
}
/** Specifies a landing page URL, together with the URL of the underlying video (e.g. FLV)
*
* @param url the landing page URL
* @param contentUrl the URL of the underlying video (e.g. FLV)
*/
public GoogleVideoSitemapUrl(URL url, URL contentUrl) {
this(new Options(url, contentUrl));
}
/** Configures the url with options */
public GoogleVideoSitemapUrl(Options options) {
super(options);
contentUrl = options.contentUrl;
playerUrl = options.playerUrl;
if (playerUrl == null && contentUrl == null) {
throw new RuntimeException("You must specify either contentUrl or playerUrl or both; neither were specified");
}
allowEmbed = convertBooleanToYesOrNo(options.allowEmbed);
if (playerUrl != null && allowEmbed == null) {
throw new RuntimeException("allowEmbed must be specified if playerUrl is specified");
}
category = options.category;
description = options.description;
durationInSeconds = options.durationInSeconds;
familyFriendly = convertBooleanToYesOrNo(options.familyFriendly);
publicationDate = options.publicationDate;
rating = options.rating;
tags = options.tags;
if (tags != null && tags.size() > 32) {
throw new RuntimeException("A maximum of 32 tags is permitted");
}
thumbnailUrl = options.thumbnailUrl;
title = options.title;
viewCount = options.viewCount;
}
private static String convertBooleanToYesOrNo(Boolean value) {
if (value == null) return null;
return value ? "Yes" : "No";
}
/** Retrieves the {@link Options#playerUrl}*/
public URL getPlayerUrl() {
return playerUrl;
}
/** Retrieves the {@link Options#contentUrl}*/
public URL getContentUrl() {
return contentUrl;
}
/** Retrieves the {@link Options#thumbnailUrl}*/
public URL getThumbnailUrl() {
return thumbnailUrl;
}
/** Retrieves the {@link Options#title}*/
public String getTitle() {
return title;
}
/** Retrieves the {@link Options#description}*/
public String getDescription() {
return description;
}
/** Retrieves the {@link Options#rating}*/
public Double getRating() {
return rating;
}
/** Retrieves the {@link Options#viewCount}*/
public Integer getViewCount() {
return viewCount;
}
/** Retrieves the {@link Options#publicationDate}*/
public Date getPublicationDate() {
return publicationDate;
}
/** Retrieves the {@link Options#tags}*/
public ArrayList<String> getTags() {
return tags;
}
/** Retrieves the {@link Options#category}*/
public String getCategory() {
return category;
}
/** Retrieves whether the video is {@link Options#familyFriendly}*/
public String getFamilyFriendly() {
return familyFriendly;
}
/** Retrieves the {@link Options#durationInSeconds}*/
public Integer getDurationInSeconds() {
return durationInSeconds;
}
/** Retrieves whether embedding is allowed */
public String getAllowEmbed() {
return allowEmbed;
}
}

View File

@ -0,0 +1,12 @@
package com.redfin.sitemapgenerator;
import java.net.URL;
import java.util.Date;
interface ISitemapUrl {
public abstract Date getLastMod();
public abstract URL getUrl();
}

View File

@ -0,0 +1,11 @@
package com.redfin.sitemapgenerator;
import java.io.IOException;
import java.io.OutputStreamWriter;
interface ISitemapUrlRenderer<T extends ISitemapUrl> {
public Class<T> getUrlClass();
public String getXmlNamespaces();
public void render(T url, OutputStreamWriter out, W3CDateFormat dateFormat) throws IOException;
}

View File

@ -0,0 +1,226 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPOutputStream;
import org.xml.sax.SAXException;
abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGenerator<U,THIS>> {
/** 50000 URLs per sitemap maximum */
public static final int MAX_URLS_PER_SITEMAP = 50000;
private final String baseUrl;
private final File baseDir;
private final String fileNamePrefix;
private final String fileNameSuffix;
private final boolean allowMultipleSitemaps;
private final ArrayList<U> urls = new ArrayList<U>();
private final W3CDateFormat dateFormat;
private final int maxUrls;
private final boolean autoValidate;
private final boolean gzip;
private final ISitemapUrlRenderer<U> renderer;
private int mapCount = 0;
private boolean finished = false;
private final ArrayList<File> outFiles = new ArrayList<File>();
public SitemapGenerator(AbstractSitemapGeneratorOptions<?> options, ISitemapUrlRenderer<U> renderer) {
baseDir = options.baseDir;
baseUrl = options.baseUrl;
fileNamePrefix = options.fileNamePrefix;
W3CDateFormat dateFormat = options.dateFormat;
if (dateFormat == null) dateFormat = new W3CDateFormat();
this.dateFormat = dateFormat;
allowMultipleSitemaps = options.allowMultipleSitemaps;
maxUrls = options.maxUrls;
autoValidate = options.autoValidate;
gzip = options.gzip;
this.renderer = renderer;
fileNameSuffix = gzip ? ".xml.gz" : ".xml";
}
/** Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or else write out one sitemap immediately.
* @param url the URL to add to this sitemap
* @return this
*/
public THIS addUrl(U url) {
if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
UrlUtils.checkUrl(url.getUrl().toString(), baseUrl);
if (urls.size() == maxUrls) {
if (!allowMultipleSitemaps) throw new RuntimeException("More than " + maxUrls + " urls, but allowMultipleSitemaps is false. Enable allowMultipleSitemaps to split the sitemap into multiple files with a sitemap index.");
if (mapCount == 0) mapCount++;
writeSiteMap();
mapCount++;
urls.clear();
}
urls.add(url);
return getThis();
}
/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(Iterable<? extends U> urls) {
for (U url : urls) addUrl(url);
return getThis();
}
/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(U... urls) {
for (U url : urls) addUrl(url);
return getThis();
}
/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param urls the URLs to add to this sitemap
* @return this
* @throws MalformedURLException
*/
public THIS addUrls(String... urls) throws MalformedURLException {
for (String url : urls) addUrl(url);
return getThis();
}
/** Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or else write out one sitemap immediately.
* @param url the URL to add to this sitemap
* @return this
* @throws MalformedURLException
*/
public THIS addUrl(String url) throws MalformedURLException {
U sitemapUrl;
try {
sitemapUrl = renderer.getUrlClass().getConstructor(String.class).newInstance(url);
} catch (Exception e) {
throw new RuntimeException(e);
}
return addUrl(sitemapUrl);
}
/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(URL... urls) {
for (URL url : urls) addUrl(url);
return getThis();
}
/** Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param url the URL to add to this sitemap
* @return this
*/
public THIS addUrl(URL url) {
U sitemapUrl;
try {
sitemapUrl = renderer.getUrlClass().getConstructor(URL.class).newInstance(url);
} catch (Exception e) {
throw new RuntimeException(e);
}
return addUrl(sitemapUrl);
}
@SuppressWarnings("unchecked")
THIS getThis() {
return (THIS)this;
}
/** Write out remaining URLs; this method can only be called once. This is necessary so we can keep an accurate count for {@link #writeSitemapsWithIndex()}.
*
* @return a list of files we wrote out to disk
*/
public List<File> write() {
if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
if (urls.size() == 0 && mapCount == 0) throw new RuntimeException("No URLs added, sitemap would be empty; you must add some URLs with addUrls");
writeSiteMap();
finished = true;
return outFiles;
}
/** After you've called {@link #write()}, call this to generate a sitemap index of all sitemaps you generated.
*
*/
public void writeSitemapsWithIndex() {
if (!finished) throw new RuntimeException("Sitemaps not generated yet; call write() first");
File outFile = new File(baseDir, "sitemap_index.xml");
SitemapIndexGenerator sig;
try {
sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();
} catch (MalformedURLException e) {
throw new RuntimeException("bug", e);
}
sig.addUrls(fileNamePrefix, fileNameSuffix, mapCount).write();
}
private void writeSiteMap() {
if (urls.size() == 0) return;
String fileNamePrefix;
if (mapCount > 0) {
fileNamePrefix = this.fileNamePrefix + mapCount;
} else {
fileNamePrefix = this.fileNamePrefix;
}
File outFile = new File(baseDir, fileNamePrefix+fileNameSuffix);
outFiles.add(outFile);
try {
OutputStreamWriter out;
if (gzip) {
FileOutputStream fileStream = new FileOutputStream(outFile);
GZIPOutputStream gzipStream = new GZIPOutputStream(fileStream);
out = new OutputStreamWriter(gzipStream);
} else {
out = new FileWriter(outFile);
}
writeSiteMap(out);
if (autoValidate) SitemapValidator.validateWebSitemap(outFile);
} catch (IOException e) {
throw new RuntimeException("Problem writing sitemap file " + outFile, e);
} catch (SAXException e) {
throw new RuntimeException("Sitemap file failed to validate (bug?)", e);
}
}
private void writeSiteMap(OutputStreamWriter out) throws IOException {
out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
out.write("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" ");
if (renderer.getXmlNamespaces() != null) {
out.write(renderer.getXmlNamespaces());
out.write(' ');
}
out.write(">\n");
for (U url : urls) {
renderer.render(url, out, dateFormat);
}
out.write("</urlset>");
out.close();
}
}

View File

@ -0,0 +1,53 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
/** A convenience class to let you configure options straightforwardly; don't instantiate by hand.
*
* <p>Instead, get one statically from a SitemapGenerator class. For example: <code>WebSitemapGenerator g =<br>
* WebSitemapGenerator.builder("http://example.com", myDir).gzip(true).autoValidate(true).build()</code></p>
*
*
* @author Dan Fabulich
*
* @param <G>
*/
//that weird thing with generics is so sub-classed objects will return themselves
//It makes sense, I swear! http://madbean.com/2004/mb2004-3/
public class SitemapGeneratorBuilder<G extends SitemapGenerator<?,?>> extends AbstractSitemapGeneratorOptions<SitemapGeneratorBuilder<G>> {
Class<G> sitemapGeneratorClass;
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @param sitemapGeneratorClass the class of the generator the builder will create
*/
public SitemapGeneratorBuilder(URL baseUrl, File baseDir, Class<G> sitemapGeneratorClass) {
super(baseUrl, baseDir);
this.sitemapGeneratorClass = sitemapGeneratorClass;
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @param sitemapGeneratorClass the class of the generator the builder will create
*/
public SitemapGeneratorBuilder(String baseUrl, File baseDir, Class<G> sitemapGeneratorClass) throws MalformedURLException {
this(new URL(baseUrl), baseDir, sitemapGeneratorClass);
}
/** Constructs a sitemap generator configured with the options you specified */
public G build() {
try {
return sitemapGeneratorClass.getDeclaredConstructor(AbstractSitemapGeneratorOptions.class).newInstance(this);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,18 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
class SitemapGeneratorOptions extends
AbstractSitemapGeneratorOptions<SitemapGeneratorOptions> {
public SitemapGeneratorOptions(URL baseUrl, File baseDir) {
super(baseUrl, baseDir);
}
public SitemapGeneratorOptions(String baseUrl, File baseDir) throws MalformedURLException {
this(new URL(baseUrl), baseDir);
}
}

View File

@ -0,0 +1,239 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import org.xml.sax.SAXException;
/**
* Builds a sitemap index, which points only to other sitemaps.
* @author Dan Fabulich
*
*/
public class SitemapIndexGenerator {
private final URL baseUrl;
private final String baseUrlString;
private final File outFile;
private final ArrayList<SitemapIndexUrl> urls = new ArrayList<SitemapIndexUrl>();
private final int maxUrls;
private final W3CDateFormat dateFormat;
private final Date defaultLastMod;
private final boolean autoValidate;
/** Maximum 1,000 sitemaps per index allowed */
public static final int MAX_SITEMAPS_PER_INDEX = 1000;
/** Options to configure sitemap index generation */
public static class Options {
private URL baseUrl;
private File outFile;
private W3CDateFormat dateFormat = null;
private int maxUrls = MAX_SITEMAPS_PER_INDEX;
private Date defaultLastMod = new Date();
private boolean autoValidate = false;
// TODO GZIP? Is that legal for a sitemap index?
/**Configures the generator with a base URL and destination to write the sitemap index file.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param outFile The sitemap index will be written out at this location
*/
public Options(URL baseUrl, File outFile) {
this.baseUrl = baseUrl;
this.outFile = outFile;
}
/**Configures the generator with a base URL and destination to write the sitemap index file.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param outFile The sitemap index will be written out at this location
*/
public Options(String baseUrl, File outFile) throws MalformedURLException {
this(new URL(baseUrl), outFile);
}
/** The date formatter, typically configured with a {@link W3CDateFormat.Pattern} and/or a time zone */
public Options dateFormat(W3CDateFormat dateFormat) {
this.dateFormat = dateFormat;
return this;
}
/**
* The maximum number of sitemaps to allow per sitemap index; the default is the
* maximum allowed (1,000), but you can decrease it if you wish (for testing)
*/
Options maxUrls(int maxUrls) {
if (maxUrls > MAX_SITEMAPS_PER_INDEX) {
throw new RuntimeException("You can't have more than 1000 sitemaps per index");
}
this.maxUrls = maxUrls;
return this;
}
/**
* The default lastMod date for sitemap indexes; the default default is
* now, but you can pass in null to omit a lastMod entirely. We don't
* recommend this; Google may not like you as much.
*/
public Options defaultLastMod(Date defaultLastMod) {
this.defaultLastMod = defaultLastMod;
return this;
}
/**
* Validate the sitemap index automatically after writing it; this takes
* time
*/
public Options autoValidate(boolean autoValidate) {
this.autoValidate = autoValidate;
return this;
}
/** Constructs a sitemap index generator configured with the options you specified */
public SitemapIndexGenerator build() {
return new SitemapIndexGenerator(this);
}
}
/**Configures the generator with a base URL and destination to write the sitemap index file.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param outFile The sitemap index will be written out at this location
*/
public SitemapIndexGenerator(URL baseUrl, File outFile) {
this(new Options(baseUrl, outFile));
}
/**Configures the generator with a base URL and destination to write the sitemap index file.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param outFile The sitemap index will be written out at this location
*/
public SitemapIndexGenerator(String baseUrl, File outFile) throws MalformedURLException {
this(new Options(baseUrl, outFile));
}
private SitemapIndexGenerator(Options options) {
this.baseUrl = options.baseUrl;
this.baseUrlString = baseUrl.toString();
this.outFile = options.outFile;
this.maxUrls = options.maxUrls;
W3CDateFormat dateFormat = options.dateFormat;
if (dateFormat == null) dateFormat = new W3CDateFormat();
this.dateFormat = dateFormat;
this.defaultLastMod = options.defaultLastMod;
this.autoValidate = options.autoValidate;
}
/** Adds a single sitemap to the index */
public SitemapIndexGenerator addUrl(SitemapIndexUrl url) {
UrlUtils.checkUrl(url.url.toString(), baseUrlString);
if (urls.size() >= maxUrls) {
throw new RuntimeException("More than " + maxUrls + " urls");
}
urls.add(url);
return this;
}
/** Add multiple sitemaps to the index */
public SitemapIndexGenerator addUrls(Iterable<? extends SitemapIndexUrl> urls) {
for (SitemapIndexUrl url : urls) addUrl(url);
return this;
}
/** Add multiple sitemaps to the index */
public SitemapIndexGenerator addUrls(SitemapIndexUrl... urls) {
for (SitemapIndexUrl url : urls) addUrl(url);
return this;
}
/** Add multiple sitemaps to the index */
public SitemapIndexGenerator addUrls(String... urls) throws MalformedURLException {
for (String url : urls) addUrl(url);
return this;
}
/** Adds a single sitemap to the index */
public SitemapIndexGenerator addUrl(String url) throws MalformedURLException {
return addUrl(new SitemapIndexUrl(url));
}
/** Add multiple sitemaps to the index */
public SitemapIndexGenerator addUrls(URL... urls) {
for (URL url : urls) addUrl(url);
return this;
}
/** Adds a single sitemap to the index */
public SitemapIndexGenerator addUrl(URL url) {
return addUrl(new SitemapIndexUrl(url));
}
/** Adds a single sitemap to the index */
public SitemapIndexGenerator addUrl(URL url, Date lastMod) {
return addUrl(new SitemapIndexUrl(url, lastMod));
}
/** Adds a single sitemap to the index */
public SitemapIndexGenerator addUrl(String url, Date lastMod) throws MalformedURLException {
return addUrl(new SitemapIndexUrl(url, lastMod));
}
/** Add a numbered list of sitemaps to the index, e.g. "sitemap1.xml" "sitemap2.xml" "sitemap3.xml" etc.
*
* @param prefix the first part of the filename e.g. "sitemap"
* @param suffix the last part of the filename e.g. ".xml" or ".xml.gz"
* @param count the number of sitemaps (1-based)
*/
public SitemapIndexGenerator addUrls(String prefix, String suffix, int count) {
for (int i = 1; i <= count; i++) {
String fileName = prefix + i + suffix;
try {
addUrl(new URL(baseUrl, fileName));
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}
return this;
}
/** Writes out the sitemap index */
public void write() {
if (urls.size() == 0) throw new RuntimeException("No URLs added, sitemap index would be empty; you must add some URLs with addUrls");
try {
// TODO gzip? is that legal for a sitemap index?
FileWriter out = new FileWriter(outFile);
writeSiteMap(out);
if (autoValidate) SitemapValidator.validateSitemapIndex(outFile);
} catch (IOException e) {
throw new RuntimeException("Problem writing sitemap index file " + outFile, e);
} catch (SAXException e) {
throw new RuntimeException("Problem validating sitemap index file (bug?)", e);
}
}
private void writeSiteMap(OutputStreamWriter out) throws IOException {
out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
out.write("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n");
for (SitemapIndexUrl url : urls) {
out.write(" <sitemap>\n");
out.write(" <loc>");
out.write(url.url.toString());
out.write("</loc>\n");
Date lastMod = url.lastMod;
if (lastMod == null) lastMod = defaultLastMod;
if (lastMod != null) {
out.write(" <lastmod>");
out.write(dateFormat.format(lastMod));
out.write("</lastmod>\n");
}
out.write(" </sitemap>\n");
}
out.write("</sitemapindex>");
out.close();
}
}

View File

@ -0,0 +1,35 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
/**
* Represents a single sitemap for inclusion in a sitemap index.
* @author Dan Fabulich
*
*/
public class SitemapIndexUrl {
final URL url;
final Date lastMod;
/** Configures the sitemap URL with a specified lastMod */
public SitemapIndexUrl(URL url, Date lastMod) {
this.url = url;
this.lastMod = lastMod;
}
/** Configures the sitemap URL with a specified lastMod */
public SitemapIndexUrl(String url, Date lastMod) throws MalformedURLException {
this(new URL(url), lastMod);
}
/** Configures the sitemap URL with no specified lastMod; we'll use {@link SitemapIndexGenerator.Options#defaultLastMod(Date)} or leave it blank if no default is specified */
public SitemapIndexUrl(URL url) {
this(url, null);
}
/** Configures the sitemap URL with no specified lastMod; we'll use {@link SitemapIndexGenerator.Options#defaultLastMod(Date)} or leave it blank if no default is specified */
public SitemapIndexUrl(String url) throws MalformedURLException {
this(new URL(url));
}
}

View File

@ -0,0 +1,81 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.XMLConstants;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/** Validates sitemaps and sitemap indexes
*
* @author Dan Fabulich
*
*/
public class SitemapValidator {
//TODO support gzip
//TODO confirm < 10MB
//TODO confirm single host
//TODO confirm correct host
//TODO confirm UTF-8
//TODO support Mobile/Geo/Video/Code/News (sitemap.xsd doesn't support them)
//TODO confirm mobile restrictions: no non-mobile urls
//TODO confirm news restrictions: 3 days, 1000 URLs
//TODO video restrictions: title, player_loc/content_loc, no non-video urls
//IMO news should have no non-news urls, geo should have no non-geo urls, code should have no non-code urls
private static Schema sitemapSchema, sitemapIndexSchema;
private synchronized static void lazyLoad() {
if (sitemapSchema != null) return;
SchemaFactory factory =
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
try {
InputStream stream = SitemapValidator.class.getResourceAsStream("sitemap.xsd");
if (stream == null) throw new RuntimeException("BUG Couldn't load sitemap.xsd");
StreamSource source = new StreamSource(stream);
sitemapSchema = factory.newSchema(source);
stream = SitemapValidator.class.getResourceAsStream("siteindex.xsd");
if (stream == null) throw new RuntimeException("BUG Couldn't load siteindex.xsd");
source = new StreamSource(stream);
sitemapIndexSchema = factory.newSchema(source);
} catch (SAXException e) {
throw new RuntimeException("BUG", e);
}
}
/** Validates an ordinary web sitemap file (NOT a Google-specific sitemap) */
public static void validateWebSitemap(File sitemap) throws SAXException {
lazyLoad();
validateXml(sitemap, sitemapSchema);
}
/** Validates a sitemap index file */
public static void validateSitemapIndex(File sitemap) throws SAXException {
lazyLoad();
validateXml(sitemap, sitemapIndexSchema);
}
private static void validateXml(File sitemap, Schema schema) throws SAXException {
Validator validator = schema.newValidator();
try {
FileReader reader = new FileReader(sitemap);
SAXSource source = new SAXSource(new InputSource(reader));
validator.validate(source);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,18 @@
package com.redfin.sitemapgenerator;
import java.util.HashMap;
class UrlUtils {
static void checkUrl(String url, String baseUrl) {
// Is there a better test to use here?
if (!url.startsWith(baseUrl)) {
throw new RuntimeException("Url " + url + " doesn't start with base URL " + baseUrl);
}
}
static <K,V> HashMap<K,V> newHashMap() {
return new HashMap<K,V>();
}
}

View File

@ -0,0 +1,173 @@
/**
*
*/
package com.redfin.sitemapgenerator;
import static java.util.Calendar.HOUR_OF_DAY;
import static java.util.Calendar.MILLISECOND;
import static java.util.Calendar.MINUTE;
import static java.util.Calendar.SECOND;
import java.text.DateFormat;
import java.text.FieldPosition;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.TimeZone;
/**
* <p>Formats and parses dates in the six defined W3C date time formats. These formats are described in
* "Date and Time Formats",
* <a href="http://www.w3.org/TR/NOTE-datetime">http://www.w3.org/TR/NOTE-datetime</a>.</p>
*
* <p>The formats are:
*
* <ol>
* <li>YEAR: YYYY (eg 1997)
* <li>MONTH: YYYY-MM (eg 1997-07)
* <li>DAY: YYYY-MM-DD (eg 1997-07-16)
* <li>MINUTE: YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
* <li>SECOND: YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
* <li>MILLISECOND: YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
* </ol>
*
* Note that W3C timezone designators (TZD) are either the letter "Z" (for GMT) or a pattern like "+00:30" or "-08:00". This is unlike
* RFC 822 timezones generated by SimpleDateFormat, which omit the ":" like this: "+0030" or "-0800".</p>
*
* <p>This class allows you to either specify which format pattern to use, or (by default) to
* automatically guess which pattern to use (AUTO mode). When parsing in AUTO mode, we'll try parsing using each pattern
* until we find one that works. When formatting in AUTO mode, we'll use this algorithm:
*
* <ol><li>If the date has fractional milliseconds (e.g. 2009-06-06T19:49:04.45Z) we'll use the MILLISECOND pattern
* <li>Otherwise, if the date has non-zero seconds (e.g. 2009-06-06T19:49:04Z) we'll use the SECOND pattern
* <li>Otherwise, if the date is not at exactly midnight (e.g. 2009-06-06T19:49Z) we'll use the MINUTE pattern
* <li>Otherwise, we'll use the DAY pattern. If you want to format using the MONTH or YEAR pattern, you must declare it explicitly.
* </ol>
*
* Finally note that, like all classes that inherit from DateFormat, <b>this class is not thread-safe</b>. Also note that you
* can explicitly specify the timezone to use for formatting using the {@link #setTimeZone(TimeZone)} method.
*
* @author Dan Fabulich
* @see <a href="http://www.w3.org/TR/NOTE-datetime">Date and Time Formats</a>
*/
public class W3CDateFormat extends SimpleDateFormat {
private static final long serialVersionUID = -5733368073260485802L;
/** The six patterns defined by W3C, plus {@link #AUTO} configuration */
public enum Pattern {
/** "yyyy-MM-dd'T'HH:mm:ss.SSSZ" */
MILLISECOND("yyyy-MM-dd'T'HH:mm:ss.SSSZ", true),
/** "yyyy-MM-dd'T'HH:mm:ssZ" */
SECOND("yyyy-MM-dd'T'HH:mm:ssZ", true),
/** "yyyy-MM-dd'T'HH:mmZ" */
MINUTE("yyyy-MM-dd'T'HH:mmZ", true),
/** "yyyy-MM-dd" */
DAY("yyyy-MM-dd", false),
/** "yyyy-MM" */
MONTH("yyyy-MM", false),
/** "yyyy" */
YEAR("yyyy", false),
/** Automatically compute the right pattern to use */
AUTO("", true);
private final String pattern;
private final boolean includeTimeZone;
Pattern(String pattern, boolean includeTimeZone) {
this.pattern = pattern;
this.includeTimeZone = includeTimeZone;
}
}
private final Pattern pattern;
/** The GMT ("zulu") time zone, for your convenience */
public static final TimeZone ZULU = TimeZone.getTimeZone("GMT");
/** Build a formatter in AUTO mode */
public W3CDateFormat() {
this(Pattern.AUTO);
}
/** Build a formatter using the specified Pattern, or AUTO mode */
public W3CDateFormat(Pattern pattern) {
super(pattern.pattern);
this.pattern = pattern;
}
/** This is what you override when you extend DateFormat; use {@link DateFormat#format(Date)} instead */
@Override
public StringBuffer format(Date date, StringBuffer toAppendTo, FieldPosition pos) {
boolean includeTimeZone = pattern.includeTimeZone;
if (pattern == Pattern.AUTO) {
includeTimeZone = autoFormat(date);
}
super.format(date, toAppendTo, pos);
if (includeTimeZone) convertRfc822TimeZoneToW3c(toAppendTo);
return toAppendTo;
}
private boolean applyPattern(Pattern pattern) {
applyPattern(pattern.pattern);
return pattern.includeTimeZone;
}
private boolean autoFormat(Date date) {
if (calendar == null) calendar = new GregorianCalendar();
calendar.setTime(date);
boolean hasMillis = calendar.get(MILLISECOND) > 0;
if (hasMillis) {
return applyPattern(Pattern.MILLISECOND);
}
boolean hasSeconds = calendar.get(SECOND) > 0;
if (hasSeconds) {
return applyPattern(Pattern.SECOND);
}
boolean hasTime = (calendar.get(HOUR_OF_DAY) + calendar.get(MINUTE)) > 0;
if (hasTime) {
return applyPattern(Pattern.MINUTE);
}
return applyPattern(Pattern.DAY);
}
/** This is what you override when you extend DateFormat; use {@link DateFormat#parse(String)} instead */
@Override
public Date parse(String text, ParsePosition pos) {
text = convertW3cTimeZoneToRfc822(text);
if (pattern == Pattern.AUTO) {
return autoParse(text, pos);
}
return super.parse(text, pos);
}
private Date autoParse(String text, ParsePosition pos) {
for (Pattern pattern : Pattern.values()) {
if (pattern == Pattern.AUTO) continue;
applyPattern(pattern);
Date out = super.parse(text, pos);
if (out != null) return out;
}
return null; // this will force a ParseException
}
private void convertRfc822TimeZoneToW3c(StringBuffer toAppendTo) {
int length = toAppendTo.length();
if (ZULU.equals(calendar.getTimeZone())) {
toAppendTo.replace(length - 5, length, "Z");
} else {
toAppendTo.insert(length - 2, ':');
}
}
private String convertW3cTimeZoneToRfc822(String source) {
int length = source.length();
if (source.endsWith("Z")) {
return source.substring(0, length-1) + "+0000";
}
if (source.charAt(length-3) == ':') {
return source.substring(0, length-3) + source.substring(length - 2);
}
return source;
}
}

View File

@ -0,0 +1,76 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Generates a regular old sitemap (USE THIS CLASS FIRST). To configure options, use {@link #builder(URL, File)}
* @author Dan Fabulich
*
*/
public class WebSitemapGenerator extends SitemapGenerator<WebSitemapUrl,WebSitemapGenerator> {
WebSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<WebSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<WebSitemapGenerator>(baseUrl, baseDir, WebSitemapGenerator.class);
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<WebSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<WebSitemapGenerator>(baseUrl, baseDir, WebSitemapGenerator.class);
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public WebSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl), baseDir));
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public WebSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
private static class Renderer extends AbstractSitemapUrlRenderer<WebSitemapUrl> implements ISitemapUrlRenderer<WebSitemapUrl> {
public Class<WebSitemapUrl> getUrlClass() {
return WebSitemapUrl.class;
}
public void render(WebSitemapUrl url, OutputStreamWriter out, W3CDateFormat dateFormat) throws IOException {
super.render(url, out, dateFormat, null);
}
public String getXmlNamespaces() {
return null;
}
}
}

View File

@ -0,0 +1,69 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
/**
* Encapsulates a single URL to be inserted into a Web sitemap (as opposed to a Geo sitemap, a Mobile sitemap, a Video sitemap, etc which are Google specific).
* Specifying a lastMod, changeFreq, or priority is optional; you specify those by using an Options object.
*
* @see Options
* @author Dan Fabulich
*
*/
public class WebSitemapUrl implements ISitemapUrl {
private final URL url;
private final Date lastMod;
private final ChangeFreq changeFreq;
private final Double priority;
/** Encapsulates a single simple URL */
public WebSitemapUrl(String url) throws MalformedURLException {
this(new URL(url));
}
/** Encapsulates a single simple URL */
public WebSitemapUrl(URL url) {
this.url = url;
this.lastMod = null;
this.changeFreq = null;
this.priority = null;
}
/** Creates an URL with configured options */
public WebSitemapUrl(Options options) {
this((AbstractSitemapUrlOptions<?,?>)options);
}
WebSitemapUrl(AbstractSitemapUrlOptions<?,?> options) {
this.url = options.url;
this.lastMod = options.lastMod;
this.changeFreq = options.changeFreq;
this.priority = options.priority;
}
/** Retrieves the {@link Options#lastMod(Date)} */
public Date getLastMod() { return lastMod; }
/** Retrieves the {@link Options#changeFreq(ChangeFreq)} */
public ChangeFreq getChangeFreq() { return changeFreq; }
/** Retrieves the {@link Options#priority(Double)} */
public Double getPriority() { return priority; }
/** Retrieves the url */
public URL getUrl() { return url; }
/** Options to configure web sitemap URLs */
public static class Options extends AbstractSitemapUrlOptions<WebSitemapUrl, Options> {
/** Configure this URL */
public Options(String url)throws MalformedURLException {
this(new URL(url));
}
/** Configure this URL */
public Options(URL url) {
super(url, WebSitemapUrl.class);
}
}
}

View File

@ -0,0 +1,105 @@
<html><head><title>How to use SitemapGen4j</title></head>
<body>
<h1>How to use SitemapGen4j</h1>
SitemapGen4j is a tool to generate XML sitemaps in Java.
<h2>What's an XML sitemap?</h2>
Quoting from <a href="http://sitemaps.org/index.php">sitemaps.org</a>:
<blockquote><p>Sitemaps are an easy way for webmasters to inform search engines about pages on their sites that are available for crawling. In its simplest form, a Sitemap is an XML file that lists URLs for a site along with additional metadata about each URL (when it was last updated, how often it usually changes, and how important it is, relative to other URLs in the site) so that search engines can more intelligently crawl the site.</p>
<p>Web crawlers usually discover pages from links within the site and from other sites. Sitemaps supplement this data to allow crawlers that support Sitemaps to pick up all URLs in the Sitemap and learn about those URLs using the associated metadata. Using the Sitemap protocol does not guarantee that web pages are included in search engines, but provides hints for web crawlers to do a better job of crawling your site.</p>
<p>Sitemap 0.90 is offered under the terms of the Attribution-ShareAlike Creative Commons License and has wide adoption, including support from Google, Yahoo!, and Microsoft.</p>
</blockquote>
<h2>Getting started</h2>
<p>The easiest way to get started is to just use the WebSitemapGenerator class, like this:
<blockquote><code>WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", new File(".");<br>
wsg.addUrl("http://www.example.com/index.html"); // repeat multiple times<br>
wsg.write();</code></blockquote>
<h2>Configuring options</h2>
But there are a lot of nifty options available for URLs and for the generator as a whole. To configure the generator, use a builder:
<blockquote><code>WebSitemapGenerator wsg = <b>WebSitemapGenerator.builder</b>("http://www.example.com", new File(".")<br>
&nbsp;&nbsp;<b>.gzip(true).build()</b>;<br>
wsg.addUrl("http://www.example.com/index.html"); // repeat multiple times<br>
wsg.write();</code></blockquote>
To configure the URLs, construct a real WebSitemapUrl with WebSitemapUrl.Options.
<blockquote><code>WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", new File(".");<br>
WebSitemapUrl url = <b>WebSitemapUrl.Options</b>("http://www.example.com/index.html")<br>
&nbsp;&nbsp;<b>.lastMod(new Date()).priority(1.0).changeFreq(ChangeFreq.HOURLY).build()</b>;<br>
wsg.addUrl(url); // repeat multiple times<br>
wsg.write();</code></blockquote>
<h2>Configuring the date format</h2>
One important configuration option for the sitemap generator is the date format. W3C allows you to use up to six
different date patterns in sitemaps; if you don't specify one, we'll try to guess which one you want, and we'll use
the default timezone of the local machine, which might not be what you prefer.
<blockquote><code><b>W3CDateFormat dateFormat = new W3CDateFormat(Pattern.DAY);<br>
dateFormat.setTimeZone(TimeTimeZone.getTimeZone("GMT"));</b>
WebSitemapGenerator wsg = <b>WebSitemapGenerator.builder</b>("http://www.example.com", new File(".")<br>
&nbsp;&nbsp;<b>.dateFormat(dateFormat).build()</b>;<br>
wsg.addUrl("http://www.example.com/index.html"); // repeat multiple times<br>
wsg.write();</code></blockquote>
<h2>Lots of URLs: a sitemap index file</h2>
One sitemap can contain a maximum of 50,000 URLs. (Some sitemaps, like Google News sitemaps, can contain only 1,000 URLs.)
If you need to put more URLs than that in a sitemap, you'll have to use a sitemap index file. Fortunately,
WebSitemapGenerator can manage the whole thing for you.
<blockquote><code>WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", new File(".");<br>
for (int i = 0; i < 100000; i++) wsg.addUrl("http://www.example.com/index"+i+".html");<br>
wsg.write();<br>
<b>wsg.writeSitemapsWithIndex();</b></code></blockquote>
<p>That will generate two sitemaps, sitemap1.xml and sitemap2.xml, and then generate a sitemap_index.xml file describing the two.</p>
<p>It's also possible to carefully organize your sub-sitemaps. For example, it's recommended to group URLs with the same changeFreq together
(have one sitemap for changeFreq "daily" and another for changeFreq "yearly"), so you can modify the lastMod of the daily
sitemap without modifying the lastMod of the yearly sitemap. To do that, just construct your sitemaps one at a time using
the WebSitemapGenerator, then use the SitemapIndexGenerator to create a single index for all of them.</p>
<blockquote><code>SitemapIndexGenerator sig = new SitemapIndexGenerator("http://www.example.com", new File("sitemap_index.xml");<br>
for (int i = 0; i < 5; i++) sig.addUrl("http://www.example.com/sitemap"+i+".html", new Date(i));<br>
wsg.write();<br>
</code></blockquote>
<h2>Validate your sitemaps</h2>
SitemapGen4j can also validate your sitemaps. (If you used SitemapGen4j to make the sitemaps, you shouldn't need to
do this unless there's a bug in our code.) It's easy to configure the WebSitemapGenerator to automatically validate
your sitemaps right after you write them (but this does slow things down, naturally).
<blockquote><code>WebSitemapGenerator wsg = <b>WebSitemapGenerator.builder</b>("http://www.example.com", new File(".")<br>
&nbsp;&nbsp;<b>.autoValidate(true).build()</b>;<br>
wsg.addUrl("http://www.example.com/index.html"); // repeat multiple times<br>
wsg.write();</code></blockquote>
You can also use the SitemapValidator directly to manage sitemaps. It has two methods: validateWebSitemap(File f)
and validateSitemapIndex(File f).
<h2>Google-specific sitemaps</h2>
<p>Google can understand a wide variety of custom sitemap formats that they made up, including a Mobile sitemaps, Geo
sitemaps, Code sitemaps (for Google Code search), Google News sitemaps, and Video sitemaps. SitemapGen4j can
generate any/all of these different types of sitemaps.</p>
<p>To generate a special type of sitemap, just use GoogleMobileSitemapGenerator, GoogleGeoSitemapGenerator,
GoogleCodeSitemapGenerator, GoogleCodeSitemapGenerator, GoogleNewsSitemapGenerator, or GoogleVideoSitemapGenerator
instead of WebSitemapGenerator.</p>
<p>You can't mix-and-match regular URLs with Google-specific sitemaps, so you'll also have to use a
GoogleMobileSitemapUrl, GoogleGeoSitemapUrl, GoogleCodeSitemapUrl, GoogleNewsSitemapUrl, or GoogleVideoSitemapUrl
instead of a WebSitemapUrl. Each of them has unique configurable options not available to regular web URLs.</p>
</body>
</html>

View File

@ -0,0 +1,74 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<xsd:annotation>
<xsd:documentation>
XML Schema for Sitemap index files.
Last Modifed 2006-07-25
</xsd:documentation>
</xsd:annotation>
<xsd:element name="sitemapindex">
<xsd:annotation>
<xsd:documentation>
Container for a set of up to 1,000 sitemap URLs.
This is the root element of the XML file.
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element ref="sitemap" maxOccurs="1000"/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="sitemap">
<xsd:annotation>
<xsd:documentation>
Container for the data needed to describe a sitemap.
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:all>
<xsd:element ref="loc"/>
<xsd:element ref="lastmod" minOccurs="0"/>
</xsd:all>
</xsd:complexType>
</xsd:element>
<xsd:element name="loc">
<xsd:annotation>
<xsd:documentation>
REQUIRED: The location URI of a sitemap.
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:anyURI">
<xsd:minLength value="12"/>
<xsd:maxLength value="2048"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="lastmod">
<xsd:annotation>
<xsd:documentation>
OPTIONAL: The date the sitemap was last modified. The date must conform
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
Example: 2005-05-10
Lastmod may also contain a timestamp.
Example: 2005-05-10T17:33:30+08:00
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:string">
<xsd:minLength value="10"/>
<xsd:maxLength value="25"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
</xsd:schema>

View File

@ -0,0 +1,121 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<xsd:annotation>
<xsd:documentation>
XML Schema for Sitemap files.
Last Modifed 2006-07-25
</xsd:documentation>
</xsd:annotation>
<xsd:element name="urlset">
<xsd:annotation>
<xsd:documentation>
Container for a set of up to 50,000 document elements.
This is the root element of the XML file.
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element ref="url" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="url">
<xsd:annotation>
<xsd:documentation>
Container for the data needed to describe a document to crawl.
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:all>
<xsd:element ref="loc"/>
<xsd:element ref="lastmod" minOccurs="0"/>
<xsd:element ref="changefreq" minOccurs="0"/>
<xsd:element ref="priority" minOccurs="0"/>
</xsd:all>
</xsd:complexType>
</xsd:element>
<xsd:element name="loc">
<xsd:annotation>
<xsd:documentation>
REQUIRED: The location URI of a document.
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:anyURI">
<xsd:minLength value="12"/>
<xsd:maxLength value="2048"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="lastmod">
<xsd:annotation>
<xsd:documentation>
OPTIONAL: The date the document was last modified. The date must conform
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
Example: 2005-05-10
Lastmod may also contain a timestamp.
Example: 2005-05-10T17:33:30+08:00
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:string">
<xsd:minLength value="10"/>
<xsd:maxLength value="25"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="changefreq">
<xsd:annotation>
<xsd:documentation>
OPTIONAL: Indicates how frequently the content at a particular URL is
likely to change. The value "always" should be used to describe
documents that change each time they are accessed. The value "never"
should be used to describe archived URLs. Please note that web
crawlers may not necessarily crawl pages marked "always" more often.
Consider this element as a friendly suggestion and not a command.
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:string">
<xsd:enumeration value="always"/>
<xsd:enumeration value="hourly"/>
<xsd:enumeration value="daily"/>
<xsd:enumeration value="weekly"/>
<xsd:enumeration value="monthly"/>
<xsd:enumeration value="yearly"/>
<xsd:enumeration value="never"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="priority">
<xsd:annotation>
<xsd:documentation>
OPTIONAL: The priority of a particular URL relative to other pages
on the same site. The value for this element is a number between
0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
The default priority of a page is 0.5. Priority is used to select
between pages on your site. Setting a priority of 1.0 for all URLs
will not help you, as the relative priority of pages on your site
is what will be considered.
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:decimal">
<xsd:minInclusive value="0.0"/>
<xsd:maxInclusive value="1.0"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
</xsd:schema>

View File

@ -0,0 +1,123 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.util.Date;
import java.util.List;
import junit.framework.TestCase;
import com.redfin.sitemapgenerator.ChangeFreq;
import com.redfin.sitemapgenerator.GoogleCodeSitemapGenerator;
import com.redfin.sitemapgenerator.GoogleCodeSitemapUrl;
import com.redfin.sitemapgenerator.W3CDateFormat;
import com.redfin.sitemapgenerator.GoogleCodeSitemapUrl.FileType;
import com.redfin.sitemapgenerator.GoogleCodeSitemapUrl.License;
import com.redfin.sitemapgenerator.GoogleCodeSitemapUrl.Options;
public class GoogleCodeSitemapUrlTest extends TestCase {
File dir;
GoogleCodeSitemapGenerator wsg;
public void setUp() throws Exception {
dir = File.createTempFile(GoogleCodeSitemapUrlTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
public void tearDown() {
wsg = null;
for (File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrl() throws Exception {
wsg = new GoogleCodeSitemapGenerator("http://www.example.com", dir);
GoogleCodeSitemapUrl url = new GoogleCodeSitemapUrl("http://www.example.com/Foo.java", FileType.JAVA);
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" " +
"xmlns:codesearch=\"http://www.google.com/codesearch/schemas/sitemap/1.0\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/Foo.java</loc>\n" +
" <codesearch:codesearch>\n" +
" <codesearch:filetype>java</codesearch:filetype>\n" +
" </codesearch:codesearch>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testOptions() throws Exception {
W3CDateFormat dateFormat = new W3CDateFormat();
dateFormat.setTimeZone(W3CDateFormat.ZULU);
wsg = GoogleCodeSitemapGenerator.builder("http://www.example.com", dir)
.dateFormat(dateFormat).build();
GoogleCodeSitemapUrl url = new Options("http://www.example.com/foo/Foo.java", FileType.JAVA)
.changeFreq(ChangeFreq.HOURLY).lastMod(new Date(0)).priority(0.5)
.license(License.GPL).fileName("Foo.java").packageUrl("http://www.example.com/foo/")
.build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:codesearch=\"http://www.google.com/codesearch/schemas/sitemap/1.0\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/foo/Foo.java</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" <changefreq>hourly</changefreq>\n" +
" <priority>0.5</priority>\n" +
" <codesearch:codesearch>\n" +
" <codesearch:filetype>java</codesearch:filetype>\n" +
" <codesearch:license>gpl</codesearch:license>\n" +
" <codesearch:filename>Foo.java</codesearch:filename>\n" +
" <codesearch:packageurl>http://www.example.com/foo/</codesearch:packageurl>\n" +
" </codesearch:codesearch>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testPackageOptions() throws Exception {
wsg = new GoogleCodeSitemapGenerator("http://www.example.com", dir);
GoogleCodeSitemapUrl url = new Options("http://www.example.com/foo/Foo.zip", FileType.ARCHIVE)
.license(License.GPL).fileName("Foo.java").packageUrl("http://www.example.com/foo/")
.packageMap("packagemap.xml").build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:codesearch=\"http://www.google.com/codesearch/schemas/sitemap/1.0\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/foo/Foo.zip</loc>\n" +
" <codesearch:codesearch>\n" +
" <codesearch:filetype>archive</codesearch:filetype>\n" +
" <codesearch:license>gpl</codesearch:license>\n" +
" <codesearch:filename>Foo.java</codesearch:filename>\n" +
" <codesearch:packageurl>http://www.example.com/foo/</codesearch:packageurl>\n" +
" <codesearch:packagemap>packagemap.xml</codesearch:packagemap>\n" +
" </codesearch:codesearch>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testPackageMapNonArchive() throws Exception {
Options options = new Options("http://www.example.com/foo/Foo.java", FileType.JAVA);
try {
options.packageMap("packagemap.xml");
fail("I was allowed to set packagemap on non-archive");
} catch (RuntimeException e) {}
}
private String writeSingleSiteMap(GoogleCodeSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}

View File

@ -0,0 +1,58 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.util.List;
import junit.framework.TestCase;
import com.redfin.sitemapgenerator.GoogleGeoSitemapGenerator;
import com.redfin.sitemapgenerator.GoogleGeoSitemapUrl;
import com.redfin.sitemapgenerator.GoogleGeoSitemapUrl.Format;
public class GoogleGeoSitemapUrlTest extends TestCase {
File dir;
GoogleGeoSitemapGenerator wsg;
public void setUp() throws Exception {
dir = File.createTempFile(GoogleGeoSitemapUrlTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
public void tearDown() {
wsg = null;
for (File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrl() throws Exception {
wsg = new GoogleGeoSitemapGenerator("http://www.example.com", dir);
GoogleGeoSitemapUrl url = new GoogleGeoSitemapUrl("http://www.example.com/index.html", Format.KML);
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" " +
"xmlns:geo=\"http://www.google.com/geo/schemas/sitemap/1.0\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <geo:geo>\n" +
" <geo:format>kml</geo:format>\n" +
" </geo:geo>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
private String writeSingleSiteMap(GoogleGeoSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}

View File

@ -0,0 +1,55 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.util.List;
import junit.framework.TestCase;
import com.redfin.sitemapgenerator.GoogleMobileSitemapGenerator;
import com.redfin.sitemapgenerator.GoogleMobileSitemapUrl;
public class GoogleMobileSitemapUrlTest extends TestCase {
File dir;
GoogleMobileSitemapGenerator wsg;
public void setUp() throws Exception {
dir = File.createTempFile(GoogleMobileSitemapUrlTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
public void tearDown() {
wsg = null;
for (File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrl() throws Exception {
wsg = new GoogleMobileSitemapGenerator("http://www.example.com", dir);
GoogleMobileSitemapUrl url = new GoogleMobileSitemapUrl("http://www.example.com/index.html");
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" " +
"xmlns:mobile=\"http://www.google.com/schemas/sitemap-mobile/1.0\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <mobile:mobile/>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
private String writeSingleSiteMap(GoogleMobileSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}

View File

@ -0,0 +1,85 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.util.Date;
import java.util.List;
import junit.framework.TestCase;
import com.redfin.sitemapgenerator.GoogleNewsSitemapGenerator;
import com.redfin.sitemapgenerator.GoogleNewsSitemapUrl;
import com.redfin.sitemapgenerator.W3CDateFormat;
import com.redfin.sitemapgenerator.W3CDateFormat.Pattern;
public class GoogleNewsSitemapUrlTest extends TestCase {
File dir;
GoogleNewsSitemapGenerator wsg;
public void setUp() throws Exception {
dir = File.createTempFile(GoogleNewsSitemapUrlTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
public void tearDown() {
wsg = null;
for (File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrl() throws Exception {
W3CDateFormat dateFormat = new W3CDateFormat(Pattern.SECOND);
dateFormat.setTimeZone(W3CDateFormat.ZULU);
wsg = GoogleNewsSitemapGenerator.builder("http://www.example.com", dir)
.dateFormat(dateFormat).build();
GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl("http://www.example.com/index.html", new Date(0));
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:news=\"http://www.google.com/schemas/sitemap-news/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <news:news>\n" +
" <news:publication_date>1970-01-01T00:00:00Z</news:publication_date>\n" +
" </news:news>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testKeywords() throws Exception {
W3CDateFormat dateFormat = new W3CDateFormat(Pattern.SECOND);
dateFormat.setTimeZone(W3CDateFormat.ZULU);
wsg = GoogleNewsSitemapGenerator.builder("http://www.example.com", dir)
.dateFormat(dateFormat).build();
GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl.Options("http://www.example.com/index.html", new Date(0))
.keywords("Klaatu", "Barrata", "Nicto")
.build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:news=\"http://www.google.com/schemas/sitemap-news/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <news:news>\n" +
" <news:publication_date>1970-01-01T00:00:00Z</news:publication_date>\n" +
" <news:keywords>Klaatu, Barrata, Nicto</news:keywords>\n" +
" </news:news>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
private String writeSingleSiteMap(GoogleNewsSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}

View File

@ -0,0 +1,174 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import java.util.List;
import junit.framework.TestCase;
import com.redfin.sitemapgenerator.GoogleVideoSitemapGenerator;
import com.redfin.sitemapgenerator.GoogleVideoSitemapUrl;
import com.redfin.sitemapgenerator.W3CDateFormat;
import com.redfin.sitemapgenerator.GoogleVideoSitemapUrl.Options;
public class GoogleVideoSitemapUrlTest extends TestCase {
private static final URL LANDING_URL = newURL("http://www.example.com/index.html");
private static final URL CONTENT_URL = newURL("http://www.example.com/index.flv");
File dir;
GoogleVideoSitemapGenerator wsg;
private static URL newURL(String url) {
try {
return new URL(url);
} catch (MalformedURLException e) {}
return null;
}
public void setUp() throws Exception {
dir = File.createTempFile(GoogleVideoSitemapUrlTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
public void tearDown() {
wsg = null;
for (File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrl() throws Exception {
wsg = new GoogleVideoSitemapGenerator("http://www.example.com", dir);
GoogleVideoSitemapUrl url = new GoogleVideoSitemapUrl(LANDING_URL, CONTENT_URL);
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:video=\"http://www.google.com/schemas/sitemap-video/1.1\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <video:video>\n" +
" <video:content_loc>http://www.example.com/index.flv</video:content_loc>\n" +
" </video:video>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testOptions() throws Exception {
W3CDateFormat dateFormat = new W3CDateFormat();
dateFormat.setTimeZone(W3CDateFormat.ZULU);
wsg = GoogleVideoSitemapGenerator.builder("http://www.example.com", dir)
.dateFormat(dateFormat).build();
GoogleVideoSitemapUrl url = new Options(LANDING_URL, CONTENT_URL)
.playerUrl(new URL("http://www.example.com/index.swf"), true)
.thumbnailUrl(new URL("http://www.example.com/thumbnail.jpg"))
.title("This is a video!").description("A great video about dinosaurs")
.rating(5.0).viewCount(500000).publicationDate(new Date(0)).tags("dinosaurs", "example", "awesome")
.category("example").familyFriendly(false).durationInSeconds(60*30)
.build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:video=\"http://www.google.com/schemas/sitemap-video/1.1\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <video:video>\n" +
" <video:content_loc>http://www.example.com/index.flv</video:content_loc>\n" +
" <video:player_loc allow_embed=\"Yes\">http://www.example.com/index.swf</video:player_loc>\n" +
" <video:thumbnail_loc>http://www.example.com/thumbnail.jpg</video:thumbnail_loc>\n" +
" <video:title>This is a video!</video:title>\n" +
" <video:description>A great video about dinosaurs</video:description>\n" +
" <video:rating>5.0</video:rating>\n" +
" <video:view_count>500000</video:view_count>\n" +
" <video:publication_date>1970-01-01</video:publication_date>\n" +
" <video:tag>dinosaurs</video:tag>\n" +
" <video:tag>example</video:tag>\n" +
" <video:tag>awesome</video:tag>\n" +
" <video:category>example</video:category>\n" +
" <video:family_friendly>No</video:family_friendly>\n" +
" <video:duration>1800</video:duration>\n" +
" </video:video>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testLongTitle() {
try {
new Options(LANDING_URL, CONTENT_URL).title("Unfortunately, this title is far longer than 100 characters" +
"by virtue of having a great deal to say but not much content.");
fail("Long title inappropriately allowed");
} catch (RuntimeException e) {}
}
public void testLongDescription() {
StringBuilder sb = new StringBuilder(2049);
for (int i = 0; i < 2049; i++) {
sb.append('x');
}
try {
new Options(LANDING_URL, CONTENT_URL).description(sb.toString());
fail("Long description inappropriately allowed");
} catch (RuntimeException e) {}
}
public void testWrongRating() {
Options o = new Options(LANDING_URL, CONTENT_URL);
try {
o.rating(-1.0);
fail("Negative rating allowed");
} catch (RuntimeException e) {}
try {
o.rating(10.0);
fail(">5 rating allowed");
} catch (RuntimeException e) {}
}
public void testTooManyTags() {
int maxTags = 32;
String[] tags = new String[maxTags+1];
for (int i = 0; i < maxTags+1; i++) {
tags[i] = "tag" + i;
}
try {
new Options(LANDING_URL, CONTENT_URL).tags(tags).build();
fail("Too many tags allowed");
} catch (RuntimeException e) {}
}
public void testLongCategory() {
StringBuilder sb = new StringBuilder(257);
for (int i = 0; i < 257; i++) {
sb.append('x');
}
try {
new Options(LANDING_URL, CONTENT_URL).category(sb.toString());
fail("Long category inappropriately allowed");
} catch (RuntimeException e) {}
}
public void testWrongDuration() {
Options o = new Options(LANDING_URL, CONTENT_URL);
try {
o.durationInSeconds(-1);
fail("Negative duration allowed");
} catch (RuntimeException e) {}
try {
o.durationInSeconds(Integer.MAX_VALUE);
fail(">8hr duration allowed");
} catch (RuntimeException e) {}
}
private String writeSingleSiteMap(GoogleVideoSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}

View File

@ -0,0 +1,300 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import java.util.List;
import java.util.zip.GZIPInputStream;
import junit.framework.TestCase;
public class SitemapGeneratorTest extends TestCase {
private static final String SITEMAP_PLUS_ONE = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/just-one-more</loc>\n" +
" </url>\n" +
"</urlset>";
private static final String SITEMAP1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/0</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/1</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/2</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/3</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/4</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/5</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/6</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/7</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/8</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/9</loc>\n" +
" </url>\n" +
"</urlset>";
private static final String SITEMAP2 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/10</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/11</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/12</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/13</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/14</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/15</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/16</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/17</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/18</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/19</loc>\n" +
" </url>\n" +
"</urlset>";
File dir;
WebSitemapGenerator wsg;
public void setUp() throws Exception {
dir = File.createTempFile(SitemapGeneratorTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
public void tearDown() {
wsg = null;
for (File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrl() throws Exception {
wsg = new WebSitemapGenerator("http://www.example.com", dir);
wsg.addUrl("http://www.example.com/index.html");
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testTwoUrl() throws Exception {
wsg = new WebSitemapGenerator("http://www.example.com", dir);
wsg.addUrls("http://www.example.com/index.html", "http://www.example.com/index2.html");
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" </url>\n" +
" <url>\n" +
" <loc>http://www.example.com/index2.html</loc>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testAllUrlOptions() throws Exception {
W3CDateFormat df = new W3CDateFormat();
df.setTimeZone(W3CDateFormat.ZULU);
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).dateFormat(df).autoValidate(true).build();
WebSitemapUrl url = new WebSitemapUrl.Options("http://www.example.com/index.html")
.changeFreq(ChangeFreq.DAILY).lastMod(new Date(0)).priority(1.0).build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" <changefreq>daily</changefreq>\n" +
" <priority>1.0</priority>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testBadUrl() throws Exception {
wsg = new WebSitemapGenerator("http://www.example.com", dir);
try {
wsg.addUrl("http://example.com/index.html");
fail("wrong domain allowed to be added");
} catch (RuntimeException e) {}
}
public void testDoubleWrite() throws Exception {
testSimpleUrl();
try {
wsg.write();
fail("Double-write is not allowed");
} catch (RuntimeException e) {}
}
public void testEmptyWrite() throws Exception {
try {
wsg = new WebSitemapGenerator("http://www.example.com", dir);
wsg.write();
fail("Empty write is not allowed");
} catch (RuntimeException e) {}
}
public void testTooManyUrls() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).allowMultipleSitemaps(false).build();
for (int i = 0; i < SitemapGenerator.MAX_URLS_PER_SITEMAP; i++) {
wsg.addUrl("http://www.example.com/"+i);
}
try {
wsg.addUrl("http://www.example.com/just-one-more");
fail("too many URLs allowed");
} catch (RuntimeException e) {}
}
public void testMaxUrlsPlusOne() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).autoValidate(true).maxUrls(10).build();
for (int i = 0; i < 9; i++) {
wsg.addUrl("http://www.example.com/"+i);
}
wsg.addUrl("http://www.example.com/9");
wsg.addUrl("http://www.example.com/just-one-more");
String actual = TestUtil.slurpFileAndDelete(new File(dir, "sitemap1.xml"));
assertEquals("sitemap1 didn't match", SITEMAP1, actual);
List<File> files = wsg.write();
assertEquals(2, files.size());
assertEquals("First sitemap was misnamed", "sitemap1.xml", files.get(0).getName());
assertEquals("Second sitemap was misnamed", "sitemap2.xml", files.get(1).getName());
actual = TestUtil.slurpFileAndDelete(files.get(1));
assertEquals("sitemap2 didn't match", SITEMAP_PLUS_ONE, actual);
}
public void testMaxUrls() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).autoValidate(true).maxUrls(10).build();
for (int i = 0; i < 9; i++) {
wsg.addUrl("http://www.example.com/"+i);
}
wsg.addUrl("http://www.example.com/9");
String actual = writeSingleSiteMap(wsg);
assertEquals("sitemap didn't match", SITEMAP1, actual);
}
public void testMaxUrlsTimesTwo() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).autoValidate(true).maxUrls(10).build();
for (int i = 0; i < 19; i++) {
wsg.addUrl("http://www.example.com/"+i);
}
wsg.addUrl("http://www.example.com/19");
List<File> files = wsg.write();
assertEquals(2, files.size());
assertEquals("First sitemap was misnamed", "sitemap1.xml", files.get(0).getName());
assertEquals("Second sitemap was misnamed", "sitemap2.xml", files.get(1).getName());
String actual = TestUtil.slurpFileAndDelete(files.get(0));
assertEquals("sitemap1 didn't match", SITEMAP1, actual);
actual = TestUtil.slurpFileAndDelete(files.get(1));
assertEquals("sitemap2 didn't match", SITEMAP2, actual);
}
public void testMaxUrlsTimesTwoPlusOne() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).autoValidate(true).maxUrls(10).build();
for (int i = 0; i < 19; i++) {
wsg.addUrl("http://www.example.com/"+i);
}
wsg.addUrl("http://www.example.com/19");
wsg.addUrl("http://www.example.com/just-one-more");
List<File> files = wsg.write();
assertEquals(3, files.size());
assertEquals("First sitemap was misnamed", "sitemap1.xml", files.get(0).getName());
assertEquals("Second sitemap was misnamed", "sitemap2.xml", files.get(1).getName());
assertEquals("Third sitemap was misnamed", "sitemap3.xml", files.get(2).getName());
String expected = SITEMAP1;
String actual = TestUtil.slurpFileAndDelete(files.get(0));
assertEquals("sitemap1 didn't match", expected, actual);
expected = SITEMAP2;
actual = TestUtil.slurpFileAndDelete(files.get(1));
assertEquals("sitemap2 didn't match", expected, actual);
expected = SITEMAP_PLUS_ONE;
actual = TestUtil.slurpFileAndDelete(files.get(2));
assertEquals("sitemap3 didn't match", expected, actual);
}
public void testGzip() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir)
.gzip(true).build();
for (int i = 0; i < 9; i++) {
wsg.addUrl("http://www.example.com/"+i);
}
wsg.addUrl("http://www.example.com/9");
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml.gz", files.get(0).getName());
File file = files.get(0);
file.deleteOnExit();
StringBuilder sb = new StringBuilder();
try {
FileInputStream fileStream = new FileInputStream(file);
GZIPInputStream gzipStream = new GZIPInputStream(fileStream);
InputStreamReader reader = new InputStreamReader(gzipStream);
int c;
while ((c = reader.read()) != -1) {
sb.append((char)c);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
file.delete();
String actual = sb.toString();
assertEquals("sitemap didn't match", SITEMAP1, actual);
}
private String writeSingleSiteMap(WebSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}

View File

@ -0,0 +1,127 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.net.MalformedURLException;
import java.util.Date;
import junit.framework.TestCase;
public class SitemapIndexGeneratorTest extends TestCase {
private static final String INDEX = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap1.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap2.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap3.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap4.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap5.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap6.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap7.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap8.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap9.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/sitemap10.xml</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
"</sitemapindex>";
private static final String EXAMPLE = "http://www.example.com/";
private static final W3CDateFormat ZULU = new W3CDateFormat();
File outFile;
SitemapIndexGenerator sig;
public void setUp() throws Exception {
ZULU.setTimeZone(W3CDateFormat.ZULU);
outFile = File.createTempFile(SitemapGeneratorTest.class.getSimpleName(), ".xml");
outFile.deleteOnExit();
}
public void tearDown() {
sig = null;
outFile.delete();
outFile = null;
}
public void testTooManyUrls() throws Exception {
sig = new SitemapIndexGenerator.Options(EXAMPLE, outFile).maxUrls(10).autoValidate(true).build();
for (int i = 0; i < 9; i++) {
sig.addUrl(EXAMPLE+i);
}
sig.addUrl(EXAMPLE+"9");
try {
sig.addUrl("http://www.example.com/just-one-more");
fail("too many URLs allowed");
} catch (RuntimeException e) {}
}
public void testNoUrls() throws Exception {
sig = new SitemapIndexGenerator(EXAMPLE, outFile);
try {
sig.write();
fail("Allowed write with no URLs");
} catch (RuntimeException e) {}
}
public void testMaxUrls() throws Exception {
sig = new SitemapIndexGenerator.Options(EXAMPLE, outFile).autoValidate(true)
.maxUrls(10).defaultLastMod(new Date(0)).dateFormat(ZULU).build();
for (int i = 1; i <= 9; i++) {
sig.addUrl(EXAMPLE+"sitemap"+i+".xml");
}
sig.addUrl(EXAMPLE+"sitemap10.xml");
sig.write();
String actual = TestUtil.slurpFileAndDelete(outFile);
assertEquals(INDEX, actual);
}
public void testOneUrl() throws Exception {
sig = new SitemapIndexGenerator.Options(EXAMPLE, outFile).dateFormat(ZULU).autoValidate(true).build();
SitemapIndexUrl url = new SitemapIndexUrl(EXAMPLE+"index.html", new Date(0));
sig.addUrl(url);
sig.write();
String actual = TestUtil.slurpFileAndDelete(outFile);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n" +
" <sitemap>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <lastmod>1970-01-01</lastmod>\n" +
" </sitemap>\n" +
"</sitemapindex>";
assertEquals(expected, actual);
}
public void testAddByPrefix() throws MalformedURLException {
sig = new SitemapIndexGenerator.Options(EXAMPLE, outFile).autoValidate(true)
.defaultLastMod(new Date(0)).dateFormat(ZULU).build();
sig.addUrls("sitemap", ".xml", 10);
sig.write();
String actual = TestUtil.slurpFileAndDelete(outFile);
assertEquals(INDEX, actual);
}
}

View File

@ -0,0 +1,41 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class TestUtil {
public static String getResourceAsString(Class<?> clazz, String path) {
InputStream stream = clazz.getResourceAsStream(path);
if (stream == null) throw new RuntimeException("resource path not found: " + path);
InputStreamReader reader = new InputStreamReader(stream);
StringBuilder sb = new StringBuilder();
try {
int c;
while ((c = reader.read()) != -1) {
sb.append((char)c);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return sb.toString();
}
public static String slurpFileAndDelete(File file) {
file.deleteOnExit();
StringBuilder sb = new StringBuilder();
try {
FileReader reader = new FileReader(file);
int c;
while ((c = reader.read()) != -1) {
sb.append((char)c);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
file.delete();
return sb.toString();
}
}

View File

@ -0,0 +1,102 @@
package com.redfin.sitemapgenerator;
import static com.redfin.sitemapgenerator.W3CDateFormat.Pattern.AUTO;
import static com.redfin.sitemapgenerator.W3CDateFormat.Pattern.DAY;
import static com.redfin.sitemapgenerator.W3CDateFormat.Pattern.MILLISECOND;
import static com.redfin.sitemapgenerator.W3CDateFormat.Pattern.MINUTE;
import static com.redfin.sitemapgenerator.W3CDateFormat.Pattern.MONTH;
import static com.redfin.sitemapgenerator.W3CDateFormat.Pattern.SECOND;
import static com.redfin.sitemapgenerator.W3CDateFormat.Pattern.YEAR;
import java.text.ParseException;
import java.util.Date;
import java.util.TimeZone;
import junit.framework.TestCase;
import com.redfin.sitemapgenerator.W3CDateFormat.Pattern;
public class W3CDateFormatTest extends TestCase {
public void testFormatEpoch() {
Date epoch = new Date(0);
verifyPatternFormat(epoch, MILLISECOND, "1970-01-01T00:00:00.000Z");
verifyPatternFormat(epoch, SECOND, "1970-01-01T00:00:00Z");
verifyPatternFormat(epoch, MINUTE, "1970-01-01T00:00Z");
verifyPatternFormat(epoch, DAY, "1970-01-01");
verifyPatternFormat(epoch, MONTH, "1970-01");
verifyPatternFormat(epoch, YEAR, "1970");
verifyPatternFormat(epoch, AUTO, "1970-01-01");
}
public void testAutoFormat() {
Date date = new Date(0);
verifyPatternFormat(date, AUTO, "1970-01-01");
date = new Date(1);
verifyPatternFormat(date, AUTO, "1970-01-01T00:00:00.001Z");
date = new Date(1000);
verifyPatternFormat(date, AUTO, "1970-01-01T00:00:01Z");
date = new Date(60000);
verifyPatternFormat(date, AUTO, "1970-01-01T00:01Z");
date = new Date(60000 * 60 * 24);
verifyPatternFormat(date, AUTO, "1970-01-02");
}
public void testFormatTimeZone() {
Date epoch = new Date(0);
TimeZone tz = TimeZone.getTimeZone("PST");
verifyPatternFormat(epoch, MILLISECOND, "1969-12-31T16:00:00.000-08:00", tz);
verifyPatternFormat(epoch, AUTO, "1969-12-31T16:00-08:00", tz);
}
public void testParseEpoch() {
Date date = new Date(0);
verifyPatternParse("1970-01-01T00:00:00.000Z", MILLISECOND, date);
verifyPatternParse("1970-01-01T00:00:00Z", SECOND, date);
verifyPatternParse("1970-01-01T00:00Z", MINUTE, date);
verifyPatternParse("1970-01-01", DAY, date);
verifyPatternParse("1970-01", MONTH, date);
verifyPatternParse("1970", YEAR, date);
}
public void testAutoParse() {
Date date = new Date(0);
verifyPatternParse("1970-01-01T00:00:00.000Z", AUTO, date);
verifyPatternParse("1970-01-01T00:00:00Z", AUTO, date);
verifyPatternParse("1970-01-01T00:00Z", AUTO, date);
verifyPatternParse("1970-01-01", AUTO, date);
verifyPatternParse("1970-01", AUTO, date);
verifyPatternParse("1970", AUTO, date);
}
public void testParseTimeZone() {
Date epoch = new Date(0);
verifyPatternParse("1969-12-31T16:00:00.000-08:00", MILLISECOND, epoch);
verifyPatternParse("1969-12-31T16:00:00.000-08:00", AUTO, epoch);
}
private void verifyPatternFormat(Date date, Pattern pattern, String expected) {
verifyPatternFormat(date, pattern, expected, W3CDateFormat.ZULU);
}
private void verifyPatternFormat(Date date, Pattern pattern, String expected, TimeZone tz) {
W3CDateFormat format = new W3CDateFormat(pattern);
format.setTimeZone(tz);
assertEquals(date.toString() + " " + pattern, expected, format.format(date));
}
private void verifyPatternParse(String source, Pattern pattern, Date expected) {
verifyPatternParse(source, pattern, expected, W3CDateFormat.ZULU);
}
private void verifyPatternParse(String source, Pattern pattern, Date expected, TimeZone tz) {
W3CDateFormat format = new W3CDateFormat(pattern);
format.setTimeZone(tz);
Date actual = null;
try {
actual = format.parse(source);
} catch (ParseException e) {}
assertEquals(source + " " + pattern, expected, actual);
}
}