Compare commits

...

61 Commits
v1.0.2 ... main

Author SHA1 Message Date
c621e05154 修改版本为 1.0.1-SNAPSHOT 2022-12-15 00:59:01 -05:00
8eeeccb213 更新软件的包和使用的 JDK 版本 2022-12-15 00:44:03 -05:00
dd08af8f9f 先不进行签名 2022-12-14 23:25:28 -05:00
871f58e1d5 Update pom file and by use JDK 11 to build 2022-12-14 23:23:54 -05:00
Marc Trölitzsch
619bcfce92
add LICENSE (#51)
Adds an Apache 2.0 license file to the repository. Closes #38
2021-06-23 10:29:30 -07:00
Matthias Kurz
c6bf718b6e
Prepare next snaphost version 2019-01-27 15:13:23 +01:00
Matthias Kurz
7cb6a9b216
Release 1.1.2 2019-01-27 15:12:04 +01:00
Matthias Kurz
bfce1911fa
Merge pull request #46 from mkurz/sitemapindexasstring
Add support to write sitemaps index as string
2019-01-27 15:04:10 +01:00
Matthias Kurz
bc5515d678
Add support to write sitemaps index as string 2019-01-27 15:02:38 +01:00
Matthias Kurz
6a605cf6ce
Merge pull request #45 from ThorKarlsson/GoogleImageExtension
Google image extension
2019-01-27 14:34:07 +01:00
Matthias Kurz
954b0d3c42
Remove last modified in tests, causes problems (timezones) 2019-01-27 14:16:34 +01:00
Matthias Kurz
edcec3bd5c
Change comments 2019-01-27 14:09:36 +01:00
Matthias Kurz
7bbc640e6e
Renamed GoogleExtension* classes/files to GoogleImage* 2019-01-26 23:02:02 +01:00
Matthias Kurz
3d11a1a9ef
Removed TODO's They are done already 2019-01-26 23:00:11 +01:00
Matthias Kurz
984d3ee661
Spaces 2019-01-26 22:51:35 +01:00
thorkarlsson
0ea636d005 Remove old version of image test 2018-10-30 14:12:30 +07:00
thorkarlsson
e9cabcf104 Add Google extended sitemap url with support to add multiple image tags to a single URL entry 2018-10-30 14:04:45 +07:00
Matthias Kurz
47cc5e01d8
Prepare next snaphost version 2018-07-17 12:19:55 +02:00
Matthias Kurz
2b7b645afd
Release 1.1.1 2018-07-17 12:19:13 +02:00
Matthias Kurz
7553eba073
Merge pull request #44 from spekr/master
Close streams in finally clause
2018-07-17 12:15:48 +02:00
Robert van der Spek
12c38f6135 Wraps codeblocks that could throw IOException in try-catch.
Catches IOException and throws RuntimeException instead. This is in line with code-practice.
This way the upgrade won't break any code.
2018-07-17 10:33:39 +02:00
Robert van der Spek
3c852e4474 Replaces spaced indents with tab indents. 2018-07-17 10:20:39 +02:00
Robert van der Spek
ab87227014 Adds flush() to ensure the file contents are available in the following procedure. 2018-07-16 13:25:07 +02:00
Robert van der Spek
4e84a7cfe8 Closes streams in finally clause whenever they are opened. 2018-07-16 12:45:37 +02:00
Matthias Kurz
168ac4f200
Prepare next snaphost version 2018-07-05 16:46:49 +02:00
Matthias Kurz
0baf7374fa
Release 1.1.0 2018-07-05 16:38:09 +02:00
Matthias Kurz
11962f83d7
Make sure resources get closed 2018-07-05 16:16:20 +02:00
Matthias Kurz
15345fc0e4
writeSitemapsWithIndex(...) returns the created file. Fixes #39 2018-07-05 16:08:26 +02:00
Matthias Kurz
a3395794ae
Cleanup 2018-07-05 15:17:34 +02:00
Matthias Kurz
b63ffc081e
Merge pull request #33 from sergiovm/master
added new generator for google link extension
2018-07-05 15:05:53 +02:00
Matthias Kurz
d4bc63c582
Merge pull request #28 from gregorko/master
Added ability to specify a custom sitemap index file
2018-07-05 14:49:16 +02:00
twb
1ceac7f3f0
Extend google news 2018-07-05 14:20:52 +02:00
Matthias Kurz
d52043d184
Merge pull request #37 from ramsrib/master
Changed the scope of the ISitemapUrl interface to public.
2018-07-04 16:15:24 +02:00
Sergio Vico
7fe063dbf9 Fixed weird random behabiour mixing alternate options and omitting some of them 2018-02-06 18:11:20 +01:00
Sergio Vico
e97b8b991a Fixed weird random behabiour mixing alternate options and omitting some of them 2018-02-06 17:39:40 +01:00
Sergio Vico
504ede31dd extended GoolgeLinkGenerator to support generic map to define link attributes 2018-01-22 16:10:09 +01:00
ramsrib
51a24c917a Changed the scope of the ISitemapUrl interface to public. 2017-08-13 14:21:03 -07:00
Sergio Vico
c1dba547dc Reversed pom to original. 2017-03-30 12:20:22 +02:00
Sergio Vico
0eccebb6c2 added new generator for google link extension 2017-03-29 17:33:46 +02:00
Matthias Kurz
deb1862436 Merge pull request #29 from andrewsmedina/master
fix typo in readme
2017-01-25 21:08:14 +01:00
Andrews Medina
50aef5b842 fix typo in readme
using < instead &lt; in java code
2017-01-25 17:06:15 -03:00
Gregor Koukkoullis
78a9936ebc ability to specify sitemap index file 2017-01-03 21:20:09 +01:00
Matthias Kurz
756d578275 Prepare next snaphost version 2016-08-03 21:16:46 +02:00
Matthias Kurz
9a3869963f Release 1.0.6 2016-08-03 21:15:01 +02:00
Matthias Kurz
ae631d8a44 Merge pull request #21 from mantacode/gitignore-update
Updating gitignore to exclude files for IntelliJ IDEA, etc.
2016-08-03 19:28:09 +02:00
Joseph Beard
e10e166b39 Write empty sitemap and index files (#23)
* Allow SitemapGenerators to write an empty sitemap.

* Allow SitemapIndexGenerators to write an empty sitemap index.
2016-08-03 08:04:52 -07:00
Matthias Kurz
6e9832beaf 1.0.5 released, preparing for 1.0.6 2016-08-03 09:15:45 +02:00
Kamil
ea0594afa1 fixes issue #25 (#26) 2016-07-20 16:06:24 -07:00
Joseph Beard
d96d24d610 Updating gitignore to exclude files for IntelliJ IDEA, etc. 2015-12-08 09:43:52 -05:00
Dan Fabulich
c3dee10152 1.0.4 released, preparing for 1.0.5 2015-09-16 20:59:49 -07:00
Ankita Nellimarla
be330d6ec3 adding support for custom suffices for sitemap generation 2015-09-08 23:34:43 -07:00
Matthias Kurz
e0c7147bfe Prepare for next SNAPSHOT release 2015-06-13 15:14:50 +02:00
Matthias Kurz
36a054bd71 Bump to version 1.0.3 2015-06-13 13:10:00 +02:00
dfabulich
a46ea9ffe5 Merge pull request #10 from skycao/master
Added write sitemap to string functionality
2015-06-12 20:35:44 -07:00
Sky Cao
54138163ce added write to string functionality for sitemap generators 2015-06-12 17:43:23 -07:00
Navtej Sadhal
5ea973b161 Changing url-check so that it allows different schemes but still requires the same domain
(cherry picked from commit d4ff78918dd9317a5686b9675cdade3cb6c2007e)
Signed-off-by: Dan Fabulich <dan.fabulich@redfin.com>
2015-06-08 16:54:52 -07:00
dfabulich
95b23f2eb0 Merge pull request #8 from jiwhiz/update-xsd
update siteindex.xsd and sitemap.xsd

based on http://www.sitemaps.org/schemas/sitemap/siteindex.xsd
http://www.sitemaps.org/schemas/sitemap/sitemap.xsd
2015-06-08 16:31:02 -07:00
Yuan Ji
2ba23162ca update siteindex.xsd and sitemap.xsd 2015-05-10 21:06:19 -06:00
dfabulich
c24db4203b Merge pull request #6 from hoteia/master
URL using non ASCII characters [utf-8 support] #5
2015-03-24 08:21:09 -07:00
denis
895cac17a6 URL using non ASCII characters [utf-8 support] #5
https://github.com/dfabulich/sitemapgen4j/issues/5#issuecomment-82243209
2015-03-18 19:35:29 +01:00
Matthias Kurz
c2d956cd06 Prepare for next SNAPSHOT release 2014-12-13 01:11:47 +01:00
39 changed files with 2803 additions and 1112 deletions

130
.gitignore vendored
View File

@ -1,4 +1,130 @@
target
# Created by https://www.gitignore.io/api/java,maven,eclipse,m2e,intellij
### Java ###
*.class
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
### Maven ###
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
### Eclipse ###
*.pydevproject
.metadata
.gradle
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
# Eclipse Core
.project
# External tool builders
.externalToolBuilders/
# Locally stored "Eclipse launch configurations"
*.launch
# CDT-specific
.cproject
# JDT-specific (Eclipse Java Development Tools)
.classpath
# Java annotation processor (APT)
.factorypath
# PDT-specific
.buildpath
# sbteclipse plugin
.target
# TeXlipse plugin
.texlipse
# STS (Spring Tool Suite)
.springBeans
### m2e ###
.classpath
.project
.settings
### Intellij ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
*.iml
## Directory-based project format:
.idea/
# if you remove the above rule, at least ignore the following:
# User-specific stuff:
# .idea/workspace.xml
# .idea/tasks.xml
# .idea/dictionaries
# .idea/shelf
# Sensitive or high-churn files:
# .idea/dataSources.ids
# .idea/dataSources.xml
# .idea/sqlDataSources.xml
# .idea/dynamic.xml
# .idea/uiDesigner.xml
# Gradle:
# .idea/gradle.xml
# .idea/libraries
# Mongo Explorer plugin:
# .idea/mongoSettings.xml
## File-based project format:
*.ipr
*.iws
## Plugin-specific files:
# IntelliJ
/out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

191
LICENSE Normal file
View File

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2009 Dan Faublich
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

119
README.md
View File

@ -67,7 +67,7 @@ One sitemap can contain a maximum of 50,000 URLs. (Some sitemaps, like Google N
```java
WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", myDir);
for (int i = 0; i &lt; 60000; i++) wsg.addUrl("http://www.example.com/doc"+i+".html");
for (int i = 0; i < 60000; i++) wsg.addUrl("http://www.example.com/doc"+i+".html");
wsg.write();
wsg.writeSitemapsWithIndex(); // generate the sitemap_index.xml
@ -82,12 +82,12 @@ WebSitemapGenerator wsg;
// generate foo sitemap
wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.fileNamePrefix("foo").build();
for (int i = 0; i &lt; 5; i++) wsg.addUrl("http://www.example.com/foo"+i+".html");
for (int i = 0; i < 5; i++) wsg.addUrl("http://www.example.com/foo"+i+".html");
wsg.write();
// generate bar sitemap
wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.fileNamePrefix("bar").build();
for (int i = 0; i &lt; 5; i++) wsg.addUrl("http://www.example.com/bar"+i+".html");
for (int i = 0; i < 5; i++) wsg.addUrl("http://www.example.com/bar"+i+".html");
wsg.write();
// generate sitemap index for foo + bar
SitemapIndexGenerator sig = new SitemapIndexGenerator("http://www.example.com", myFile);
@ -120,3 +120,116 @@ Google can understand a wide variety of custom sitemap formats that they made up
To generate a special type of sitemap, just use GoogleMobileSitemapGenerator, GoogleGeoSitemapGenerator, GoogleCodeSitemapGenerator, GoogleCodeSitemapGenerator, GoogleNewsSitemapGenerator, or GoogleVideoSitemapGenerator instead of WebSitemapGenerator.
You can't mix-and-match regular URLs with Google-specific sitemaps, so you'll also have to use a GoogleMobileSitemapUrl, GoogleGeoSitemapUrl, GoogleCodeSitemapUrl, GoogleNewsSitemapUrl, or GoogleVideoSitemapUrl instead of a WebSitemapUrl. Each of them has unique configurable options not available to regular web URLs.
<html><head><title>How to use SitemapGen4j</title></head>
<body>
<h1>How to use SitemapGen4j</h1>
SitemapGen4j is a library to generate XML sitemaps in Java.
<h2>What's an XML sitemap?</h2>
Quoting from <a href="http://sitemaps.org/index.php">sitemaps.org</a>:
<blockquote><p>Sitemaps are an easy way for webmasters to inform search engines about pages on their sites that are available for crawling. In its simplest form, a Sitemap is an XML file that lists URLs for a site along with additional metadata about each URL (when it was last updated, how often it usually changes, and how important it is, relative to other URLs in the site) so that search engines can more intelligently crawl the site.</p>
<p>Web crawlers usually discover pages from links within the site and from other sites. Sitemaps supplement this data to allow crawlers that support Sitemaps to pick up all URLs in the Sitemap and learn about those URLs using the associated metadata. Using the Sitemap protocol does not guarantee that web pages are included in search engines, but provides hints for web crawlers to do a better job of crawling your site.</p>
<p>Sitemap 0.90 is offered under the terms of the Attribution-ShareAlike Creative Commons License and has wide adoption, including support from Google, Yahoo!, and Microsoft.</p>
</blockquote>
<h2>Getting started</h2>
<p>The easiest way to get started is to just use the WebSitemapGenerator class, like this:
<pre name="code" class="java">WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", myDir);
wsg.addUrl("http://www.example.com/index.html"); // repeat multiple times
wsg.write();</pre>
<h2>Configuring options</h2>
But there are a lot of nifty options available for URLs and for the generator as a whole. To configure the generator, use a builder:
<pre name="code" class="java">WebSitemapGenerator wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.gzip(true).build(); // enable gzipped output
wsg.addUrl("http://www.example.com/index.html");
wsg.write();</pre>
To configure the URLs, construct a WebSitemapUrl with WebSitemapUrl.Options.
<pre name="code" class="java">WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", myDir);
WebSitemapUrl url = new WebSitemapUrl.Options("http://www.example.com/index.html")
.lastMod(new Date()).priority(1.0).changeFreq(ChangeFreq.HOURLY).build();
// this will configure the URL with lastmod=now, priority=1.0, changefreq=hourly
wsg.addUrl(url);
wsg.write();</pre>
<h2>Configuring the date format</h2>
One important configuration option for the sitemap generator is the date format. The <a href="http://www.w3.org/TR/NOTE-datetime">W3C datetime standard</a> allows you to choose the precision of your datetime (anything from just specifying the year like "1997" to specifying the fraction of the second like "1997-07-16T19:20:30.45+01:00"); if you don't specify one, we'll try to guess which one you want, and we'll use the default timezone of the local machine, which might not be what you prefer.
<pre name="code" class="java">
// Use DAY pattern (2009-02-07), Greenwich Mean Time timezone
W3CDateFormat dateFormat = new W3CDateFormat(Pattern.DAY);
dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
WebSitemapGenerator wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.dateFormat(dateFormat).build(); // actually use the configured dateFormat
wsg.addUrl("http://www.example.com/index.html");
wsg.write();</pre>
<h2>Lots of URLs: a sitemap index file</h2>
One sitemap can contain a maximum of 50,000 URLs. (Some sitemaps, like Google News sitemaps, can contain only 1,000 URLs.) If you need to put more URLs than that in a sitemap, you'll have to use a sitemap index file. Fortunately, WebSitemapGenerator can manage the whole thing for you.
<pre name="code" class="java">WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", myDir);
for (int i = 0; i &lt; 60000; i++) wsg.addUrl("http://www.example.com/doc"+i+".html");
wsg.write();
wsg.writeSitemapsWithIndex(); // generate the sitemap_index.xml
</pre>
<p>That will generate two sitemaps for 60K URLs: sitemap1.xml (with 50K urls) and sitemap2.xml (with the remaining 10K), and then generate a sitemap_index.xml file describing the two.</p>
<p>It's also possible to carefully organize your sub-sitemaps. For example, it's recommended to group URLs with the same changeFreq together (have one sitemap for changeFreq "daily" and another for changeFreq "yearly"), so you can modify the lastMod of the daily sitemap without modifying the lastMod of the yearly sitemap. To do that, just construct your sitemaps one at a time using the WebSitemapGenerator, then use the SitemapIndexGenerator to create a single index for all of them.</p>
<pre name="code" class="java">WebSitemapGenerator wsg;
// generate foo sitemap
wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.fileNamePrefix("foo").build();
for (int i = 0; i &lt; 5; i++) wsg.addUrl("http://www.example.com/foo"+i+".html");
wsg.write();
// generate bar sitemap
wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.fileNamePrefix("bar").build();
for (int i = 0; i &lt; 5; i++) wsg.addUrl("http://www.example.com/bar"+i+".html");
wsg.write();
// generate sitemap index for foo + bar
SitemapIndexGenerator sig = new SitemapIndexGenerator("http://www.example.com", myFile);
sig.addUrl("http://www.example.com/foo.xml");
sig.addUrl("http://www.example.com/bar.xml");
sig.write();</pre>
<p>You could also use the SitemapIndexGenerator to incorporate sitemaps generated by other tools. For example, you might use Google's official Python sitemap generator to generate some sitemaps, and use WebSitemapGenerator to generate some sitemaps, and use SitemapIndexGenerator to make an index of all of them.</p>
<h2>Validate your sitemaps</h2>
<p>SitemapGen4j can also validate your sitemaps using the official XML Schema Definition (XSD). If you used SitemapGen4j to make the sitemaps, you shouldn't need to do this unless there's a bug in our code. But you can use it to validate sitemaps generated by other tools, and it provides an extra level of safety.</p>
<p>It's easy to configure the WebSitemapGenerator to automatically validate your sitemaps right after you write them (but this does slow things down, naturally).</p>
<pre name="code" class="java">WebSitemapGenerator wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.autoValidate(true).build(); // validate the sitemap after writing
wsg.addUrl("http://www.example.com/index.html");
wsg.write();</pre>
<p>You can also use the SitemapValidator directly to manage sitemaps. It has two methods: validateWebSitemap(File f) and validateSitemapIndex(File f).</p>
<h2>Google-specific sitemaps</h2>
<p>Google can understand a wide variety of custom sitemap formats that they made up, including a Mobile sitemaps, Geo sitemaps, Code sitemaps (for Google Code search), Google News sitemaps, and Video sitemaps. SitemapGen4j can generate any/all of these different types of sitemaps.</p>
<p>To generate a special type of sitemap, just use GoogleMobileSitemapGenerator, GoogleGeoSitemapGenerator, GoogleCodeSitemapGenerator, GoogleCodeSitemapGenerator, GoogleNewsSitemapGenerator, or GoogleVideoSitemapGenerator instead of WebSitemapGenerator.</p>
<p>You can't mix-and-match regular URLs with Google-specific sitemaps, so you'll also have to use a GoogleMobileSitemapUrl, GoogleGeoSitemapUrl, GoogleCodeSitemapUrl, GoogleNewsSitemapUrl, or GoogleVideoSitemapUrl instead of a WebSitemapUrl. Each of them has unique configurable options not available to regular web URLs.</p>
</body>
</html>

236
pom.xml
View File

@ -1,126 +1,114 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.github.dfabulich</groupId>
<artifactId>sitemapgen4j</artifactId>
<packaging>jar</packaging>
<version>1.0.2</version>
<name>SitemapGen4J</name>
<url>https://github.com/dfabulich/sitemapgen4j/</url>
<description>SitemapGen4j is an XML sitemap generator written in Java.</description>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<connection>scm:git:git://github.com:dfabulich/sitemapgen4j.git</connection>
<developerConnection>scm:git:git@github.com:dfabulich/sitemapgen4j.git</developerConnection>
<url>https://github.com/dfabulich/sitemapgen4j/</url>
</scm>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<developers>
<developer>
<id>dfabulich</id>
<name>Dan Fabulich</name>
<email>dan@fabulich.com</email>
<organization>Redfin</organization>
<organizationUrl>http://www.redfin.com/</organizationUrl>
<timezone>-8</timezone>
</developer>
</developers>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
<repository>
<id>ossrh</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
</repository>
</distributionManagement>
<build>
<defaultGoal>install</defaultGoal>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.5</source>
<target>1.5</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.5.1</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<additionalparam>-Xdoclint:none</additionalparam>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.5</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.3</version>
<extensions>true</extensions>
<configuration>
<serverId>ossrh</serverId>
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
<autoReleaseAfterClose>false</autoReleaseAfterClose>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.ossez</groupId>
<artifactId>sitemap-j</artifactId>
<packaging>jar</packaging>
<version>1.0.1-SNAPSHOT</version>
<name>SitemapJ</name>
<url>https://github.com/honeymoose/sitemap-j</url>
<description>SitemapJ is an XML sitemap generator written in Java.</description>
<scm>
<connection>scm:git:git://github.com:dfabulich/sitemapgen4j.git</connection>
<developerConnection>scm:git:git@github.com:dfabulich/sitemapgen4j.git</developerConnection>
<url>https://github.com/dfabulich/sitemapgen4j/</url>
</scm>
<properties>
<java.version>11</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<developers>
<developer>
<name>YuCheng Hu</name>
<id>honeymoose</id>
<email>huyuchengus@gmail.com</email>
<timezone>-5</timezone>
<organization>Open Source</organization>
<roles>
<role>Sr. Java Developer</role>
</roles>
</developer>
</developers>
<licenses>
<license>
<name>The MIT license</name>
<url>https://opensource.org/licenses/mit-license.php</url>
<distribution>repo</distribution>
</license>
</licenses>
<distributionManagement>
<repository>
<id>ossez-repo</id>
<url>https://repo.ossez.com/repository/maven-releases/</url>
</repository>
<snapshotRepository>
<id>ossez-repo</id>
<url>https://repo.ossez.com/repository/maven-snapshots/</url>
</snapshotRepository>
</distributionManagement>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<defaultGoal>install</defaultGoal>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<fork>true</fork>
<compilerReuseStrategy>alwaysNew</compilerReuseStrategy>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.5.1</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.4.1</version>
<executions>
<execution>
<id>create-javadoc-jar</id>
<goals>
<goal>javadoc</goal>
<goal>jar</goal>
</goals>
<phase>package</phase>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -7,19 +7,24 @@ import java.net.URL;
// It makes sense, I swear! http://madbean.com/2004/mb2004-3/
abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGeneratorOptions<THIS>> {
File baseDir;
String baseUrl;
URL baseUrl;
String fileNamePrefix = "sitemap";
boolean allowEmptySitemap = false;
boolean allowMultipleSitemaps = true;
String suffixStringPattern; // this will store some type of string pattern suitable per needs.
W3CDateFormat dateFormat;
int maxUrls = SitemapGenerator.MAX_URLS_PER_SITEMAP;
boolean autoValidate = false;
boolean gzip = false;
public AbstractSitemapGeneratorOptions(URL baseUrl, File baseDir) {
if (baseDir == null) throw new NullPointerException("baseDir may not be null");
if (baseUrl == null) throw new NullPointerException("baseUrl may not be null");
this.baseDir = baseDir;
this.baseUrl = baseUrl.toString();
this.baseUrl = baseUrl;
}
public AbstractSitemapGeneratorOptions(URL baseUrl) {
this(baseUrl, null);
}
/** The prefix of the name of the sitemaps we'll create; by default this is "sitemap" */
@ -28,6 +33,23 @@ abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGener
this.fileNamePrefix = fileNamePrefix;
return getThis();
}
public THIS suffixStringPattern(String pattern) {
this.suffixStringPattern = pattern;
return getThis();
}
/**
* Permit writing a sitemap that contains no URLs.
*
* @param allowEmpty {@code true} if an empty sitemap is permissible
* @return this instance, for chaining
*/
public THIS allowEmptySitemap(boolean allowEmpty) {
this.allowEmptySitemap = allowEmpty;
return getThis();
}
/** When more than the maximum number of URLs are passed in, should we split into multiple sitemaps automatically, or just throw an exception? */
public THIS allowMultipleSitemaps(boolean allowMultipleSitemaps) {
this.allowMultipleSitemaps = allowMultipleSitemaps;
@ -62,8 +84,9 @@ abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGener
this.gzip = gzip;
return getThis();
}
@SuppressWarnings("unchecked")
THIS getThis() {
return (THIS)this;
}
}
}

View File

@ -1,33 +1,31 @@
package com.redfin.sitemapgenerator;
import java.io.IOException;
import java.io.OutputStreamWriter;
abstract class AbstractSitemapUrlRenderer<T extends WebSitemapUrl> implements ISitemapUrlRenderer<T> {
public void render(WebSitemapUrl url, OutputStreamWriter out, W3CDateFormat dateFormat, String additionalData)
throws IOException {
out.write(" <url>\n");
out.write(" <loc>");
out.write(url.getUrl().toString());
out.write("</loc>\n");
public void render(WebSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat, String additionalData) {
sb.append(" <url>\n");
sb.append(" <loc>");
sb.append(UrlUtils.escapeXml(url.getUrl().toString()));
sb.append("</loc>\n");
if (url.getLastMod() != null) {
out.write(" <lastmod>");
out.write(dateFormat.format(url.getLastMod()));
out.write("</lastmod>\n");
sb.append(" <lastmod>");
sb.append(dateFormat.format(url.getLastMod()));
sb.append("</lastmod>\n");
}
if (url.getChangeFreq() != null) {
out.write(" <changefreq>");
out.write(url.getChangeFreq().toString());
out.write("</changefreq>\n");
sb.append(" <changefreq>");
sb.append(url.getChangeFreq().toString());
sb.append("</changefreq>\n");
}
if (url.getPriority() != null) {
out.write(" <priority>");
out.write(url.getPriority().toString());
out.write("</priority>\n");
sb.append(" <priority>");
sb.append(url.getPriority().toString());
sb.append("</priority>\n");
}
if (additionalData != null) out.write(additionalData);
out.write(" </url>\n");
if (additionalData != null) {
sb.append(additionalData);
}
sb.append(" </url>\n");
}
public void renderTag(StringBuilder sb, String namespace, String tagName, Object value) {
@ -37,7 +35,7 @@ abstract class AbstractSitemapUrlRenderer<T extends WebSitemapUrl> implements IS
sb.append(':');
sb.append(tagName);
sb.append('>');
sb.append(value);
sb.append(UrlUtils.escapeXml(value.toString()));
sb.append("</");
sb.append(namespace);
sb.append(':');
@ -45,4 +43,10 @@ abstract class AbstractSitemapUrlRenderer<T extends WebSitemapUrl> implements IS
sb.append(">\n");
}
public void renderSubTag(StringBuilder sb, String namespace, String tagName, Object value) {
if (value == null) return;
sb.append(" ");
renderTag(sb, namespace, tagName, value);
}
}

View File

@ -1,87 +1,114 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Builds a code sitemap for Google Code Search. To configure options, use {@link #builder(URL, File)}
*
* @author Dan Fabulich
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75224">Creating Code Search Sitemaps</a>
*/
public class GoogleCodeSitemapGenerator extends SitemapGenerator<GoogleCodeSitemapUrl,GoogleCodeSitemapGenerator> {
GoogleCodeSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
}
public class GoogleCodeSitemapGenerator extends SitemapGenerator<GoogleCodeSitemapUrl, GoogleCodeSitemapGenerator> {
/** Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleCodeSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
GoogleCodeSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleCodeSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleCodeSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<GoogleCodeSitemapGenerator>(baseUrl, baseDir, GoogleCodeSitemapGenerator.class);
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
* @throws MalformedURLException
*/
public static SitemapGeneratorBuilder<GoogleCodeSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleCodeSitemapGenerator>(baseUrl, baseDir, GoogleCodeSitemapGenerator.class);
}
/**
* Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException Exception
*/
public GoogleCodeSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleCodeSitemapUrl> implements ISitemapUrlRenderer<GoogleCodeSitemapUrl> {
/**
* Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleCodeSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
public Class<GoogleCodeSitemapUrl> getUrlClass() {
return GoogleCodeSitemapUrl.class;
}
public void render(GoogleCodeSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(" <codesearch:codesearch>\n");
renderTag(sb, "codesearch", "filetype", url.getFileType());
renderTag(sb, "codesearch", "license", url.getLicense());
renderTag(sb, "codesearch", "filename", url.getFileName());
renderTag(sb, "codesearch", "packageurl", url.getPackageUrl());
renderTag(sb, "codesearch", "packagemap", url.getPackageMap());
sb.append(" </codesearch:codesearch>\n");
super.render(url, out, dateFormat, sb.toString());
}
public String getXmlNamespaces() {
return "xmlns:codesearch=\"http://www.google.com/codesearch/schemas/sitemap/1.0\"";
}
}
/**
* Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl
* @throws MalformedURLException Exception
*/
public GoogleCodeSitemapGenerator(String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**
* Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleCodeSitemapGenerator(URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
/**
* Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleCodeSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<GoogleCodeSitemapGenerator>(baseUrl, baseDir, GoogleCodeSitemapGenerator.class);
}
/**
* Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
* @throws MalformedURLException
*/
public static SitemapGeneratorBuilder<GoogleCodeSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleCodeSitemapGenerator>(baseUrl, baseDir, GoogleCodeSitemapGenerator.class);
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleCodeSitemapUrl> implements ISitemapUrlRenderer<GoogleCodeSitemapUrl> {
public Class<GoogleCodeSitemapUrl> getUrlClass() {
return GoogleCodeSitemapUrl.class;
}
public String getXmlNamespaces() {
return "xmlns:codesearch=\"http://www.google.com/codesearch/schemas/sitemap/1.0\"";
}
public void render(GoogleCodeSitemapUrl url, StringBuilder sb,
W3CDateFormat dateFormat) {
StringBuilder tagSb = new StringBuilder();
tagSb.append(" <codesearch:codesearch>\n");
renderTag(tagSb, "codesearch", "filetype", url.getFileType());
renderTag(tagSb, "codesearch", "license", url.getLicense());
renderTag(tagSb, "codesearch", "filename", url.getFileName());
renderTag(tagSb, "codesearch", "packageurl", url.getPackageUrl());
renderTag(tagSb, "codesearch", "packagemap", url.getPackageMap());
tagSb.append(" </codesearch:codesearch>\n");
super.render(url, sb, dateFormat, tagSb.toString());
}
}
}

View File

@ -18,7 +18,7 @@ public class GoogleCodeSitemapUrl extends WebSitemapUrl {
*/
public enum FileType {
/** A special value meaning that the URL is a compressed archive containing code.
* @see @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75259">Supported archive suffixes</a>
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=75259">Supported archive suffixes</a>
*/
ARCHIVE("Archive"),
ADA("Ada"),

View File

@ -1,8 +1,6 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
@ -57,6 +55,27 @@ public class GoogleGeoSitemapGenerator extends SitemapGenerator<GoogleGeoSitemap
public GoogleGeoSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleGeoSitemapGenerator(String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleGeoSitemapGenerator(URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleGeoSitemapUrl> implements ISitemapUrlRenderer<GoogleGeoSitemapUrl> {
@ -64,19 +83,17 @@ public class GoogleGeoSitemapGenerator extends SitemapGenerator<GoogleGeoSitemap
return GoogleGeoSitemapUrl.class;
}
public void render(GoogleGeoSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(" <geo:geo>\n");
sb.append(" <geo:format>"+url.getFormat()+"</geo:format>\n");
sb.append(" </geo:geo>\n");
super.render(url, out, dateFormat, sb.toString());
}
public String getXmlNamespaces() {
return "xmlns:geo=\"http://www.google.com/geo/schemas/sitemap/1.0\"";
}
public void render(GoogleGeoSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) {
StringBuilder tagSb = new StringBuilder();
tagSb.append(" <geo:geo>\n");
tagSb.append(" <geo:format>"+url.getFormat()+"</geo:format>\n");
tagSb.append(" </geo:geo>\n");
super.render(url, sb, dateFormat, tagSb.toString());
}
}
}

View File

@ -0,0 +1,103 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Builds a sitemap for Google Image search. To configure options use {@link #builder(URL, File)}
* @see <a href="https://support.google.com/webmasters/answer/183668">Manage your sitemaps</a>
* */
public class GoogleImageSitemapGenerator extends SitemapGenerator<GoogleImageSitemapUrl, GoogleImageSitemapGenerator> {
GoogleImageSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new GoogleImageSitemapGenerator.Renderer());
}
/** Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleImageSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleImageSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleImageSitemapGenerator(String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleImageSitemapGenerator(URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleImageSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<GoogleImageSitemapGenerator>(baseUrl, baseDir, GoogleImageSitemapGenerator.class);
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
* @throws MalformedURLException
*/
public static SitemapGeneratorBuilder<GoogleImageSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleImageSitemapGenerator>(baseUrl, baseDir, GoogleImageSitemapGenerator.class);
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleImageSitemapUrl> implements ISitemapUrlRenderer<GoogleImageSitemapUrl> {
public Class<GoogleImageSitemapUrl> getUrlClass() {
return GoogleImageSitemapUrl.class;
}
public String getXmlNamespaces() {
return "xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\"";
}
public void render(GoogleImageSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) {
StringBuilder tagSb = new StringBuilder();
for(Image image : url.getImages()) {
tagSb.append(" <image:image>\n");
renderTag(tagSb, "image", "loc", image.getUrl());
renderTag(tagSb, "image", "caption", image.getCaption());
renderTag(tagSb, "image", "title", image.getTitle());
renderTag(tagSb, "image", "geo_location", image.getGeoLocation());
renderTag(tagSb, "image", "license", image.getLicense());
tagSb.append(" </image:image>\n");
}
super.render(url, sb, dateFormat, tagSb.toString());
}
}
}

View File

@ -0,0 +1,74 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/** One configurable Google Image Search URL. To configure, use {@link Options}
*
* @see Options
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=183668">Creating Image Sitemaps</a>
*/
public class GoogleImageSitemapUrl extends WebSitemapUrl {
private final List<Image> images;
public GoogleImageSitemapUrl(String url) throws MalformedURLException {
this(new Options(url));
}
public GoogleImageSitemapUrl(URL url) {
this(new Options(url));
}
public GoogleImageSitemapUrl(Options options) {
super(options);
this.images = options.images;
}
public void addImage(Image image) {
this.images.add(image);
if(this.images.size() > 1000) {
throw new RuntimeException("A URL cannot have more than 1000 image tags");
}
}
/** Options to configure Google Extension URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleImageSitemapUrl, GoogleImageSitemapUrl.Options> {
private List<Image> images;
public Options(URL url) {
super(url, GoogleImageSitemapUrl.class);
images = new ArrayList<Image>();
}
public Options(String url) throws MalformedURLException {
super(url, GoogleImageSitemapUrl.class);
images = new ArrayList<Image>();
}
public Options images(List<Image> images) {
if(images != null && images.size() > 1000) {
throw new RuntimeException("A URL cannot have more than 1000 image tags");
}
this.images = images;
return this;
}
public Options images(Image...images) {
if(images.length > 1000) {
throw new RuntimeException("A URL cannot have more than 1000 image tags");
}
return images(Arrays.asList(images));
}
}
/**Retrieves list of images*/
public List<Image> getImages() {
return this.images;
}
}

View File

@ -0,0 +1,134 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.net.*;
import java.util.Map;
import java.util.Map.Entry;
/**
* Builds a Google Link Sitemap (to indicate alternate language pages).
*
* @author Sergio Vico
* @see <a href="https://support.google.com/webmasters/answer/2620865">Creating alternate language pages Sitemaps</a>
* @see <a href="https://developers.google.com/search/mobile-sites/mobile-seo/separate-urls?hl=en">Mobile SEO configurations | Separate URLs </a>
*/
public class GoogleLinkSitemapGenerator extends SitemapGenerator<GoogleLinkSitemapUrl, GoogleLinkSitemapGenerator> {
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleLinkSitemapUrl>
implements ISitemapUrlRenderer<GoogleLinkSitemapUrl> {
public Class<GoogleLinkSitemapUrl> getUrlClass() {
return GoogleLinkSitemapUrl.class;
}
public String getXmlNamespaces() {
return "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\"";
}
public void render(final GoogleLinkSitemapUrl url, final StringBuilder sb, final W3CDateFormat dateFormat) {
final StringBuilder tagSb = new StringBuilder();
for (final Entry<URI, Map<String, String>> entry : url.getAlternates().entrySet()) {
tagSb.append(" <xhtml:link\n");
tagSb.append(" rel=\"alternate\"\n");
for(final Entry<String, String> innerEntry : entry.getValue().entrySet()){
tagSb.append(" " + innerEntry.getKey() + "=\"" + innerEntry.getValue() + "\"\n");
}
tagSb.append(" href=\"" + UrlUtils.escapeXml(entry.getKey().toString()) + "\"\n");
tagSb.append(" />\n");
}
super.render(url, sb, dateFormat, tagSb.toString());
}
}
/**
* Configures a builder so you can specify sitemap generator options
*
* @param baseUrl
* All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir
* Sitemap files will be generated in this directory as either "sitemap.xml" or
* "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleLinkSitemapGenerator> builder(final String baseUrl, final File baseDir)
throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleLinkSitemapGenerator>(baseUrl, baseDir,
GoogleLinkSitemapGenerator.class);
}
/**
* Configures a builder so you can specify sitemap generator options
*
* @param baseUrl
* All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir
* Sitemap files will be generated in this directory as either "sitemap.xml" or
* "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleLinkSitemapGenerator> builder(final URL baseUrl, final File baseDir) {
return new SitemapGeneratorBuilder<GoogleLinkSitemapGenerator>(baseUrl, baseDir,
GoogleLinkSitemapGenerator.class);
}
/**
* Configures the generator with a base URL and a null directory. The object constructed is not
* intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl
* All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleLinkSitemapGenerator(final String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**
* Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl
* All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir
* Sitemap files will be generated in this directory as either "sitemap.xml" or
* "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleLinkSitemapGenerator(final String baseUrl, final File baseDir) throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**
* Configures the generator with a base URL and a null directory. The object constructed is not
* intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl
* All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleLinkSitemapGenerator(final URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
/**
* Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl
* All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir
* Sitemap files will be generated in this directory as either "sitemap.xml" or
* "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleLinkSitemapGenerator(final URL baseUrl, final File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
GoogleLinkSitemapGenerator(final AbstractSitemapGeneratorOptions<?> options) {
super(options, new Renderer());
}
}

View File

@ -0,0 +1,102 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* One configurable Google Link URL. To configure, use {@link Options}
*
* @author Sergio Vico
* @see Options
* @see <a href="https://support.google.com/webmasters/answer/2620865">Creating alternate language pages Sitemaps</a>
* @see <a href="https://developers.google.com/search/mobile-sites/mobile-seo/separate-urls?hl=en">Mobile SEO configurations | Separate URLs </a>
*/
public class GoogleLinkSitemapUrl extends WebSitemapUrl {
/** Options to configure URLs with alternates */
public static class Options extends AbstractSitemapUrlOptions<GoogleLinkSitemapUrl, Options> {
private final Map<URI, Map<String, String>> alternates;
private static Map<URI, Map<String, String>> convertAlternates(final Map<String, Map<String, String>> alternates)
throws URISyntaxException {
final Map<URI, Map<String, String>> converted = new LinkedHashMap<URI, Map<String, String>>();
for (final Map.Entry<String, Map<String, String>> entry : alternates.entrySet()) {
converted.put(new URI(entry.getKey()), new LinkedHashMap<String, String>(entry.getValue()));
}
return converted;
}
/**
* Options constructor with the alternates configurations
*
* @param url Base URL into which we will be adding alternates
* @param alternates Map&lt;String, Map&lt;String, String&gt;&gt; where the key is the href and
* the value is a generic Map&lt;String, String&gt; holding the attributes of
* the link (e.g. hreflang, media, ...)
*/
public Options(final String url, final Map<String, Map<String, String>> alternates) throws URISyntaxException, MalformedURLException {
this(new URL(url), convertAlternates(alternates));
}
/**
* Options constructor with the alternates configurations
*
* @param url Base URL into which we will be adding alternates
* @param alternates Map&lt;URL, Map&lt;String, String&gt;&gt; where the key is the href and
* the value is a generic Map&lt;String, String&gt; holding the attributes of
* the link (e.g. hreflang, media, ...)
*/
public Options(final URL url, final Map<URI, Map<String, String>> alternates) {
super(url, GoogleLinkSitemapUrl.class);
this.alternates = new LinkedHashMap<URI, Map<String, String>>(alternates);
}
}
private final Map<URI, Map<String, String>> alternates;
/**
* Constructor specifying the URL and the alternates configurations with Options object
*
* @param options Configuration object to initialize the GoogleLinkSitemapUrl with.
* @see Options#Options(java.lang.String, java.util.Map)
*/
public GoogleLinkSitemapUrl(final Options options) {
super(options);
alternates = options.alternates;
}
/**
* Constructor specifying the URL as a String and the alternates configurations
*
* @param url Base URL into which we will be adding alternates
* @param alternates Map&lt;String, Map&lt;String, String&gt;&gt; where the key is the href and
* the value is a generic Map&lt;String, String&gt; holding the attributes of
* the link (e.g. hreflang, media, ...)
*/
public GoogleLinkSitemapUrl(final String url, final Map<String, Map<String, String>> alternates) throws URISyntaxException, MalformedURLException {
this(new Options(url, alternates));
}
/**
* Constructor specifying the URL as a URL and the alternates configurations
*
* @param url Base URL into which we will be adding alternates
* @param alternates Map&lt;String, Map&lt;String, String&gt;&gt; where the key is the href and
* the value is a generic Map&lt;String, String&gt; holding the attributes of
* the link (e.g. hreflang, media, ...)
*/
public GoogleLinkSitemapUrl(final URL url, final Map<URI, Map<String, String>> alternates) {
this(new Options(url, alternates));
}
public Map<URI, Map<String, String>> getAlternates() {
return this.alternates;
}
}

View File

@ -1,8 +1,6 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
@ -56,6 +54,26 @@ public class GoogleMobileSitemapGenerator extends SitemapGenerator<GoogleMobileS
public GoogleMobileSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleMobileSitemapGenerator(String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleMobileSitemapGenerator(URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleMobileSitemapUrl> implements ISitemapUrlRenderer<GoogleMobileSitemapUrl> {
@ -63,16 +81,14 @@ public class GoogleMobileSitemapGenerator extends SitemapGenerator<GoogleMobileS
return GoogleMobileSitemapUrl.class;
}
public void render(GoogleMobileSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
String additionalData = " <mobile:mobile/>\n";
super.render(url, out, dateFormat, additionalData);
}
public String getXmlNamespaces() {
return "xmlns:mobile=\"http://www.google.com/schemas/sitemap-mobile/1.0\"";
}
public void render(GoogleMobileSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) {
String additionalData = " <mobile:mobile/>\n";
super.render(url, sb, dateFormat, additionalData);
}
}
}

View File

@ -18,8 +18,11 @@ public class GoogleMobileSitemapUrl extends WebSitemapUrl {
public Options(String url) throws MalformedURLException {
this(new URL(url));
}
/** Specifies the url */
/**
* Specifies the url
* @param url
*/
public Options(URL url) {
super(url, GoogleMobileSitemapUrl.class);
}

View File

@ -0,0 +1,28 @@
package com.redfin.sitemapgenerator;
public class GoogleNewsPublication {
private String name;
private String language;
public GoogleNewsPublication(String name, String language) {
this.name = name;
this.language = language;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
}

View File

@ -1,8 +1,6 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
@ -69,26 +67,50 @@ public class GoogleNewsSitemapGenerator extends SitemapGenerator<GoogleNewsSitem
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleNewsSitemapGenerator(String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleNewsSitemapGenerator(URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleNewsSitemapUrl> implements ISitemapUrlRenderer<GoogleNewsSitemapUrl> {
public Class<GoogleNewsSitemapUrl> getUrlClass() {
return GoogleNewsSitemapUrl.class;
}
public void render(GoogleNewsSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(" <news:news>\n");
renderTag(sb, "news", "publication_date", dateFormat.format(url.getPublicationDate()));
renderTag(sb, "news", "keywords", url.getKeywords());
sb.append(" </news:news>\n");
super.render(url, out, dateFormat, sb.toString());
}
public String getXmlNamespaces() {
return "xmlns:news=\"http://www.google.com/schemas/sitemap-news/0.9\"";
}
public void render(GoogleNewsSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) {
StringBuilder tagSb = new StringBuilder();
tagSb.append(" <news:news>\n");
tagSb.append(" <news:publication>\n");
renderSubTag(tagSb, "news", "name", url.getPublication().getName());
renderSubTag(tagSb, "news", "language", url.getPublication().getLanguage());
tagSb.append(" </news:publication>\n");
renderTag(tagSb, "news", "genres", url.getGenres());
renderTag(tagSb, "news", "publication_date", dateFormat.format(url.getPublicationDate()));
renderTag(tagSb, "news", "title", url.getTitle());
renderTag(tagSb, "news", "keywords", url.getKeywords());
tagSb.append(" </news:news>\n");
super.render(url, sb, dateFormat, tagSb.toString());
}
}

View File

@ -15,22 +15,42 @@ public class GoogleNewsSitemapUrl extends WebSitemapUrl {
private final Date publicationDate;
private final String keywords;
private final String genres;
private final String title;
private final GoogleNewsPublication publication;
/** Options to configure Google News URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleNewsSitemapUrl, Options> {
private Date publicationDate;
private String keywords;
private String genres;
private String title;
private GoogleNewsPublication publication;
/** Specifies an URL and publication date (which is mandatory for Google News) */
public Options(String url, Date publicationDate) throws MalformedURLException {
this(new URL(url), publicationDate);
public Options(String url, Date publicationDate, String title, GoogleNewsPublication publication) throws MalformedURLException {
this(new URL(url), publicationDate, title, publication);
}
public Options(String url, Date publicationDate, String title, String name, String language) throws MalformedURLException {
this(new URL(url), publicationDate, title, new GoogleNewsPublication(name, language));
}
public Options(URL url, Date publicationDate, String title, String name, String language) {
this(url, publicationDate, title, new GoogleNewsPublication(name, language));
}
/** Specifies an URL and publication date (which is mandatory for Google News) */
public Options(URL url, Date publicationDate) {
public Options(URL url, Date publicationDate, String title, GoogleNewsPublication publication) {
super(url, GoogleNewsSitemapUrl.class);
if (publicationDate == null) throw new NullPointerException("publicationDate must not be null");
this.publicationDate = publicationDate;
if (title == null) throw new NullPointerException("title must not be null");
this.title = title;
if (publication == null) throw new NullPointerException("publication must not be null");
if (publication.getName() == null) throw new NullPointerException("publication name must not be null");
if (publication.getLanguage() == null) throw new NullPointerException("publication language must not be null");
this.publication = publication;
}
/** Specifies a list of comma-delimited keywords */
@ -41,18 +61,32 @@ public class GoogleNewsSitemapUrl extends WebSitemapUrl {
/** Specifies a list of comma-delimited keywords */
public Options keywords(Iterable<String> keywords) {
this.keywords = getListAsCommaSeparatedString(keywords);
return this;
}
public Options genres(String genres) {
this.genres = genres;
return this;
}
public Options genres(Iterable<String> genres) {
this.genres = getListAsCommaSeparatedString(genres);
return this;
}
private String getListAsCommaSeparatedString(Iterable<String> values) {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (String keyword : keywords) {
for (String value : values) {
if (first) {
first = false;
} else {
sb.append(", ");
}
sb.append(keyword);
sb.append(value);
}
this.keywords = sb.toString();
return this;
return sb.toString();
}
/** Specifies a list of comma-delimited keywords */
@ -60,16 +94,30 @@ public class GoogleNewsSitemapUrl extends WebSitemapUrl {
return keywords(Arrays.asList(keywords));
}
public Options genres(String... genres) {
return genres(Arrays.asList(genres));
}
}
/** Specifies an URL and publication date (which is mandatory for Google News) */
public GoogleNewsSitemapUrl(URL url, Date publicationDate) {
this(new Options(url, publicationDate));
/** Specifies an URL and publication date, title and publication (which are mandatory for Google News) */
public GoogleNewsSitemapUrl(URL url, Date publicationDate, String title, String name, String language) {
this(new Options(url, publicationDate, title, name, language));
}
/** Specifies an URL and publication date (which is mandatory for Google News) */
public GoogleNewsSitemapUrl(String url, Date publicationDate) throws MalformedURLException {
this(new Options(url, publicationDate));
/** Specifies an URL and publication date, title and publication (which are mandatory for Google News) */
public GoogleNewsSitemapUrl(URL url, Date publicationDate, String title, GoogleNewsPublication publication) {
this(new Options(url, publicationDate, title, publication));
}
/** Specifies an URL and publication date, title and publication (which are mandatory for Google News) */
public GoogleNewsSitemapUrl(String url, Date publicationDate, String title, String name, String language) throws MalformedURLException {
this(new Options(url, publicationDate, title, name, language));
}
/** Specifies an URL and publication date, title and publication (which are mandatory for Google News) */
public GoogleNewsSitemapUrl(String url, Date publicationDate, String title, GoogleNewsPublication publication) throws MalformedURLException {
this(new Options(url, publicationDate, title, publication));
}
/** Configures an URL with options */
@ -77,6 +125,9 @@ public class GoogleNewsSitemapUrl extends WebSitemapUrl {
super(options);
publicationDate = options.publicationDate;
keywords = options.keywords;
genres = options.genres;
title = options.title;
publication = options.publication;
}
/** Retrieves the publication date */
@ -89,7 +140,26 @@ public class GoogleNewsSitemapUrl extends WebSitemapUrl {
return keywords;
}
/**
* Retrieves the Genres
*/
public String getGenres() {
return genres;
}
/**
* Retrieves the title
*/
public String getTitle() {
return title;
}
/**
* Retrieves the publication with name and language
*/
public GoogleNewsPublication getPublication() {
return publication;
}
}

View File

@ -1,8 +1,6 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
@ -56,49 +54,67 @@ public class GoogleVideoSitemapGenerator extends SitemapGenerator<GoogleVideoSit
public GoogleVideoSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleVideoSitemapGenerator(String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleVideoSitemapGenerator(URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleVideoSitemapUrl> implements ISitemapUrlRenderer<GoogleVideoSitemapUrl> {
public Class<GoogleVideoSitemapUrl> getUrlClass() {
return GoogleVideoSitemapUrl.class;
}
public void render(GoogleVideoSitemapUrl url, OutputStreamWriter out,
W3CDateFormat dateFormat) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(" <video:video>\n");
renderTag(sb, "video", "content_loc", url.getContentUrl());
if (url.getPlayerUrl() != null) {
sb.append(" <video:player_loc allow_embed=\"");
sb.append(url.getAllowEmbed());
sb.append("\">");
sb.append(url.getPlayerUrl());
sb.append("</video:player_loc>\n");
}
renderTag(sb, "video", "thumbnail_loc", url.getThumbnailUrl());
renderTag(sb, "video", "title", url.getTitle());
renderTag(sb, "video", "description", url.getDescription());
renderTag(sb, "video", "rating", url.getRating());
renderTag(sb, "video", "view_count", url.getViewCount());
if (url.getPublicationDate() != null) {
renderTag(sb, "video", "publication_date", dateFormat.format(url.getPublicationDate()));
}
if (url.getTags() != null) {
for (String tag : url.getTags()) {
renderTag(sb, "video", "tag", tag);
}
}
renderTag(sb, "video", "category", url.getCategory());
renderTag(sb, "video", "family_friendly", url.getFamilyFriendly());
renderTag(sb, "video", "duration", url.getDurationInSeconds());
sb.append(" </video:video>\n");
super.render(url, out, dateFormat, sb.toString());
}
public String getXmlNamespaces() {
return "xmlns:video=\"http://www.google.com/schemas/sitemap-video/1.1\"";
}
public void render(GoogleVideoSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) {
StringBuilder tagSb = new StringBuilder();
tagSb.append(" <video:video>\n");
renderTag(tagSb, "video", "content_loc", url.getContentUrl());
if (url.getPlayerUrl() != null) {
tagSb.append(" <video:player_loc allow_embed=\"");
tagSb.append(url.getAllowEmbed());
tagSb.append("\">");
tagSb.append(url.getPlayerUrl());
tagSb.append("</video:player_loc>\n");
}
renderTag(tagSb, "video", "thumbnail_loc", url.getThumbnailUrl());
renderTag(tagSb, "video", "title", url.getTitle());
renderTag(tagSb, "video", "description", url.getDescription());
renderTag(tagSb, "video", "rating", url.getRating());
renderTag(tagSb, "video", "view_count", url.getViewCount());
if (url.getPublicationDate() != null) {
renderTag(tagSb, "video", "publication_date", dateFormat.format(url.getPublicationDate()));
}
if (url.getTags() != null) {
for (String tag : url.getTags()) {
renderTag(tagSb, "video", "tag", tag);
}
}
renderTag(tagSb, "video", "category", url.getCategory());
renderTag(tagSb, "video", "family_friendly", url.getFamilyFriendly());
renderTag(tagSb, "video", "duration", url.getDurationInSeconds());
tagSb.append(" </video:video>\n");
super.render(url, sb, dateFormat, tagSb.toString());
}
}

View File

@ -5,347 +5,396 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
/** One configurable Google Video Search URL. To configure, use {@link Options}
*
/**
* One configurable Google Video Search URL. To configure, use {@link Options}
*
* @author Dan Fabulich
* @see Options
* @see <a href="http://www.google.com/support/webmasters/bin/answer.py?answer=80472">Creating Video Sitemaps</a>
*/
public class GoogleVideoSitemapUrl extends WebSitemapUrl {
private final URL playerUrl;
private final URL contentUrl;
private final URL thumbnailUrl;
private final String title;
private final String description;
private final Double rating;
private final Integer viewCount;
private final Date publicationDate;
private final ArrayList<String> tags;
private final String category;
// TODO can there be multiple categories?
// "Usually a video will belong to a single category."
// http://www.google.com/support/webmasters/bin/answer.py?answer=80472
private final String familyFriendly;
private final Integer durationInSeconds;
private final String allowEmbed;
/** Options to configure Google Video URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleVideoSitemapUrl, Options> {
private URL playerUrl;
private URL contentUrl;
private URL thumbnailUrl;
private String title;
private String description;
private Double rating;
private Integer viewCount;
private Date publicationDate;
private ArrayList<String> tags;
private String category;
// TODO can there be multiple categories?
// "Usually a video will belong to a single category."
// http://www.google.com/support/webmasters/bin/answer.py?answer=80472
private Boolean familyFriendly;
private Integer durationInSeconds;
private Boolean allowEmbed;
/** Specifies a landing page URL, together with a "player" (e.g. SWF)
*
* @param url the landing page URL
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public Options(URL url, URL playerUrl, boolean allowEmbed) {
super(url, GoogleVideoSitemapUrl.class);
this.playerUrl = playerUrl;
this.allowEmbed = allowEmbed;
}
/** Specifies a landing page URL, together with the URL of the underlying video (e.g. FLV)
*
* @param url the landing page URL
* @param contentUrl the URL of the underlying video (e.g. FLV)
*/
public Options(URL url, URL contentUrl) {
super(url, GoogleVideoSitemapUrl.class);
this.contentUrl = contentUrl;
}
/** Specifies a player URL (e.g. SWF)
*
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public Options playerUrl(URL playerUrl, boolean allowEmbed) {
this.playerUrl = playerUrl;
this.allowEmbed = allowEmbed;
return this;
}
/** Specifies the URL of the underlying video (e.g FLV) */
public Options contentUrl(URL contentUrl) {
this.contentUrl = contentUrl;
return this;
}
/**
* A URL pointing to the URL for the video thumbnail image file. This
* allows you to suggest the thumbnail you want displayed in search
* results. If you provide a {@link #contentUrl(URL)}, Google will attempt
* to generate a set of representative thumbnail images from your actual
* video content. However, we strongly recommended that you provide a
* thumbnail URL to increase the likelihood of your video being included
* in the video index.
*/
public Options thumbnailUrl(URL thumbnailUrl) {
this.thumbnailUrl = thumbnailUrl;
return this;
}
/** The title of the video. Limited to 100 characters. */
public Options title(String title) {
if (title != null) {
if (title.length() > 100) {
throw new RuntimeException("Video title is limited to 100 characters: " + title);
}
}
this.title = title;
return this;
}
/** The description of the video. Descriptions longer than 2048 characters will be truncated. */
public Options description(String description) {
if (description != null) {
if (description.length() > 2048) {
throw new RuntimeException("Truncate video descriptions to 2048 characters: " + description);
}
}
this.description = description;
return this;
}
/** The rating of the video. The value must be number in the range 0.0-5.0. */
public Options rating(Double rating) {
if (rating != null) {
if (rating < 0 || rating > 5.0) {
throw new RuntimeException("Rating must be between 0.0 and 5.0:" + rating);
}
}
this.rating = rating;
return this;
}
/** The number of times the video has been viewed */
public Options viewCount(int viewCount) {
this.viewCount = viewCount;
return this;
}
/** The date the video was first published, in {@link W3CDateFormat}. */
public Options publicationDate(Date publicationDate) {
this.publicationDate = publicationDate;
return this;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new <video:tag> element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(ArrayList<String> tags) {
this.tags = tags;
return this;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new <video:tag> element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(Iterable<String> tags) {
this.tags = new ArrayList<String>();
for (String tag : tags) {
this.tags.add(tag);
}
return this;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new <video:tag> element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(String... tags) {
return tags(Arrays.asList(tags));
}
/**
* The video's category; for example, <code>cooking</code>. The value
* should be a string no longer than 256 characters. In general,
* categories are broad groupings of content by subject. Usually a video
* will belong to a single category. For example, a site about cooking
* could have categories for Broiling, Baking, and Grilling
*/
public Options category(String category) {
if (category != null) {
if (category.length() > 256) {
throw new RuntimeException("Video category is limited to 256 characters: " + title);
}
}
this.category = category;
return this;
}
/** Whether the video is suitable for viewing by children */
public Options familyFriendly(boolean familyFriendly) {
this.familyFriendly = familyFriendly;
return this;
}
/** The duration of the video in seconds; value must be between 0 and 28800 (8 hours). */
public Options durationInSeconds(int durationInSeconds) {
if (durationInSeconds < 0 || durationInSeconds > 28800) {
throw new RuntimeException("Duration must be between 0 and 28800 (8 hours):" + durationInSeconds);
}
this.durationInSeconds = durationInSeconds;
return this;
}
}
private final URL playerUrl;
private final URL contentUrl;
private final URL thumbnailUrl;
private final String title;
private final String description;
private final Double rating;
private final Integer viewCount;
private final Date publicationDate;
private final ArrayList<String> tags;
private final String category;
// TODO can there be multiple categories?
// "Usually a video will belong to a single category."
// http://www.google.com/support/webmasters/bin/answer.py?answer=80472
private final String familyFriendly;
private final Integer durationInSeconds;
private final String allowEmbed;
/** Specifies a landing page URL, together with a "player" (e.g. SWF)
*
* @param url the landing page URL
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public GoogleVideoSitemapUrl(URL url, URL playerUrl, boolean allowEmbed) {
this(new Options(url, playerUrl, allowEmbed));
}
/** Specifies a landing page URL, together with the URL of the underlying video (e.g. FLV)
*
* @param url the landing page URL
* @param contentUrl the URL of the underlying video (e.g. FLV)
*/
public GoogleVideoSitemapUrl(URL url, URL contentUrl) {
this(new Options(url, contentUrl));
}
/** Configures the url with options */
public GoogleVideoSitemapUrl(Options options) {
super(options);
contentUrl = options.contentUrl;
playerUrl = options.playerUrl;
if (playerUrl == null && contentUrl == null) {
throw new RuntimeException("You must specify either contentUrl or playerUrl or both; neither were specified");
}
allowEmbed = convertBooleanToYesOrNo(options.allowEmbed);
if (playerUrl != null && allowEmbed == null) {
throw new RuntimeException("allowEmbed must be specified if playerUrl is specified");
}
category = options.category;
description = options.description;
durationInSeconds = options.durationInSeconds;
familyFriendly = convertBooleanToYesOrNo(options.familyFriendly);
publicationDate = options.publicationDate;
rating = options.rating;
tags = options.tags;
if (tags != null && tags.size() > 32) {
throw new RuntimeException("A maximum of 32 tags is permitted");
}
thumbnailUrl = options.thumbnailUrl;
title = options.title;
viewCount = options.viewCount;
}
private static String convertBooleanToYesOrNo(Boolean value) {
if (value == null) return null;
return value ? "Yes" : "No";
}
/** Retrieves the {@link Options#playerUrl}*/
public URL getPlayerUrl() {
return playerUrl;
}
/** Retrieves the {@link Options#contentUrl}*/
public URL getContentUrl() {
return contentUrl;
}
/**
* Options to configure Google Video URLs
*/
public static class Options extends AbstractSitemapUrlOptions<GoogleVideoSitemapUrl, Options> {
private URL playerUrl;
private URL contentUrl;
private URL thumbnailUrl;
private String title;
private String description;
private Double rating;
private Integer viewCount;
private Date publicationDate;
private ArrayList<String> tags;
private String category;
// TODO can there be multiple categories?
// "Usually a video will belong to a single category."
// http://www.google.com/support/webmasters/bin/answer.py?answer=80472
private Boolean familyFriendly;
private Integer durationInSeconds;
private Boolean allowEmbed;
/** Retrieves the {@link Options#thumbnailUrl}*/
public URL getThumbnailUrl() {
return thumbnailUrl;
}
/**
* Specifies a landing page URL, together with a "player" (e.g. SWF)
*
* @param url the landing page URL
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public Options(URL url, URL playerUrl, boolean allowEmbed) {
super(url, GoogleVideoSitemapUrl.class);
this.playerUrl = playerUrl;
this.allowEmbed = allowEmbed;
}
/** Retrieves the {@link Options#title}*/
public String getTitle() {
return title;
}
/**
* Specifies a landing page URL, together with the URL of the underlying video (e.g. FLV)
*
* @param url the landing page URL
* @param contentUrl the URL of the underlying video (e.g. FLV)
*/
public Options(URL url, URL contentUrl) {
super(url, GoogleVideoSitemapUrl.class);
this.contentUrl = contentUrl;
}
/** Retrieves the {@link Options#description}*/
public String getDescription() {
return description;
}
/**
* Specifies a player URL (e.g. SWF)
*
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public Options playerUrl(URL playerUrl, boolean allowEmbed) {
this.playerUrl = playerUrl;
this.allowEmbed = allowEmbed;
return this;
}
/** Retrieves the {@link Options#rating}*/
public Double getRating() {
return rating;
}
/**
* Specifies the URL of the underlying video (e.g FLV)
*/
public Options contentUrl(URL contentUrl) {
this.contentUrl = contentUrl;
return this;
}
/** Retrieves the {@link Options#viewCount}*/
public Integer getViewCount() {
return viewCount;
}
/**
* A URL pointing to the URL for the video thumbnail image file. This
* allows you to suggest the thumbnail you want displayed in search
* results. If you provide a {@link #contentUrl(URL)}, Google will attempt
* to generate a set of representative thumbnail images from your actual
* video content. However, we strongly recommended that you provide a
* thumbnail URL to increase the likelihood of your video being included
* in the video index.
*/
public Options thumbnailUrl(URL thumbnailUrl) {
this.thumbnailUrl = thumbnailUrl;
return this;
}
/** Retrieves the {@link Options#publicationDate}*/
public Date getPublicationDate() {
return publicationDate;
}
/**
* The title of the video. Limited to 100 characters.
*/
public Options title(String title) {
if (title != null) {
if (title.length() > 100) {
throw new RuntimeException("Video title is limited to 100 characters: " + title);
}
}
this.title = title;
return this;
}
/** Retrieves the {@link Options#tags}*/
public ArrayList<String> getTags() {
return tags;
}
/**
* The description of the video. Descriptions longer than 2048 characters will be truncated.
*/
public Options description(String description) {
if (description != null) {
if (description.length() > 2048) {
throw new RuntimeException("Truncate video descriptions to 2048 characters: " + description);
}
}
this.description = description;
return this;
}
/** Retrieves the {@link Options#category}*/
public String getCategory() {
return category;
}
/**
* The rating of the video. The value must be number in the range 0.0-5.0.
*/
public Options rating(Double rating) {
if (rating != null) {
if (rating < 0 || rating > 5.0) {
throw new RuntimeException("Rating must be between 0.0 and 5.0:" + rating);
}
}
this.rating = rating;
return this;
}
/** Retrieves whether the video is {@link Options#familyFriendly}*/
public String getFamilyFriendly() {
return familyFriendly;
}
/**
* The number of times the video has been viewed
*/
public Options viewCount(int viewCount) {
this.viewCount = viewCount;
return this;
}
/** Retrieves the {@link Options#durationInSeconds}*/
public Integer getDurationInSeconds() {
return durationInSeconds;
}
/**
* The date the video was first published, in {@link W3CDateFormat}.
*/
public Options publicationDate(Date publicationDate) {
this.publicationDate = publicationDate;
return this;
}
/** Retrieves whether embedding is allowed */
public String getAllowEmbed() {
return allowEmbed;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new &lt;video:tag&gt; element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(ArrayList<String> tags) {
this.tags = tags;
return this;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new &lt;video:tag&gt; element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(Iterable<String> tags) {
this.tags = new ArrayList<String>();
for (String tag : tags) {
this.tags.add(tag);
}
return this;
}
/**
* Tag associated with the video; tags are generally very short
* descriptions of key concepts associated with a video or piece of
* content. A single video could have several tags, although it might
* belong to only one category. For example, a video about grilling food
* may belong in the Grilling category, but could be tagged "steak",
* "meat", "summer", and "outdoor". Create a new &lt;video:tag&gt; element for
* each tag associated with a video. A maximum of 32 tags is permitted.
*/
public Options tags(String... tags) {
return tags(Arrays.asList(tags));
}
/**
* The video's category; for example, <code>cooking</code>. The value
* should be a string no longer than 256 characters. In general,
* categories are broad groupings of content by subject. Usually a video
* will belong to a single category. For example, a site about cooking
* could have categories for Broiling, Baking, and Grilling
*/
public Options category(String category) {
if (category != null) {
if (category.length() > 256) {
throw new RuntimeException("Video category is limited to 256 characters: " + title);
}
}
this.category = category;
return this;
}
/**
* Whether the video is suitable for viewing by children
*/
public Options familyFriendly(boolean familyFriendly) {
this.familyFriendly = familyFriendly;
return this;
}
/**
* The duration of the video in seconds; value must be between 0 and 28800 (8 hours).
*/
public Options durationInSeconds(int durationInSeconds) {
if (durationInSeconds < 0 || durationInSeconds > 28800) {
throw new RuntimeException("Duration must be between 0 and 28800 (8 hours):" + durationInSeconds);
}
this.durationInSeconds = durationInSeconds;
return this;
}
}
/**
* Specifies a landing page URL, together with a "player" (e.g. SWF)
*
* @param url the landing page URL
* @param playerUrl the URL of the "player" (e.g. SWF file)
* @param allowEmbed when specifying a player, you must specify whether embedding is allowed
*/
public GoogleVideoSitemapUrl(URL url, URL playerUrl, boolean allowEmbed) {
this(new Options(url, playerUrl, allowEmbed));
}
/**
* Specifies a landing page URL, together with the URL of the underlying video (e.g. FLV)
*
* @param url the landing page URL
* @param contentUrl the URL of the underlying video (e.g. FLV)
*/
public GoogleVideoSitemapUrl(URL url, URL contentUrl) {
this(new Options(url, contentUrl));
}
/**
* Configures the url with options
*/
public GoogleVideoSitemapUrl(Options options) {
super(options);
contentUrl = options.contentUrl;
playerUrl = options.playerUrl;
if (playerUrl == null && contentUrl == null) {
throw new RuntimeException("You must specify either contentUrl or playerUrl or both; neither were specified");
}
allowEmbed = convertBooleanToYesOrNo(options.allowEmbed);
if (playerUrl != null && allowEmbed == null) {
throw new RuntimeException("allowEmbed must be specified if playerUrl is specified");
}
category = options.category;
description = options.description;
durationInSeconds = options.durationInSeconds;
familyFriendly = convertBooleanToYesOrNo(options.familyFriendly);
publicationDate = options.publicationDate;
rating = options.rating;
tags = options.tags;
if (tags != null && tags.size() > 32) {
throw new RuntimeException("A maximum of 32 tags is permitted");
}
thumbnailUrl = options.thumbnailUrl;
title = options.title;
viewCount = options.viewCount;
}
private static String convertBooleanToYesOrNo(Boolean value) {
if (value == null) return null;
return value ? "Yes" : "No";
}
/**
* Retrieves the {@link Options#playerUrl}
*/
public URL getPlayerUrl() {
return playerUrl;
}
/**
* Retrieves the {@link Options#contentUrl}
*/
public URL getContentUrl() {
return contentUrl;
}
/**
* Retrieves the {@link Options#thumbnailUrl}
*/
public URL getThumbnailUrl() {
return thumbnailUrl;
}
/**
* Retrieves the {@link Options#title}
*/
public String getTitle() {
return title;
}
/**
* Retrieves the {@link Options#description}
*/
public String getDescription() {
return description;
}
/**
* Retrieves the {@link Options#rating}
*/
public Double getRating() {
return rating;
}
/**
* Retrieves the {@link Options#viewCount}
*/
public Integer getViewCount() {
return viewCount;
}
/**
* Retrieves the {@link Options#publicationDate}
*/
public Date getPublicationDate() {
return publicationDate;
}
/**
* Retrieves the {@link Options#tags}
*/
public ArrayList<String> getTags() {
return tags;
}
/**
* Retrieves the {@link Options#category}
*/
public String getCategory() {
return category;
}
/**
* Retrieves whether the video is {@link Options#familyFriendly}
*/
public String getFamilyFriendly() {
return familyFriendly;
}
/**
* Retrieves the {@link Options#durationInSeconds}
*/
public Integer getDurationInSeconds() {
return durationInSeconds;
}
/**
* Retrieves whether embedding is allowed
*/
public String getAllowEmbed() {
return allowEmbed;
}
}

View File

@ -3,7 +3,7 @@ package com.redfin.sitemapgenerator;
import java.net.URL;
import java.util.Date;
interface ISitemapUrl {
public interface ISitemapUrl {
public abstract Date getLastMod();

View File

@ -1,11 +1,8 @@
package com.redfin.sitemapgenerator;
import java.io.IOException;
import java.io.OutputStreamWriter;
interface ISitemapUrlRenderer<T extends ISitemapUrl> {
public Class<T> getUrlClass();
public String getXmlNamespaces();
public void render(T url, OutputStreamWriter out, W3CDateFormat dateFormat) throws IOException;
public void render(T url, StringBuilder sb, W3CDateFormat dateFormat);
}

View File

@ -0,0 +1,100 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Represent a single image and image properties for use in extended sitemaps
* @see <a href="https://support.google.com/webmasters/answer/178636">Image sitemaps</a>
*/
public class Image {
private final URL url;
private final String title;
private final String caption;
private final String geoLocation;
private final URL license;
public Image(String url) throws MalformedURLException {
this(new URL(url));
}
public Image(URL url) {
this.url = url;
this.title = null;
this.caption = null;
this.geoLocation = null;
this.license = null;
}
public Image(URL url, String title, String caption, String geoLocation, String license) throws MalformedURLException {
this(url, title, caption, geoLocation, new URL(license));
}
public Image(URL url, String title, String caption, String geoLocation, URL license) {
this.url = url;
this.title = title;
this.caption = caption;
this.geoLocation = geoLocation;
this.license = license;
}
/** Retrieves URL of Image*/
public URL getUrl() { return url; }
/** Retrieves title of image*/
public String getTitle() { return title; }
/** Retrieves captionof image*/
public String getCaption() { return caption; }
/** Retrieves geolocation string of image*/
public String getGeoLocation() { return geoLocation; }
/** Retrieves license string of image*/
public URL getLicense() { return license; }
public static class ImageBuilder {
private URL url;
private String title;
private String caption;
private String geoLocation;
private URL license;
public ImageBuilder(String url) throws MalformedURLException {
this(new URL(url));
}
public ImageBuilder(URL url) {
this.url = url;
}
public ImageBuilder title(String title) {
this.title = title;
return this;
}
public ImageBuilder caption(String caption) {
this.caption = caption;
return this;
}
public ImageBuilder geoLocation(String geoLocation) {
this.geoLocation = geoLocation;
return this;
}
public ImageBuilder license(String license) throws MalformedURLException {
return license(new URL(license));
}
public ImageBuilder license(URL license) {
this.license = license;
return this;
}
public Image build() {
return new Image(url, title, caption, geoLocation, license);
}
}
}

View File

@ -1,226 +1,317 @@
package com.redfin.sitemapgenerator;
import org.xml.sax.SAXException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPOutputStream;
import org.xml.sax.SAXException;
abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGenerator<U, THIS>> {
/**
* 50000 URLs per sitemap maximum
*/
public static final int MAX_URLS_PER_SITEMAP = 50000;
private final URL baseUrl;
private final File baseDir;
private final String fileNamePrefix;
private final String fileNameSuffix;
private final boolean allowEmptySitemap;
private final boolean allowMultipleSitemaps;
private final ArrayList<U> urls = new ArrayList<U>();
private final W3CDateFormat dateFormat;
private final int maxUrls;
private final boolean autoValidate;
private final boolean gzip;
private final ISitemapUrlRenderer<U> renderer;
private int mapCount = 0;
private boolean finished = false;
private final ArrayList<File> outFiles = new ArrayList<File>();
public SitemapGenerator(AbstractSitemapGeneratorOptions<?> options, ISitemapUrlRenderer<U> renderer) {
baseDir = options.baseDir;
baseUrl = options.baseUrl;
fileNamePrefix = options.fileNamePrefix;
W3CDateFormat dateFormat = options.dateFormat;
if (dateFormat == null) dateFormat = new W3CDateFormat();
this.dateFormat = dateFormat;
allowEmptySitemap = options.allowEmptySitemap;
allowMultipleSitemaps = options.allowMultipleSitemaps;
maxUrls = options.maxUrls;
autoValidate = options.autoValidate;
gzip = options.gzip;
this.renderer = renderer;
if (options.suffixStringPattern != null && !options.suffixStringPattern.isEmpty()) {
fileNameSuffix = gzip ? options.suffixStringPattern + ".xml.gz" : options.suffixStringPattern + ".xml";
} else {
fileNameSuffix = gzip ? ".xml.gz" : ".xml";
}
}
/**
* Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or else write out one sitemap immediately.
*
* @param url the URL to add to this sitemap
* @return this
*/
public THIS addUrl(U url) {
if (finished)
throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
UrlUtils.checkUrl(url.getUrl(), baseUrl);
if (urls.size() == maxUrls) {
if (!allowMultipleSitemaps)
throw new RuntimeException("More than " + maxUrls + " urls, but allowMultipleSitemaps is false. Enable allowMultipleSitemaps to split the sitemap into multiple files with a sitemap index.");
if (baseDir != null) {
if (mapCount == 0) mapCount++;
try {
writeSiteMap();
} catch (IOException ex) {
throw new RuntimeException("Closing of stream failed.", ex);
}
mapCount++;
urls.clear();
}
}
urls.add(url);
return getThis();
}
/**
* Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
*
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(Iterable<? extends U> urls) {
for (U url : urls) addUrl(url);
return getThis();
}
/**
* Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
*
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(U... urls) {
for (U url : urls) addUrl(url);
return getThis();
}
/**
* Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
*
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(String... urls) {
for (String url : urls) addUrl(url);
return getThis();
}
/**
* Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or else write out one sitemap immediately.
*
* @param url the URL to add to this sitemap
* @return this
*/
public THIS addUrl(String url) {
U sitemapUrl;
try {
sitemapUrl = renderer.getUrlClass().getConstructor(String.class).newInstance(url);
return addUrl(sitemapUrl);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
*
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(URL... urls) {
for (URL url : urls) addUrl(url);
return getThis();
}
/**
* Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
*
* @param url the URL to add to this sitemap
* @return this
*/
public THIS addUrl(URL url) {
U sitemapUrl;
try {
sitemapUrl = renderer.getUrlClass().getConstructor(URL.class).newInstance(url);
return addUrl(sitemapUrl);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@SuppressWarnings("unchecked")
THIS getThis() {
return (THIS) this;
}
/**
* Write out remaining URLs; this method can only be called once. This is necessary so we can keep an accurate count for {@link #writeSitemapsWithIndex()}.
*
* @return a list of files we wrote out to disk
*/
public List<File> write() {
if (finished)
throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
if (!allowEmptySitemap && urls.isEmpty() && mapCount == 0)
throw new RuntimeException("No URLs added, sitemap would be empty; you must add some URLs with addUrls");
try {
writeSiteMap();
} catch (IOException ex) {
throw new RuntimeException("Closing of streams has failed at some point.", ex);
}
finished = true;
return outFiles;
}
/**
* Writes out the sitemaps as a list of strings.
* Each string in the list is a formatted list of URLs.
* We return a list because the URLs may not all fit --
* google specifies a maximum of 50,000 URLs in one sitemap.
*
* @return a list of XML-formatted strings
*/
public List<String> writeAsStrings() {
List<String> listOfSiteMapStrings = new ArrayList<String>();
for (int start = 0; start < urls.size(); start += maxUrls) {
int end = start + maxUrls;
if (end > urls.size()) {
end = urls.size();
}
StringBuilder sb = new StringBuilder();
writeSiteMapAsString(sb, urls.subList(start, end));
listOfSiteMapStrings.add(sb.toString());
}
return listOfSiteMapStrings;
}
private void writeSiteMapAsString(StringBuilder sb, List<U> urls) {
sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
sb.append("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" ");
if (renderer.getXmlNamespaces() != null) {
sb.append(renderer.getXmlNamespaces());
sb.append(' ');
}
sb.append(">\n");
for (U url : urls) {
renderer.render(url, sb, dateFormat);
}
sb.append("</urlset>");
}
/**
* After you've called {@link #write()}, call this to generate a sitemap index of all sitemaps you generated.
* The sitemap index is written to {baseDir}/sitemap_index.xml
*/
public File writeSitemapsWithIndex() {
return writeSitemapsWithIndex(new File(baseDir, "sitemap_index.xml"));
}
/**
* After you've called {@link #write()}, call this to generate a sitemap index of all sitemaps you generated.
*
* @return
*/
public String writeSitemapsWithIndexAsString() {
return prepareSitemapIndexGenerator(null).writeAsString();
}
/**
* After you've called {@link #write()}, call this to generate a sitemap index of all sitemaps you generated.
*
* @param outFile the destination file of the sitemap index.
*/
public File writeSitemapsWithIndex(File outFile) {
prepareSitemapIndexGenerator(outFile).write();
return outFile;
}
private SitemapIndexGenerator prepareSitemapIndexGenerator(File outFile) {
if (!finished) throw new RuntimeException("Sitemaps not generated yet; call write() first");
SitemapIndexGenerator sig;
sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();
sig.addUrls(fileNamePrefix, fileNameSuffix, mapCount);
return sig;
}
private void writeSiteMap() throws IOException {
if (baseDir == null) {
throw new NullPointerException("To write to files, baseDir must not be null");
}
if (urls.isEmpty() && (mapCount > 0 || !allowEmptySitemap)) return;
String fileNamePrefix;
if (mapCount > 0) {
fileNamePrefix = this.fileNamePrefix + mapCount;
} else {
fileNamePrefix = this.fileNamePrefix;
}
File outFile = new File(baseDir, fileNamePrefix + fileNameSuffix);
outFiles.add(outFile);
OutputStreamWriter out = null;
try {
if (gzip) {
FileOutputStream fileStream = new FileOutputStream(outFile);
GZIPOutputStream gzipStream = new GZIPOutputStream(fileStream);
out = new OutputStreamWriter(gzipStream, Charset.forName("UTF-8").newEncoder());
} else {
out = new OutputStreamWriter(new FileOutputStream(outFile), Charset.forName("UTF-8").newEncoder());
}
writeSiteMap(out);
out.flush();
if (autoValidate) SitemapValidator.validateWebSitemap(outFile);
} catch (IOException e) {
throw new RuntimeException("Problem writing sitemap file " + outFile, e);
} catch (SAXException e) {
throw new RuntimeException("Sitemap file failed to validate (bug?)", e);
} finally {
if (out != null) {
out.close();
}
}
}
private void writeSiteMap(OutputStreamWriter out) throws IOException {
StringBuilder sb = new StringBuilder();
writeSiteMapAsString(sb, urls);
out.write(sb.toString());
}
abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGenerator<U,THIS>> {
/** 50000 URLs per sitemap maximum */
public static final int MAX_URLS_PER_SITEMAP = 50000;
private final String baseUrl;
private final File baseDir;
private final String fileNamePrefix;
private final String fileNameSuffix;
private final boolean allowMultipleSitemaps;
private final ArrayList<U> urls = new ArrayList<U>();
private final W3CDateFormat dateFormat;
private final int maxUrls;
private final boolean autoValidate;
private final boolean gzip;
private final ISitemapUrlRenderer<U> renderer;
private int mapCount = 0;
private boolean finished = false;
private final ArrayList<File> outFiles = new ArrayList<File>();
public SitemapGenerator(AbstractSitemapGeneratorOptions<?> options, ISitemapUrlRenderer<U> renderer) {
baseDir = options.baseDir;
baseUrl = options.baseUrl;
fileNamePrefix = options.fileNamePrefix;
W3CDateFormat dateFormat = options.dateFormat;
if (dateFormat == null) dateFormat = new W3CDateFormat();
this.dateFormat = dateFormat;
allowMultipleSitemaps = options.allowMultipleSitemaps;
maxUrls = options.maxUrls;
autoValidate = options.autoValidate;
gzip = options.gzip;
this.renderer = renderer;
fileNameSuffix = gzip ? ".xml.gz" : ".xml";
}
/** Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or else write out one sitemap immediately.
* @param url the URL to add to this sitemap
* @return this
*/
public THIS addUrl(U url) {
if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
UrlUtils.checkUrl(url.getUrl().toString(), baseUrl);
if (urls.size() == maxUrls) {
if (!allowMultipleSitemaps) throw new RuntimeException("More than " + maxUrls + " urls, but allowMultipleSitemaps is false. Enable allowMultipleSitemaps to split the sitemap into multiple files with a sitemap index.");
if (mapCount == 0) mapCount++;
writeSiteMap();
mapCount++;
urls.clear();
}
urls.add(url);
return getThis();
}
/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(Iterable<? extends U> urls) {
for (U url : urls) addUrl(url);
return getThis();
}
/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(U... urls) {
for (U url : urls) addUrl(url);
return getThis();
}
/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param urls the URLs to add to this sitemap
* @return this
* @throws MalformedURLException
*/
public THIS addUrls(String... urls) throws MalformedURLException {
for (String url : urls) addUrl(url);
return getThis();
}
/** Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or else write out one sitemap immediately.
* @param url the URL to add to this sitemap
* @return this
* @throws MalformedURLException
*/
public THIS addUrl(String url) throws MalformedURLException {
U sitemapUrl;
try {
sitemapUrl = renderer.getUrlClass().getConstructor(String.class).newInstance(url);
} catch (Exception e) {
throw new RuntimeException(e);
}
return addUrl(sitemapUrl);
}
/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param urls the URLs to add to this sitemap
* @return this
*/
public THIS addUrls(URL... urls) {
for (URL url : urls) addUrl(url);
return getThis();
}
/** Add one URL of the appropriate type to this sitemap.
* If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
* or write out one sitemap immediately.
* @param url the URL to add to this sitemap
* @return this
*/
public THIS addUrl(URL url) {
U sitemapUrl;
try {
sitemapUrl = renderer.getUrlClass().getConstructor(URL.class).newInstance(url);
} catch (Exception e) {
throw new RuntimeException(e);
}
return addUrl(sitemapUrl);
}
@SuppressWarnings("unchecked")
THIS getThis() {
return (THIS)this;
}
/** Write out remaining URLs; this method can only be called once. This is necessary so we can keep an accurate count for {@link #writeSitemapsWithIndex()}.
*
* @return a list of files we wrote out to disk
*/
public List<File> write() {
if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
if (urls.size() == 0 && mapCount == 0) throw new RuntimeException("No URLs added, sitemap would be empty; you must add some URLs with addUrls");
writeSiteMap();
finished = true;
return outFiles;
}
/** After you've called {@link #write()}, call this to generate a sitemap index of all sitemaps you generated.
*
*/
public void writeSitemapsWithIndex() {
if (!finished) throw new RuntimeException("Sitemaps not generated yet; call write() first");
File outFile = new File(baseDir, "sitemap_index.xml");
SitemapIndexGenerator sig;
try {
sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();
} catch (MalformedURLException e) {
throw new RuntimeException("bug", e);
}
sig.addUrls(fileNamePrefix, fileNameSuffix, mapCount).write();
}
private void writeSiteMap() {
if (urls.size() == 0) return;
String fileNamePrefix;
if (mapCount > 0) {
fileNamePrefix = this.fileNamePrefix + mapCount;
} else {
fileNamePrefix = this.fileNamePrefix;
}
File outFile = new File(baseDir, fileNamePrefix+fileNameSuffix);
outFiles.add(outFile);
try {
OutputStreamWriter out;
if (gzip) {
FileOutputStream fileStream = new FileOutputStream(outFile);
GZIPOutputStream gzipStream = new GZIPOutputStream(fileStream);
out = new OutputStreamWriter(gzipStream);
} else {
out = new FileWriter(outFile);
}
writeSiteMap(out);
if (autoValidate) SitemapValidator.validateWebSitemap(outFile);
} catch (IOException e) {
throw new RuntimeException("Problem writing sitemap file " + outFile, e);
} catch (SAXException e) {
throw new RuntimeException("Sitemap file failed to validate (bug?)", e);
}
}
private void writeSiteMap(OutputStreamWriter out) throws IOException {
out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
out.write("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" ");
if (renderer.getXmlNamespaces() != null) {
out.write(renderer.getXmlNamespaces());
out.write(' ');
}
out.write(">\n");
for (U url : urls) {
renderer.render(url, out, dateFormat);
}
out.write("</urlset>");
out.close();
}
}

View File

@ -14,5 +14,13 @@ class SitemapGeneratorOptions extends
public SitemapGeneratorOptions(String baseUrl, File baseDir) throws MalformedURLException {
this(new URL(baseUrl), baseDir);
}
public SitemapGeneratorOptions(URL baseUrl) {
super(baseUrl);
}
public SitemapGeneratorOptions(String baseUrl) throws MalformedURLException {
super(new URL(baseUrl));
}
}

View File

@ -1,5 +1,7 @@
package com.redfin.sitemapgenerator;
import org.xml.sax.SAXException;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
@ -9,17 +11,15 @@ import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import org.xml.sax.SAXException;
/**
* Builds a sitemap index, which points only to other sitemaps.
* @author Dan Fabulich
*
*/
public class SitemapIndexGenerator {
private final URL baseUrl;
private final String baseUrlString;
private final URL baseUrl;
private final File outFile;
private final boolean allowEmptyIndex;
private final ArrayList<SitemapIndexUrl> urls = new ArrayList<SitemapIndexUrl>();
private final int maxUrls;
private final W3CDateFormat dateFormat;
@ -33,6 +33,7 @@ public class SitemapIndexGenerator {
private URL baseUrl;
private File outFile;
private W3CDateFormat dateFormat = null;
private boolean allowEmptyIndex = false;
private int maxUrls = MAX_SITEMAPS_PER_INDEX;
private Date defaultLastMod = new Date();
private boolean autoValidate = false;
@ -60,6 +61,18 @@ public class SitemapIndexGenerator {
this.dateFormat = dateFormat;
return this;
}
/**
* Permit writing an index that contains no URLs.
*
* @param allowEmptyIndex {@code true} if an empty index is permissible
* @return this instance, for chaining
*/
public Options allowEmptyIndex(boolean allowEmptyIndex) {
this.allowEmptyIndex = allowEmptyIndex;
return this;
}
/**
* The maximum number of sitemaps to allow per sitemap index; the default is the
* maximum allowed (1,000), but you can decrease it if you wish (for testing)
@ -115,9 +128,9 @@ public class SitemapIndexGenerator {
}
private SitemapIndexGenerator(Options options) {
this.baseUrl = options.baseUrl;
this.baseUrlString = baseUrl.toString();
this.baseUrl = options.baseUrl;
this.outFile = options.outFile;
this.allowEmptyIndex = options.allowEmptyIndex;
this.maxUrls = options.maxUrls;
W3CDateFormat dateFormat = options.dateFormat;
if (dateFormat == null) dateFormat = new W3CDateFormat();
@ -128,7 +141,7 @@ public class SitemapIndexGenerator {
/** Adds a single sitemap to the index */
public SitemapIndexGenerator addUrl(SitemapIndexUrl url) {
UrlUtils.checkUrl(url.url.toString(), baseUrlString);
UrlUtils.checkUrl(url.url, baseUrl);
if (urls.size() >= maxUrls) {
throw new RuntimeException("More than " + maxUrls + " urls");
}
@ -208,40 +221,66 @@ public class SitemapIndexGenerator {
/** Writes out the sitemap index */
public void write() {
if (urls.size() == 0) throw new RuntimeException("No URLs added, sitemap index would be empty; you must add some URLs with addUrls");
try {
// TODO gzip? is that legal for a sitemap index?
FileWriter out = new FileWriter(outFile);
writeSiteMap(out);
if (autoValidate) SitemapValidator.validateSitemapIndex(outFile);
write(new FileWriter(outFile));
} catch (IOException e) {
throw new RuntimeException("Problem writing sitemap index file " + outFile, e);
} catch (SAXException e) {
throw new RuntimeException("Problem validating sitemap index file (bug?)", e);
}
}
private void write(OutputStreamWriter out) {
if (!allowEmptyIndex && urls.isEmpty()) throw new RuntimeException("No URLs added, sitemap index would be empty; you must add some URLs with addUrls");
try {
try {
writeSiteMap(out);
out.flush();
if (autoValidate) SitemapValidator.validateSitemapIndex(outFile);
} catch (SAXException e) {
throw new RuntimeException("Problem validating sitemap index file (bug?)", e);
} finally {
if(out != null) {
out.close();
}
}
} catch (IOException ex) {
throw new RuntimeException("Closing of stream has failed.", ex);
}
}
private void writeSiteMap(OutputStreamWriter out) throws IOException {
out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
out.write("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n");
public String writeAsString() {
StringBuilder sb = new StringBuilder();
writeAsString(sb);
return sb.toString();
}
private void writeAsString(StringBuilder sb) {
sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
sb.append("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n");
for (SitemapIndexUrl url : urls) {
out.write(" <sitemap>\n");
out.write(" <loc>");
out.write(url.url.toString());
out.write("</loc>\n");
sb.append(" <sitemap>\n");
sb.append(" <loc>");
sb.append(UrlUtils.escapeXml(url.url.toString()));
sb.append("</loc>\n");
Date lastMod = url.lastMod;
if (lastMod == null) lastMod = defaultLastMod;
if (lastMod != null) {
out.write(" <lastmod>");
out.write(dateFormat.format(lastMod));
out.write("</lastmod>\n");
sb.append(" <lastmod>");
sb.append(dateFormat.format(lastMod));
sb.append("</lastmod>\n");
}
out.write(" </sitemap>\n");
sb.append(" </sitemap>\n");
}
out.write("</sitemapindex>");
out.close();
sb.append("</sitemapindex>");
}
private void writeSiteMap(OutputStreamWriter out) throws IOException {
StringBuilder sb = new StringBuilder();
writeAsString(sb);
out.write(sb.toString());
}
}

View File

@ -1,9 +1,7 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.XMLConstants;
import javax.xml.transform.sax.SAXSource;
@ -11,9 +9,10 @@ import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
/** Validates sitemaps and sitemap indexes
*
@ -39,21 +38,30 @@ public class SitemapValidator {
private synchronized static void lazyLoad() {
if (sitemapSchema != null) return;
SchemaFactory factory =
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
try {
InputStream stream = SitemapValidator.class.getResourceAsStream("sitemap.xsd");
if (stream == null) throw new RuntimeException("BUG Couldn't load sitemap.xsd");
StreamSource source = new StreamSource(stream);
sitemapSchema = factory.newSchema(source);
stream = SitemapValidator.class.getResourceAsStream("siteindex.xsd");
if (stream == null) throw new RuntimeException("BUG Couldn't load siteindex.xsd");
source = new StreamSource(stream);
sitemapIndexSchema = factory.newSchema(source);
} catch (SAXException e) {
sitemapSchema = lazyLoad(factory, "sitemap.xsd");
sitemapIndexSchema = lazyLoad(factory, "siteindex.xsd");
} catch (Exception e) {
throw new RuntimeException("BUG", e);
}
}
private synchronized static Schema lazyLoad(SchemaFactory factory, String resource) throws IOException, SAXException {
InputStream stream = null;
try {
stream = SitemapValidator.class.getResourceAsStream(resource);
if (stream == null) throw new RuntimeException("BUG Couldn't load " + resource);
StreamSource source = new StreamSource(stream);
return factory.newSchema(source);
} finally {
if(stream != null) {
stream.close();
}
}
}
/** Validates an ordinary web sitemap file (NOT a Google-specific sitemap) */
public static void validateWebSitemap(File sitemap) throws SAXException {
@ -68,14 +76,24 @@ public class SitemapValidator {
}
private static void validateXml(File sitemap, Schema schema) throws SAXException {
Validator validator = schema.newValidator();
try {
FileReader reader = new FileReader(sitemap);
SAXSource source = new SAXSource(new InputSource(reader));
validator.validate(source);
} catch (IOException e) {
throw new RuntimeException(e);
Validator validator = schema.newValidator();
FileReader reader = null;
try {
reader = new FileReader(sitemap);
SAXSource source = new SAXSource(new InputSource(reader));
validator.validate(source);
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
if(reader != null) {
reader.close();
}
}
} catch (IOException ex) {
throw new RuntimeException("Unable to close stream.", ex);
}
}
}

View File

@ -1,13 +1,42 @@
package com.redfin.sitemapgenerator;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class UrlUtils {
private static Map<String,String> ENTITIES = new HashMap<String,String>();
static {
ENTITIES.put("&", "&amp;");
ENTITIES.put("'", "&apos;");
ENTITIES.put("\"", "&quot;");
ENTITIES.put(">", "&gt;");
ENTITIES.put("<", "&lt;");
}
private static Pattern PATTERN = Pattern.compile("(&|'|\"|>|<)");
static void checkUrl(String url, String baseUrl) {
static String escapeXml(String string){
Matcher matcher = PATTERN.matcher(string);
StringBuffer sb = new StringBuffer();
while(matcher.find()) {
matcher.appendReplacement(sb, ENTITIES.get(matcher.group(1)));
}
matcher.appendTail(sb);
return sb.toString();
}
static void checkUrl(URL url, URL baseUrl) {
// Is there a better test to use here?
if (!url.startsWith(baseUrl)) {
throw new RuntimeException("Url " + url + " doesn't start with base URL " + baseUrl);
if (baseUrl.getHost() == null) {
throw new RuntimeException("base URL is null");
}
if (!baseUrl.getHost().equalsIgnoreCase(url.getHost())) {
throw new RuntimeException("Domain of URL " + url + " doesn't match base URL " + baseUrl);
}
}

View File

@ -32,7 +32,7 @@ import java.util.TimeZone;
* <li>MILLISECOND: YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
* </ol>
*
* Note that W3C timezone designators (TZD) are either the letter "Z" (for GMT) or a pattern like "+00:30" or "-08:00". This is unlike
* <p>Note that W3C timezone designators (TZD) are either the letter "Z" (for GMT) or a pattern like "+00:30" or "-08:00". This is unlike
* RFC 822 timezones generated by SimpleDateFormat, which omit the ":" like this: "+0030" or "-0800".</p>
*
* <p>This class allows you to either specify which format pattern to use, or (by default) to

View File

@ -1,8 +1,6 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
@ -57,6 +55,25 @@ public class WebSitemapGenerator extends SitemapGenerator<WebSitemapUrl,WebSitem
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public WebSitemapGenerator(String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public WebSitemapGenerator(URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
private static class Renderer extends AbstractSitemapUrlRenderer<WebSitemapUrl> implements ISitemapUrlRenderer<WebSitemapUrl> {
@ -64,13 +81,13 @@ public class WebSitemapGenerator extends SitemapGenerator<WebSitemapUrl,WebSitem
return WebSitemapUrl.class;
}
public void render(WebSitemapUrl url, OutputStreamWriter out, W3CDateFormat dateFormat) throws IOException {
super.render(url, out, dateFormat, null);
}
public String getXmlNamespaces() {
return null;
}
public void render(WebSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) {
super.render(url, sb, dateFormat, null);
}
}
}

View File

@ -1,111 +0,0 @@
<html><head><title>How to use SitemapGen4j</title></head>
<body>
<h1>How to use SitemapGen4j</h1>
SitemapGen4j is a library to generate XML sitemaps in Java.
<h2>What's an XML sitemap?</h2>
Quoting from <a href="http://sitemaps.org/index.php">sitemaps.org</a>:
<blockquote><p>Sitemaps are an easy way for webmasters to inform search engines about pages on their sites that are available for crawling. In its simplest form, a Sitemap is an XML file that lists URLs for a site along with additional metadata about each URL (when it was last updated, how often it usually changes, and how important it is, relative to other URLs in the site) so that search engines can more intelligently crawl the site.</p>
<p>Web crawlers usually discover pages from links within the site and from other sites. Sitemaps supplement this data to allow crawlers that support Sitemaps to pick up all URLs in the Sitemap and learn about those URLs using the associated metadata. Using the Sitemap protocol does not guarantee that web pages are included in search engines, but provides hints for web crawlers to do a better job of crawling your site.</p>
<p>Sitemap 0.90 is offered under the terms of the Attribution-ShareAlike Creative Commons License and has wide adoption, including support from Google, Yahoo!, and Microsoft.</p>
</blockquote>
<h2>Getting started</h2>
<p>The easiest way to get started is to just use the WebSitemapGenerator class, like this:
<pre name="code" class="java">WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", myDir);
wsg.addUrl("http://www.example.com/index.html"); // repeat multiple times
wsg.write();</pre>
<h2>Configuring options</h2>
But there are a lot of nifty options available for URLs and for the generator as a whole. To configure the generator, use a builder:
<pre name="code" class="java">WebSitemapGenerator wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.gzip(true).build(); // enable gzipped output
wsg.addUrl("http://www.example.com/index.html");
wsg.write();</pre>
To configure the URLs, construct a WebSitemapUrl with WebSitemapUrl.Options.
<pre name="code" class="java">WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", myDir);
WebSitemapUrl url = new WebSitemapUrl.Options("http://www.example.com/index.html")
.lastMod(new Date()).priority(1.0).changeFreq(ChangeFreq.HOURLY).build();
// this will configure the URL with lastmod=now, priority=1.0, changefreq=hourly
wsg.addUrl(url);
wsg.write();</pre>
<h2>Configuring the date format</h2>
One important configuration option for the sitemap generator is the date format. The <a href="http://www.w3.org/TR/NOTE-datetime">W3C datetime standard</a> allows you to choose the precision of your datetime (anything from just specifying the year like "1997" to specifying the fraction of the second like "1997-07-16T19:20:30.45+01:00"); if you don't specify one, we'll try to guess which one you want, and we'll use the default timezone of the local machine, which might not be what you prefer.
<pre name="code" class="java">
// Use DAY pattern (2009-02-07), Greenwich Mean Time timezone
W3CDateFormat dateFormat = new W3CDateFormat(Pattern.DAY);
dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
WebSitemapGenerator wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.dateFormat(dateFormat).build(); // actually use the configured dateFormat
wsg.addUrl("http://www.example.com/index.html");
wsg.write();</pre>
<h2>Lots of URLs: a sitemap index file</h2>
One sitemap can contain a maximum of 50,000 URLs. (Some sitemaps, like Google News sitemaps, can contain only 1,000 URLs.) If you need to put more URLs than that in a sitemap, you'll have to use a sitemap index file. Fortunately, WebSitemapGenerator can manage the whole thing for you.
<pre name="code" class="java">WebSitemapGenerator wsg = new WebSitemapGenerator("http://www.example.com", myDir);
for (int i = 0; i &lt; 60000; i++) wsg.addUrl("http://www.example.com/doc"+i+".html");
wsg.write();
wsg.writeSitemapsWithIndex(); // generate the sitemap_index.xml
</pre>
<p>That will generate two sitemaps for 60K URLs: sitemap1.xml (with 50K urls) and sitemap2.xml (with the remaining 10K), and then generate a sitemap_index.xml file describing the two.</p>
<p>It's also possible to carefully organize your sub-sitemaps. For example, it's recommended to group URLs with the same changeFreq together (have one sitemap for changeFreq "daily" and another for changeFreq "yearly"), so you can modify the lastMod of the daily sitemap without modifying the lastMod of the yearly sitemap. To do that, just construct your sitemaps one at a time using the WebSitemapGenerator, then use the SitemapIndexGenerator to create a single index for all of them.</p>
<pre name="code" class="java">WebSitemapGenerator wsg;
// generate foo sitemap
wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.fileNamePrefix("foo").build();
for (int i = 0; i &lt; 5; i++) wsg.addUrl("http://www.example.com/foo"+i+".html");
wsg.write();
// generate bar sitemap
wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.fileNamePrefix("bar").build();
for (int i = 0; i &lt; 5; i++) wsg.addUrl("http://www.example.com/bar"+i+".html");
wsg.write();
// generate sitemap index for foo + bar
SitemapIndexGenerator sig = new SitemapIndexGenerator("http://www.example.com", myFile);
sig.addUrl("http://www.example.com/foo.xml");
sig.addUrl("http://www.example.com/bar.xml");
sig.write();</pre>
<p>You could also use the SitemapIndexGenerator to incorporate sitemaps generated by other tools. For example, you might use Google's official Python sitemap generator to generate some sitemaps, and use WebSitemapGenerator to generate some sitemaps, and use SitemapIndexGenerator to make an index of all of them.</p>
<h2>Validate your sitemaps</h2>
<p>SitemapGen4j can also validate your sitemaps using the official XML Schema Definition (XSD). If you used SitemapGen4j to make the sitemaps, you shouldn't need to do this unless there's a bug in our code. But you can use it to validate sitemaps generated by other tools, and it provides an extra level of safety.</p>
<p>It's easy to configure the WebSitemapGenerator to automatically validate your sitemaps right after you write them (but this does slow things down, naturally).</p>
<pre name="code" class="java">WebSitemapGenerator wsg = WebSitemapGenerator.builder("http://www.example.com", myDir)
.autoValidate(true).build(); // validate the sitemap after writing
wsg.addUrl("http://www.example.com/index.html");
wsg.write();</pre>
<p>You can also use the SitemapValidator directly to manage sitemaps. It has two methods: validateWebSitemap(File f) and validateSitemapIndex(File f).</p>
<h2>Google-specific sitemaps</h2>
<p>Google can understand a wide variety of custom sitemap formats that they made up, including a Mobile sitemaps, Geo sitemaps, Code sitemaps (for Google Code search), Google News sitemaps, and Video sitemaps. SitemapGen4j can generate any/all of these different types of sitemaps.</p>
<p>To generate a special type of sitemap, just use GoogleMobileSitemapGenerator, GoogleGeoSitemapGenerator, GoogleCodeSitemapGenerator, GoogleCodeSitemapGenerator, GoogleNewsSitemapGenerator, or GoogleVideoSitemapGenerator instead of WebSitemapGenerator.</p>
<p>You can't mix-and-match regular URLs with Google-specific sitemaps, so you'll also have to use a GoogleMobileSitemapUrl, GoogleGeoSitemapUrl, GoogleCodeSitemapUrl, GoogleNewsSitemapUrl, or GoogleVideoSitemapUrl instead of a WebSitemapUrl. Each of them has unique configurable options not available to regular web URLs.</p>
</body>
</html>

View File

@ -1,74 +1,73 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
elementFormDefault="qualified">
<xsd:annotation>
<xsd:documentation>
XML Schema for Sitemap index files.
Last Modifed 2006-07-25
Last Modifed 2009-04-08
</xsd:documentation>
</xsd:annotation>
<xsd:element name="sitemapindex">
<xsd:annotation>
<xsd:documentation>
Container for a set of up to 1,000 sitemap URLs.
Container for a set of up to 50,000 sitemap URLs.
This is the root element of the XML file.
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element ref="sitemap" maxOccurs="1000"/>
<xsd:element name="sitemap" type="tSitemap" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="sitemap">
<xsd:complexType name="tSitemap">
<xsd:annotation>
<xsd:documentation>
Container for the data needed to describe a sitemap.
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:all>
<xsd:element ref="loc"/>
<xsd:element ref="lastmod" minOccurs="0"/>
</xsd:all>
</xsd:complexType>
</xsd:element>
<xsd:all>
<xsd:element name="loc" type="tLocSitemap"/>
<xsd:element name="lastmod" type="tLastmodSitemap" minOccurs="0"/>
</xsd:all>
</xsd:complexType>
<xsd:element name="loc">
<xsd:simpleType name="tLocSitemap">
<xsd:annotation>
<xsd:documentation>
REQUIRED: The location URI of a sitemap.
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:anyURI">
<xsd:minLength value="12"/>
<xsd:maxLength value="2048"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:restriction base="xsd:anyURI">
<xsd:minLength value="12"/>
<xsd:maxLength value="2048"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:element name="lastmod">
<xsd:simpleType name="tLastmodSitemap">
<xsd:annotation>
<xsd:documentation>
OPTIONAL: The date the sitemap was last modified. The date must conform
OPTIONAL: The date the document was last modified. The date must conform
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
Example: 2005-05-10
Lastmod may also contain a timestamp.
Example: 2005-05-10T17:33:30+08:00
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:string">
<xsd:minLength value="10"/>
<xsd:maxLength value="25"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:union>
<xsd:simpleType>
<xsd:restriction base="xsd:date"/>
</xsd:simpleType>
<xsd:simpleType>
<xsd:restriction base="xsd:dateTime"/>
</xsd:simpleType>
</xsd:union>
</xsd:simpleType>
</xsd:schema>

View File

@ -1,12 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
elementFormDefault="qualified">
<xsd:annotation>
<xsd:documentation>
XML Schema for Sitemap files.
Last Modifed 2006-07-25
Last Modifed 2008-03-26
</xsd:documentation>
</xsd:annotation>
@ -19,43 +19,40 @@
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element ref="url" maxOccurs="unbounded"/>
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="url">
<xsd:complexType name="tUrl">
<xsd:annotation>
<xsd:documentation>
Container for the data needed to describe a document to crawl.
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:all>
<xsd:element ref="loc"/>
<xsd:element ref="lastmod" minOccurs="0"/>
<xsd:element ref="changefreq" minOccurs="0"/>
<xsd:element ref="priority" minOccurs="0"/>
</xsd:all>
</xsd:complexType>
</xsd:element>
<xsd:sequence>
<xsd:element name="loc" type="tLoc"/>
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
<xsd:element name="priority" type="tPriority" minOccurs="0"/>
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
</xsd:sequence>
</xsd:complexType>
<xsd:element name="loc">
<xsd:simpleType name="tLoc">
<xsd:annotation>
<xsd:documentation>
REQUIRED: The location URI of a document.
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:anyURI">
<xsd:minLength value="12"/>
<xsd:maxLength value="2048"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:restriction base="xsd:anyURI">
<xsd:minLength value="12"/>
<xsd:maxLength value="2048"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:element name="lastmod">
<xsd:simpleType name="tLastmod">
<xsd:annotation>
<xsd:documentation>
OPTIONAL: The date the document was last modified. The date must conform
@ -65,15 +62,17 @@
Example: 2005-05-10T17:33:30+08:00
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:string">
<xsd:minLength value="10"/>
<xsd:maxLength value="25"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:union>
<xsd:simpleType>
<xsd:restriction base="xsd:date"/>
</xsd:simpleType>
<xsd:simpleType>
<xsd:restriction base="xsd:dateTime"/>
</xsd:simpleType>
</xsd:union>
</xsd:simpleType>
<xsd:element name="changefreq">
<xsd:simpleType name="tChangeFreq">
<xsd:annotation>
<xsd:documentation>
OPTIONAL: Indicates how frequently the content at a particular URL is
@ -84,20 +83,18 @@
Consider this element as a friendly suggestion and not a command.
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:string">
<xsd:enumeration value="always"/>
<xsd:enumeration value="hourly"/>
<xsd:enumeration value="daily"/>
<xsd:enumeration value="weekly"/>
<xsd:enumeration value="monthly"/>
<xsd:enumeration value="yearly"/>
<xsd:enumeration value="never"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:restriction base="xsd:string">
<xsd:enumeration value="always"/>
<xsd:enumeration value="hourly"/>
<xsd:enumeration value="daily"/>
<xsd:enumeration value="weekly"/>
<xsd:enumeration value="monthly"/>
<xsd:enumeration value="yearly"/>
<xsd:enumeration value="never"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:element name="priority">
<xsd:simpleType name="tPriority">
<xsd:annotation>
<xsd:documentation>
OPTIONAL: The priority of a particular URL relative to other pages
@ -109,13 +106,10 @@
is what will be considered.
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:decimal">
<xsd:minInclusive value="0.0"/>
<xsd:maxInclusive value="1.0"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:restriction base="xsd:decimal">
<xsd:minInclusive value="0.0"/>
<xsd:maxInclusive value="1.0"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:schema>

View File

@ -0,0 +1,165 @@
package com.redfin.sitemapgenerator;
import junit.framework.TestCase;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class GoogleImageSitemapUrlTest extends TestCase {
private static final URL LANDING_URL = newURL("http://www.example.com/index.html");
private static final URL CONTENT_URL = newURL("http://www.example.com/index.flv");
File dir;
GoogleImageSitemapGenerator wsg;
private static URL newURL(String url) {
try {
return new URL(url);
} catch (MalformedURLException e) {}
return null;
}
public void setUp() throws Exception {
dir = File.createTempFile(GoogleVideoSitemapUrlTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
public void tearDown() {
wsg = null;
for (File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrl() throws Exception {
wsg = new GoogleImageSitemapGenerator("http://www.example.com", dir);
GoogleImageSitemapUrl url = new GoogleImageSitemapUrl(LANDING_URL);
url.addImage(new Image("http://cdn.example.com/image1.jpg"));
url.addImage(new Image("http://cdn.example.com/image2.jpg"));
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image1.jpg</image:loc>\n" +
" </image:image>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image2.jpg</image:loc>\n" +
" </image:image>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testBaseOptions() throws Exception {
wsg = new GoogleImageSitemapGenerator("http://www.example.com", dir);
GoogleImageSitemapUrl url = new GoogleImageSitemapUrl.Options(LANDING_URL)
.images(new Image("http://cdn.example.com/image1.jpg"), new Image("http://cdn.example.com/image2.jpg"))
.priority(0.5)
.changeFreq(ChangeFreq.WEEKLY)
.build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <changefreq>weekly</changefreq>\n" +
" <priority>0.5</priority>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image1.jpg</image:loc>\n" +
" </image:image>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image2.jpg</image:loc>\n" +
" </image:image>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testImageOptions() throws Exception {
wsg = new GoogleImageSitemapGenerator("http://www.example.com", dir);
GoogleImageSitemapUrl url = new GoogleImageSitemapUrl.Options(LANDING_URL)
.images(new Image.ImageBuilder("http://cdn.example.com/image1.jpg")
.title("image1.jpg")
.caption("An image of the number 1")
.geoLocation("Pyongyang, North Korea")
.license("http://cdn.example.com/licenses/imagelicense.txt")
.build(),
new Image.ImageBuilder("http://cdn.example.com/image2.jpg")
.title("image2.jpg")
.caption("An image of the number 2")
.geoLocation("Pyongyang, North Korea")
.license("http://cdn.example.com/licenses/imagelicense.txt")
.build())
.priority(0.5)
.changeFreq(ChangeFreq.WEEKLY)
.build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <changefreq>weekly</changefreq>\n" +
" <priority>0.5</priority>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image1.jpg</image:loc>\n" +
" <image:caption>An image of the number 1</image:caption>\n" +
" <image:title>image1.jpg</image:title>\n" +
" <image:geo_location>Pyongyang, North Korea</image:geo_location>\n" +
" <image:license>http://cdn.example.com/licenses/imagelicense.txt</image:license>\n" +
" </image:image>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image2.jpg</image:loc>\n" +
" <image:caption>An image of the number 2</image:caption>\n" +
" <image:title>image2.jpg</image:title>\n" +
" <image:geo_location>Pyongyang, North Korea</image:geo_location>\n" +
" <image:license>http://cdn.example.com/licenses/imagelicense.txt</image:license>\n" +
" </image:image>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testTooManyImages() throws Exception {
wsg = new GoogleImageSitemapGenerator("http://www.example.com", dir);
List<Image> images = new ArrayList<Image>();
for(int i = 0; i <= 1000; i++) {
images.add(new Image("http://cdn.example.com/image" + i + ".jpg"));
}
try {
GoogleImageSitemapUrl url = new GoogleImageSitemapUrl.Options(LANDING_URL)
.images(images)
.priority(0.5)
.changeFreq(ChangeFreq.WEEKLY)
.build();
fail("Too many images allowed");
} catch (RuntimeException r) {}
}
private String writeSingleSiteMap(GoogleImageSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}

View File

@ -0,0 +1,113 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.util.*;
import junit.framework.TestCase;
public class GoogleLinkSitemapUrlTest extends TestCase {
File dir;
GoogleLinkSitemapGenerator wsg;
@Override
public void setUp() throws Exception {
dir = File.createTempFile(GoogleLinkSitemapUrlTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
@Override
public void tearDown() {
wsg = null;
for (final File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrlWithHrefLang() throws Exception {
wsg = new GoogleLinkSitemapGenerator("http://www.example.com", dir);
final Map<String, Map<String, String>> alternates = new LinkedHashMap<String, Map<String, String>>();
alternates.put("http://www.example/en/index.html", Collections.singletonMap("hreflang", "en-GB"));
alternates.put("http://www.example/fr/index.html", Collections.singletonMap("hreflang", "fr-FR"));
alternates.put("http://www.example/es/index.html", Collections.singletonMap("hreflang", "es-ES"));
final GoogleLinkSitemapUrl url = new GoogleLinkSitemapUrl("http://www.example.com/index.html", alternates);
wsg.addUrl(url);
final String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" "
+ "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" >\n"
+ " <url>\n"
+ " <loc>http://www.example.com/index.html</loc>\n"
+ " <xhtml:link\n"
+ " rel=\"alternate\"\n"
+ " hreflang=\"en-GB\"\n"
+ " href=\"http://www.example/en/index.html\"\n"
+ " />\n"
+ " <xhtml:link\n"
+ " rel=\"alternate\"\n"
+ " hreflang=\"fr-FR\"\n"
+ " href=\"http://www.example/fr/index.html\"\n"
+ " />\n"
+ " <xhtml:link\n"
+ " rel=\"alternate\"\n"
+ " hreflang=\"es-ES\"\n"
+ " href=\"http://www.example/es/index.html\"\n"
+ " />\n"
+ " </url>\n"
+ "</urlset>";
final String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testSimpleUrlWithMedia() throws Exception {
wsg = new GoogleLinkSitemapGenerator("http://www.example.com", dir);
final Map<String, Map<String, String>> alternates = new LinkedHashMap<String, Map<String, String>>();
alternates.put("http://www.example/en/index.html", Collections.singletonMap("media", "only screen and (max-width: 640px)"));
alternates.put("http://www.example/fr/index.html", Collections.singletonMap("media", "only screen and (max-width: 640px)"));
alternates.put("http://www.example/es/index.html", Collections.singletonMap("media", "only screen and (max-width: 640px)"));
final GoogleLinkSitemapUrl url = new GoogleLinkSitemapUrl("http://www.example.com/index.html", alternates);
wsg.addUrl(url);
final String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" "
+ "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" >\n"
+ " <url>\n"
+ " <loc>http://www.example.com/index.html</loc>\n"
+ " <xhtml:link\n"
+ " rel=\"alternate\"\n"
+ " media=\"only screen and (max-width: 640px)\"\n"
+ " href=\"http://www.example/en/index.html\"\n"
+ " />\n"
+ " <xhtml:link\n"
+ " rel=\"alternate\"\n"
+ " media=\"only screen and (max-width: 640px)\"\n"
+ " href=\"http://www.example/fr/index.html\"\n"
+ " />\n"
+ " <xhtml:link\n"
+ " rel=\"alternate\"\n"
+ " media=\"only screen and (max-width: 640px)\"\n"
+ " href=\"http://www.example/es/index.html\"\n"
+ " />\n"
+ " </url>\n"
+ "</urlset>";
final String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
private String writeSingleSiteMap(final GoogleLinkSitemapGenerator wsg) {
final List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}

View File

@ -6,9 +6,6 @@ import java.util.List;
import junit.framework.TestCase;
import com.redfin.sitemapgenerator.GoogleNewsSitemapGenerator;
import com.redfin.sitemapgenerator.GoogleNewsSitemapUrl;
import com.redfin.sitemapgenerator.W3CDateFormat;
import com.redfin.sitemapgenerator.W3CDateFormat.Pattern;
public class GoogleNewsSitemapUrlTest extends TestCase {
@ -38,14 +35,19 @@ public class GoogleNewsSitemapUrlTest extends TestCase {
dateFormat.setTimeZone(W3CDateFormat.ZULU);
wsg = GoogleNewsSitemapGenerator.builder("http://www.example.com", dir)
.dateFormat(dateFormat).build();
GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl("http://www.example.com/index.html", new Date(0));
GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl("http://www.example.com/index.html", new Date(0), "Example Title", "The Example Times", "en");
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:news=\"http://www.google.com/schemas/sitemap-news/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <news:news>\n" +
" <news:publication_date>1970-01-01T00:00:00Z</news:publication_date>\n" +
" <news:publication>\n" +
" <news:name>The Example Times</news:name>\n" +
" <news:language>en</news:language>\n" +
" </news:publication>\n" +
" <news:publication_date>1970-01-01T00:00:00Z</news:publication_date>\n" +
" <news:title>Example Title</news:title>\n" +
" </news:news>\n" +
" </url>\n" +
"</urlset>";
@ -58,7 +60,7 @@ public class GoogleNewsSitemapUrlTest extends TestCase {
dateFormat.setTimeZone(W3CDateFormat.ZULU);
wsg = GoogleNewsSitemapGenerator.builder("http://www.example.com", dir)
.dateFormat(dateFormat).build();
GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl.Options("http://www.example.com/index.html", new Date(0))
GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl.Options("http://www.example.com/index.html", new Date(0), "Example Title", "The Example Times", "en")
.keywords("Klaatu", "Barrata", "Nicto")
.build();
wsg.addUrl(url);
@ -67,7 +69,12 @@ public class GoogleNewsSitemapUrlTest extends TestCase {
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <news:news>\n" +
" <news:publication_date>1970-01-01T00:00:00Z</news:publication_date>\n" +
" <news:publication>\n" +
" <news:name>The Example Times</news:name>\n" +
" <news:language>en</news:language>\n" +
" </news:publication>\n" +
" <news:publication_date>1970-01-01T00:00:00Z</news:publication_date>\n" +
" <news:title>Example Title</news:title>\n" +
" <news:keywords>Klaatu, Barrata, Nicto</news:keywords>\n" +
" </news:news>\n" +
" </url>\n" +
@ -75,6 +82,34 @@ public class GoogleNewsSitemapUrlTest extends TestCase {
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testGenres() throws Exception {
W3CDateFormat dateFormat = new W3CDateFormat(Pattern.SECOND);
dateFormat.setTimeZone(W3CDateFormat.ZULU);
wsg = GoogleNewsSitemapGenerator.builder("http://www.example.com", dir)
.dateFormat(dateFormat).build();
GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl.Options("http://www.example.com/index.html", new Date(0), "Example Title", "The Example Times", "en")
.genres("persbericht")
.build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:news=\"http://www.google.com/schemas/sitemap-news/0.9\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <news:news>\n" +
" <news:publication>\n" +
" <news:name>The Example Times</news:name>\n" +
" <news:language>en</news:language>\n" +
" </news:publication>\n" +
" <news:genres>persbericht</news:genres>\n" +
" <news:publication_date>1970-01-01T00:00:00Z</news:publication_date>\n" +
" <news:title>Example Title</news:title>\n" +
" </news:news>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
private String writeSingleSiteMap(GoogleNewsSitemapGenerator wsg) {
List<File> files = wsg.write();

View File

@ -4,6 +4,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.util.Date;
import java.util.List;
import java.util.zip.GZIPInputStream;
@ -161,6 +162,21 @@ public class SitemapGeneratorTest extends TestCase {
fail("wrong domain allowed to be added");
} catch (RuntimeException e) {}
}
public void testSameDomainDifferentSchemeOK() throws Exception {
wsg = new WebSitemapGenerator("http://www.example.com", dir);
wsg.addUrl("https://www.example.com/index.html");
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
" <url>\n" +
" <loc>https://www.example.com/index.html</loc>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testDoubleWrite() throws Exception {
testSimpleUrl();
@ -177,7 +193,23 @@ public class SitemapGeneratorTest extends TestCase {
fail("Empty write is not allowed");
} catch (RuntimeException e) {}
}
public void testSuffixPresent() throws MalformedURLException {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).suffixStringPattern("01").build();
wsg.addUrl("http://www.example.com/url1");
wsg.addUrl("http://www.example.com/url2");
List<File> files = wsg.write();
assertEquals("Sitemap has a suffix now", "sitemap01.xml", files.get(0).getName());
}
public void testNullSuffixPassed() throws MalformedURLException {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).suffixStringPattern("").build();
wsg.addUrl("http://www.example.com/url1");
wsg.addUrl("http://www.example.com/url2");
List<File> files = wsg.write();
assertEquals("Sitemap has a suffix now", "sitemap.xml", files.get(0).getName());
}
public void testTooManyUrls() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).allowMultipleSitemaps(false).build();
for (int i = 0; i < SitemapGenerator.MAX_URLS_PER_SITEMAP; i++) {
@ -283,6 +315,7 @@ public class SitemapGeneratorTest extends TestCase {
while ((c = reader.read()) != -1) {
sb.append((char)c);
}
reader.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
@ -291,6 +324,49 @@ public class SitemapGeneratorTest extends TestCase {
assertEquals("sitemap didn't match", SITEMAP1, actual);
}
public void testBaseDirIsNullThrowsNullPointerException() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", null).autoValidate(true).maxUrls(10).build();
wsg.addUrl("http://www.example.com/index.html");
Exception e = null;
try {
wsg.write();
} catch (Exception ex) {
e = ex;
}
assertTrue(e instanceof NullPointerException);
assertEquals("Correct exception was not thrown", e.getMessage(), "To write to files, baseDir must not be null");
}
public void testWriteAsStringsMoreThanOneString() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", null).autoValidate(true).maxUrls(10).build();
for (int i = 0; i < 9; i++) {
wsg.addUrl("http://www.example.com/"+i);
}
wsg.addUrl("http://www.example.com/9");
wsg.addUrl("http://www.example.com/just-one-more");
List<String> siteMapsAsStrings = wsg.writeAsStrings();
assertEquals("First string didn't match", SITEMAP1, siteMapsAsStrings.get(0));
assertEquals("Second string didn't match", SITEMAP_PLUS_ONE, siteMapsAsStrings.get(1));
}
public void testWriteEmptySitemap() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).allowEmptySitemap(true).build();
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testMaxUrlsAllowingEmptyDoesNotWriteExtraSitemap() throws Exception {
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).allowEmptySitemap(true).maxUrls(10).build();
for (int i = 0; i < 10; i++) {
wsg.addUrl("http://www.example.com/"+i);
}
String sitemap = writeSingleSiteMap(wsg);
assertEquals(SITEMAP1, sitemap);
}
private String writeSingleSiteMap(WebSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());

View File

@ -86,6 +86,17 @@ public class SitemapIndexGeneratorTest extends TestCase {
fail("Allowed write with no URLs");
} catch (RuntimeException e) {}
}
public void testNoUrlsEmptyIndexAllowed() throws Exception {
sig = new SitemapIndexGenerator.Options(EXAMPLE, outFile).allowEmptyIndex(true).build();
sig.write();
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n" +
"</sitemapindex>";
String actual = TestUtil.slurpFileAndDelete(outFile);
assertEquals(expected, actual);
assertEquals(expected, sig.writeAsString());
}
public void testMaxUrls() throws Exception {
sig = new SitemapIndexGenerator.Options(EXAMPLE, outFile).autoValidate(true)
@ -97,6 +108,7 @@ public class SitemapIndexGeneratorTest extends TestCase {
sig.write();
String actual = TestUtil.slurpFileAndDelete(outFile);
assertEquals(INDEX, actual);
assertEquals(INDEX, sig.writeAsString());
}
public void testOneUrl() throws Exception {
@ -113,6 +125,7 @@ public class SitemapIndexGeneratorTest extends TestCase {
" </sitemap>\n" +
"</sitemapindex>";
assertEquals(expected, actual);
assertEquals(expected, sig.writeAsString());
}
public void testAddByPrefix() throws MalformedURLException {
@ -122,6 +135,7 @@ public class SitemapIndexGeneratorTest extends TestCase {
sig.write();
String actual = TestUtil.slurpFileAndDelete(outFile);
assertEquals(INDEX, actual);
assertEquals(INDEX, sig.writeAsString());
}
}

View File

@ -32,6 +32,7 @@ public class TestUtil {
while ((c = reader.read()) != -1) {
sb.append((char)c);
}
reader.close();
} catch (IOException e) {
throw new RuntimeException(e);
}