From ea0594afa169e7cedb05643bcb03b7649afd8649 Mon Sep 17 00:00:00 2001 From: Kamil Date: Thu, 21 Jul 2016 01:06:24 +0200 Subject: [PATCH] fixes issue #25 (#26) --- .../AbstractSitemapUrlRenderer.java | 4 ++-- .../SitemapIndexGenerator.java | 2 +- .../com/redfin/sitemapgenerator/UrlUtils.java | 23 +++++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/redfin/sitemapgenerator/AbstractSitemapUrlRenderer.java b/src/main/java/com/redfin/sitemapgenerator/AbstractSitemapUrlRenderer.java index fd794f6..a8ec3b2 100644 --- a/src/main/java/com/redfin/sitemapgenerator/AbstractSitemapUrlRenderer.java +++ b/src/main/java/com/redfin/sitemapgenerator/AbstractSitemapUrlRenderer.java @@ -5,7 +5,7 @@ abstract class AbstractSitemapUrlRenderer implements IS public void render(WebSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat, String additionalData) { sb.append(" \n"); sb.append(" "); - sb.append(url.getUrl().toString()); + sb.append(UrlUtils.escapeXml(url.getUrl().toString())); sb.append("\n"); if (url.getLastMod() != null) { sb.append(" "); @@ -35,7 +35,7 @@ abstract class AbstractSitemapUrlRenderer implements IS sb.append(':'); sb.append(tagName); sb.append('>'); - sb.append(value); + sb.append(UrlUtils.escapeXml(value.toString())); sb.append("\n"); out.write(" "); - out.write(url.url.toString()); + out.write(UrlUtils.escapeXml(url.url.toString())); out.write("\n"); Date lastMod = url.lastMod; diff --git a/src/main/java/com/redfin/sitemapgenerator/UrlUtils.java b/src/main/java/com/redfin/sitemapgenerator/UrlUtils.java index b12b575..558b5b0 100644 --- a/src/main/java/com/redfin/sitemapgenerator/UrlUtils.java +++ b/src/main/java/com/redfin/sitemapgenerator/UrlUtils.java @@ -2,8 +2,31 @@ package com.redfin.sitemapgenerator; import java.net.URL; import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; class UrlUtils { + private static Map ENTITIES = new HashMap(); + static { + ENTITIES.put("&", "&"); + ENTITIES.put("'", "'"); + ENTITIES.put("\"", """); + ENTITIES.put(">", ">"); + ENTITIES.put("<", "<"); + } + private static Pattern PATTERN = Pattern.compile("(&|'|\"|>|<)"); + + static String escapeXml(String string){ + Matcher matcher = PATTERN.matcher(string); + StringBuffer sb = new StringBuffer(); + while(matcher.find()) { + matcher.appendReplacement(sb, ENTITIES.get(matcher.group(1))); + } + matcher.appendTail(sb); + + return sb.toString(); + } static void checkUrl(URL url, URL baseUrl) { // Is there a better test to use here?