From 687bb758895bd3dc93f23c93800cd4b8195ad7bf Mon Sep 17 00:00:00 2001 From: Steven Caswell Date: Sun, 26 Jun 2005 16:56:36 +0000 Subject: [PATCH] (35366) Implementation of escape/unescapeHtml methods with Writer (http://issues.apache.org/bugzilla/show_bug.cgi?id=35366) git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/lang/trunk@201875 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/commons/lang/Entities.java | 259 +++++++++++++++++- .../commons/lang/StringEscapeUtils.java | 87 +++++- .../commons/lang/StringEscapeUtilsTest.java | 25 +- 3 files changed, 346 insertions(+), 25 deletions(-) diff --git a/src/java/org/apache/commons/lang/Entities.java b/src/java/org/apache/commons/lang/Entities.java index 375d82052..f4cf82e32 100644 --- a/src/java/org/apache/commons/lang/Entities.java +++ b/src/java/org/apache/commons/lang/Entities.java @@ -15,6 +15,8 @@ */ package org.apache.commons.lang; +import java.io.IOException; +import java.io.Writer; import java.util.HashMap; import java.util.Map; import java.util.TreeMap; @@ -219,7 +221,8 @@ class Entities { {"real", "8476"}, //blackletter capital R = real part symbol,U+211C ISOamso --> {"trade", "8482"}, //trade mark sign, U+2122 ISOnum --> {"alefsym", "8501"}, //alef symbol = first transfinite cardinal,U+2135 NEW --> -// +// // {"larr", "8592"}, //leftwards arrow, U+2190 ISOnum --> {"uarr", "8593"}, //upwards arrow, U+2191 ISOnum--> @@ -228,10 +231,14 @@ class Entities { {"harr", "8596"}, //left right arrow, U+2194 ISOamsa --> {"crarr", "8629"}, //downwards arrow with corner leftwards= carriage return, U+21B5 NEW --> {"lArr", "8656"}, //leftwards double arrow, U+21D0 ISOtech --> -// +// {"uArr", "8657"}, //upwards double arrow, U+21D1 ISOamsa --> {"rArr", "8658"}, //rightwards double arrow,U+21D2 ISOtech --> -// +// {"dArr", "8659"}, //downwards double arrow, U+21D3 ISOamsa --> {"hArr", "8660"}, //left right double arrow,U+21D4 ISOamsa --> // @@ -245,9 +252,11 @@ class Entities { {"ni", "8715"}, //contains as member, U+220B ISOtech --> // {"prod", "8719"}, //n-ary product = product sign,U+220F ISOamsb --> -// +// {"sum", "8721"}, //n-ary summation, U+2211 ISOamsb --> -// +// {"minus", "8722"}, //minus sign, U+2212 ISOtech --> {"lowast", "8727"}, //asterisk operator, U+2217 ISOtech --> {"radic", "8730"}, //square root = radical sign,U+221A ISOtech --> @@ -261,7 +270,8 @@ class Entities { {"int", "8747"}, //integral, U+222B ISOtech --> {"there4", "8756"}, //therefore, U+2234 ISOtech --> {"sim", "8764"}, //tilde operator = varies with = similar to,U+223C ISOtech --> -// +// {"cong", "8773"}, //approximately equal to, U+2245 ISOtech --> {"asymp", "8776"}, //almost equal to = asymptotic to,U+2248 ISOamsr --> {"ne", "8800"}, //not equal to, U+2260 ISOtech --> @@ -270,7 +280,10 @@ class Entities { {"ge", "8805"}, //greater-than or equal to,U+2265 ISOtech --> {"sub", "8834"}, //subset of, U+2282 ISOtech --> {"sup", "8835"}, //superset of, U+2283 ISOtech --> -// +// {"sube", "8838"}, //subset of or equal to, U+2286 ISOtech --> {"supe", "8839"}, //superset of or equal to,U+2287 ISOtech --> {"oplus", "8853"}, //circled plus = direct sum,U+2295 ISOamsb --> @@ -286,7 +299,8 @@ class Entities { {"lang", "9001"}, //left-pointing angle bracket = bra,U+2329 ISOtech --> // {"rang", "9002"}, //right-pointing angle bracket = ket,U+232A ISOtech --> -// +// // {"loz", "9674"}, //lozenge, U+25CA ISOpub --> // @@ -364,6 +378,11 @@ class Entities { fillWithHtml40Entities(HTML40); } + /** + *

Fills the specified entities instance with HTML 40 entities.

+ * + * @param entities the instance to be filled. + */ static void fillWithHtml40Entities(Entities entities) { entities.addEntities(BASIC_ARRAY); entities.addEntities(ISO8859_1_ARRAY); @@ -371,10 +390,28 @@ class Entities { } static interface EntityMap { + /** + *

Add an entry to this entity map.

+ * + * @param name the entity name + * @param value the entity value + */ void add(String name, int value); + /** + *

Returns the name of the entity identified by the specified value.

+ * + * @param value the value to locate + * @return entity name associated with the specified value + */ String name(int value); + /** + *

Returns the value of the entity identified by the specified name.

+ * + * @param name the name to locate + * @return entity value associated with the specified name + */ int value(String name); } @@ -382,15 +419,24 @@ class Entities { private Map mapNameToValue = new HashMap(); private IntHashMap mapValueToName = new IntHashMap(); + /** + * {@inheritDoc} + */ public void add(String name, int value) { mapNameToValue.put(name, new Integer(value)); mapValueToName.put(value, name); } + /** + * {@inheritDoc} + */ public String name(int value) { return (String) mapValueToName.get(value); } + /** + * {@inheritDoc} + */ public int value(String name) { Object value = mapNameToValue.get(name); if (value == null) { @@ -405,15 +451,24 @@ class Entities { protected Map mapNameToValue; protected Map mapValueToName; + /** + * {@inheritDoc} + */ public void add(String name, int value) { mapNameToValue.put(name, new Integer(value)); mapValueToName.put(new Integer(value), name); } + /** + * {@inheritDoc} + */ public String name(int value) { return (String) mapValueToName.get(new Integer(value)); } + /** + * {@inheritDoc} + */ public int value(String name) { Object value = mapNameToValue.get(name); if (value == null) { @@ -424,6 +479,9 @@ class Entities { } static class HashEntityMap extends MapIntMap { + /** + * Constructs a new instance of HashEntityMap. + */ public HashEntityMap() { mapNameToValue = new HashMap(); mapValueToName = new HashMap(); @@ -431,6 +489,9 @@ class Entities { } static class TreeEntityMap extends MapIntMap { + /** + * Constructs a new instance of TreeEntityMap. + */ public TreeEntityMap() { mapNameToValue = new TreeMap(); mapValueToName = new TreeMap(); @@ -441,6 +502,9 @@ class Entities { private String[] lookupTable; private int LOOKUP_TABLE_SIZE = 256; + /** + * {@inheritDoc} + */ public String name(int value) { if (value < LOOKUP_TABLE_SIZE) { return lookupTable()[value]; @@ -448,6 +512,12 @@ class Entities { return super.name(value); } + /** + *

Returns the lookup table for this entity map. The lookup table is created if it has not been + * previously.

+ * + * @return the lookup table + */ private String[] lookupTable() { if (lookupTable == null) { createLookupTable(); @@ -455,6 +525,9 @@ class Entities { return lookupTable; } + /** + *

Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity names.

+ */ private void createLookupTable() { lookupTable = new String[LOOKUP_TABLE_SIZE]; for (int i = 0; i < LOOKUP_TABLE_SIZE; ++i) { @@ -469,17 +542,29 @@ class Entities { protected String[] names; protected int[] values; + /** + * Constructs a new instance of ArrayEntityMap. + */ public ArrayEntityMap() { names = new String[growBy]; values = new int[growBy]; } + /** + * Constructs a new instance of ArrayEntityMap + * specifying the size by which the array should grow. + * + * @param growBy array will be initialized to and will grow by this amount + */ public ArrayEntityMap(int growBy) { this.growBy = growBy; names = new String[growBy]; values = new int[growBy]; } + /** + * {@inheritDoc} + */ public void add(String name, int value) { ensureCapacity(size + 1); names[size] = name; @@ -487,6 +572,11 @@ class Entities { size++; } + /** + * Verifies the capacity of the entity array, adjusting the size if necessary. + * + * @param capacity size the array should be + */ protected void ensureCapacity(int capacity) { if (capacity > names.length) { int newSize = Math.max(capacity, size + growBy); @@ -499,6 +589,9 @@ class Entities { } } + /** + * {@inheritDoc} + */ public String name(int value) { for (int i = 0; i < size; ++i) { if (values[i] == value) { @@ -508,6 +601,9 @@ class Entities { return null; } + /** + * {@inheritDoc} + */ public int value(String name) { for (int i = 0; i < size; ++i) { if (names[i].equals(name)) { @@ -520,14 +616,30 @@ class Entities { static class BinaryEntityMap extends ArrayEntityMap { + /** + * Constructs a new instance of BinaryEntityMap. + */ public BinaryEntityMap() { + ; // empty constructor } + /** + * Constructs a new instance of ArrayEntityMap + * specifying the size by which the underlying array should grow. + * + * @param growBy array will be initialized to and will grow by this amount + */ public BinaryEntityMap(int growBy) { super(growBy); } - // based on code in java.util.Arrays + /** + * Performs a binary search of the entity array for the specified key. + * This method is based on code in {@link java.util.Arrays}. + * + * @param key the key to be found + * @return the index of the entity array matching the specified key + */ private int binarySearch(int key) { int low = 0; int high = size - 1; @@ -547,6 +659,9 @@ class Entities { return -(low + 1); // key not found. } + /** + * {@inheritDoc} + */ public void add(String name, int value) { ensureCapacity(size + 1); int insertAt = binarySearch(value); @@ -561,6 +676,9 @@ class Entities { size++; } + /** + * {@inheritDoc} + */ public String name(int value) { int index = binarySearch(value); if (index < 0) { @@ -573,21 +691,43 @@ class Entities { // package scoped for testing EntityMap map = new Entities.LookupEntityMap(); + /** + *

Adds entities to this entity.

+ * + * @param entityArray array of entities to be added + */ public void addEntities(String[][] entityArray) { for (int i = 0; i < entityArray.length; ++i) { addEntity(entityArray[i][0], Integer.parseInt(entityArray[i][1])); } } + /** + *

Add an entity to this entity.

+ * + * @param name name of the entity + * @param value vale of the entity + */ public void addEntity(String name, int value) { map.add(name, value); } + /** + *

Returns the name of the entity identified by the specified value.

+ * + * @param value the value to locate + * @return entity name associated with the specified value + */ public String entityName(int value) { return map.name(value); } - + /** + *

Returns the value of the entity identified by the specified name.

+ * + * @param name the name to locate + * @return entity value associated with the specified name + */ public int entityValue(String name) { return map.value(name); } @@ -626,6 +766,40 @@ class Entities { return buf.toString(); } + /** + *

Escapes the characters in the String passed and writes the result + * to the Writer passed.

+ * + * @param writer The Writer to write the results of the escaping to. + * Assumed to be a non-null value. + * @param str The String to escape. Assumed to be a non-null value. + * @throws IOException when Writer passed throws the exception from + * calls to the {@link Writer#write(int)} methods. + * + * @see #escape(String) + * @see Writer + */ + public void escape(Writer writer, String str) throws IOException { + int len = str.length(); + for (int i = 0; i < len; i++) { + char c = str.charAt(i); + String entityName = this.entityName(c); + if (entityName == null) { + if (c > 0x7F) { + writer.write("&#"); + writer.write(Integer.toString(c, 10)); + writer.write(';'); + } else { + writer.write(c); + } + } else { + writer.write('&'); + writer.write(entityName); + writer.write(';'); + } + } + } + /** *

Unescapes the entities in a String.

* @@ -683,4 +857,69 @@ class Entities { return buf.toString(); } + /** + *

Unescapes the escaped entities in the String passed and + * writes the result to the Writer passed.

+ * + * @param writer The Writer to write the results to; assumed to be non-null. + * @param string The String to write the results to; assumed to be non-null. + * @throws IOException when Writer passed throws the exception from + * calls to the {@link Writer#write(int)} methods. + * + * @see #escape(String) + * @see Writer + */ + public void unescape(Writer writer, String string) throws IOException { + int len = string.length(); + if (len == 0) { + return; + } + for (int i = 0; i < len; i++) { + char c = string.charAt(i); + if (c == '&') { + int nextIdx = i+1; + int semiColonIdx = string.indexOf(';', nextIdx); + if (semiColonIdx == -1) { + writer.write(c); + continue; + } + String entityContent = string.substring(nextIdx, semiColonIdx); + int entityValue = -1; + int entityContentLen = entityContent.length(); + if (entityContentLen > 0) { + if (entityContent.charAt(0) == '#') { //escaped value content is an integer (decimal or hexidecimal) + if (entityContentLen > 1) { + char isHexChar = entityContent.charAt(1); + try { + switch (isHexChar) { + case 'X' : + case 'x' : { + entityValue = Integer.parseInt(entityContent.substring(2), 16); + } + default : { + entityValue = Integer.parseInt(entityContent.substring(1), 10); + } + } + } catch (NumberFormatException e) { + } + } + } else { //escaped value content is an entity name + entityValue = this.entityValue(entityContent); + } + } + + if (entityValue == -1) { + writer.write('&'); + writer.write(entityContent); + writer.write(';'); + } else { + writer.write(entityValue); + } + i = semiColonIdx; //move index up to the semi-colon + } else { + writer.write(c); + } + } + } + } diff --git a/src/java/org/apache/commons/lang/StringEscapeUtils.java b/src/java/org/apache/commons/lang/StringEscapeUtils.java index 95e6ef14b..19a5d71e7 100644 --- a/src/java/org/apache/commons/lang/StringEscapeUtils.java +++ b/src/java/org/apache/commons/lang/StringEscapeUtils.java @@ -414,19 +414,66 @@ public class StringEscapeUtils { * @return a new escaped String, null if null string input * * @see #unescapeHtml(String) - * @see
ISO Entities - * @see
HTML 3.2 Character Entities for ISO Latin-1 - * @see
HTML 4.0 Character entity references - * @see
HTML 4.01 Character References - * @see
HTML 4.01 Code positions + * @see ISO Entities + * @see HTML 3.2 Character Entities for ISO Latin-1 + * @see HTML 4.0 Character entity references + * @see HTML 4.01 Character References + * @see HTML 4.01 Code positions **/ public static String escapeHtml(String str) { if (str == null) { return null; } - //todo: add a version that takes a Writer - //todo: rewrite underlying method to use a Writer instead of a StringBuffer - return Entities.HTML40.escape(str); + + try { + StringPrintWriter writer = new StringPrintWriter ((int)(str.length() * 1.5)); + escapeHtml(writer, str); + return writer.getString(); + } catch (IOException e) { + //assert false; + //should be impossible + e.printStackTrace(); + return null; + } + } + + /** + *

Escapes the characters in a String using HTML entities and writes + * them to a Writer.

+ * + *

+ * For example: + *

+ * "bread" & "butter" + *

becomes:

+ * &quot;bread&quot; &amp; &quot;butter&quot;. + * + *

Supports all known HTML 4.0 entities, including funky accents.

+ * + * @param writer The Writer to write the result to. This must not be null. + * @param string The String to escape. This may be null. + * + * @throws IOException when Writer passed throws the exception from + * calls to the {@link Writer#write(int)} methods. + * + * @see #escapeHtml(String) + * @see #unescapeHtml(String) + * @see ISO Entities + * @see HTML 3.2 Character Entities for ISO Latin-1 + * @see HTML 4.0 Character entity references + * @see HTML 4.01 Character References + * @see HTML 4.01 Code positions + */ + public static void escapeHtml(Writer writer, String string) throws IOException { + if (writer == null ) { + throw new IllegalArgumentException ("The Writer must not be null."); + } + + if (string == null) { + return; + } + + Entities.HTML40.escape(writer, string); } /** @@ -449,7 +496,29 @@ public class StringEscapeUtils { if (str == null) { return null; } - return Entities.HTML40.unescape(str); + + try { + StringPrintWriter writer = new StringPrintWriter ((int)(str.length() * 1.5)); + unescapeHtml(writer, str); + return writer.getString(); + } catch (IOException e) { + //assert false; + //should be impossible + e.printStackTrace(); + return null; + } + } + + public static void unescapeHtml(Writer writer, String string) throws IOException { + if (writer == null ) { + throw new IllegalArgumentException ("The Writer must not be null."); + } + + if (string == null) { + return; + } + + Entities.HTML40.unescape(writer, string); } /** diff --git a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java index 02f8b4438..c9d3c8496 100644 --- a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java +++ b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java @@ -210,17 +210,30 @@ public class StringEscapeUtilsTest extends TestCase { String expected = htmlEscapes[i][1]; String original = htmlEscapes[i][2]; assertEquals(message, expected, StringEscapeUtils.escapeHtml(original)); - // todo: add test for (and implement) Writer-based version, something like this: -// StringPrintWriter sw = new StringPrintWriter(); -// StringEscapeUtils.escapeHtml(sw, original); -// assertEquals(expected, sw.getString()); + StringPrintWriter sw = new StringPrintWriter(); + try { + StringEscapeUtils.escapeHtml(sw, original); + } catch (IOException e) { + } + String actual = original == null ? null : sw.getString(); + assertEquals(message, expected, actual); } } public void testUnescapeHtml() { for (int i = 0; i < htmlEscapes.length; ++i) { - assertEquals(htmlEscapes[i][0], htmlEscapes[i][2], StringEscapeUtils.unescapeHtml(htmlEscapes[i][1])); - // todo: add test for (and implement) Writer-based version + String message = htmlEscapes[i][0]; + String expected = htmlEscapes[i][2]; + String original = htmlEscapes[i][1]; + assertEquals(message, expected, StringEscapeUtils.unescapeHtml(original)); + + StringPrintWriter sw = new StringPrintWriter(); + try { + StringEscapeUtils.unescapeHtml(sw, original); + } catch (IOException e) { + } + String actual = original == null ? null : sw.getString(); + assertEquals(message, expected, actual); } // \u00E7 is a cedilla (c with wiggle under) // note that the test string must be 7-bit-clean (unicode escaped) or else it will compile incorrectly