[LANG-102] [lang] Refactor Entities methods.

http://issues.apache.org/jira/browse/LANG-102.
Refactored escape and unescape methods to remove code duplication.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/lang/trunk@491695 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2007-01-01 23:24:27 +00:00
parent a1dcdfa2a6
commit 0e07a3b099
1 changed files with 440 additions and 392 deletions

View File

@ -14,16 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.lang;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
/**
* <p>Provides HTML and XML entity utilities.</p>
* <p>
* Provides HTML and XML entity utilities.
* </p>
*
* @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
* @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
@ -38,20 +42,17 @@ import java.util.TreeMap;
*/
class Entities {
private static final String[][] BASIC_ARRAY = {
{"quot", "34"}, // " - double-quote
private static final String[][] BASIC_ARRAY = {{"quot", "34"}, // " - double-quote
{"amp", "38"}, // & - ampersand
{"lt", "60"}, // < - less-than
{"gt", "62"}, // > - greater-than
};
private static final String[][] APOS_ARRAY = {
{"apos", "39"}, // XML apostrophe
private static final String[][] APOS_ARRAY = {{"apos", "39"}, // XML apostrophe
};
// package scoped for testing
static final String[][] ISO8859_1_ARRAY = {
{"nbsp", "160"}, // non-breaking space
static final String[][] ISO8859_1_ARRAY = {{"nbsp", "160"}, // non-breaking space
{"iexcl", "161"}, // inverted exclamation mark
{"cent", "162"}, // cent sign
{"pound", "163"}, // pound sign
@ -298,7 +299,8 @@ class Entities {
{"lfloor", "8970"}, // left floor = apl downstile,U+230A ISOamsc -->
{"rfloor", "8971"}, // right floor, U+230B ISOamsc -->
{"lang", "9001"}, // left-pointing angle bracket = bra,U+2329 ISOtech -->
// <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' -->
// <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation
// mark' -->
{"rang", "9002"}, // right-pointing angle bracket = ket,U+232A ISOtech -->
// <!-- rang is NOT the same character as U+003E 'greater than' or U+203A
// 'single right-pointing angle quotation mark' -->
@ -348,17 +350,23 @@ class Entities {
};
/**
* <p>The set of entities supported by standard XML.</p>
* <p>
* The set of entities supported by standard XML.
* </p>
*/
public static final Entities XML;
/**
* <p>The set of entities supported by HTML 3.2.</p>
* <p>
* The set of entities supported by HTML 3.2.
* </p>
*/
public static final Entities HTML32;
/**
* <p>The set of entities supported by HTML 4.0.</p>
* <p>
* The set of entities supported by HTML 4.0.
* </p>
*/
public static final Entities HTML40;
@ -380,9 +388,12 @@ class Entities {
}
/**
* <p>Fills the specified entities instance with HTML 40 entities.</p>
* <p>
* Fills the specified entities instance with HTML 40 entities.
* </p>
*
* @param entities the instance to be filled.
* @param entities
* the instance to be filled.
*/
static void fillWithHtml40Entities(Entities entities) {
entities.addEntities(BASIC_ARRAY);
@ -392,25 +403,35 @@ class Entities {
static interface EntityMap {
/**
* <p>Add an entry to this entity map.</p>
* <p>
* Add an entry to this entity map.
* </p>
*
* @param name the entity name
* @param value the entity value
* @param name
* the entity name
* @param value
* the entity value
*/
void add(String name, int value);
/**
* <p>Returns the name of the entity identified by the specified value.</p>
* <p>
* Returns the name of the entity identified by the specified value.
* </p>
*
* @param value the value to locate
* @param value
* the value to locate
* @return entity name associated with the specified value
*/
String name(int value);
/**
* <p>Returns the value of the entity identified by the specified name.</p>
* <p>
* Returns the value of the entity identified by the specified name.
* </p>
*
* @param name the name to locate
* @param name
* the name to locate
* @return entity value associated with the specified name
*/
int value(String name);
@ -418,6 +439,7 @@ class Entities {
static class PrimitiveEntityMap implements EntityMap {
private Map mapNameToValue = new HashMap();
private IntHashMap mapValueToName = new IntHashMap();
/**
@ -447,9 +469,9 @@ class Entities {
}
}
static abstract class MapIntMap implements Entities.EntityMap {
protected Map mapNameToValue;
protected Map mapValueToName;
/**
@ -501,6 +523,7 @@ class Entities {
static class LookupEntityMap extends PrimitiveEntityMap {
private String[] lookupTable;
private int LOOKUP_TABLE_SIZE = 256;
/**
@ -514,8 +537,9 @@ class Entities {
}
/**
* <p>Returns the lookup table for this entity map. The lookup table is created if it has not been
* previously.</p>
* <p>
* Returns the lookup table for this entity map. The lookup table is created if it has not been previously.
* </p>
*
* @return the lookup table
*/
@ -527,7 +551,9 @@ class Entities {
}
/**
* <p>Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity names.</p>
* <p>
* Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity names.
* </p>
*/
private void createLookupTable() {
lookupTable = new String[LOOKUP_TABLE_SIZE];
@ -539,8 +565,11 @@ class Entities {
static class ArrayEntityMap implements EntityMap {
protected int growBy = 100;
protected int size = 0;
protected String[] names;
protected int[] values;
/**
@ -552,10 +581,11 @@ class Entities {
}
/**
* Constructs a new instance of <code>ArrayEntityMap</code>
* specifying the size by which the array should grow.
* Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the array should
* grow.
*
* @param growBy array will be initialized to and will grow by this amount
* @param growBy
* array will be initialized to and will grow by this amount
*/
public ArrayEntityMap(int growBy) {
this.growBy = growBy;
@ -576,7 +606,8 @@ class Entities {
/**
* Verifies the capacity of the entity array, adjusting the size if necessary.
*
* @param capacity size the array should be
* @param capacity
* size the array should be
*/
protected void ensureCapacity(int capacity) {
if (capacity > names.length) {
@ -625,20 +656,22 @@ class Entities {
}
/**
* Constructs a new instance of <code>ArrayEntityMap</code>
* specifying the size by which the underlying array should grow.
* Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the underlying array
* should grow.
*
* @param growBy array will be initialized to and will grow by this amount
* @param growBy
* array will be initialized to and will grow by this amount
*/
public BinaryEntityMap(int growBy) {
super(growBy);
}
/**
* Performs a binary search of the entity array for the specified key.
* This method is based on code in {@link java.util.Arrays}.
* Performs a binary search of the entity array for the specified key. This method is based on code in
* {@link java.util.Arrays}.
*
* @param key the key to be found
* @param key
* the key to be found
* @return the index of the entity array matching the specified key
*/
private int binarySearch(int key) {
@ -693,9 +726,12 @@ class Entities {
EntityMap map = new Entities.LookupEntityMap();
/**
* <p>Adds entities to this entity.</p>
* <p>
* Adds entities to this entity.
* </p>
*
* @param entityArray array of entities to be added
* @param entityArray
* array of entities to be added
*/
public void addEntities(String[][] entityArray) {
for (int i = 0; i < entityArray.length; ++i) {
@ -704,19 +740,26 @@ class Entities {
}
/**
* <p>Add an entity to this entity.</p>
* <p>
* Add an entity to this entity.
* </p>
*
* @param name name of the entity
* @param value vale of the entity
* @param name
* name of the entity
* @param value
* vale of the entity
*/
public void addEntity(String name, int value) {
map.add(name, value);
}
/**
* <p>Returns the name of the entity identified by the specified value.</p>
* <p>
* Returns the name of the entity identified by the specified value.
* </p>
*
* @param value the value to locate
* @param value
* the value to locate
* @return entity name associated with the specified value
*/
public String entityName(int value) {
@ -724,9 +767,12 @@ class Entities {
}
/**
* <p>Returns the value of the entity identified by the specified name.</p>
* <p>
* Returns the value of the entity identified by the specified name.
* </p>
*
* @param name the name to locate
* @param name
* the name to locate
* @return entity value associated with the specified name
*/
public int entityValue(String name) {
@ -734,48 +780,38 @@ class Entities {
}
/**
* <p>Escapes the characters in a <code>String</code>.</p>
* <p>
* Escapes the characters in a <code>String</code>.
* </p>
*
* <p>For example, if you have called addEntity(&quot;foo&quot;, 0xA1),
* escape(&quot;\u00A1&quot;) will return &quot;&amp;foo;&quot;</p>
* <p>
* For example, if you have called addEntity(&quot;foo&quot;, 0xA1), escape(&quot;\u00A1&quot;) will return
* &quot;&amp;foo;&quot;
* </p>
*
* @param str The <code>String</code> to escape.
* @param str
* The <code>String</code> to escape.
* @return A new escaped <code>String</code>.
*/
public String escape(String str) {
//todo: rewrite to use a Writer
StringBuffer buf = new StringBuffer(str.length() * 2);
int i;
for (i = 0; i < str.length(); ++i) {
char ch = str.charAt(i);
String entityName = this.entityName(ch);
if (entityName == null) {
if (ch > 0x7F) {
int intValue = ch;
buf.append("&#");
buf.append(intValue);
buf.append(';');
} else {
buf.append(ch);
}
} else {
buf.append('&');
buf.append(entityName);
buf.append(';');
}
}
return buf.toString();
StringWriter stringWriter = newStringWriter(str);
this.escape(stringWriter, str);
return stringWriter.toString();
}
/**
* <p>Escapes the characters in the <code>String</code> passed and writes the result
* to the <code>Writer</code> passed. </p>
* <p>
* Escapes the characters in the <code>String</code> passed and writes the result to the <code>Writer</code>
* passed.
* </p>
*
* @param writer The <code>Writer</code> to write the results of the escaping to.
* Assumed to be a non-null value.
* @param str The <code>String</code> to escape. Assumed to be a non-null value.
* @throws IOException when <code>Writer</code> passed throws the exception from
* calls to the {@link Writer#write(int)} methods.
* @param writer
* The <code>Writer</code> to write the results of the escaping to. Assumed to be a non-null value.
* @param str
* The <code>String</code> to escape. Assumed to be a non-null value.
* @throws IOException
* when <code>Writer</code> passed throws the exception from calls to the {@link Writer#write(int)}
* methods.
*
* @see #escape(String)
* @see Writer
@ -802,84 +838,95 @@ class Entities {
}
/**
* <p>Unescapes the entities in a <code>String</code>.</p>
* <p>
* Escapes the characters in the <code>String</code> passed and writes the result to the <code>StringWriter</code>
* passed.
* </p>
*
* <p>For example, if you have called addEntity(&quot;foo&quot;, 0xA1),
* unescape(&quot;&amp;foo;&quot;) will return &quot;\u00A1&quot;</p>
* @param writer
* The <code>StringWriter</code> to write the results of the escaping to. Assumed to be a non-null
* value.
* @param str
* The <code>String</code> to escape. Assumed to be a non-null value.
*
* @param str The <code>String</code> to escape.
* @return A new escaped <code>String</code>.
* @see #escape(String)
* @see Writer
* @since 2.3
*/
public String unescape(String str) {
int firstAmp = str.indexOf('&');
if (firstAmp < 0) {
return str;
}
StringBuffer buf = new StringBuffer(str.length());
buf.append(str.substring(0, firstAmp));
for (int i = firstAmp; i < str.length(); ++i) {
char ch = str.charAt(i);
if (ch == '&') {
int semi = str.indexOf(';', i + 1);
if (semi == -1) {
buf.append(ch);
continue;
}
int amph = str.indexOf('&', i + 1);
if( amph != -1 && amph < semi ) {
// Then the text looks like &...&...;
buf.append(ch);
continue;
}
String entityName = str.substring(i + 1, semi);
int entityValue;
if (entityName.length() == 0) {
entityValue = -1;
} else if (entityName.charAt(0) == '#') {
if (entityName.length() == 1) {
entityValue = -1;
} else {
char charAt1 = entityName.charAt(1);
public void escape(StringWriter writer, String str) {
try {
if (charAt1 == 'x' || charAt1=='X') {
entityValue = Integer.valueOf(entityName.substring(2), 16).intValue();
} else {
entityValue = Integer.parseInt(entityName.substring(1));
this.escape((Writer) writer, str);
} catch (IOException e) {
// This should never happen because ALL the StringWriter methods called by #escape(Writer, String) do not
// throw IOExceptions.
throw new UnhandledException(e);
}
if (entityValue > 0xFFFF) {
entityValue = -1;
}
} catch (NumberFormatException ex) {
entityValue = -1;
}
}
} else {
entityValue = this.entityValue(entityName);
}
if (entityValue == -1) {
buf.append('&');
buf.append(entityName);
buf.append(';');
} else {
buf.append((char) (entityValue));
}
i = semi;
} else {
buf.append(ch);
}
}
return buf.toString();
}
/**
* <p>Unescapes the escaped entities in the <code>String</code> passed and
* writes the result to the <code>Writer</code> passed.</p>
* <p>
* Unescapes the entities in a <code>String</code>.
* </p>
*
* @param writer The <code>Writer</code> to write the results to; assumed to be non-null.
* @param string The <code>String</code> to write the results to; assumed to be non-null.
* @throws IOException when <code>Writer</code> passed throws the exception from
* calls to the {@link Writer#write(int)} methods.
* <p>
* For example, if you have called addEntity(&quot;foo&quot;, 0xA1), unescape(&quot;&amp;foo;&quot;) will return
* &quot;\u00A1&quot;
* </p>
*
* @param str
* The <code>String</code> to escape.
* @return A new escaped <code>String</code>.
*/
public String unescape(String str) {
// Make the StringWriter larger than the source String to avoid growing the writer.
StringWriter stringWriter = newStringWriter(str);
this.unescape(stringWriter, str);
return stringWriter.toString();
}
private StringWriter newStringWriter(String str) {
// Make the StringWriter 10% larger than the source String to avoid growing the writer
return new StringWriter((int) (str.length() + (str.length() * 0.1)));
}
/**
* <p>
* Unescapes the escaped entities in the <code>String</code> passed and writes the result to the
* <code>StringWriter</code> passed.
* </p>
*
* @param writer
* The <code>StringWriter</code> to write the results to; assumed to be non-null.
* @param string
* The <code>String</code> to write the results to; assumed to be non-null.
*
* @see #escape(String)
* @see Writer
* @since 2.3
*/
public void unescape(StringWriter writer, String string) {
try {
this.unescape((Writer) writer, string);
} catch (IOException e) {
// This should never happen because ALL the StringWriter methods called by #escape(Writer, String) do not
// throw IOExceptions.
throw new UnhandledException(e);
}
}
/**
* <p>
* Unescapes the escaped entities in the <code>String</code> passed and writes the result to the
* <code>Writer</code> passed.
* </p>
*
* @param writer
* The <code>Writer</code> to write the results to; assumed to be non-null.
* @param string
* The <code>String</code> to write the results to; assumed to be non-null.
* @throws IOException
* when <code>Writer</code> passed throws the exception from calls to the {@link Writer#write(int)}
* methods.
*
* @see #escape(String)
* @see Writer
@ -912,7 +959,8 @@ class Entities {
int entityValue = -1;
int entityContentLen = entityContent.length();
if (entityContentLen > 0) {
if (entityContent.charAt(0) == '#') { //escaped value content is an integer (decimal or hexidecimal)
if (entityContent.charAt(0) == '#') { // escaped value content is an integer (decimal or
// hexidecimal)
if (entityContentLen > 1) {
char isHexChar = entityContent.charAt(1);
try {