[LANG-102] [lang] Refactor Entities methods.
http://issues.apache.org/jira/browse/LANG-102. Refactored escape and unescape methods to remove code duplication. git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/lang/trunk@491695 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a1dcdfa2a6
commit
0e07a3b099
|
@ -14,16 +14,20 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.lang;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* <p>Provides HTML and XML entity utilities.</p>
|
||||
* <p>
|
||||
* Provides HTML and XML entity utilities.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
|
||||
* @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
|
||||
|
@ -38,20 +42,17 @@ import java.util.TreeMap;
|
|||
*/
|
||||
class Entities {
|
||||
|
||||
private static final String[][] BASIC_ARRAY = {
|
||||
{"quot", "34"}, // " - double-quote
|
||||
private static final String[][] BASIC_ARRAY = {{"quot", "34"}, // " - double-quote
|
||||
{"amp", "38"}, // & - ampersand
|
||||
{"lt", "60"}, // < - less-than
|
||||
{"gt", "62"}, // > - greater-than
|
||||
};
|
||||
|
||||
private static final String[][] APOS_ARRAY = {
|
||||
{"apos", "39"}, // XML apostrophe
|
||||
private static final String[][] APOS_ARRAY = {{"apos", "39"}, // XML apostrophe
|
||||
};
|
||||
|
||||
// package scoped for testing
|
||||
static final String[][] ISO8859_1_ARRAY = {
|
||||
{"nbsp", "160"}, // non-breaking space
|
||||
static final String[][] ISO8859_1_ARRAY = {{"nbsp", "160"}, // non-breaking space
|
||||
{"iexcl", "161"}, // inverted exclamation mark
|
||||
{"cent", "162"}, // cent sign
|
||||
{"pound", "163"}, // pound sign
|
||||
|
@ -298,7 +299,8 @@ class Entities {
|
|||
{"lfloor", "8970"}, // left floor = apl downstile,U+230A ISOamsc -->
|
||||
{"rfloor", "8971"}, // right floor, U+230B ISOamsc -->
|
||||
{"lang", "9001"}, // left-pointing angle bracket = bra,U+2329 ISOtech -->
|
||||
// <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' -->
|
||||
// <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation
|
||||
// mark' -->
|
||||
{"rang", "9002"}, // right-pointing angle bracket = ket,U+232A ISOtech -->
|
||||
// <!-- rang is NOT the same character as U+003E 'greater than' or U+203A
|
||||
// 'single right-pointing angle quotation mark' -->
|
||||
|
@ -348,17 +350,23 @@ class Entities {
|
|||
};
|
||||
|
||||
/**
|
||||
* <p>The set of entities supported by standard XML.</p>
|
||||
* <p>
|
||||
* The set of entities supported by standard XML.
|
||||
* </p>
|
||||
*/
|
||||
public static final Entities XML;
|
||||
|
||||
/**
|
||||
* <p>The set of entities supported by HTML 3.2.</p>
|
||||
* <p>
|
||||
* The set of entities supported by HTML 3.2.
|
||||
* </p>
|
||||
*/
|
||||
public static final Entities HTML32;
|
||||
|
||||
/**
|
||||
* <p>The set of entities supported by HTML 4.0.</p>
|
||||
* <p>
|
||||
* The set of entities supported by HTML 4.0.
|
||||
* </p>
|
||||
*/
|
||||
public static final Entities HTML40;
|
||||
|
||||
|
@ -380,9 +388,12 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* <p>Fills the specified entities instance with HTML 40 entities.</p>
|
||||
* <p>
|
||||
* Fills the specified entities instance with HTML 40 entities.
|
||||
* </p>
|
||||
*
|
||||
* @param entities the instance to be filled.
|
||||
* @param entities
|
||||
* the instance to be filled.
|
||||
*/
|
||||
static void fillWithHtml40Entities(Entities entities) {
|
||||
entities.addEntities(BASIC_ARRAY);
|
||||
|
@ -392,25 +403,35 @@ class Entities {
|
|||
|
||||
static interface EntityMap {
|
||||
/**
|
||||
* <p>Add an entry to this entity map.</p>
|
||||
* <p>
|
||||
* Add an entry to this entity map.
|
||||
* </p>
|
||||
*
|
||||
* @param name the entity name
|
||||
* @param value the entity value
|
||||
* @param name
|
||||
* the entity name
|
||||
* @param value
|
||||
* the entity value
|
||||
*/
|
||||
void add(String name, int value);
|
||||
|
||||
/**
|
||||
* <p>Returns the name of the entity identified by the specified value.</p>
|
||||
* <p>
|
||||
* Returns the name of the entity identified by the specified value.
|
||||
* </p>
|
||||
*
|
||||
* @param value the value to locate
|
||||
* @param value
|
||||
* the value to locate
|
||||
* @return entity name associated with the specified value
|
||||
*/
|
||||
String name(int value);
|
||||
|
||||
/**
|
||||
* <p>Returns the value of the entity identified by the specified name.</p>
|
||||
* <p>
|
||||
* Returns the value of the entity identified by the specified name.
|
||||
* </p>
|
||||
*
|
||||
* @param name the name to locate
|
||||
* @param name
|
||||
* the name to locate
|
||||
* @return entity value associated with the specified name
|
||||
*/
|
||||
int value(String name);
|
||||
|
@ -418,6 +439,7 @@ class Entities {
|
|||
|
||||
static class PrimitiveEntityMap implements EntityMap {
|
||||
private Map mapNameToValue = new HashMap();
|
||||
|
||||
private IntHashMap mapValueToName = new IntHashMap();
|
||||
|
||||
/**
|
||||
|
@ -447,9 +469,9 @@ class Entities {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static abstract class MapIntMap implements Entities.EntityMap {
|
||||
protected Map mapNameToValue;
|
||||
|
||||
protected Map mapValueToName;
|
||||
|
||||
/**
|
||||
|
@ -501,6 +523,7 @@ class Entities {
|
|||
|
||||
static class LookupEntityMap extends PrimitiveEntityMap {
|
||||
private String[] lookupTable;
|
||||
|
||||
private int LOOKUP_TABLE_SIZE = 256;
|
||||
|
||||
/**
|
||||
|
@ -514,8 +537,9 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* <p>Returns the lookup table for this entity map. The lookup table is created if it has not been
|
||||
* previously.</p>
|
||||
* <p>
|
||||
* Returns the lookup table for this entity map. The lookup table is created if it has not been previously.
|
||||
* </p>
|
||||
*
|
||||
* @return the lookup table
|
||||
*/
|
||||
|
@ -527,7 +551,9 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* <p>Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity names.</p>
|
||||
* <p>
|
||||
* Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity names.
|
||||
* </p>
|
||||
*/
|
||||
private void createLookupTable() {
|
||||
lookupTable = new String[LOOKUP_TABLE_SIZE];
|
||||
|
@ -539,8 +565,11 @@ class Entities {
|
|||
|
||||
static class ArrayEntityMap implements EntityMap {
|
||||
protected int growBy = 100;
|
||||
|
||||
protected int size = 0;
|
||||
|
||||
protected String[] names;
|
||||
|
||||
protected int[] values;
|
||||
|
||||
/**
|
||||
|
@ -552,10 +581,11 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* Constructs a new instance of <code>ArrayEntityMap</code>
|
||||
* specifying the size by which the array should grow.
|
||||
* Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the array should
|
||||
* grow.
|
||||
*
|
||||
* @param growBy array will be initialized to and will grow by this amount
|
||||
* @param growBy
|
||||
* array will be initialized to and will grow by this amount
|
||||
*/
|
||||
public ArrayEntityMap(int growBy) {
|
||||
this.growBy = growBy;
|
||||
|
@ -576,7 +606,8 @@ class Entities {
|
|||
/**
|
||||
* Verifies the capacity of the entity array, adjusting the size if necessary.
|
||||
*
|
||||
* @param capacity size the array should be
|
||||
* @param capacity
|
||||
* size the array should be
|
||||
*/
|
||||
protected void ensureCapacity(int capacity) {
|
||||
if (capacity > names.length) {
|
||||
|
@ -625,20 +656,22 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* Constructs a new instance of <code>ArrayEntityMap</code>
|
||||
* specifying the size by which the underlying array should grow.
|
||||
* Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the underlying array
|
||||
* should grow.
|
||||
*
|
||||
* @param growBy array will be initialized to and will grow by this amount
|
||||
* @param growBy
|
||||
* array will be initialized to and will grow by this amount
|
||||
*/
|
||||
public BinaryEntityMap(int growBy) {
|
||||
super(growBy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a binary search of the entity array for the specified key.
|
||||
* This method is based on code in {@link java.util.Arrays}.
|
||||
* Performs a binary search of the entity array for the specified key. This method is based on code in
|
||||
* {@link java.util.Arrays}.
|
||||
*
|
||||
* @param key the key to be found
|
||||
* @param key
|
||||
* the key to be found
|
||||
* @return the index of the entity array matching the specified key
|
||||
*/
|
||||
private int binarySearch(int key) {
|
||||
|
@ -693,9 +726,12 @@ class Entities {
|
|||
EntityMap map = new Entities.LookupEntityMap();
|
||||
|
||||
/**
|
||||
* <p>Adds entities to this entity.</p>
|
||||
* <p>
|
||||
* Adds entities to this entity.
|
||||
* </p>
|
||||
*
|
||||
* @param entityArray array of entities to be added
|
||||
* @param entityArray
|
||||
* array of entities to be added
|
||||
*/
|
||||
public void addEntities(String[][] entityArray) {
|
||||
for (int i = 0; i < entityArray.length; ++i) {
|
||||
|
@ -704,19 +740,26 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* <p>Add an entity to this entity.</p>
|
||||
* <p>
|
||||
* Add an entity to this entity.
|
||||
* </p>
|
||||
*
|
||||
* @param name name of the entity
|
||||
* @param value vale of the entity
|
||||
* @param name
|
||||
* name of the entity
|
||||
* @param value
|
||||
* vale of the entity
|
||||
*/
|
||||
public void addEntity(String name, int value) {
|
||||
map.add(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Returns the name of the entity identified by the specified value.</p>
|
||||
* <p>
|
||||
* Returns the name of the entity identified by the specified value.
|
||||
* </p>
|
||||
*
|
||||
* @param value the value to locate
|
||||
* @param value
|
||||
* the value to locate
|
||||
* @return entity name associated with the specified value
|
||||
*/
|
||||
public String entityName(int value) {
|
||||
|
@ -724,9 +767,12 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* <p>Returns the value of the entity identified by the specified name.</p>
|
||||
* <p>
|
||||
* Returns the value of the entity identified by the specified name.
|
||||
* </p>
|
||||
*
|
||||
* @param name the name to locate
|
||||
* @param name
|
||||
* the name to locate
|
||||
* @return entity value associated with the specified name
|
||||
*/
|
||||
public int entityValue(String name) {
|
||||
|
@ -734,48 +780,38 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* <p>Escapes the characters in a <code>String</code>.</p>
|
||||
* <p>
|
||||
* Escapes the characters in a <code>String</code>.
|
||||
* </p>
|
||||
*
|
||||
* <p>For example, if you have called addEntity("foo", 0xA1),
|
||||
* escape("\u00A1") will return "&foo;"</p>
|
||||
* <p>
|
||||
* For example, if you have called addEntity("foo", 0xA1), escape("\u00A1") will return
|
||||
* "&foo;"
|
||||
* </p>
|
||||
*
|
||||
* @param str The <code>String</code> to escape.
|
||||
* @param str
|
||||
* The <code>String</code> to escape.
|
||||
* @return A new escaped <code>String</code>.
|
||||
*/
|
||||
public String escape(String str) {
|
||||
//todo: rewrite to use a Writer
|
||||
StringBuffer buf = new StringBuffer(str.length() * 2);
|
||||
int i;
|
||||
for (i = 0; i < str.length(); ++i) {
|
||||
char ch = str.charAt(i);
|
||||
String entityName = this.entityName(ch);
|
||||
if (entityName == null) {
|
||||
if (ch > 0x7F) {
|
||||
int intValue = ch;
|
||||
buf.append("&#");
|
||||
buf.append(intValue);
|
||||
buf.append(';');
|
||||
} else {
|
||||
buf.append(ch);
|
||||
}
|
||||
} else {
|
||||
buf.append('&');
|
||||
buf.append(entityName);
|
||||
buf.append(';');
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
StringWriter stringWriter = newStringWriter(str);
|
||||
this.escape(stringWriter, str);
|
||||
return stringWriter.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Escapes the characters in the <code>String</code> passed and writes the result
|
||||
* to the <code>Writer</code> passed. </p>
|
||||
* <p>
|
||||
* Escapes the characters in the <code>String</code> passed and writes the result to the <code>Writer</code>
|
||||
* passed.
|
||||
* </p>
|
||||
*
|
||||
* @param writer The <code>Writer</code> to write the results of the escaping to.
|
||||
* Assumed to be a non-null value.
|
||||
* @param str The <code>String</code> to escape. Assumed to be a non-null value.
|
||||
* @throws IOException when <code>Writer</code> passed throws the exception from
|
||||
* calls to the {@link Writer#write(int)} methods.
|
||||
* @param writer
|
||||
* The <code>Writer</code> to write the results of the escaping to. Assumed to be a non-null value.
|
||||
* @param str
|
||||
* The <code>String</code> to escape. Assumed to be a non-null value.
|
||||
* @throws IOException
|
||||
* when <code>Writer</code> passed throws the exception from calls to the {@link Writer#write(int)}
|
||||
* methods.
|
||||
*
|
||||
* @see #escape(String)
|
||||
* @see Writer
|
||||
|
@ -802,84 +838,95 @@ class Entities {
|
|||
}
|
||||
|
||||
/**
|
||||
* <p>Unescapes the entities in a <code>String</code>.</p>
|
||||
* <p>
|
||||
* Escapes the characters in the <code>String</code> passed and writes the result to the <code>StringWriter</code>
|
||||
* passed.
|
||||
* </p>
|
||||
*
|
||||
* <p>For example, if you have called addEntity("foo", 0xA1),
|
||||
* unescape("&foo;") will return "\u00A1"</p>
|
||||
* @param writer
|
||||
* The <code>StringWriter</code> to write the results of the escaping to. Assumed to be a non-null
|
||||
* value.
|
||||
* @param str
|
||||
* The <code>String</code> to escape. Assumed to be a non-null value.
|
||||
*
|
||||
* @param str The <code>String</code> to escape.
|
||||
* @return A new escaped <code>String</code>.
|
||||
* @see #escape(String)
|
||||
* @see Writer
|
||||
* @since 2.3
|
||||
*/
|
||||
public String unescape(String str) {
|
||||
int firstAmp = str.indexOf('&');
|
||||
if (firstAmp < 0) {
|
||||
return str;
|
||||
}
|
||||
|
||||
StringBuffer buf = new StringBuffer(str.length());
|
||||
buf.append(str.substring(0, firstAmp));
|
||||
for (int i = firstAmp; i < str.length(); ++i) {
|
||||
char ch = str.charAt(i);
|
||||
if (ch == '&') {
|
||||
int semi = str.indexOf(';', i + 1);
|
||||
if (semi == -1) {
|
||||
buf.append(ch);
|
||||
continue;
|
||||
}
|
||||
int amph = str.indexOf('&', i + 1);
|
||||
if( amph != -1 && amph < semi ) {
|
||||
// Then the text looks like &...&...;
|
||||
buf.append(ch);
|
||||
continue;
|
||||
}
|
||||
String entityName = str.substring(i + 1, semi);
|
||||
int entityValue;
|
||||
if (entityName.length() == 0) {
|
||||
entityValue = -1;
|
||||
} else if (entityName.charAt(0) == '#') {
|
||||
if (entityName.length() == 1) {
|
||||
entityValue = -1;
|
||||
} else {
|
||||
char charAt1 = entityName.charAt(1);
|
||||
public void escape(StringWriter writer, String str) {
|
||||
try {
|
||||
if (charAt1 == 'x' || charAt1=='X') {
|
||||
entityValue = Integer.valueOf(entityName.substring(2), 16).intValue();
|
||||
} else {
|
||||
entityValue = Integer.parseInt(entityName.substring(1));
|
||||
this.escape((Writer) writer, str);
|
||||
} catch (IOException e) {
|
||||
// This should never happen because ALL the StringWriter methods called by #escape(Writer, String) do not
|
||||
// throw IOExceptions.
|
||||
throw new UnhandledException(e);
|
||||
}
|
||||
if (entityValue > 0xFFFF) {
|
||||
entityValue = -1;
|
||||
}
|
||||
} catch (NumberFormatException ex) {
|
||||
entityValue = -1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
entityValue = this.entityValue(entityName);
|
||||
}
|
||||
if (entityValue == -1) {
|
||||
buf.append('&');
|
||||
buf.append(entityName);
|
||||
buf.append(';');
|
||||
} else {
|
||||
buf.append((char) (entityValue));
|
||||
}
|
||||
i = semi;
|
||||
} else {
|
||||
buf.append(ch);
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Unescapes the escaped entities in the <code>String</code> passed and
|
||||
* writes the result to the <code>Writer</code> passed.</p>
|
||||
* <p>
|
||||
* Unescapes the entities in a <code>String</code>.
|
||||
* </p>
|
||||
*
|
||||
* @param writer The <code>Writer</code> to write the results to; assumed to be non-null.
|
||||
* @param string The <code>String</code> to write the results to; assumed to be non-null.
|
||||
* @throws IOException when <code>Writer</code> passed throws the exception from
|
||||
* calls to the {@link Writer#write(int)} methods.
|
||||
* <p>
|
||||
* For example, if you have called addEntity("foo", 0xA1), unescape("&foo;") will return
|
||||
* "\u00A1"
|
||||
* </p>
|
||||
*
|
||||
* @param str
|
||||
* The <code>String</code> to escape.
|
||||
* @return A new escaped <code>String</code>.
|
||||
*/
|
||||
public String unescape(String str) {
|
||||
// Make the StringWriter larger than the source String to avoid growing the writer.
|
||||
StringWriter stringWriter = newStringWriter(str);
|
||||
this.unescape(stringWriter, str);
|
||||
return stringWriter.toString();
|
||||
}
|
||||
|
||||
private StringWriter newStringWriter(String str) {
|
||||
// Make the StringWriter 10% larger than the source String to avoid growing the writer
|
||||
return new StringWriter((int) (str.length() + (str.length() * 0.1)));
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Unescapes the escaped entities in the <code>String</code> passed and writes the result to the
|
||||
* <code>StringWriter</code> passed.
|
||||
* </p>
|
||||
*
|
||||
* @param writer
|
||||
* The <code>StringWriter</code> to write the results to; assumed to be non-null.
|
||||
* @param string
|
||||
* The <code>String</code> to write the results to; assumed to be non-null.
|
||||
*
|
||||
* @see #escape(String)
|
||||
* @see Writer
|
||||
* @since 2.3
|
||||
*/
|
||||
public void unescape(StringWriter writer, String string) {
|
||||
try {
|
||||
this.unescape((Writer) writer, string);
|
||||
} catch (IOException e) {
|
||||
// This should never happen because ALL the StringWriter methods called by #escape(Writer, String) do not
|
||||
// throw IOExceptions.
|
||||
throw new UnhandledException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Unescapes the escaped entities in the <code>String</code> passed and writes the result to the
|
||||
* <code>Writer</code> passed.
|
||||
* </p>
|
||||
*
|
||||
* @param writer
|
||||
* The <code>Writer</code> to write the results to; assumed to be non-null.
|
||||
* @param string
|
||||
* The <code>String</code> to write the results to; assumed to be non-null.
|
||||
* @throws IOException
|
||||
* when <code>Writer</code> passed throws the exception from calls to the {@link Writer#write(int)}
|
||||
* methods.
|
||||
*
|
||||
* @see #escape(String)
|
||||
* @see Writer
|
||||
|
@ -912,7 +959,8 @@ class Entities {
|
|||
int entityValue = -1;
|
||||
int entityContentLen = entityContent.length();
|
||||
if (entityContentLen > 0) {
|
||||
if (entityContent.charAt(0) == '#') { //escaped value content is an integer (decimal or hexidecimal)
|
||||
if (entityContent.charAt(0) == '#') { // escaped value content is an integer (decimal or
|
||||
// hexidecimal)
|
||||
if (entityContentLen > 1) {
|
||||
char isHexChar = entityContent.charAt(1);
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue