362742 improved UTF8 exception reason

This commit is contained in:
Greg Wilkins 2011-11-28 14:00:35 +11:00
parent 11a79f7dee
commit c229a55ad9
1 changed files with 30 additions and 23 deletions

View File

@ -134,7 +134,7 @@ public abstract class Utf8Appendable
protected void appendByte(byte b) throws IOException
{
if (b > 0 && isUtf8SequenceComplete())
if (b > 0 && _state == UTF8_ACCEPT)
{
_appendable.append((char)(b & 0xFF));
}
@ -142,41 +142,48 @@ public abstract class Utf8Appendable
{
int i = b & 0xFF;
int type = BYTE_TABLE[i];
_codep = isUtf8SequenceComplete() ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
_state = TRANS_TABLE[_state + type];
_codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
int next = TRANS_TABLE[_state + type];
if (isUtf8SequenceComplete())
switch(next)
{
if (_codep < Character.MIN_HIGH_SURROGATE)
{
_appendable.append((char)_codep);
}
else
{
for (char c : Character.toChars(_codep))
_appendable.append(c);
}
}
else if (_state == UTF8_REJECT)
{
_codep=0;
_state = UTF8_ACCEPT;
_appendable.append(REPLACEMENT);
throw new NotUtf8Exception();
case UTF8_ACCEPT:
_state=next;
if (_codep < Character.MIN_HIGH_SURROGATE)
{
_appendable.append((char)_codep);
}
else
{
for (char c : Character.toChars(_codep))
_appendable.append(c);
}
break;
case UTF8_REJECT:
String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12);
_codep=0;
_state = UTF8_ACCEPT;
_appendable.append(REPLACEMENT);
throw new NotUtf8Exception(reason);
default:
_state=next;
}
}
}
protected boolean isUtf8SequenceComplete()
public boolean isUtf8SequenceComplete()
{
return _state == UTF8_ACCEPT;
}
public static class NotUtf8Exception extends IllegalArgumentException
{
public NotUtf8Exception()
public NotUtf8Exception(String reason)
{
super("Not valid UTF8!");
super("Not valid UTF8! "+reason);
}
}
}