From c229a55ad9acd6d78a9a624ab3fccb666264dbf3 Mon Sep 17 00:00:00 2001 From: Greg Wilkins Date: Mon, 28 Nov 2011 14:00:35 +1100 Subject: [PATCH] 362742 improved UTF8 exception reason --- .../eclipse/jetty/util/Utf8Appendable.java | 53 +++++++++++-------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java index 306b6bff125..8fe840c1b88 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java @@ -134,7 +134,7 @@ public abstract class Utf8Appendable protected void appendByte(byte b) throws IOException { - if (b > 0 && isUtf8SequenceComplete()) + if (b > 0 && _state == UTF8_ACCEPT) { _appendable.append((char)(b & 0xFF)); } @@ -142,41 +142,48 @@ public abstract class Utf8Appendable { int i = b & 0xFF; int type = BYTE_TABLE[i]; - _codep = isUtf8SequenceComplete() ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6); - _state = TRANS_TABLE[_state + type]; + _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6); + int next = TRANS_TABLE[_state + type]; - if (isUtf8SequenceComplete()) + switch(next) { - if (_codep < Character.MIN_HIGH_SURROGATE) - { - _appendable.append((char)_codep); - } - else - { - for (char c : Character.toChars(_codep)) - _appendable.append(c); - } - } - else if (_state == UTF8_REJECT) - { - _codep=0; - _state = UTF8_ACCEPT; - _appendable.append(REPLACEMENT); - throw new NotUtf8Exception(); + case UTF8_ACCEPT: + _state=next; + if (_codep < Character.MIN_HIGH_SURROGATE) + { + _appendable.append((char)_codep); + } + else + { + for (char c : Character.toChars(_codep)) + _appendable.append(c); + } + break; + + case UTF8_REJECT: + String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12); + _codep=0; + _state = UTF8_ACCEPT; + _appendable.append(REPLACEMENT); + throw new NotUtf8Exception(reason); + + default: + _state=next; + } } } - protected boolean isUtf8SequenceComplete() + public boolean isUtf8SequenceComplete() { return _state == UTF8_ACCEPT; } public static class NotUtf8Exception extends IllegalArgumentException { - public NotUtf8Exception() + public NotUtf8Exception(String reason) { - super("Not valid UTF8!"); + super("Not valid UTF8! "+reason); } } }