Fix for #358121 (Utf8Appendable refactored to use Bjoern Hoehrmann's decoder).

Signed-off-by: Simone Bordet <simone.bordet@gmail.com>
2011-09-21 11:47:38 +02:00 · 2011-09-21 11:47:38 +02:00 · b3e6ebf416
parent 6bebdceb3a
commit b3e6ebf416
5 changed files with 285 additions and 312 deletions
--- a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java
+++ b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java
@ -1,180 +1,181 @@
+// ========================================================================
+// Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
+// ------------------------------------------------------------------------
+// All rights reserved. This program and the accompanying materials
+// are made available under the terms of the Eclipse Public License v1.0
+// and Apache License v2.0 which accompanies this distribution.
+// The Eclipse Public License is available at
+// http://www.eclipse.org/legal/epl-v10.html
+// The Apache License v2.0 is available at
+// http://www.opensource.org/licenses/apache2.0.php
+// You may elect to redistribute this code under either of these licenses.
+// ========================================================================
 package org.eclipse.jetty.util;

 import java.io.IOException;
-import java.util.IllegalFormatCodePointException;

+/* ------------------------------------------------------------ */
+/**
+ * Utf8 Appendable abstract base class
+ *
+ * This abstract class wraps a standard {@link java.lang.Appendable} and provides methods to append UTF-8 encoded bytes, that are converted into characters.
+ *
+ * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before state a character is appended to the string buffer.
+ *
+ * The UTF-8 decoding is done by this class and no additional buffers or Readers are used. The UTF-8 code was inspired by
+ * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ *
+ * License information for Bjoern Hoehrmann's code:
+ *
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **/
 public abstract class Utf8Appendable
 {
    private final char REPLACEMENT = '\ufffd';
+    private static final int UTF8_ACCEPT = 0;
+    private static final int UTF8_REJECT = 12;
+
    protected final Appendable _appendable;
-    protected int _expectedContinuationBytes;
-    protected int _codePoint;
-    protected int _minCodePoint;
+    protected int _state = UTF8_ACCEPT;
+
+    private static final byte[] BYTE_TABLE =
+    {
+        // The first part of the table maps bytes to character classes that
+        // to reduce the size of the transition table and create bitmasks.
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+         8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+        10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
+    };
+
+    private static final byte[] TRANS_TABLE =
+    {
+        // The second part is a transition table that maps a combination
+        // of a state of the automaton and a character class to a state.
+         0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+        12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+        12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+        12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+        12,36,12,12,12,12,12,12,12,12,12,12
+    };
+
+    private int _codep;

    public Utf8Appendable(Appendable appendable)
    {
-        _appendable=appendable;
+        _appendable = appendable;
    }

    public abstract int length();
-    
+
+    protected void reset()
+    {
+        _state = UTF8_ACCEPT;
+    }
+
    public void append(byte b)
    {
        try
        {
            appendByte(b);
        }
-        catch(IOException e)
-        {
-            throw new RuntimeException(e);
-        }
-    }
-    
-    public void append(byte[] b,int offset, int length)
-    {
-        try
-        {
-            int end=offset+length;
-            for (int i=offset; i<end;i++)
-                appendByte(b[i]);
-        }
-        catch(IOException e)
+        catch (IOException e)
        {
            throw new RuntimeException(e);
        }
    }

-    public boolean append(byte[] b,int offset, int length, int maxChars)
+    public void append(byte[] b, int offset, int length)
    {
        try
        {
-            int end=offset+length;
-            for (int i=offset; i<end;i++)
+            int end = offset + length;
+            for (int i = offset; i < end; i++)
+                appendByte(b[i]);
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public boolean append(byte[] b, int offset, int length, int maxChars)
+    {
+        try
+        {
+            int end = offset + length;
+            for (int i = offset; i < end; i++)
            {
-                if (length()>maxChars)
+                if (length() > maxChars)
                    return false;
                appendByte(b[i]);
            }
            return true;
        }
-        catch(IOException e)
+        catch (IOException e)
        {
            throw new RuntimeException(e);
        }
    }
-    
+
    protected void appendByte(byte b) throws IOException
    {
-        // Check for invalid bytes
-        if (b==(byte)0xc0 || b==(byte)0xc1 || (int)b>=0xf5)
-        {
-            _appendable.append(REPLACEMENT);
-            _expectedContinuationBytes=0;
-            _codePoint=0;
-            throw new NotUtf8Exception();
-        }
-        
-        // Is it plain ASCII?
-        if (b>=0)
-        {
-            // Were we expecting a continuation byte?
-            if (_expectedContinuationBytes>0)
-            {
-                _appendable.append(REPLACEMENT);
-                _expectedContinuationBytes=0;
-                _codePoint=0;
-                throw new NotUtf8Exception();
-            }
-            else
-                _appendable.append((char)(0x7f&b));
-        }
-        // Else is this a start byte
-        else if (_expectedContinuationBytes==0)
-        {
-            if ((b & 0xe0) == 0xc0)
-            {
-                //110xxxxx
-                _expectedContinuationBytes=1;
-                _codePoint=b&0x1f;
-                _minCodePoint=0x80;
-            }
-            else if ((b & 0xf0) == 0xe0)
-            {
-                //1110xxxx
-                _expectedContinuationBytes=2;
-                _codePoint=b&0x0f;
-                _minCodePoint=0x800;
-            }
-            else if ((b & 0xf8) == 0xf0)
-            {
-                //11110xxx
-                _expectedContinuationBytes=3;
-                _codePoint=b&0x07;
-                _minCodePoint=0x10000;
-            }
-            else if ((b & 0xfc) == 0xf8)
-            {
-                //111110xx
-                _expectedContinuationBytes=4;
-                _codePoint=b&0x03;
-                _minCodePoint=0x200000;
-            }
-            else if ((b & 0xfe) == 0xfc) 
-            {
-                //1111110x
-                _expectedContinuationBytes=5;
-                _codePoint=b&0x01;
-                _minCodePoint=0x400000;
-            }
-            else
-            {
-                _appendable.append(REPLACEMENT);
-                _expectedContinuationBytes=0;
-                _codePoint=0;
-                throw new NotUtf8Exception();
-            }
-        }
-        // else is this a continuation character
-        else if ((b&0xc0)==0x80)
-        {
-            // 10xxxxxx
-            _codePoint=(_codePoint<<6)|(b&0x3f);
-            
-            // was that the last continuation?
-            if (--_expectedContinuationBytes==0)
-            {
-                // If this a valid unicode point?
-                if (_codePoint<_minCodePoint || (_codePoint>=0xD800 && _codePoint<=0xDFFF))
-                {
-                    _appendable.append(REPLACEMENT);
-                    _expectedContinuationBytes=0;
-                    _codePoint=0;
-                    throw new NotUtf8Exception();
-                }

-                _minCodePoint=0;
-                char[] chars = Character.toChars(_codePoint);
-                for (char c : chars)
-                    _appendable.append(c);
-            }
+        if (b > 0 && isUtf8SequenceComplete())
+        {
+            _appendable.append((char)(b & 0xFF));
        }
-        // Else this is not a continuation character
        else
        {
-            // ! 10xxxxxx
-            _appendable.append(REPLACEMENT);
-            _expectedContinuationBytes=0;
-            _codePoint=0;
-            throw new NotUtf8Exception();
+            int i = b & 0xFF;
+            int type = BYTE_TABLE[i];
+            _codep = isUtf8SequenceComplete() ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
+            _state = TRANS_TABLE[_state + type];
+
+            if (isUtf8SequenceComplete())
+            {
+                if (_codep < Character.MIN_HIGH_SURROGATE)
+                {
+                    _appendable.append((char)_codep);
+                }
+                else
+                {
+                    for (char c : Character.toChars(_codep))
+                        _appendable.append(c);
+                }
+            }
+            else if (_state == UTF8_REJECT)
+            {
+                _state = UTF8_ACCEPT;
+                _appendable.append(REPLACEMENT);
+                throw new NotUtf8Exception();
+            }
        }
    }

+    protected boolean isUtf8SequenceComplete()
+    {
+        return _state == UTF8_ACCEPT;
+    }

    public static class NotUtf8Exception extends IllegalArgumentException
    {
        public NotUtf8Exception()
        {
-            super("!UTF-8");
+            super("Not valid UTF8!");
        }
    }
-}
+}
--- a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuffer.java
+++ b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuffer.java
@ -4,71 +4,73 @@
 // All rights reserved. This program and the accompanying materials
 // are made available under the terms of the Eclipse Public License v1.0
 // and Apache License v2.0 which accompanies this distribution.
-// The Eclipse Public License is available at 
+// The Eclipse Public License is available at
 // http://www.eclipse.org/legal/epl-v10.html
 // The Apache License v2.0 is available at
 // http://www.opensource.org/licenses/apache2.0.php
-// You may elect to redistribute this code under either of these licenses. 
+// You may elect to redistribute this code under either of these licenses.
 // ========================================================================

 package org.eclipse.jetty.util;

-import java.io.IOException;
-
 /* ------------------------------------------------------------ */
-/** UTF-8 StringBuffer.
+/**
+ * UTF-8 StringBuffer.
 *
- * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append 
+ * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
 * UTF-8 encoded bytes, that are converted into characters.
- * 
- * This class is stateful and up to 6  calls to {@link #append(byte)} may be needed before 
+ *
+ * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before
 * state a character is appended to the string buffer.
- * 
+ *
 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
- * The UTF-8 code was inspired by http://javolution.org
- * 
- * This class is not synchronised and should probably be called Utf8StringBuilder
+ * The UTF-8 code was inspired by http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
 */
-public class Utf8StringBuffer extends Utf8Appendable 
+public class Utf8StringBuffer extends Utf8Appendable
 {
    final StringBuffer _buffer;
-    
+
    public Utf8StringBuffer()
    {
        super(new StringBuffer());
-        _buffer=(StringBuffer)_appendable;
+        _buffer = (StringBuffer)_appendable;
    }
-    
+
    public Utf8StringBuffer(int capacity)
    {
        super(new StringBuffer(capacity));
-        _buffer=(StringBuffer)_appendable;
+        _buffer = (StringBuffer)_appendable;
    }

+    @Override
    public int length()
    {
        return _buffer.length();
    }
-    
+
+    @Override
    public void reset()
    {
+        super.reset();
        _buffer.setLength(0);
-        _expectedContinuationBytes=0;
-        _codePoint=0;
    }
-    
+
    public StringBuffer getStringBuffer()
    {
-        if (_expectedContinuationBytes!=0)
-            throw new NotUtf8Exception();
+        checkState();
        return _buffer;
    }
-    
+
    @Override
    public String toString()
    {
-        if (_expectedContinuationBytes!=0)
-            throw new NotUtf8Exception();
+        checkState();
        return _buffer.toString();
    }
+
+    private void checkState()
+    {
+        if (!isUtf8SequenceComplete())
+            throw new IllegalArgumentException("Tried to read incomplete UTF8 decoded String");
+    }
 }
--- a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuilder.java
+++ b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuilder.java
@ -4,70 +4,74 @@
 // All rights reserved. This program and the accompanying materials
 // are made available under the terms of the Eclipse Public License v1.0
 // and Apache License v2.0 which accompanies this distribution.
-// The Eclipse Public License is available at 
+// The Eclipse Public License is available at
 // http://www.eclipse.org/legal/epl-v10.html
 // The Apache License v2.0 is available at
 // http://www.opensource.org/licenses/apache2.0.php
-// You may elect to redistribute this code under either of these licenses. 
+// You may elect to redistribute this code under either of these licenses.
 // ========================================================================

 package org.eclipse.jetty.util;

-import java.io.IOException;

 /* ------------------------------------------------------------ */
 /** UTF-8 StringBuilder.
 *
- * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append 
+ * This class wraps a standard {@link java.lang.StringBuilder} and provides methods to append
 * UTF-8 encoded bytes, that are converted into characters.
- * 
- * This class is stateful and up to 6  calls to {@link #append(byte)} may be needed before 
+ *
+ * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before
 * state a character is appended to the string buffer.
- * 
+ *
 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
- * The UTF-8 code was inspired by http://javolution.org
- * 
+ * The UTF-8 code was inspired by http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ *
 */
-public class Utf8StringBuilder extends Utf8Appendable 
+public class Utf8StringBuilder extends Utf8Appendable
 {
    final StringBuilder _buffer;
-    
+
    public Utf8StringBuilder()
    {
        super(new StringBuilder());
        _buffer=(StringBuilder)_appendable;
    }
-    
+
    public Utf8StringBuilder(int capacity)
    {
        super(new StringBuilder(capacity));
        _buffer=(StringBuilder)_appendable;
    }
-    
+
+    @Override
    public int length()
    {
        return _buffer.length();
    }
-    
+
+    @Override
    public void reset()
    {
+        super.reset();
        _buffer.setLength(0);
-        _expectedContinuationBytes=0;
-        _codePoint=0;
    }
-    
+
    public StringBuilder getStringBuilder()
    {
-        if (_expectedContinuationBytes!=0)
-            throw new NotUtf8Exception();
+        checkState();
        return _buffer;
    }
-    
+
    @Override
    public String toString()
    {
-        if (_expectedContinuationBytes!=0)
-            throw new NotUtf8Exception();
+        checkState();
        return _buffer.toString();
    }
+
+    private void checkState()
+    {
+        if (!isUtf8SequenceComplete())
+            throw new IllegalArgumentException("Tried to read incomplete UTF8 decoded String");
+    }
 }
--- a/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBufferTest.java
+++ b/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBufferTest.java
@ -4,92 +4,98 @@
 // All rights reserved. This program and the accompanying materials
 // are made available under the terms of the Eclipse Public License v1.0
 // and Apache License v2.0 which accompanies this distribution.
-// The Eclipse Public License is available at 
+// The Eclipse Public License is available at
 // http://www.eclipse.org/legal/epl-v10.html
 // The Apache License v2.0 is available at
 // http://www.opensource.org/licenses/apache2.0.php
-// You may elect to redistribute this code under either of these licenses. 
+// You may elect to redistribute this code under either of these licenses.
 // ========================================================================

 package org.eclipse.jetty.util;

-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import java.io.UnsupportedEncodingException;

 import org.junit.Test;

+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;

 public class Utf8StringBufferTest
 {
-    public void testUtfStringBuffer()
-        throws Exception
+    @Test
+    public void testUtfStringBuffer() throws Exception
    {
-        String source="abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
+        String source = "abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
        byte[] bytes = source.getBytes(StringUtil.__UTF8);
        Utf8StringBuffer buffer = new Utf8StringBuffer();
-        for (int i=0;i<bytes.length;i++)
+        for (byte aByte : bytes)
+            buffer.append(aByte);
+        assertEquals(source,buffer.toString());
+        assertTrue(buffer.toString().endsWith("jetty"));
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testUtf8WithMissingByte() throws Exception
+    {
+        String source = "abc\u10fb";
+        byte[] bytes = source.getBytes(StringUtil.__UTF8);
+        Utf8StringBuffer buffer = new Utf8StringBuffer();
+        for (int i = 0; i < bytes.length - 1; i++)
            buffer.append(bytes[i]);
-        assertEquals(source, buffer.toString());
-        assertTrue(buffer.toString().endsWith("jetty")); 
+        buffer.toString();
+    }
+
+    @Test(expected = Utf8Appendable.NotUtf8Exception.class)
+    public void testUtf8WithAdditionalByte() throws Exception
+    {
+        String source = "abcXX";
+        byte[] bytes = source.getBytes(StringUtil.__UTF8);
+        bytes[3] = (byte)0xc0;
+        bytes[4] = (byte)0x00;
+
+        Utf8StringBuffer buffer = new Utf8StringBuffer();
+        for (byte aByte : bytes)
+            buffer.append(aByte);
    }


    @Test
-    public void testShort()
-    throws Exception
+    public void testUTF32codes() throws Exception
    {
-        String source="abc\u10fb";
-        byte[] bytes = source.getBytes(StringUtil.__UTF8);
-        Utf8StringBuffer buffer = new Utf8StringBuffer();
-        for (int i=0;i<bytes.length-1;i++)
-            buffer.append(bytes[i]);
-        try
-        {
-            buffer.toString();
-            assertTrue(false);
-        }
-        catch(Utf8Appendable.NotUtf8Exception e)
-        {
-            assertTrue(true);
-        }
-    }
-    
-    @Test
-    public void testLong()
-    throws Exception
-    {
-        String source="abcXX";
-        byte[] bytes = source.getBytes(StringUtil.__UTF8);
-        bytes[3]=(byte)0xc0;
-        bytes[4]=(byte)0x00;
+        String source = "\uD842\uDF9F";
+        byte[] bytes = source.getBytes("UTF-8");

-        Utf8StringBuffer buffer = new Utf8StringBuffer();
-        try
-        {
-            for (int i=0;i<bytes.length;i++)
-                buffer.append(bytes[i]);
-                assertTrue(false);
-        }
-        catch(Utf8Appendable.NotUtf8Exception e)
-        {
-            assertTrue(e.toString().indexOf("!UTF-8")>=0);
-        }
-        assertEquals("abc\ufffd",buffer.toString());
-    }
-    
-    @Test 
-    public void testUTF32codes()
-    throws Exception
-    {
-        String source="\uD842\uDF9F";
-        byte[] bytes=source.getBytes("UTF-8");
-        
        String jvmcheck = new String(bytes,0,bytes.length,"UTF-8");
        assertEquals(source,jvmcheck);
-        
+
        Utf8StringBuffer buffer = new Utf8StringBuffer();
        buffer.append(bytes,0,bytes.length);
-        String result=buffer.toString();
+        String result = buffer.toString();
        assertEquals(source,result);
    }
+    @Test
+    public void testGermanUmlauts() throws Exception
+    {
+        byte[] bytes = new byte[6];
+        bytes[0] = (byte)0xC3;
+        bytes[1] = (byte)0xBC;
+        bytes[2] = (byte)0xC3;
+        bytes[3] = (byte)0xB6;
+        bytes[4] = (byte)0xC3;
+        bytes[5] = (byte)0xA4;
+
+        Utf8StringBuffer buffer = new Utf8StringBuffer();
+        for (int i = 0; i < bytes.length; i++)
+            buffer.append(bytes[i]);
+
+        assertEquals("\u00FC\u00F6\u00E4",buffer.toString());
+    }
+
+    @Test(expected = Utf8Appendable.NotUtf8Exception.class)
+    public void testInvalidUTF8() throws UnsupportedEncodingException
+    {
+        Utf8StringBuffer buffer = new Utf8StringBuffer();
+        buffer.append((byte)0xC2);
+        buffer.append((byte)0xC2);
+    }
 }
--- a/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBuilderTest.java
+++ b/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBuilderTest.java
@ -4,142 +4,102 @@
 // All rights reserved. This program and the accompanying materials
 // are made available under the terms of the Eclipse Public License v1.0
 // and Apache License v2.0 which accompanies this distribution.
-// The Eclipse Public License is available at 
+// The Eclipse Public License is available at
 // http://www.eclipse.org/legal/epl-v10.html
 // The Apache License v2.0 is available at
 // http://www.opensource.org/licenses/apache2.0.php
-// You may elect to redistribute this code under either of these licenses. 
+// You may elect to redistribute this code under either of these licenses.
 // ========================================================================

 package org.eclipse.jetty.util;

-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 import org.junit.Test;

+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;

 public class Utf8StringBuilderTest
 {
    @Test
-    public void testInvalid()
-        throws Exception
+    public void testInvalid() throws Exception
    {
-        String[] invalids = {
-                "c0af",
-                "EDA080",
-                "f08080af",
-                "f8808080af",
-                "e080af",
-                "F4908080",
-                "fbbfbfbfbf"
-        };
-        
+        String[] invalids =
+        { "c0af", "EDA080", "f08080af", "f8808080af", "e080af", "F4908080", "fbbfbfbfbf", "10FFFF" };
+
        for (String i : invalids)
        {
            byte[] bytes = TypeUtil.fromHexString(i);
-
-            /* Test what JVM does
-            try
-            {
-                String s = new String(bytes,0,bytes.length,"UTF-8");
-                System.err.println(i+": "+s);
-            }
-            catch(Exception e)
-            {
-                System.err.println(i+": "+e);
-            }
-            */
-            
            try
            {
                Utf8StringBuilder buffer = new Utf8StringBuilder();
                buffer.append(bytes,0,bytes.length);
-                
+
                assertEquals(i,"not expected",buffer.toString());
            }
-            catch(IllegalArgumentException e)
+            catch (Utf8Appendable.NotUtf8Exception e)
            {
                assertTrue(i,true);
            }
        }
    }
-    
+
    @Test
-    public void testUtfStringBuilder()
-        throws Exception
+    public void testUtfStringBuilder() throws Exception
    {
-        String source="abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
+        String source = "abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
        byte[] bytes = source.getBytes(StringUtil.__UTF8);
        Utf8StringBuilder buffer = new Utf8StringBuilder();
-        for (int i=0;i<bytes.length;i++)
-            buffer.append(bytes[i]);
-        assertEquals(source, buffer.toString());
-        assertTrue(buffer.toString().endsWith("jetty")); 
+        for (byte aByte : bytes)
+            buffer.append(aByte);
+        assertEquals(source,buffer.toString());
+        assertTrue(buffer.toString().endsWith("jetty"));
    }
-    
-    
-    
-    @Test
-    public void testShort()
-    throws Exception
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testShort() throws Exception
    {
-        String source="abc\u10fb";
+        String source = "abc\u10fb";
        byte[] bytes = source.getBytes(StringUtil.__UTF8);
        Utf8StringBuilder buffer = new Utf8StringBuilder();
-        for (int i=0;i<bytes.length-1;i++)
+        for (int i = 0; i < bytes.length - 1; i++)
            buffer.append(bytes[i]);
-        try
-        {
-            buffer.toString();
-            assertTrue(false);
-        }
-        catch(Utf8Appendable.NotUtf8Exception e)
-        {
-            assertTrue(e.toString().indexOf("!UTF-8")>=0);
-        }
+        buffer.toString();
    }
-    
+
    @Test
-    public void testLong()
-    throws Exception
+    public void testLong() throws Exception
    {
-        String source="abcXX";
+        String source = "abcXX";
        byte[] bytes = source.getBytes(StringUtil.__UTF8);
-        bytes[3]=(byte)0xc0;
-        bytes[4]=(byte)0x00;
+        bytes[3] = (byte)0xc0;
+        bytes[4] = (byte)0x00;

        Utf8StringBuilder buffer = new Utf8StringBuilder();
        try
        {
-            for (int i = 0; i < bytes.length; i++)
-                buffer.append(bytes[i]);
+            for (byte aByte : bytes)
+                buffer.append(aByte);
            assertTrue(false);
        }
-        catch(Utf8Appendable.NotUtf8Exception e)
+        catch (IllegalArgumentException e)
        {
            assertTrue(true);
        }
-        assertEquals("abc\ufffd", buffer.toString());
+        assertEquals("abc\ufffd",buffer.toString());
    }

-    
-    @Test 
-    public void testUTF32codes()
-    throws Exception
+    @Test
+    public void testUTF32codes() throws Exception
    {
-        String source="\uD842\uDF9F";
-        byte[] bytes=source.getBytes("UTF-8");
-        
-        // System.err.println(TypeUtil.toHexString(bytes));
+        String source = "\uD842\uDF9F";
+        byte[] bytes = source.getBytes("UTF-8");
+
        String jvmcheck = new String(bytes,0,bytes.length,"UTF-8");
        assertEquals(source,jvmcheck);
-        
+
        Utf8StringBuilder buffer = new Utf8StringBuilder();
        buffer.append(bytes,0,bytes.length);
-        String result=buffer.toString();
+        String result = buffer.toString();
        assertEquals(source,result);
    }
-    
-
 }