466618 - Partial WebSocket Text delivery does not like incomplete UTF8 sequences

+ Adding alternative to Utf8StringBuilder that doesn't throw exception on incomplete UTF8 sequences, specifically for partial message handling
2015-05-06 10:10:32 -07:00 · 2015-05-06 10:10:32 -07:00 · e801cf3374
parent dd7313ca82
commit e801cf3374
2 changed files with 149 additions and 0 deletions
--- a/jetty-websocket/websocket-common/src/main/java/org/eclipse/jetty/websocket/common/util/Utf8PartialBuilder.java
+++ b/jetty-websocket/websocket-common/src/main/java/org/eclipse/jetty/websocket/common/util/Utf8PartialBuilder.java
@ -0,0 +1,63 @@
+//
+//  ========================================================================
+//  Copyright (c) 1995-2015 Mort Bay Consulting Pty. Ltd.
+//  ------------------------------------------------------------------------
+//  All rights reserved. This program and the accompanying materials
+//  are made available under the terms of the Eclipse Public License v1.0
+//  and Apache License v2.0 which accompanies this distribution.
+//
+//      The Eclipse Public License is available at
+//      http://www.eclipse.org/legal/epl-v10.html
+//
+//      The Apache License v2.0 is available at
+//      http://www.opensource.org/licenses/apache2.0.php
+//
+//  You may elect to redistribute this code under either of these licenses.
+//  ========================================================================
+//
+
+package org.eclipse.jetty.websocket.common.util;
+
+import java.nio.ByteBuffer;
+
+import org.eclipse.jetty.util.Utf8Appendable;
+import org.eclipse.jetty.util.Utf8StringBuilder;
+
+/**
+ * Similar in scope to the {@link Utf8StringBuilder}, but allowing partially constructed Strings without throwing
+ * Exceptions for incomplete UTF8 sequences.
+ * <p>
+ * A call to {@link #toPartialString(ByteBuffer)} will return the section of the String from the start to the last
+ * completed UTF8 sequence. Leaving incomplete sequences for a subsequent call to complete.
+ */
+public class Utf8PartialBuilder
+{
+    private final StringBuilder str;
+    private final Utf8Appendable utf8;
+
+    public Utf8PartialBuilder()
+    {
+        this.str = new StringBuilder();
+        this.utf8 = new Utf8Appendable(str)
+        {
+            @Override
+            public int length()
+            {
+                return str.length();
+            }
+        };
+    }
+
+    public String toPartialString(ByteBuffer buf)
+    {
+        if (buf == null)
+        {
+            // no change, return empty
+            return "";
+        }
+        utf8.append(buf);
+        String ret = str.toString();
+        str.setLength(0);
+        return ret;
+    }
+}
--- a/jetty-websocket/websocket-common/src/test/java/org/eclipse/jetty/websocket/common/util/Utf8PartialBuilderTest.java
+++ b/jetty-websocket/websocket-common/src/test/java/org/eclipse/jetty/websocket/common/util/Utf8PartialBuilderTest.java
@ -0,0 +1,86 @@
+//
+//  ========================================================================
+//  Copyright (c) 1995-2015 Mort Bay Consulting Pty. Ltd.
+//  ------------------------------------------------------------------------
+//  All rights reserved. This program and the accompanying materials
+//  are made available under the terms of the Eclipse Public License v1.0
+//  and Apache License v2.0 which accompanies this distribution.
+//
+//      The Eclipse Public License is available at
+//      http://www.eclipse.org/legal/epl-v10.html
+//
+//      The Apache License v2.0 is available at
+//      http://www.opensource.org/licenses/apache2.0.php
+//
+//  You may elect to redistribute this code under either of these licenses.
+//  ========================================================================
+//
+
+package org.eclipse.jetty.websocket.common.util;
+
+import static org.hamcrest.Matchers.*;
+import static org.junit.Assert.*;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+
+import org.eclipse.jetty.util.BufferUtil;
+import org.junit.Test;
+
+/**
+ * Test partial UTF8 String sequence building.
+ */
+public class Utf8PartialBuilderTest
+{
+    private ByteBuffer toByteBuffer(String hexStr)
+    {
+        return ByteBuffer.wrap(Hex.asByteArray(hexStr));
+    }
+    
+    @Test
+    public void testPartial_UnsplitCodepoint()
+    {
+        Utf8PartialBuilder utf8 = new Utf8PartialBuilder();
+
+        String seq1 = "Hello-\uC2B5@\uC39F\uC3A4";
+        String seq2 = "\uC3BC\uC3A0\uC3A1-UTF-8!!";
+
+        String ret1 = utf8.toPartialString(BufferUtil.toBuffer(seq1,StandardCharsets.UTF_8));
+        String ret2 = utf8.toPartialString(BufferUtil.toBuffer(seq2,StandardCharsets.UTF_8));
+
+        assertThat("Seq1",ret1,is(seq1));
+        assertThat("Seq2",ret2,is(seq2));
+    }
+    
+    @Test
+    public void testPartial_SplitCodepoint()
+    {
+        Utf8PartialBuilder utf8 = new Utf8PartialBuilder();
+
+        String seq1 = "48656C6C6F2DEC8AB540EC8E9FEC8E";
+        String seq2 = "A4EC8EBCEC8EA0EC8EA12D5554462D382121";
+        
+        String ret1 = utf8.toPartialString(toByteBuffer(seq1));
+        String ret2 = utf8.toPartialString(toByteBuffer(seq2));
+
+        assertThat("Seq1",ret1,is("Hello-\uC2B5@\uC39F"));
+        assertThat("Seq2",ret2,is("\uC3A4\uC3BC\uC3A0\uC3A1-UTF-8!!"));
+    }
+    
+    @Test
+    public void testPartial_SplitCodepoint_WithNoBuf()
+    {
+        Utf8PartialBuilder utf8 = new Utf8PartialBuilder();
+
+        String seq1 = "48656C6C6F2DEC8AB540EC8E9FEC8E";
+        String seq2 = "A4EC8EBCEC8EA0EC8EA12D5554462D382121";
+        
+        String ret1 = utf8.toPartialString(toByteBuffer(seq1));
+        String ret2 = utf8.toPartialString(BufferUtil.EMPTY_BUFFER);
+        String ret3 = utf8.toPartialString(toByteBuffer(seq2));
+
+        assertThat("Seq1",ret1,is("Hello-\uC2B5@\uC39F"));
+        assertThat("Seq2",ret2,is(""));
+        assertThat("Seq3",ret3,is("\uC3A4\uC3BC\uC3A0\uC3A1-UTF-8!!"));
+    }
+}