466618 - Partial WebSocket Text delivery does not like incomplete UTF8 sequences

+ Adding alternative to Utf8StringBuilder that doesn't throw exception
  on incomplete UTF8 sequences, specifically for partial message
  handling
This commit is contained in:
Joakim Erdfelt 2015-05-06 10:10:32 -07:00
parent dd7313ca82
commit e801cf3374
2 changed files with 149 additions and 0 deletions

View File

@ -0,0 +1,63 @@
//
// ========================================================================
// Copyright (c) 1995-2015 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
//
// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
//
// You may elect to redistribute this code under either of these licenses.
// ========================================================================
//
package org.eclipse.jetty.websocket.common.util;
import java.nio.ByteBuffer;
import org.eclipse.jetty.util.Utf8Appendable;
import org.eclipse.jetty.util.Utf8StringBuilder;
/**
* Similar in scope to the {@link Utf8StringBuilder}, but allowing partially constructed Strings without throwing
* Exceptions for incomplete UTF8 sequences.
* <p>
* A call to {@link #toPartialString(ByteBuffer)} will return the section of the String from the start to the last
* completed UTF8 sequence. Leaving incomplete sequences for a subsequent call to complete.
*/
public class Utf8PartialBuilder
{
private final StringBuilder str;
private final Utf8Appendable utf8;
public Utf8PartialBuilder()
{
this.str = new StringBuilder();
this.utf8 = new Utf8Appendable(str)
{
@Override
public int length()
{
return str.length();
}
};
}
public String toPartialString(ByteBuffer buf)
{
if (buf == null)
{
// no change, return empty
return "";
}
utf8.append(buf);
String ret = str.toString();
str.setLength(0);
return ret;
}
}

View File

@ -0,0 +1,86 @@
//
// ========================================================================
// Copyright (c) 1995-2015 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
//
// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
//
// You may elect to redistribute this code under either of these licenses.
// ========================================================================
//
package org.eclipse.jetty.websocket.common.util;
import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import org.eclipse.jetty.util.BufferUtil;
import org.junit.Test;
/**
* Test partial UTF8 String sequence building.
*/
public class Utf8PartialBuilderTest
{
private ByteBuffer toByteBuffer(String hexStr)
{
return ByteBuffer.wrap(Hex.asByteArray(hexStr));
}
@Test
public void testPartial_UnsplitCodepoint()
{
Utf8PartialBuilder utf8 = new Utf8PartialBuilder();
String seq1 = "Hello-\uC2B5@\uC39F\uC3A4";
String seq2 = "\uC3BC\uC3A0\uC3A1-UTF-8!!";
String ret1 = utf8.toPartialString(BufferUtil.toBuffer(seq1,StandardCharsets.UTF_8));
String ret2 = utf8.toPartialString(BufferUtil.toBuffer(seq2,StandardCharsets.UTF_8));
assertThat("Seq1",ret1,is(seq1));
assertThat("Seq2",ret2,is(seq2));
}
@Test
public void testPartial_SplitCodepoint()
{
Utf8PartialBuilder utf8 = new Utf8PartialBuilder();
String seq1 = "48656C6C6F2DEC8AB540EC8E9FEC8E";
String seq2 = "A4EC8EBCEC8EA0EC8EA12D5554462D382121";
String ret1 = utf8.toPartialString(toByteBuffer(seq1));
String ret2 = utf8.toPartialString(toByteBuffer(seq2));
assertThat("Seq1",ret1,is("Hello-\uC2B5@\uC39F"));
assertThat("Seq2",ret2,is("\uC3A4\uC3BC\uC3A0\uC3A1-UTF-8!!"));
}
@Test
public void testPartial_SplitCodepoint_WithNoBuf()
{
Utf8PartialBuilder utf8 = new Utf8PartialBuilder();
String seq1 = "48656C6C6F2DEC8AB540EC8E9FEC8E";
String seq2 = "A4EC8EBCEC8EA0EC8EA12D5554462D382121";
String ret1 = utf8.toPartialString(toByteBuffer(seq1));
String ret2 = utf8.toPartialString(BufferUtil.EMPTY_BUFFER);
String ret3 = utf8.toPartialString(toByteBuffer(seq2));
assertThat("Seq1",ret1,is("Hello-\uC2B5@\uC39F"));
assertThat("Seq2",ret2,is(""));
assertThat("Seq3",ret3,is("\uC3A4\uC3BC\uC3A0\uC3A1-UTF-8!!"));
}
}