Fix for #358121 (Utf8Appendable refactored to use Bjoern Hoehrmann's decoder).
Signed-off-by: Simone Bordet <simone.bordet@gmail.com>
This commit is contained in:
parent
6bebdceb3a
commit
b3e6ebf416
|
@ -1,180 +1,181 @@
|
|||
// ========================================================================
|
||||
// Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
|
||||
// ------------------------------------------------------------------------
|
||||
// All rights reserved. This program and the accompanying materials
|
||||
// are made available under the terms of the Eclipse Public License v1.0
|
||||
// and Apache License v2.0 which accompanies this distribution.
|
||||
// The Eclipse Public License is available at
|
||||
// http://www.eclipse.org/legal/epl-v10.html
|
||||
// The Apache License v2.0 is available at
|
||||
// http://www.opensource.org/licenses/apache2.0.php
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// ========================================================================
|
||||
package org.eclipse.jetty.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.IllegalFormatCodePointException;
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/**
|
||||
* Utf8 Appendable abstract base class
|
||||
*
|
||||
* This abstract class wraps a standard {@link java.lang.Appendable} and provides methods to append UTF-8 encoded bytes, that are converted into characters.
|
||||
*
|
||||
* This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before state a character is appended to the string buffer.
|
||||
*
|
||||
* The UTF-8 decoding is done by this class and no additional buffers or Readers are used. The UTF-8 code was inspired by
|
||||
* http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
||||
*
|
||||
* License information for Bjoern Hoehrmann's code:
|
||||
*
|
||||
* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
**/
|
||||
public abstract class Utf8Appendable
|
||||
{
|
||||
private final char REPLACEMENT = '\ufffd';
|
||||
private static final int UTF8_ACCEPT = 0;
|
||||
private static final int UTF8_REJECT = 12;
|
||||
|
||||
protected final Appendable _appendable;
|
||||
protected int _expectedContinuationBytes;
|
||||
protected int _codePoint;
|
||||
protected int _minCodePoint;
|
||||
protected int _state = UTF8_ACCEPT;
|
||||
|
||||
private static final byte[] BYTE_TABLE =
|
||||
{
|
||||
// The first part of the table maps bytes to character classes that
|
||||
// to reduce the size of the transition table and create bitmasks.
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
|
||||
};
|
||||
|
||||
private static final byte[] TRANS_TABLE =
|
||||
{
|
||||
// The second part is a transition table that maps a combination
|
||||
// of a state of the automaton and a character class to a state.
|
||||
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
||||
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
||||
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
||||
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
||||
12,36,12,12,12,12,12,12,12,12,12,12
|
||||
};
|
||||
|
||||
private int _codep;
|
||||
|
||||
public Utf8Appendable(Appendable appendable)
|
||||
{
|
||||
_appendable=appendable;
|
||||
_appendable = appendable;
|
||||
}
|
||||
|
||||
public abstract int length();
|
||||
|
||||
|
||||
protected void reset()
|
||||
{
|
||||
_state = UTF8_ACCEPT;
|
||||
}
|
||||
|
||||
public void append(byte b)
|
||||
{
|
||||
try
|
||||
{
|
||||
appendByte(b);
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void append(byte[] b,int offset, int length)
|
||||
{
|
||||
try
|
||||
{
|
||||
int end=offset+length;
|
||||
for (int i=offset; i<end;i++)
|
||||
appendByte(b[i]);
|
||||
}
|
||||
catch(IOException e)
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean append(byte[] b,int offset, int length, int maxChars)
|
||||
public void append(byte[] b, int offset, int length)
|
||||
{
|
||||
try
|
||||
{
|
||||
int end=offset+length;
|
||||
for (int i=offset; i<end;i++)
|
||||
int end = offset + length;
|
||||
for (int i = offset; i < end; i++)
|
||||
appendByte(b[i]);
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean append(byte[] b, int offset, int length, int maxChars)
|
||||
{
|
||||
try
|
||||
{
|
||||
int end = offset + length;
|
||||
for (int i = offset; i < end; i++)
|
||||
{
|
||||
if (length()>maxChars)
|
||||
if (length() > maxChars)
|
||||
return false;
|
||||
appendByte(b[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
catch(IOException e)
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected void appendByte(byte b) throws IOException
|
||||
{
|
||||
// Check for invalid bytes
|
||||
if (b==(byte)0xc0 || b==(byte)0xc1 || (int)b>=0xf5)
|
||||
{
|
||||
_appendable.append(REPLACEMENT);
|
||||
_expectedContinuationBytes=0;
|
||||
_codePoint=0;
|
||||
throw new NotUtf8Exception();
|
||||
}
|
||||
|
||||
// Is it plain ASCII?
|
||||
if (b>=0)
|
||||
{
|
||||
// Were we expecting a continuation byte?
|
||||
if (_expectedContinuationBytes>0)
|
||||
{
|
||||
_appendable.append(REPLACEMENT);
|
||||
_expectedContinuationBytes=0;
|
||||
_codePoint=0;
|
||||
throw new NotUtf8Exception();
|
||||
}
|
||||
else
|
||||
_appendable.append((char)(0x7f&b));
|
||||
}
|
||||
// Else is this a start byte
|
||||
else if (_expectedContinuationBytes==0)
|
||||
{
|
||||
if ((b & 0xe0) == 0xc0)
|
||||
{
|
||||
//110xxxxx
|
||||
_expectedContinuationBytes=1;
|
||||
_codePoint=b&0x1f;
|
||||
_minCodePoint=0x80;
|
||||
}
|
||||
else if ((b & 0xf0) == 0xe0)
|
||||
{
|
||||
//1110xxxx
|
||||
_expectedContinuationBytes=2;
|
||||
_codePoint=b&0x0f;
|
||||
_minCodePoint=0x800;
|
||||
}
|
||||
else if ((b & 0xf8) == 0xf0)
|
||||
{
|
||||
//11110xxx
|
||||
_expectedContinuationBytes=3;
|
||||
_codePoint=b&0x07;
|
||||
_minCodePoint=0x10000;
|
||||
}
|
||||
else if ((b & 0xfc) == 0xf8)
|
||||
{
|
||||
//111110xx
|
||||
_expectedContinuationBytes=4;
|
||||
_codePoint=b&0x03;
|
||||
_minCodePoint=0x200000;
|
||||
}
|
||||
else if ((b & 0xfe) == 0xfc)
|
||||
{
|
||||
//1111110x
|
||||
_expectedContinuationBytes=5;
|
||||
_codePoint=b&0x01;
|
||||
_minCodePoint=0x400000;
|
||||
}
|
||||
else
|
||||
{
|
||||
_appendable.append(REPLACEMENT);
|
||||
_expectedContinuationBytes=0;
|
||||
_codePoint=0;
|
||||
throw new NotUtf8Exception();
|
||||
}
|
||||
}
|
||||
// else is this a continuation character
|
||||
else if ((b&0xc0)==0x80)
|
||||
{
|
||||
// 10xxxxxx
|
||||
_codePoint=(_codePoint<<6)|(b&0x3f);
|
||||
|
||||
// was that the last continuation?
|
||||
if (--_expectedContinuationBytes==0)
|
||||
{
|
||||
// If this a valid unicode point?
|
||||
if (_codePoint<_minCodePoint || (_codePoint>=0xD800 && _codePoint<=0xDFFF))
|
||||
{
|
||||
_appendable.append(REPLACEMENT);
|
||||
_expectedContinuationBytes=0;
|
||||
_codePoint=0;
|
||||
throw new NotUtf8Exception();
|
||||
}
|
||||
|
||||
_minCodePoint=0;
|
||||
char[] chars = Character.toChars(_codePoint);
|
||||
for (char c : chars)
|
||||
_appendable.append(c);
|
||||
}
|
||||
if (b > 0 && isUtf8SequenceComplete())
|
||||
{
|
||||
_appendable.append((char)(b & 0xFF));
|
||||
}
|
||||
// Else this is not a continuation character
|
||||
else
|
||||
{
|
||||
// ! 10xxxxxx
|
||||
_appendable.append(REPLACEMENT);
|
||||
_expectedContinuationBytes=0;
|
||||
_codePoint=0;
|
||||
throw new NotUtf8Exception();
|
||||
int i = b & 0xFF;
|
||||
int type = BYTE_TABLE[i];
|
||||
_codep = isUtf8SequenceComplete() ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
|
||||
_state = TRANS_TABLE[_state + type];
|
||||
|
||||
if (isUtf8SequenceComplete())
|
||||
{
|
||||
if (_codep < Character.MIN_HIGH_SURROGATE)
|
||||
{
|
||||
_appendable.append((char)_codep);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (char c : Character.toChars(_codep))
|
||||
_appendable.append(c);
|
||||
}
|
||||
}
|
||||
else if (_state == UTF8_REJECT)
|
||||
{
|
||||
_state = UTF8_ACCEPT;
|
||||
_appendable.append(REPLACEMENT);
|
||||
throw new NotUtf8Exception();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean isUtf8SequenceComplete()
|
||||
{
|
||||
return _state == UTF8_ACCEPT;
|
||||
}
|
||||
|
||||
public static class NotUtf8Exception extends IllegalArgumentException
|
||||
{
|
||||
public NotUtf8Exception()
|
||||
{
|
||||
super("!UTF-8");
|
||||
super("Not valid UTF8!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,71 +4,73 @@
|
|||
// All rights reserved. This program and the accompanying materials
|
||||
// are made available under the terms of the Eclipse Public License v1.0
|
||||
// and Apache License v2.0 which accompanies this distribution.
|
||||
// The Eclipse Public License is available at
|
||||
// The Eclipse Public License is available at
|
||||
// http://www.eclipse.org/legal/epl-v10.html
|
||||
// The Apache License v2.0 is available at
|
||||
// http://www.opensource.org/licenses/apache2.0.php
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// ========================================================================
|
||||
|
||||
package org.eclipse.jetty.util;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/** UTF-8 StringBuffer.
|
||||
/**
|
||||
* UTF-8 StringBuffer.
|
||||
*
|
||||
* This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
|
||||
* This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
|
||||
* UTF-8 encoded bytes, that are converted into characters.
|
||||
*
|
||||
* This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
|
||||
*
|
||||
* This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before
|
||||
* state a character is appended to the string buffer.
|
||||
*
|
||||
*
|
||||
* The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
|
||||
* The UTF-8 code was inspired by http://javolution.org
|
||||
*
|
||||
* This class is not synchronised and should probably be called Utf8StringBuilder
|
||||
* The UTF-8 code was inspired by http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
||||
*/
|
||||
public class Utf8StringBuffer extends Utf8Appendable
|
||||
public class Utf8StringBuffer extends Utf8Appendable
|
||||
{
|
||||
final StringBuffer _buffer;
|
||||
|
||||
|
||||
public Utf8StringBuffer()
|
||||
{
|
||||
super(new StringBuffer());
|
||||
_buffer=(StringBuffer)_appendable;
|
||||
_buffer = (StringBuffer)_appendable;
|
||||
}
|
||||
|
||||
|
||||
public Utf8StringBuffer(int capacity)
|
||||
{
|
||||
super(new StringBuffer(capacity));
|
||||
_buffer=(StringBuffer)_appendable;
|
||||
_buffer = (StringBuffer)_appendable;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length()
|
||||
{
|
||||
return _buffer.length();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void reset()
|
||||
{
|
||||
super.reset();
|
||||
_buffer.setLength(0);
|
||||
_expectedContinuationBytes=0;
|
||||
_codePoint=0;
|
||||
}
|
||||
|
||||
|
||||
public StringBuffer getStringBuffer()
|
||||
{
|
||||
if (_expectedContinuationBytes!=0)
|
||||
throw new NotUtf8Exception();
|
||||
checkState();
|
||||
return _buffer;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
if (_expectedContinuationBytes!=0)
|
||||
throw new NotUtf8Exception();
|
||||
checkState();
|
||||
return _buffer.toString();
|
||||
}
|
||||
|
||||
private void checkState()
|
||||
{
|
||||
if (!isUtf8SequenceComplete())
|
||||
throw new IllegalArgumentException("Tried to read incomplete UTF8 decoded String");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,70 +4,74 @@
|
|||
// All rights reserved. This program and the accompanying materials
|
||||
// are made available under the terms of the Eclipse Public License v1.0
|
||||
// and Apache License v2.0 which accompanies this distribution.
|
||||
// The Eclipse Public License is available at
|
||||
// The Eclipse Public License is available at
|
||||
// http://www.eclipse.org/legal/epl-v10.html
|
||||
// The Apache License v2.0 is available at
|
||||
// http://www.opensource.org/licenses/apache2.0.php
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// ========================================================================
|
||||
|
||||
package org.eclipse.jetty.util;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
/** UTF-8 StringBuilder.
|
||||
*
|
||||
* This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
|
||||
* This class wraps a standard {@link java.lang.StringBuilder} and provides methods to append
|
||||
* UTF-8 encoded bytes, that are converted into characters.
|
||||
*
|
||||
* This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
|
||||
*
|
||||
* This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before
|
||||
* state a character is appended to the string buffer.
|
||||
*
|
||||
*
|
||||
* The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
|
||||
* The UTF-8 code was inspired by http://javolution.org
|
||||
*
|
||||
* The UTF-8 code was inspired by http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
||||
*
|
||||
*/
|
||||
public class Utf8StringBuilder extends Utf8Appendable
|
||||
public class Utf8StringBuilder extends Utf8Appendable
|
||||
{
|
||||
final StringBuilder _buffer;
|
||||
|
||||
|
||||
public Utf8StringBuilder()
|
||||
{
|
||||
super(new StringBuilder());
|
||||
_buffer=(StringBuilder)_appendable;
|
||||
}
|
||||
|
||||
|
||||
public Utf8StringBuilder(int capacity)
|
||||
{
|
||||
super(new StringBuilder(capacity));
|
||||
_buffer=(StringBuilder)_appendable;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int length()
|
||||
{
|
||||
return _buffer.length();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void reset()
|
||||
{
|
||||
super.reset();
|
||||
_buffer.setLength(0);
|
||||
_expectedContinuationBytes=0;
|
||||
_codePoint=0;
|
||||
}
|
||||
|
||||
|
||||
public StringBuilder getStringBuilder()
|
||||
{
|
||||
if (_expectedContinuationBytes!=0)
|
||||
throw new NotUtf8Exception();
|
||||
checkState();
|
||||
return _buffer;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
if (_expectedContinuationBytes!=0)
|
||||
throw new NotUtf8Exception();
|
||||
checkState();
|
||||
return _buffer.toString();
|
||||
}
|
||||
|
||||
private void checkState()
|
||||
{
|
||||
if (!isUtf8SequenceComplete())
|
||||
throw new IllegalArgumentException("Tried to read incomplete UTF8 decoded String");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,92 +4,98 @@
|
|||
// All rights reserved. This program and the accompanying materials
|
||||
// are made available under the terms of the Eclipse Public License v1.0
|
||||
// and Apache License v2.0 which accompanies this distribution.
|
||||
// The Eclipse Public License is available at
|
||||
// The Eclipse Public License is available at
|
||||
// http://www.eclipse.org/legal/epl-v10.html
|
||||
// The Apache License v2.0 is available at
|
||||
// http://www.opensource.org/licenses/apache2.0.php
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// ========================================================================
|
||||
|
||||
package org.eclipse.jetty.util;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class Utf8StringBufferTest
|
||||
{
|
||||
public void testUtfStringBuffer()
|
||||
throws Exception
|
||||
@Test
|
||||
public void testUtfStringBuffer() throws Exception
|
||||
{
|
||||
String source="abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
|
||||
String source = "abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
|
||||
byte[] bytes = source.getBytes(StringUtil.__UTF8);
|
||||
Utf8StringBuffer buffer = new Utf8StringBuffer();
|
||||
for (int i=0;i<bytes.length;i++)
|
||||
for (byte aByte : bytes)
|
||||
buffer.append(aByte);
|
||||
assertEquals(source,buffer.toString());
|
||||
assertTrue(buffer.toString().endsWith("jetty"));
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testUtf8WithMissingByte() throws Exception
|
||||
{
|
||||
String source = "abc\u10fb";
|
||||
byte[] bytes = source.getBytes(StringUtil.__UTF8);
|
||||
Utf8StringBuffer buffer = new Utf8StringBuffer();
|
||||
for (int i = 0; i < bytes.length - 1; i++)
|
||||
buffer.append(bytes[i]);
|
||||
assertEquals(source, buffer.toString());
|
||||
assertTrue(buffer.toString().endsWith("jetty"));
|
||||
buffer.toString();
|
||||
}
|
||||
|
||||
@Test(expected = Utf8Appendable.NotUtf8Exception.class)
|
||||
public void testUtf8WithAdditionalByte() throws Exception
|
||||
{
|
||||
String source = "abcXX";
|
||||
byte[] bytes = source.getBytes(StringUtil.__UTF8);
|
||||
bytes[3] = (byte)0xc0;
|
||||
bytes[4] = (byte)0x00;
|
||||
|
||||
Utf8StringBuffer buffer = new Utf8StringBuffer();
|
||||
for (byte aByte : bytes)
|
||||
buffer.append(aByte);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testShort()
|
||||
throws Exception
|
||||
public void testUTF32codes() throws Exception
|
||||
{
|
||||
String source="abc\u10fb";
|
||||
byte[] bytes = source.getBytes(StringUtil.__UTF8);
|
||||
Utf8StringBuffer buffer = new Utf8StringBuffer();
|
||||
for (int i=0;i<bytes.length-1;i++)
|
||||
buffer.append(bytes[i]);
|
||||
try
|
||||
{
|
||||
buffer.toString();
|
||||
assertTrue(false);
|
||||
}
|
||||
catch(Utf8Appendable.NotUtf8Exception e)
|
||||
{
|
||||
assertTrue(true);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLong()
|
||||
throws Exception
|
||||
{
|
||||
String source="abcXX";
|
||||
byte[] bytes = source.getBytes(StringUtil.__UTF8);
|
||||
bytes[3]=(byte)0xc0;
|
||||
bytes[4]=(byte)0x00;
|
||||
String source = "\uD842\uDF9F";
|
||||
byte[] bytes = source.getBytes("UTF-8");
|
||||
|
||||
Utf8StringBuffer buffer = new Utf8StringBuffer();
|
||||
try
|
||||
{
|
||||
for (int i=0;i<bytes.length;i++)
|
||||
buffer.append(bytes[i]);
|
||||
assertTrue(false);
|
||||
}
|
||||
catch(Utf8Appendable.NotUtf8Exception e)
|
||||
{
|
||||
assertTrue(e.toString().indexOf("!UTF-8")>=0);
|
||||
}
|
||||
assertEquals("abc\ufffd",buffer.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUTF32codes()
|
||||
throws Exception
|
||||
{
|
||||
String source="\uD842\uDF9F";
|
||||
byte[] bytes=source.getBytes("UTF-8");
|
||||
|
||||
String jvmcheck = new String(bytes,0,bytes.length,"UTF-8");
|
||||
assertEquals(source,jvmcheck);
|
||||
|
||||
|
||||
Utf8StringBuffer buffer = new Utf8StringBuffer();
|
||||
buffer.append(bytes,0,bytes.length);
|
||||
String result=buffer.toString();
|
||||
String result = buffer.toString();
|
||||
assertEquals(source,result);
|
||||
}
|
||||
@Test
|
||||
public void testGermanUmlauts() throws Exception
|
||||
{
|
||||
byte[] bytes = new byte[6];
|
||||
bytes[0] = (byte)0xC3;
|
||||
bytes[1] = (byte)0xBC;
|
||||
bytes[2] = (byte)0xC3;
|
||||
bytes[3] = (byte)0xB6;
|
||||
bytes[4] = (byte)0xC3;
|
||||
bytes[5] = (byte)0xA4;
|
||||
|
||||
Utf8StringBuffer buffer = new Utf8StringBuffer();
|
||||
for (int i = 0; i < bytes.length; i++)
|
||||
buffer.append(bytes[i]);
|
||||
|
||||
assertEquals("\u00FC\u00F6\u00E4",buffer.toString());
|
||||
}
|
||||
|
||||
@Test(expected = Utf8Appendable.NotUtf8Exception.class)
|
||||
public void testInvalidUTF8() throws UnsupportedEncodingException
|
||||
{
|
||||
Utf8StringBuffer buffer = new Utf8StringBuffer();
|
||||
buffer.append((byte)0xC2);
|
||||
buffer.append((byte)0xC2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,142 +4,102 @@
|
|||
// All rights reserved. This program and the accompanying materials
|
||||
// are made available under the terms of the Eclipse Public License v1.0
|
||||
// and Apache License v2.0 which accompanies this distribution.
|
||||
// The Eclipse Public License is available at
|
||||
// The Eclipse Public License is available at
|
||||
// http://www.eclipse.org/legal/epl-v10.html
|
||||
// The Apache License v2.0 is available at
|
||||
// http://www.opensource.org/licenses/apache2.0.php
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// ========================================================================
|
||||
|
||||
package org.eclipse.jetty.util;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class Utf8StringBuilderTest
|
||||
{
|
||||
@Test
|
||||
public void testInvalid()
|
||||
throws Exception
|
||||
public void testInvalid() throws Exception
|
||||
{
|
||||
String[] invalids = {
|
||||
"c0af",
|
||||
"EDA080",
|
||||
"f08080af",
|
||||
"f8808080af",
|
||||
"e080af",
|
||||
"F4908080",
|
||||
"fbbfbfbfbf"
|
||||
};
|
||||
|
||||
String[] invalids =
|
||||
{ "c0af", "EDA080", "f08080af", "f8808080af", "e080af", "F4908080", "fbbfbfbfbf", "10FFFF" };
|
||||
|
||||
for (String i : invalids)
|
||||
{
|
||||
byte[] bytes = TypeUtil.fromHexString(i);
|
||||
|
||||
/* Test what JVM does
|
||||
try
|
||||
{
|
||||
String s = new String(bytes,0,bytes.length,"UTF-8");
|
||||
System.err.println(i+": "+s);
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
System.err.println(i+": "+e);
|
||||
}
|
||||
*/
|
||||
|
||||
try
|
||||
{
|
||||
Utf8StringBuilder buffer = new Utf8StringBuilder();
|
||||
buffer.append(bytes,0,bytes.length);
|
||||
|
||||
|
||||
assertEquals(i,"not expected",buffer.toString());
|
||||
}
|
||||
catch(IllegalArgumentException e)
|
||||
catch (Utf8Appendable.NotUtf8Exception e)
|
||||
{
|
||||
assertTrue(i,true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUtfStringBuilder()
|
||||
throws Exception
|
||||
public void testUtfStringBuilder() throws Exception
|
||||
{
|
||||
String source="abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
|
||||
String source = "abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
|
||||
byte[] bytes = source.getBytes(StringUtil.__UTF8);
|
||||
Utf8StringBuilder buffer = new Utf8StringBuilder();
|
||||
for (int i=0;i<bytes.length;i++)
|
||||
buffer.append(bytes[i]);
|
||||
assertEquals(source, buffer.toString());
|
||||
assertTrue(buffer.toString().endsWith("jetty"));
|
||||
for (byte aByte : bytes)
|
||||
buffer.append(aByte);
|
||||
assertEquals(source,buffer.toString());
|
||||
assertTrue(buffer.toString().endsWith("jetty"));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testShort()
|
||||
throws Exception
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testShort() throws Exception
|
||||
{
|
||||
String source="abc\u10fb";
|
||||
String source = "abc\u10fb";
|
||||
byte[] bytes = source.getBytes(StringUtil.__UTF8);
|
||||
Utf8StringBuilder buffer = new Utf8StringBuilder();
|
||||
for (int i=0;i<bytes.length-1;i++)
|
||||
for (int i = 0; i < bytes.length - 1; i++)
|
||||
buffer.append(bytes[i]);
|
||||
try
|
||||
{
|
||||
buffer.toString();
|
||||
assertTrue(false);
|
||||
}
|
||||
catch(Utf8Appendable.NotUtf8Exception e)
|
||||
{
|
||||
assertTrue(e.toString().indexOf("!UTF-8")>=0);
|
||||
}
|
||||
buffer.toString();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testLong()
|
||||
throws Exception
|
||||
public void testLong() throws Exception
|
||||
{
|
||||
String source="abcXX";
|
||||
String source = "abcXX";
|
||||
byte[] bytes = source.getBytes(StringUtil.__UTF8);
|
||||
bytes[3]=(byte)0xc0;
|
||||
bytes[4]=(byte)0x00;
|
||||
bytes[3] = (byte)0xc0;
|
||||
bytes[4] = (byte)0x00;
|
||||
|
||||
Utf8StringBuilder buffer = new Utf8StringBuilder();
|
||||
try
|
||||
{
|
||||
for (int i = 0; i < bytes.length; i++)
|
||||
buffer.append(bytes[i]);
|
||||
for (byte aByte : bytes)
|
||||
buffer.append(aByte);
|
||||
assertTrue(false);
|
||||
}
|
||||
catch(Utf8Appendable.NotUtf8Exception e)
|
||||
catch (IllegalArgumentException e)
|
||||
{
|
||||
assertTrue(true);
|
||||
}
|
||||
assertEquals("abc\ufffd", buffer.toString());
|
||||
assertEquals("abc\ufffd",buffer.toString());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUTF32codes()
|
||||
throws Exception
|
||||
@Test
|
||||
public void testUTF32codes() throws Exception
|
||||
{
|
||||
String source="\uD842\uDF9F";
|
||||
byte[] bytes=source.getBytes("UTF-8");
|
||||
|
||||
// System.err.println(TypeUtil.toHexString(bytes));
|
||||
String source = "\uD842\uDF9F";
|
||||
byte[] bytes = source.getBytes("UTF-8");
|
||||
|
||||
String jvmcheck = new String(bytes,0,bytes.length,"UTF-8");
|
||||
assertEquals(source,jvmcheck);
|
||||
|
||||
|
||||
Utf8StringBuilder buffer = new Utf8StringBuilder();
|
||||
buffer.append(bytes,0,bytes.length);
|
||||
String result=buffer.toString();
|
||||
String result = buffer.toString();
|
||||
assertEquals(source,result);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue