397110 Accept %uXXXX encodings in URIs

This commit is contained in:
Greg Wilkins 2012-12-22 12:00:54 +11:00
parent 5e4711fdb0
commit 810ff3802f
6 changed files with 250 additions and 101 deletions

View File

@ -558,8 +558,7 @@ public class HttpURI
return null;
int length = _param-_path;
byte[] bytes=null;
int n=0;
boolean decoding=false;
for (int i=_path;i<_param;i++)
{
@ -567,35 +566,49 @@ public class HttpURI
if (b=='%')
{
if (!decoding)
{
_utf8b.reset();
_utf8b.append(_raw,_path,i-_path);
decoding=true;
}
if ((i+2)>=_param)
throw new IllegalArgumentException("Bad % encoding: "+this);
b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
i+=2;
}
else if (bytes==null)
{
n++;
if (_raw[i+1]=='u')
{
if ((i+5)>=_param)
throw new IllegalArgumentException("Bad %u encoding: "+this);
try
{
String unicode = new String(Character.toChars(TypeUtil.parseInt(_raw,i+2,4,16)));
_utf8b.getStringBuilder().append(unicode);
i+=5;
}
catch(Exception e)
{
throw new RuntimeException(e);
}
}
else
{
b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
_utf8b.append(b);
i+=2;
}
continue;
}
if (bytes==null)
else if (decoding)
{
bytes=new byte[length];
System.arraycopy(_raw,_path,bytes,0,n);
_utf8b.append(b);
}
bytes[n++]=b;
}
if (bytes==null)
if (!decoding)
return toUtf8String(_path,length);
_utf8b.reset();
_utf8b.append(bytes,0,n);
return _utf8b.toString();
}
public String getDecodedPath(String encoding)
{
if (_path==_param)
@ -611,10 +624,39 @@ public class HttpURI
if (b=='%')
{
if (bytes==null)
{
bytes=new byte[length];
System.arraycopy(_raw,_path,bytes,0,n);
}
if ((i+2)>=_param)
throw new IllegalArgumentException("Bad % encoding: "+this);
b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
i+=2;
if (_raw[i+1]=='u')
{
if ((i+5)>=_param)
throw new IllegalArgumentException("Bad %u encoding: "+this);
try
{
String unicode = new String(Character.toChars(TypeUtil.parseInt(_raw,i+2,4,16)));
byte[] encoded = unicode.getBytes(encoding);
System.arraycopy(encoded,0,bytes,n,encoded.length);
n+=encoded.length;
i+=5;
}
catch(Exception e)
{
throw new RuntimeException(e);
}
}
else
{
b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
bytes[n++]=b;
i+=2;
}
continue;
}
else if (bytes==null)
{
@ -622,12 +664,6 @@ public class HttpURI
continue;
}
if (bytes==null)
{
bytes=new byte[length];
System.arraycopy(_raw,_path,bytes,0,n);
}
bytes[n++]=b;
}

View File

@ -195,7 +195,8 @@ public class HttpURITest
/* 1*/ {"/path/%69nfo","/path/info", "UTF-8"},
/* 2*/ {"http://host/path/%69nfo","/path/info", "UTF-8"},
/* 3*/ {"http://host/path/%69nf%c2%a4","/path/inf\u00a4", "UTF-8"},
/* 4*/ {"http://host/path/%E5", "/path/\u00e5", "ISO-8859-1"}
/* 4*/ {"http://host/path/%E5", "/path/\u00e5", "ISO-8859-1"},
/* 5*/ {"/foo/%u30ED/bar%3Fabc%3D123%26xyz%3D456","/foo/\u30ed/bar?abc=123&xyz=456","UTF-8"}
};
@Test
@ -207,11 +208,12 @@ public class HttpURITest
{
uri.parse(encoding_tests[t][0]);
assertEquals(""+t,encoding_tests[t][1],uri.getDecodedPath(encoding_tests[t][2]));
if ("UTF-8".equalsIgnoreCase(encoding_tests[t][2]))
assertEquals(""+t,encoding_tests[t][1],uri.getDecodedPath());
}
}
@Test
public void testNoPercentEncodingOfQueryUsingNonUTF8() throws Exception
{
@ -314,7 +316,7 @@ public class HttpURITest
@Test
public void testUnicodeErrors() throws UnsupportedEncodingException
{
String uri="http://server/path?invalid=data%u2021here";
String uri="http://server/path?invalid=data%uXXXXhere%u000";
try
{
URLDecoder.decode(uri,"UTF-8");

View File

@ -278,13 +278,7 @@ public class TypeUtil
{
char c=s.charAt(offset+i);
int digit=c-'0';
if (digit<0 || digit>=base || digit>=10)
{
digit=10+c-'A';
if (digit<10 || digit>=base)
digit=10+c-'a';
}
int digit=convertHexDigit((int)c);
if (digit<0 || digit>=base)
throw new NumberFormatException(s.substring(offset,offset+length));
value=value*base+digit;
@ -358,15 +352,28 @@ public class TypeUtil
/* ------------------------------------------------------------ */
/**
* @param b An ASCII encoded character 0-9 a-f A-F
* @param c An ASCII encoded character 0-9 a-f A-F
* @return The byte value of the character 0-16.
*/
public static byte convertHexDigit( byte b )
public static byte convertHexDigit( byte c )
{
if ((b >= '0') && (b <= '9')) return (byte)(b - '0');
if ((b >= 'a') && (b <= 'f')) return (byte)(b - 'a' + 10);
if ((b >= 'A') && (b <= 'F')) return (byte)(b - 'A' + 10);
throw new IllegalArgumentException("!hex:"+Integer.toHexString(0xff&b));
byte b = (byte)((c & 0x1f) + ((c >> 6) * 0x19) - 0x10);
if (b<0 || b>15)
throw new IllegalArgumentException("!hex "+c);
return b;
}
/* ------------------------------------------------------------ */
/**
* @param c An ASCII encoded character 0-9 a-f A-F
* @return The byte value of the character 0-16.
*/
public static int convertHexDigit( int c )
{
int d= ((c & 0x1f) + ((c >> 6) * 0x19) - 0x10);
if (d<0 || d>15)
throw new NumberFormatException("!hex "+c);
return d;
}
/* ------------------------------------------------------------ */

View File

@ -18,6 +18,8 @@
package org.eclipse.jetty.util;
import static org.eclipse.jetty.util.TypeUtil.convertHexDigit;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
@ -328,7 +330,16 @@ public class UrlEncoded extends MultiMap implements Cloneable
case '%':
if (i+2<end)
buffer.append((byte)((TypeUtil.convertHexDigit(raw[++i])<<4) + TypeUtil.convertHexDigit(raw[++i])));
{
if ('u'==raw[i+1])
{
i++;
if (i+4<end)
buffer.getStringBuilder().append(Character.toChars((convertHexDigit(raw[++i])<<12) +(convertHexDigit(raw[++i])<<8) + (convertHexDigit(raw[++i])<<4) +convertHexDigit(raw[++i])));
}
else
buffer.append((byte)((convertHexDigit(raw[++i])<<4) + convertHexDigit(raw[++i])));
}
break;
default:
@ -411,12 +422,29 @@ public class UrlEncoded extends MultiMap implements Cloneable
break;
case '%':
int dh=in.read();
int dl=in.read();
if (dh<0||dl<0)
break;
buffer.append((char)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
int code0=in.read();
if ('u'==code0)
{
int code1=in.read();
if (code1>=0)
{
int code2=in.read();
if (code2>=0)
{
int code3=in.read();
if (code3>=0)
buffer.append(Character.toChars((convertHexDigit(code0)<<12)+(convertHexDigit(code1)<<8)+(convertHexDigit(code2)<<4)+convertHexDigit(code3)));
}
}
}
else if (code0>=0)
{
int code1=in.read();
if (code1>=0)
buffer.append((char)((convertHexDigit(code0)<<4)+convertHexDigit(code1)));
}
break;
default:
buffer.append((char)b);
break;
@ -495,12 +523,29 @@ public class UrlEncoded extends MultiMap implements Cloneable
break;
case '%':
int dh=in.read();
int dl=in.read();
if (dh<0||dl<0)
break;
buffer.append((byte)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
int code0=in.read();
if ('u'==code0)
{
int code1=in.read();
if (code1>=0)
{
int code2=in.read();
if (code2>=0)
{
int code3=in.read();
if (code3>=0)
buffer.getStringBuilder().append(Character.toChars((convertHexDigit(code0)<<12)+(convertHexDigit(code1)<<8)+(convertHexDigit(code2)<<4)+convertHexDigit(code3)));
}
}
}
else if (code0>=0)
{
int code1=in.read();
if (code1>=0)
buffer.append((byte)((convertHexDigit(code0)<<4)+convertHexDigit(code1)));
}
break;
default:
buffer.append((byte)b);
break;
@ -576,8 +621,6 @@ public class UrlEncoded extends MultiMap implements Cloneable
String value = null;
int c;
int digit=0;
int digits=0;
int totalLength = 0;
ByteArrayOutputStream2 output = new ByteArrayOutputStream2();
@ -619,21 +662,31 @@ public class UrlEncoded extends MultiMap implements Cloneable
output.write(' ');
break;
case '%':
digits=2;
int code0=in.read();
if ('u'==code0)
{
int code1=in.read();
if (code1>=0)
{
int code2=in.read();
if (code2>=0)
{
int code3=in.read();
if (code3>=0)
output.write(new String(Character.toChars((convertHexDigit(code0)<<12)+(convertHexDigit(code1)<<8)+(convertHexDigit(code2)<<4)+convertHexDigit(code3))).getBytes(charset));
}
}
}
else if (code0>=0)
{
int code1=in.read();
if (code1>=0)
output.write((convertHexDigit(code0)<<4)+convertHexDigit(code1));
}
break;
default:
if (digits==2)
{
digit=TypeUtil.convertHexDigit((byte)c);
digits=1;
}
else if (digits==1)
{
output.write((digit<<4) + TypeUtil.convertHexDigit((byte)c));
digits=0;
}
else
output.write(c);
output.write(c);
break;
}
@ -688,24 +741,45 @@ public class UrlEncoded extends MultiMap implements Cloneable
buffer.getStringBuffer().append(' ');
}
else if (c=='%' && (i+2)<length)
else if (c=='%')
{
if (buffer==null)
{
buffer=new Utf8StringBuffer(length);
buffer.getStringBuffer().append(encoded,offset,offset+i);
}
try
if ((i+2)<length)
{
byte b=(byte)TypeUtil.parseInt(encoded,offset+i+1,2,16);
buffer.append(b);
i+=2;
}
catch(NumberFormatException nfe)
{
buffer.getStringBuffer().append('%');
try
{
if ('u'==encoded.charAt(offset+i+1))
{
if((i+5)<length)
{
int o=offset+i+2;
i+=5;
String unicode = new String(Character.toChars(TypeUtil.parseInt(encoded,o,4,16)));
buffer.getStringBuffer().append(unicode);
}
else
i=length;
}
else
{
int o=offset+i+1;
i+=2;
byte b=(byte)TypeUtil.parseInt(encoded,o,2,16);
buffer.append(b);
}
}
catch(NumberFormatException nfe)
{
buffer.getStringBuffer().append(Utf8Appendable.REPLACEMENT);
}
}
else
i=length;
}
else if (buffer!=null)
buffer.getStringBuffer().append(c);
@ -749,7 +823,7 @@ public class UrlEncoded extends MultiMap implements Cloneable
buffer.append(' ');
}
else if (c=='%' && (i+2)<length)
else if (c=='%')
{
if (buffer==null)
{
@ -767,15 +841,27 @@ public class UrlEncoded extends MultiMap implements Cloneable
{
try
{
ba[n]=(byte)TypeUtil.parseInt(encoded,offset+i+1,2,16);
n++;
i+=3;
if ('u'==encoded.charAt(offset+i+1))
{
int o=offset+i+2;
i+=6;
String unicode = new String(Character.toChars(TypeUtil.parseInt(encoded,o,4,16)));
byte[] reencoded = unicode.getBytes(charset);
System.arraycopy(reencoded,0,ba,n,reencoded.length);
n+=reencoded.length;
}
else
{
int o=offset+i+1;
i+=3;
ba[n]=(byte)TypeUtil.parseInt(encoded,o,2,16);
n++;
}
}
catch(NumberFormatException nfe)
{
LOG.ignore(nfe);
ba[n++] = (byte)'%';
i++;
ba[n++] = (byte)'?';
}
}
else

View File

@ -24,7 +24,24 @@ import org.junit.Test;
public class TypeUtilTest
{
@Test
public void convertHexDigitTest()
{
Assert.assertEquals((byte)0,TypeUtil.convertHexDigit((byte)'0'));
Assert.assertEquals((byte)9,TypeUtil.convertHexDigit((byte)'9'));
Assert.assertEquals((byte)10,TypeUtil.convertHexDigit((byte)'a'));
Assert.assertEquals((byte)10,TypeUtil.convertHexDigit((byte)'A'));
Assert.assertEquals((byte)15,TypeUtil.convertHexDigit((byte)'f'));
Assert.assertEquals((byte)15,TypeUtil.convertHexDigit((byte)'F'));
Assert.assertEquals((int)0,TypeUtil.convertHexDigit((int)'0'));
Assert.assertEquals((int)9,TypeUtil.convertHexDigit((int)'9'));
Assert.assertEquals((int)10,TypeUtil.convertHexDigit((int)'a'));
Assert.assertEquals((int)10,TypeUtil.convertHexDigit((int)'A'));
Assert.assertEquals((int)15,TypeUtil.convertHexDigit((int)'f'));
Assert.assertEquals((int)15,TypeUtil.convertHexDigit((int)'F'));
}
@Test
public void testToHexInt() throws Exception
{

View File

@ -120,33 +120,34 @@ public class URLEncodedTest
assertEquals("encoded get", url_encoded.getString("Name8"),"xx, yy ,zz");
url_encoded.clear();
url_encoded.decode("Name11=xxVerdi+%C6+og+2zz", "ISO-8859-1");
url_encoded.decode("Name11=%u30EDxxVerdi+%C6+og+2zz", "ISO-8859-1");
assertEquals("encoded param size",1, url_encoded.size());
assertEquals("encoded get", url_encoded.getString("Name11"),"xxVerdi \u00c6 og 2zz");
assertEquals("encoded get", "?xxVerdi \u00c6 og 2zz",url_encoded.getString("Name11"));
url_encoded.clear();
url_encoded.decode("Name12=xxVerdi+%2F+og+2zz", "UTF-8");
url_encoded.decode("Name12=%u30EDxxVerdi+%2F+og+2zz", "UTF-8");
assertEquals("encoded param size",1, url_encoded.size());
assertEquals("encoded get", url_encoded.getString("Name12"),"xxVerdi / og 2zz");
assertEquals("encoded get", url_encoded.getString("Name12"),"\u30edxxVerdi / og 2zz");
url_encoded.clear();
url_encoded.decode("Name14=%GG%+%%+%", "ISO-8859-1");
url_encoded.decode("Name14=%uXXXXa%GGb%+%c%+%d", "ISO-8859-1");
assertEquals("encoded param size",1, url_encoded.size());
assertEquals("encoded get", url_encoded.getString("Name14"),"%GG% %% %");
assertEquals("encoded get","?a?b?c?d", url_encoded.getString("Name14"));
url_encoded.clear();
url_encoded.decode("Name14=%GG%+%%+%", "UTF-8");
url_encoded.decode("Name14=%uXXXX%GG%+%%+%", "UTF-8");
assertEquals("encoded param size",1, url_encoded.size());
assertEquals("encoded get", url_encoded.getString("Name14"),"%GG% %% %");
assertEquals("encoded get", url_encoded.getString("Name14"),"\ufffd\ufffd\ufffd\ufffd");
/* Not every jvm supports this encoding */
if (java.nio.charset.Charset.isSupported("SJIS"))
{
url_encoded.clear();
url_encoded.decode("Name9=%83e%83X%83g", "SJIS"); // "Test" in Japanese Katakana
url_encoded.decode("Name9=%u30ED%83e%83X%83g", "SJIS"); // "Test" in Japanese Katakana
assertEquals("encoded param size",1, url_encoded.size());
assertEquals("encoded get", "\u30c6\u30b9\u30c8", url_encoded.getString("Name9"));
assertEquals("encoded get", "\u30ed\u30c6\u30b9\u30c8", url_encoded.getString("Name9"));
}
else
assertTrue("Charset SJIS not supported by jvm", true);
@ -160,9 +161,9 @@ public class URLEncodedTest
UrlEncoded url_encoded = new UrlEncoded();
url_encoded.decode("Name15=xx%zz", "UTF-8");
assertEquals("encoded param size",1, url_encoded.size());
assertEquals("encoded get", "xx%zz", url_encoded.getString("Name15"));
assertEquals("encoded get", "xx\ufffd", url_encoded.getString("Name15"));
assertEquals("%u123",UrlEncoded.decodeString("%u123",0,5,"UTF-8"));
assertEquals("xxx",UrlEncoded.decodeString("xxx%u123",0,5,"UTF-8"));
}
@ -196,8 +197,8 @@ public class URLEncodedTest
if (java.nio.charset.Charset.isSupported("Shift_JIS"))
{
ByteArrayInputStream in2 = new ByteArrayInputStream ("name=%83e%83X%83g".getBytes());
MultiMap m2 = new MultiMap();
ByteArrayInputStream in2 = new ByteArrayInputStream ("name=%83e%83X%83g".getBytes(StringUtil.__ISO_8859_1));
MultiMap<String> m2 = new MultiMap<String>();
UrlEncoded.decodeTo(in2, m2, "Shift_JIS", -1,-1);
assertEquals("stream length",1,m2.size());
assertEquals("stream name","\u30c6\u30b9\u30c8",m2.getString("name"));