Issue #2532 Better handling of tokens in parser (#2533)

Issue #2532 Better handling of tokens in parser

Handle TCHAR and VCHAR from abnf separately

Signed-off-by: Greg Wilkins <gregw@webtide.com>
This commit is contained in:
Greg Wilkins 2018-05-17 09:50:49 +10:00 committed by GitHub
parent aa97518d0b
commit 715a637c2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 801 additions and 467 deletions

View File

@ -18,21 +18,174 @@
package org.eclipse.jetty.http;
import org.eclipse.jetty.util.TypeUtil;
/**
* HTTP constants
*/
public interface HttpTokens
public class HttpTokens
{
// Terminal symbols.
static final byte COLON= (byte)':';
static final byte TAB= 0x09;
static final byte LINE_FEED= 0x0A;
static final byte CARRIAGE_RETURN= 0x0D;
static final byte SPACE= 0x20;
static final byte[] CRLF = {CARRIAGE_RETURN,LINE_FEED};
static final byte SEMI_COLON= (byte)';';
public enum EndOfContent { UNKNOWN_CONTENT,NO_CONTENT,EOF_CONTENT,CONTENT_LENGTH,CHUNKED_CONTENT }
public enum Type
{
CNTL, // Control characters excluding LF, CR
HTAB, // Horizontal tab
LF, // Line feed
CR, // Carriage return
SPACE, // Space
COLON, // Colon character
DIGIT, // Digit
ALPHA, // Alpha
TCHAR, // token characters excluding COLON,DIGIT,ALPHA, which is equivalent to VCHAR excluding delimiters
VCHAR, // Visible characters excluding COLON,DIGIT,ALPHA
OTEXT // Obsolete text
}
public static class Token
{
private final Type _type;
private final byte _b;
private final char _c;
private final int _x;
private Token(byte b, Type type)
{
_type = type;
_b = b;
_c = (char)(0xff&b);
char lc = (_c>='A' & _c<='Z')?((char)(_c-'A'+'a')):_c;
_x = (_type==Type.DIGIT || _type==Type.ALPHA && lc>='a' && lc<='f')?TypeUtil.convertHexDigit(b):-1;
}
public Type getType()
{
return _type;
}
public byte getByte()
{
return _b;
}
public char getChar()
{
return _c;
}
public boolean isHexDigit()
{
return _x>=0;
}
public int getHexDigit()
{
return _x;
}
@Override
public String toString()
{
switch(_type)
{
case SPACE:
case COLON:
case ALPHA:
case DIGIT:
case TCHAR:
case VCHAR:
return _type+"='"+_c+"'";
case CR:
return "CR=\\r";
case LF:
return "LF=\\n";
default:
return String.format("%s=0x%x",_type,_b);
}
}
}
public final static Token[] TOKENS = new Token[256];
static
{
for (int b=0; b<256; b++)
{
// token = 1*tchar
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
// / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
// / DIGIT / ALPHA
// ; any VCHAR, except delimiters
// quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
// qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
// obs-text = %x80-FF
// comment = "(" *( ctext / quoted-pair / comment ) ")"
// ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text
// quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
switch (b)
{
case LINE_FEED:
TOKENS[b] = new Token((byte)b,Type.LF);
break;
case CARRIAGE_RETURN:
TOKENS[b] = new Token((byte)b,Type.CR);
break;
case SPACE:
TOKENS[b] = new Token((byte)b,Type.SPACE);
break;
case TAB:
TOKENS[b] = new Token((byte)b,Type.HTAB);
break;
case COLON:
TOKENS[b] = new Token((byte)b,Type.COLON);
break;
case '!':
case '#':
case '$':
case '%':
case '&':
case '\'':
case '*':
case '+':
case '-':
case '.':
case '^':
case '_':
case '`':
case '|':
case '~':
TOKENS[b] = new Token((byte)b,Type.TCHAR);
break;
default:
if (b>=0x30 &&b<=0x39) // DIGIT
TOKENS[b] = new Token((byte)b,Type.DIGIT);
else if (b>=0x41 &&b<=0x5A) // ALPHA (uppercase)
TOKENS[b] = new Token((byte)b,Type.ALPHA);
else if (b>=0x61 &&b<=0x7A) // ALPHA (lowercase)
TOKENS[b] = new Token((byte)b,Type.ALPHA);
else if (b>=0x21 &&b<=0x7E) // Visible
TOKENS[b] = new Token((byte)b,Type.VCHAR);
else if (b>=0x80) // OBS
TOKENS[b] = new Token((byte)b,Type.OTEXT);
else
TOKENS[b] = new Token((byte)b,Type.CNTL);
}
}
}
}

View File

@ -40,13 +40,7 @@ import org.eclipse.jetty.util.log.Logger;
public class MultiPartParser
{
public static final Logger LOG = Log.getLogger(MultiPartParser.class);
private static final byte COLON = (byte)':';
private static final byte TAB = 0x09;
private static final byte LINE_FEED = 0x0A;
private static final byte CARRIAGE_RETURN = 0x0D;
private static final byte SPACE = 0x20;
// States
public enum FieldState
{
@ -134,41 +128,49 @@ public class MultiPartParser
}
/* ------------------------------------------------------------------------------- */
private byte getNextByte(ByteBuffer buffer)
private HttpTokens.Token next(ByteBuffer buffer)
{
byte ch = buffer.get();
HttpTokens.Token t = HttpTokens.TOKENS[0xff & ch];
HttpParser.CharState s = HttpParser.TOKEN_CHAR[0xff & ch];
switch (s)
if (DEBUG)
LOG.debug("token={}",t);
switch(t.getType())
{
case CNTL:
throw new IllegalCharacterException(_state,t,buffer);
case LF:
_cr = false;
return ch;
_cr=false;
break;
case CR:
if (_cr)
throw new BadMessageException("Bad EOL");
_cr = true;
if (buffer.hasRemaining())
return getNextByte(buffer);
// Can return 0 here to indicate the need for more characters,
// because a real 0 in the buffer would cause a BadMessage below
return 0;
case LEGAL:
_cr=true;
return null;
case ALPHA:
case DIGIT:
case TCHAR:
case VCHAR:
case HTAB:
case SPACE:
case OTEXT:
case COLON:
if (_cr)
throw new BadMessageException("Bad EOL");
break;
return ch;
case ILLEGAL:
default:
throw new IllegalCharacterException(_state, ch, buffer);
break;
}
}
return t;
}
/* ------------------------------------------------------------------------------- */
private void setString(String s)
@ -307,11 +309,11 @@ public class MultiPartParser
{
while (__delimiterStates.contains(_state) && hasNextByte(buffer))
{
byte b = getNextByte(buffer);
if (b == 0)
HttpTokens.Token t = next(buffer);
if (t == null)
return;
if (b == '\n')
if (t.getType()==HttpTokens.Type.LF)
{
setState(State.BODY_PART);
@ -325,14 +327,14 @@ public class MultiPartParser
switch (_state)
{
case DELIMITER:
if (b == '-')
if (t.getChar() == '-')
setState(State.DELIMITER_CLOSE);
else
setState(State.DELIMITER_PADDING);
continue;
case DELIMITER_CLOSE:
if (b == '-')
if (t.getChar() == '-')
{
setState(State.EPILOGUE);
return;
@ -356,11 +358,11 @@ public class MultiPartParser
while (_state == State.BODY_PART && hasNextByte(buffer))
{
// process each character
byte b = getNextByte(buffer);
if (b == 0)
HttpTokens.Token t = next(buffer);
if (t == null)
break;
if (b != LINE_FEED)
if (t.getType() != HttpTokens.Type.LF)
_totalHeaderLineLength++;
if (_totalHeaderLineLength > MAX_HEADER_LINE_LENGTH)
@ -369,10 +371,10 @@ public class MultiPartParser
switch (_fieldState)
{
case FIELD:
switch (b)
switch (t.getType())
{
case SPACE:
case TAB:
case HTAB:
{
// Folded field value!
@ -395,8 +397,7 @@ public class MultiPartParser
break;
}
case LINE_FEED:
{
case LF:
handleField();
setState(State.FIRST_OCTETS);
_partialBoundary = 2; // CRLF is option for empty parts
@ -407,24 +408,28 @@ public class MultiPartParser
if (_handler.headerComplete())
return true;
break;
}
default:
{
case ALPHA:
case DIGIT:
case TCHAR:
// process previous header
handleField();
// New header
setState(FieldState.IN_NAME);
_string.reset();
_string.append(b);
_string.append(t.getChar());
_length = 1;
}
break;
default:
throw new IllegalCharacterException(_state,t,buffer);
}
break;
case IN_NAME:
switch (b)
switch(t.getType())
{
case COLON:
_fieldName = takeString();
@ -437,7 +442,7 @@ public class MultiPartParser
setState(FieldState.AFTER_NAME);
break;
case LINE_FEED:
case LF:
{
if (LOG.isDebugEnabled())
LOG.debug("Line Feed in Name {}", this);
@ -446,16 +451,21 @@ public class MultiPartParser
setState(FieldState.FIELD);
break;
}
default:
_string.append(b);
case ALPHA:
case DIGIT:
case TCHAR:
_string.append(t.getChar());
_length = _string.length();
break;
default:
throw new IllegalCharacterException(_state,t,buffer);
}
break;
case AFTER_NAME:
switch (b)
switch(t.getType())
{
case COLON:
_fieldName = takeString();
@ -463,7 +473,7 @@ public class MultiPartParser
setState(FieldState.VALUE);
break;
case LINE_FEED:
case LF:
_fieldName = takeString();
_string.reset();
_fieldValue = "";
@ -474,14 +484,14 @@ public class MultiPartParser
break;
default:
throw new IllegalCharacterException(_state, b, buffer);
throw new IllegalCharacterException(_state, t, buffer);
}
break;
case VALUE:
switch (b)
switch(t.getType())
{
case LINE_FEED:
case LF:
_string.reset();
_fieldValue = "";
_length = -1;
@ -490,25 +500,34 @@ public class MultiPartParser
break;
case SPACE:
case TAB:
case HTAB:
break;
default:
_string.append(b);
case ALPHA:
case DIGIT:
case TCHAR:
case VCHAR:
case COLON:
case OTEXT:
_string.append(t.getByte());
_length = _string.length();
setState(FieldState.IN_VALUE);
break;
default:
throw new IllegalCharacterException(_state,t,buffer);
}
break;
case IN_VALUE:
switch (b)
switch(t.getType())
{
case SPACE:
_string.append(b);
case HTAB:
_string.append(' ');
break;
case LINE_FEED:
case LF:
if (_length > 0)
{
_fieldValue = takeString();
@ -517,12 +536,19 @@ public class MultiPartParser
}
setState(FieldState.FIELD);
break;
default:
_string.append(b);
if (b > SPACE || b < 0)
_length = _string.length();
case ALPHA:
case DIGIT:
case TCHAR:
case VCHAR:
case COLON:
case OTEXT:
_string.append(t.getByte());
_length=_string.length();
break;
default:
throw new IllegalCharacterException(_state,t,buffer);
}
break;
@ -694,16 +720,16 @@ public class MultiPartParser
{
}
}
/* ------------------------------------------------------------------------------- */
@SuppressWarnings("serial")
private static class IllegalCharacterException extends IllegalArgumentException
private static class IllegalCharacterException extends BadMessageException
{
private IllegalCharacterException(State state, byte ch, ByteBuffer buffer)
private IllegalCharacterException(State state,HttpTokens.Token token,ByteBuffer buffer)
{
super(String.format("Illegal character 0x%X", ch));
// Bug #460642 - don't reveal buffers to end user
LOG.warn(String.format("Illegal character 0x%X in state=%s for buffer %s", ch, state, BufferUtil.toDetailString(buffer)));
super(400,String.format("Illegal character %s",token));
if (LOG.isDebugEnabled())
LOG.debug(String.format("Illegal character %s in state=%s for buffer %s",token,state,BufferUtil.toDetailString(buffer)));
}
}
}

View File

@ -238,10 +238,9 @@ public class HttpParserTest
HttpParser.RequestHandler handler = new Handler();
HttpParser parser= new HttpParser(handler);
parseAll(parser,buffer);
Assert.assertEquals("Bad preamble", _bad);
Assert.assertEquals("Illegal character SPACE=' '", _bad);
}
@Test
public void testConnect() throws Exception
{
@ -472,7 +471,7 @@ public class HttpParserTest
Assert.assertEquals("HTTP/1.1", _methodOrVersion);
Assert.assertEquals("204", _uriOrStatus);
Assert.assertEquals("No Content", _versionOrReason);
Assert.assertThat(_bad, Matchers.containsString("Illegal character 0x20"));
Assert.assertThat(_bad, Matchers.containsString("Illegal character "));
}
@Test
@ -749,6 +748,40 @@ public class HttpParserTest
parseAll(parser, buffer);
Assert.assertThat(_bad, Matchers.notNullValue());
}
@Test
public void testBadHeaderNames() throws Exception
{
String[] bad = new String[]
{
"Foo\\Bar: value\r\n",
"Foo@Bar: value\r\n",
"Foo,Bar: value\r\n",
"Foo}Bar: value\r\n",
"Foo{Bar: value\r\n",
"Foo=Bar: value\r\n",
"Foo>Bar: value\r\n",
"Foo<Bar: value\r\n",
"Foo)Bar: value\r\n",
"Foo(Bar: value\r\n",
"Foo?Bar: value\r\n",
"Foo\"Bar: value\r\n",
"Foo/Bar: value\r\n",
"Foo]Bar: value\r\n",
"Foo[Bar: value\r\n",
};
for (int i=0; i<bad.length; i++)
{
ByteBuffer buffer= BufferUtil.toBuffer(
"GET / HTTP/1.0\r\n" + bad[i]+ "\r\n");
HttpParser.RequestHandler handler = new Handler();
HttpParser parser= new HttpParser(handler);
parseAll(parser,buffer);
Assert.assertThat(bad[i],_bad,Matchers.notNullValue());
}
}
@Test
public void testHeaderTab() throws Exception

View File

@ -482,6 +482,49 @@ public class MultiPartParserTest
}
@Test
public void testBadHeaderNames() throws Exception
{
String[] bad = new String[]
{
"Foo\\Bar: value\r\n",
"Foo@Bar: value\r\n",
"Foo,Bar: value\r\n",
"Foo}Bar: value\r\n",
"Foo{Bar: value\r\n",
"Foo=Bar: value\r\n",
"Foo>Bar: value\r\n",
"Foo<Bar: value\r\n",
"Foo)Bar: value\r\n",
"Foo(Bar: value\r\n",
"Foo?Bar: value\r\n",
"Foo\"Bar: value\r\n",
"Foo/Bar: value\r\n",
"Foo]Bar: value\r\n",
"Foo[Bar: value\r\n",
"\u0192\u00f8\u00f8\u00df\u00e5\u00ae: value\r\n"
};
for (int i=0; i<bad.length; i++)
{
ByteBuffer buffer= BufferUtil.toBuffer(
"--AaB03x\r\n" + bad[i] + "\r\n--AaB03x--\r\n");
MultiPartParser.Handler handler = new TestHandler();
MultiPartParser parser= new MultiPartParser(handler, "AaB03x");
try
{
parser.parse(buffer, true);
}
catch(BadMessageException e)
{
assertTrue(e.getMessage().contains("Illegal character"));
}
}
}
@Test
public void splitTest()
{

View File

@ -1,3 +1,4 @@
org.eclipse.jetty.util.log.class=org.eclipse.jetty.util.log.StdErrLog
#org.eclipse.jetty.LEVEL=DEBUG
#org.eclipse.jetty.server.LEVEL=DEBUG
#org.eclipse.jetty.http.LEVEL=DEBUG