Issue #2532 Better handling of tokens in parser (#2533)

Issue #2532 Better handling of tokens in parser

Handle TCHAR and VCHAR from abnf separately

Signed-off-by: Greg Wilkins <gregw@webtide.com>
This commit is contained in:
Greg Wilkins 2018-05-17 09:50:49 +10:00 committed by GitHub
parent aa97518d0b
commit 715a637c2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 802 additions and 469 deletions

View File

@ -18,21 +18,174 @@
package org.eclipse.jetty.http; package org.eclipse.jetty.http;
import org.eclipse.jetty.util.TypeUtil;
/** /**
* HTTP constants * HTTP constants
*/ */
public interface HttpTokens public class HttpTokens
{ {
// Terminal symbols.
static final byte COLON= (byte)':'; static final byte COLON= (byte)':';
static final byte TAB= 0x09; static final byte TAB= 0x09;
static final byte LINE_FEED= 0x0A; static final byte LINE_FEED= 0x0A;
static final byte CARRIAGE_RETURN= 0x0D; static final byte CARRIAGE_RETURN= 0x0D;
static final byte SPACE= 0x20; static final byte SPACE= 0x20;
static final byte[] CRLF = {CARRIAGE_RETURN,LINE_FEED}; static final byte[] CRLF = {CARRIAGE_RETURN,LINE_FEED};
static final byte SEMI_COLON= (byte)';';
public enum EndOfContent { UNKNOWN_CONTENT,NO_CONTENT,EOF_CONTENT,CONTENT_LENGTH,CHUNKED_CONTENT } public enum EndOfContent { UNKNOWN_CONTENT,NO_CONTENT,EOF_CONTENT,CONTENT_LENGTH,CHUNKED_CONTENT }
public enum Type
{
CNTL, // Control characters excluding LF, CR
HTAB, // Horizontal tab
LF, // Line feed
CR, // Carriage return
SPACE, // Space
COLON, // Colon character
DIGIT, // Digit
ALPHA, // Alpha
TCHAR, // token characters excluding COLON,DIGIT,ALPHA, which is equivalent to VCHAR excluding delimiters
VCHAR, // Visible characters excluding COLON,DIGIT,ALPHA
OTEXT // Obsolete text
}
public static class Token
{
private final Type _type;
private final byte _b;
private final char _c;
private final int _x;
private Token(byte b, Type type)
{
_type = type;
_b = b;
_c = (char)(0xff&b);
char lc = (_c>='A' & _c<='Z')?((char)(_c-'A'+'a')):_c;
_x = (_type==Type.DIGIT || _type==Type.ALPHA && lc>='a' && lc<='f')?TypeUtil.convertHexDigit(b):-1;
}
public Type getType()
{
return _type;
}
public byte getByte()
{
return _b;
}
public char getChar()
{
return _c;
}
public boolean isHexDigit()
{
return _x>=0;
}
public int getHexDigit()
{
return _x;
}
@Override
public String toString()
{
switch(_type)
{
case SPACE:
case COLON:
case ALPHA:
case DIGIT:
case TCHAR:
case VCHAR:
return _type+"='"+_c+"'";
case CR:
return "CR=\\r";
case LF:
return "LF=\\n";
default:
return String.format("%s=0x%x",_type,_b);
}
}
}
public final static Token[] TOKENS = new Token[256];
static
{
for (int b=0; b<256; b++)
{
// token = 1*tchar
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
// / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
// / DIGIT / ALPHA
// ; any VCHAR, except delimiters
// quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
// qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
// obs-text = %x80-FF
// comment = "(" *( ctext / quoted-pair / comment ) ")"
// ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text
// quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
switch (b)
{
case LINE_FEED:
TOKENS[b] = new Token((byte)b,Type.LF);
break;
case CARRIAGE_RETURN:
TOKENS[b] = new Token((byte)b,Type.CR);
break;
case SPACE:
TOKENS[b] = new Token((byte)b,Type.SPACE);
break;
case TAB:
TOKENS[b] = new Token((byte)b,Type.HTAB);
break;
case COLON:
TOKENS[b] = new Token((byte)b,Type.COLON);
break;
case '!':
case '#':
case '$':
case '%':
case '&':
case '\'':
case '*':
case '+':
case '-':
case '.':
case '^':
case '_':
case '`':
case '|':
case '~':
TOKENS[b] = new Token((byte)b,Type.TCHAR);
break;
default:
if (b>=0x30 &&b<=0x39) // DIGIT
TOKENS[b] = new Token((byte)b,Type.DIGIT);
else if (b>=0x41 &&b<=0x5A) // ALPHA (uppercase)
TOKENS[b] = new Token((byte)b,Type.ALPHA);
else if (b>=0x61 &&b<=0x7A) // ALPHA (lowercase)
TOKENS[b] = new Token((byte)b,Type.ALPHA);
else if (b>=0x21 &&b<=0x7E) // Visible
TOKENS[b] = new Token((byte)b,Type.VCHAR);
else if (b>=0x80) // OBS
TOKENS[b] = new Token((byte)b,Type.OTEXT);
else
TOKENS[b] = new Token((byte)b,Type.CNTL);
}
}
}
} }

View File

@ -40,13 +40,7 @@ import org.eclipse.jetty.util.log.Logger;
public class MultiPartParser public class MultiPartParser
{ {
public static final Logger LOG = Log.getLogger(MultiPartParser.class); public static final Logger LOG = Log.getLogger(MultiPartParser.class);
private static final byte COLON = (byte)':';
private static final byte TAB = 0x09;
private static final byte LINE_FEED = 0x0A;
private static final byte CARRIAGE_RETURN = 0x0D;
private static final byte SPACE = 0x20;
// States // States
public enum FieldState public enum FieldState
{ {
@ -134,41 +128,49 @@ public class MultiPartParser
} }
/* ------------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------------- */
private byte getNextByte(ByteBuffer buffer) private HttpTokens.Token next(ByteBuffer buffer)
{ {
byte ch = buffer.get(); byte ch = buffer.get();
HttpTokens.Token t = HttpTokens.TOKENS[0xff & ch];
HttpParser.CharState s = HttpParser.TOKEN_CHAR[0xff & ch]; if (DEBUG)
switch (s) LOG.debug("token={}",t);
switch(t.getType())
{ {
case CNTL:
throw new IllegalCharacterException(_state,t,buffer);
case LF: case LF:
_cr = false; _cr=false;
return ch; break;
case CR: case CR:
if (_cr) if (_cr)
throw new BadMessageException("Bad EOL"); throw new BadMessageException("Bad EOL");
_cr = true; _cr=true;
if (buffer.hasRemaining()) return null;
return getNextByte(buffer);
case ALPHA:
// Can return 0 here to indicate the need for more characters, case DIGIT:
// because a real 0 in the buffer would cause a BadMessage below case TCHAR:
return 0; case VCHAR:
case HTAB:
case LEGAL: case SPACE:
case OTEXT:
case COLON:
if (_cr) if (_cr)
throw new BadMessageException("Bad EOL"); throw new BadMessageException("Bad EOL");
break;
return ch;
case ILLEGAL:
default: default:
throw new IllegalCharacterException(_state, ch, buffer); break;
} }
}
return t;
}
/* ------------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------------- */
private void setString(String s) private void setString(String s)
@ -307,11 +309,11 @@ public class MultiPartParser
{ {
while (__delimiterStates.contains(_state) && hasNextByte(buffer)) while (__delimiterStates.contains(_state) && hasNextByte(buffer))
{ {
byte b = getNextByte(buffer); HttpTokens.Token t = next(buffer);
if (b == 0) if (t == null)
return; return;
if (b == '\n') if (t.getType()==HttpTokens.Type.LF)
{ {
setState(State.BODY_PART); setState(State.BODY_PART);
@ -325,14 +327,14 @@ public class MultiPartParser
switch (_state) switch (_state)
{ {
case DELIMITER: case DELIMITER:
if (b == '-') if (t.getChar() == '-')
setState(State.DELIMITER_CLOSE); setState(State.DELIMITER_CLOSE);
else else
setState(State.DELIMITER_PADDING); setState(State.DELIMITER_PADDING);
continue; continue;
case DELIMITER_CLOSE: case DELIMITER_CLOSE:
if (b == '-') if (t.getChar() == '-')
{ {
setState(State.EPILOGUE); setState(State.EPILOGUE);
return; return;
@ -356,11 +358,11 @@ public class MultiPartParser
while (_state == State.BODY_PART && hasNextByte(buffer)) while (_state == State.BODY_PART && hasNextByte(buffer))
{ {
// process each character // process each character
byte b = getNextByte(buffer); HttpTokens.Token t = next(buffer);
if (b == 0) if (t == null)
break; break;
if (b != LINE_FEED) if (t.getType() != HttpTokens.Type.LF)
_totalHeaderLineLength++; _totalHeaderLineLength++;
if (_totalHeaderLineLength > MAX_HEADER_LINE_LENGTH) if (_totalHeaderLineLength > MAX_HEADER_LINE_LENGTH)
@ -369,10 +371,10 @@ public class MultiPartParser
switch (_fieldState) switch (_fieldState)
{ {
case FIELD: case FIELD:
switch (b) switch (t.getType())
{ {
case SPACE: case SPACE:
case TAB: case HTAB:
{ {
// Folded field value! // Folded field value!
@ -395,8 +397,7 @@ public class MultiPartParser
break; break;
} }
case LINE_FEED: case LF:
{
handleField(); handleField();
setState(State.FIRST_OCTETS); setState(State.FIRST_OCTETS);
_partialBoundary = 2; // CRLF is option for empty parts _partialBoundary = 2; // CRLF is option for empty parts
@ -407,24 +408,28 @@ public class MultiPartParser
if (_handler.headerComplete()) if (_handler.headerComplete())
return true; return true;
break; break;
}
case ALPHA:
default: case DIGIT:
{ case TCHAR:
// process previous header // process previous header
handleField(); handleField();
// New header // New header
setState(FieldState.IN_NAME); setState(FieldState.IN_NAME);
_string.reset(); _string.reset();
_string.append(b); _string.append(t.getChar());
_length = 1; _length = 1;
}
break;
default:
throw new IllegalCharacterException(_state,t,buffer);
} }
break; break;
case IN_NAME: case IN_NAME:
switch (b) switch(t.getType())
{ {
case COLON: case COLON:
_fieldName = takeString(); _fieldName = takeString();
@ -437,7 +442,7 @@ public class MultiPartParser
setState(FieldState.AFTER_NAME); setState(FieldState.AFTER_NAME);
break; break;
case LINE_FEED: case LF:
{ {
if (LOG.isDebugEnabled()) if (LOG.isDebugEnabled())
LOG.debug("Line Feed in Name {}", this); LOG.debug("Line Feed in Name {}", this);
@ -446,16 +451,21 @@ public class MultiPartParser
setState(FieldState.FIELD); setState(FieldState.FIELD);
break; break;
} }
default: case ALPHA:
_string.append(b); case DIGIT:
case TCHAR:
_string.append(t.getChar());
_length = _string.length(); _length = _string.length();
break; break;
default:
throw new IllegalCharacterException(_state,t,buffer);
} }
break; break;
case AFTER_NAME: case AFTER_NAME:
switch (b) switch(t.getType())
{ {
case COLON: case COLON:
_fieldName = takeString(); _fieldName = takeString();
@ -463,7 +473,7 @@ public class MultiPartParser
setState(FieldState.VALUE); setState(FieldState.VALUE);
break; break;
case LINE_FEED: case LF:
_fieldName = takeString(); _fieldName = takeString();
_string.reset(); _string.reset();
_fieldValue = ""; _fieldValue = "";
@ -474,14 +484,14 @@ public class MultiPartParser
break; break;
default: default:
throw new IllegalCharacterException(_state, b, buffer); throw new IllegalCharacterException(_state, t, buffer);
} }
break; break;
case VALUE: case VALUE:
switch (b) switch(t.getType())
{ {
case LINE_FEED: case LF:
_string.reset(); _string.reset();
_fieldValue = ""; _fieldValue = "";
_length = -1; _length = -1;
@ -490,25 +500,34 @@ public class MultiPartParser
break; break;
case SPACE: case SPACE:
case TAB: case HTAB:
break; break;
default: case ALPHA:
_string.append(b); case DIGIT:
case TCHAR:
case VCHAR:
case COLON:
case OTEXT:
_string.append(t.getByte());
_length = _string.length(); _length = _string.length();
setState(FieldState.IN_VALUE); setState(FieldState.IN_VALUE);
break; break;
default:
throw new IllegalCharacterException(_state,t,buffer);
} }
break; break;
case IN_VALUE: case IN_VALUE:
switch (b) switch(t.getType())
{ {
case SPACE: case SPACE:
_string.append(b); case HTAB:
_string.append(' ');
break; break;
case LINE_FEED: case LF:
if (_length > 0) if (_length > 0)
{ {
_fieldValue = takeString(); _fieldValue = takeString();
@ -517,12 +536,19 @@ public class MultiPartParser
} }
setState(FieldState.FIELD); setState(FieldState.FIELD);
break; break;
default: case ALPHA:
_string.append(b); case DIGIT:
if (b > SPACE || b < 0) case TCHAR:
_length = _string.length(); case VCHAR:
case COLON:
case OTEXT:
_string.append(t.getByte());
_length=_string.length();
break; break;
default:
throw new IllegalCharacterException(_state,t,buffer);
} }
break; break;
@ -694,16 +720,16 @@ public class MultiPartParser
{ {
} }
} }
/* ------------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------------- */
@SuppressWarnings("serial") @SuppressWarnings("serial")
private static class IllegalCharacterException extends IllegalArgumentException private static class IllegalCharacterException extends BadMessageException
{ {
private IllegalCharacterException(State state, byte ch, ByteBuffer buffer) private IllegalCharacterException(State state,HttpTokens.Token token,ByteBuffer buffer)
{ {
super(String.format("Illegal character 0x%X", ch)); super(400,String.format("Illegal character %s",token));
// Bug #460642 - don't reveal buffers to end user if (LOG.isDebugEnabled())
LOG.warn(String.format("Illegal character 0x%X in state=%s for buffer %s", ch, state, BufferUtil.toDetailString(buffer))); LOG.debug(String.format("Illegal character %s in state=%s for buffer %s",token,state,BufferUtil.toDetailString(buffer)));
} }
} }
} }

View File

@ -238,10 +238,9 @@ public class HttpParserTest
HttpParser.RequestHandler handler = new Handler(); HttpParser.RequestHandler handler = new Handler();
HttpParser parser= new HttpParser(handler); HttpParser parser= new HttpParser(handler);
parseAll(parser,buffer); parseAll(parser,buffer);
Assert.assertEquals("Bad preamble", _bad); Assert.assertEquals("Illegal character SPACE=' '", _bad);
} }
@Test @Test
public void testConnect() throws Exception public void testConnect() throws Exception
{ {
@ -472,7 +471,7 @@ public class HttpParserTest
Assert.assertEquals("HTTP/1.1", _methodOrVersion); Assert.assertEquals("HTTP/1.1", _methodOrVersion);
Assert.assertEquals("204", _uriOrStatus); Assert.assertEquals("204", _uriOrStatus);
Assert.assertEquals("No Content", _versionOrReason); Assert.assertEquals("No Content", _versionOrReason);
Assert.assertThat(_bad, Matchers.containsString("Illegal character 0x20")); Assert.assertThat(_bad, Matchers.containsString("Illegal character "));
} }
@Test @Test
@ -749,6 +748,40 @@ public class HttpParserTest
parseAll(parser, buffer); parseAll(parser, buffer);
Assert.assertThat(_bad, Matchers.notNullValue()); Assert.assertThat(_bad, Matchers.notNullValue());
} }
@Test
public void testBadHeaderNames() throws Exception
{
String[] bad = new String[]
{
"Foo\\Bar: value\r\n",
"Foo@Bar: value\r\n",
"Foo,Bar: value\r\n",
"Foo}Bar: value\r\n",
"Foo{Bar: value\r\n",
"Foo=Bar: value\r\n",
"Foo>Bar: value\r\n",
"Foo<Bar: value\r\n",
"Foo)Bar: value\r\n",
"Foo(Bar: value\r\n",
"Foo?Bar: value\r\n",
"Foo\"Bar: value\r\n",
"Foo/Bar: value\r\n",
"Foo]Bar: value\r\n",
"Foo[Bar: value\r\n",
};
for (int i=0; i<bad.length; i++)
{
ByteBuffer buffer= BufferUtil.toBuffer(
"GET / HTTP/1.0\r\n" + bad[i]+ "\r\n");
HttpParser.RequestHandler handler = new Handler();
HttpParser parser= new HttpParser(handler);
parseAll(parser,buffer);
Assert.assertThat(bad[i],_bad,Matchers.notNullValue());
}
}
@Test @Test
public void testHeaderTab() throws Exception public void testHeaderTab() throws Exception

View File

@ -482,6 +482,49 @@ public class MultiPartParserTest
} }
@Test
public void testBadHeaderNames() throws Exception
{
String[] bad = new String[]
{
"Foo\\Bar: value\r\n",
"Foo@Bar: value\r\n",
"Foo,Bar: value\r\n",
"Foo}Bar: value\r\n",
"Foo{Bar: value\r\n",
"Foo=Bar: value\r\n",
"Foo>Bar: value\r\n",
"Foo<Bar: value\r\n",
"Foo)Bar: value\r\n",
"Foo(Bar: value\r\n",
"Foo?Bar: value\r\n",
"Foo\"Bar: value\r\n",
"Foo/Bar: value\r\n",
"Foo]Bar: value\r\n",
"Foo[Bar: value\r\n",
"\u0192\u00f8\u00f8\u00df\u00e5\u00ae: value\r\n"
};
for (int i=0; i<bad.length; i++)
{
ByteBuffer buffer= BufferUtil.toBuffer(
"--AaB03x\r\n" + bad[i] + "\r\n--AaB03x--\r\n");
MultiPartParser.Handler handler = new TestHandler();
MultiPartParser parser= new MultiPartParser(handler, "AaB03x");
try
{
parser.parse(buffer, true);
}
catch(BadMessageException e)
{
assertTrue(e.getMessage().contains("Illegal character"));
}
}
}
@Test @Test
public void splitTest() public void splitTest()
{ {

View File

@ -1,3 +1,4 @@
org.eclipse.jetty.util.log.class=org.eclipse.jetty.util.log.StdErrLog org.eclipse.jetty.util.log.class=org.eclipse.jetty.util.log.StdErrLog
#org.eclipse.jetty.LEVEL=DEBUG #org.eclipse.jetty.LEVEL=DEBUG
#org.eclipse.jetty.server.LEVEL=DEBUG #org.eclipse.jetty.server.LEVEL=DEBUG
#org.eclipse.jetty.http.LEVEL=DEBUG

View File

@ -17,8 +17,7 @@ Content-Transfer-Encoding: 8bit
Value 3 Value 3
--z5xWs05oeiE0TAdFlrrlAX5RSgHrHzVcgskrru --z5xWs05oeiE0TAdFlrrlAX5RSgHrHzVcgskrru
Content-Disposition: form-data; name="other\"; Content-Disposition: form-data; name="other\"; what=\"Something\""
what=\"Something\""
Content-Type: text/plain; charset=UTF-8 Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit Content-Transfer-Encoding: 8bit