diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8LineParser.java b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8LineParser.java new file mode 100644 index 00000000000..b4fc20ee7f5 --- /dev/null +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8LineParser.java @@ -0,0 +1,99 @@ +// ======================================================================== +// Copyright 2011-2012 Mort Bay Consulting Pty. Ltd. +// ------------------------------------------------------------------------ +// All rights reserved. This program and the accompanying materials +// are made available under the terms of the Eclipse Public License v1.0 +// and Apache License v2.0 which accompanies this distribution. +// +// The Eclipse Public License is available at +// http://www.eclipse.org/legal/epl-v10.html +// +// The Apache License v2.0 is available at +// http://www.opensource.org/licenses/apache2.0.php +// +// You may elect to redistribute this code under either of these licenses. +//======================================================================== +package org.eclipse.jetty.util; + +import java.nio.ByteBuffer; + +import org.eclipse.jetty.util.Utf8StringBuilder; +import org.eclipse.jetty.util.Utf8Appendable.NotUtf8Exception; + +/** + * Stateful parser for lines of UTF8 formatted text, looking for "\n" as a line termination character. + *

+ * For use with new IO framework that is based on ByteBuffer parsing. + */ +public class Utf8LineParser +{ + private enum State + { + START, + PARSE, + END; + } + + private State state; + private Utf8StringBuilder utf; + + public Utf8LineParser() + { + this.state = State.START; + } + + /** + * Parse a ByteBuffer (could be a partial buffer), and return once a complete line of UTF8 parsed text has been reached. + * + * @param buf + * the buffer to parse (could be an incomplete buffer) + * @return the line of UTF8 parsed text, or null if no line end termination has been reached within the {@link ByteBuffer#remaining() remaining} bytes of + * the provided ByteBuffer. (In the case of a null, a subsequent ByteBuffer with a line end termination should be provided) + * @throws NotUtf8Exception + * if the input buffer has bytes that do not conform to UTF8 validation (validation performed by {@link Utf8StringBuilder} + */ + public String parse(ByteBuffer buf) + { + byte b; + while (buf.remaining() > 0) + { + b = buf.get(); + if (parseByte(b)) + { + state = State.START; + return utf.toString(); + } + } + // have not reached end of line (yet) + return null; + } + + private boolean parseByte(byte b) + { + switch (state) + { + case START: + utf = new Utf8StringBuilder(); + state = State.PARSE; + return parseByte(b); + case PARSE: + // not waiting on more UTF sequence parts. + if (utf.isUtf8SequenceComplete() && ((b == '\r') || (b == '\n'))) + { + state = State.END; + return parseByte(b); + } + utf.append(b); + break; + case END: + if (b == '\n') + { + // we've reached the end + state = State.START; + return true; + } + break; + } + return false; + } +} diff --git a/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8LineParserTest.java b/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8LineParserTest.java new file mode 100644 index 00000000000..b588fc2687d --- /dev/null +++ b/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8LineParserTest.java @@ -0,0 +1,189 @@ +// ======================================================================== +// Copyright 2011-2012 Mort Bay Consulting Pty. Ltd. +// ------------------------------------------------------------------------ +// All rights reserved. This program and the accompanying materials +// are made available under the terms of the Eclipse Public License v1.0 +// and Apache License v2.0 which accompanies this distribution. +// +// The Eclipse Public License is available at +// http://www.eclipse.org/legal/epl-v10.html +// +// The Apache License v2.0 is available at +// http://www.opensource.org/licenses/apache2.0.php +// +// You may elect to redistribute this code under either of these licenses. +//======================================================================== +package org.eclipse.jetty.util; + +import static org.hamcrest.Matchers.*; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.jetty.util.BufferUtil; +import org.eclipse.jetty.util.StringUtil; +import org.junit.Assert; +import org.junit.Test; + +public class Utf8LineParserTest +{ + private void appendUtf8(ByteBuffer buf, String line) + { + buf.put(ByteBuffer.wrap(StringUtil.getBytes(line,StringUtil.__UTF8))); + } + + private void assertEquals(List expected, List actual) + { + Assert.assertThat("Expected Line Count",actual.size(),is(expected.size())); + int len = expected.size(); + for (int i = 0; i < len; i++) + { + String expectedLine = expected.get(i); + String actualLine = actual.get(i); + + Assert.assertThat("Line[" + i + "]",actualLine,is(expectedLine)); + } + } + + /** + * Parse a basic line, with UNIX style line endings "\n" + */ + @Test + public void testBasicParse() + { + ByteBuffer buf = ByteBuffer.allocate(64); + appendUtf8(buf,"Hello World\n"); + BufferUtil.flipToFlush(buf,0); + + Utf8LineParser utfparser = new Utf8LineParser(); + + String line = utfparser.parse(buf); + Assert.assertThat("Line",line,is("Hello World")); + } + + /** + * Parsing of a single line of HTTP header style line ending "\r\n" + */ + @Test + public void testHttpLineParse() + { + ByteBuffer buf = ByteBuffer.allocate(64); + appendUtf8(buf,"Hello World\r\n"); + BufferUtil.flipToFlush(buf,0); + + Utf8LineParser utfparser = new Utf8LineParser(); + + String line = utfparser.parse(buf); + Assert.assertThat("Line",line,is("Hello World")); + } + + /** + * Parsing of an "in the wild" set HTTP response header lines. + */ + @Test + public void testWildHttpRequestParse() + { + // Arbitrary Http Response Headers seen in the wild. + // Request URI -> http://www.eclipse.org/jetty/ + List expected = new ArrayList<>(); + expected.add("HEAD /jetty/ HTTP/1.0"); + expected.add("User-Agent: \"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.6) Gecko/20060601 Firefox/2.0.0.6 (Ubuntu-feisty)\""); + expected.add("Accept: */*"); + expected.add("Host: www.eclipse.org"); + expected.add("Connection: Keep-Alive"); + expected.add(""); + + // Prepare Buffer + ByteBuffer buf = ByteBuffer.allocate(512); + for (String line : expected) + { + appendUtf8(buf,line + "\r\n"); + } + + BufferUtil.flipToFlush(buf,0); + + // Parse Buffer + Utf8LineParser utfparser = new Utf8LineParser(); + + List actual = new ArrayList<>(); + int count = 0; + int excessive = expected.size() + 10; // fail-safe for bad code + boolean done = false; + while (!done) + { + String line = utfparser.parse(buf); + if (line != null) + { + actual.add(line); + } + else + { + done = true; + } + count++; + Assert.assertThat("Parse Count is excessive (bug in code!)",count,lessThan(excessive)); + } + + // Validate Results + assertEquals(expected,actual); + } + + /** + * Parsing of an "in the wild" set HTTP response header lines. + */ + @Test + public void testWildHttpResponseParse() + { + // Arbitrary Http Response Headers seen in the wild. + // Request URI -> https://ssl.google-analytics.com/__utm.gif + List expected = new ArrayList<>(); + expected.add("HTTP/1.0 200 OK"); + expected.add("Date: Thu, 09 Aug 2012 16:16:39 GMT"); + expected.add("Content-Length: 35"); + expected.add("X-Content-Type-Options: nosniff"); + expected.add("Pragma: no-cache"); + expected.add("Expires: Wed, 19 Apr 2000 11:43:00 GMT"); + expected.add("Last-Modified: Wed, 21 Jan 2004 19:51:30 GMT"); + expected.add("Content-Type: image/gif"); + expected.add("Cache-Control: private, no-cache, no-cache=Set-Cookie, proxy-revalidate"); + expected.add("Age: 518097"); + expected.add("Server: GFE/2.0"); + expected.add("Connection: Keep-Alive"); + expected.add(""); + + // Prepare Buffer + ByteBuffer buf = ByteBuffer.allocate(512); + for (String line : expected) + { + appendUtf8(buf,line + "\r\n"); + } + + BufferUtil.flipToFlush(buf,0); + + // Parse Buffer + Utf8LineParser utfparser = new Utf8LineParser(); + + List actual = new ArrayList<>(); + int count = 0; + int excessive = expected.size() + 10; // fail-safe for bad code + boolean done = false; + while (!done) + { + String line = utfparser.parse(buf); + if (line != null) + { + actual.add(line); + } + else + { + done = true; + } + count++; + Assert.assertThat("Parse Count is excessive (bug in code!)",count,lessThan(excessive)); + } + + // Validate Results + assertEquals(expected,actual); + } +}