From caff41bebbbac7be7854844075f3e09b06f8b3fd Mon Sep 17 00:00:00 2001 From: Greg Wilkins Date: Mon, 31 Jan 2011 05:13:57 +0000 Subject: [PATCH] 333481 Handle UTF-32 codepoints in decode and encode git-svn-id: svn+ssh://dev.eclipse.org/svnroot/rt/org.eclipse.jetty/jetty/trunk@2713 7e9141cc-0065-0410-87d8-b60c137991c4 --- VERSION.txt | 2 +- .../org/eclipse/jetty/server/HttpWriter.java | 32 +++++++- .../eclipse/jetty/server/HttpWriterTest.java | 79 +++++++++++++++++++ 3 files changed, 109 insertions(+), 4 deletions(-) diff --git a/VERSION.txt b/VERSION.txt index 0f8df93d726..62dee9903ce 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -17,7 +17,7 @@ jetty-7.3.0-SNAPSHOT + 332937 Added Destroyable interface and reworked dependent lifecycles, specially of JNDI + 333247 fix api compat issue in ConstraintSecurityHandler + 333415 wired up HttpInput.available and added test harnesses - + 333481 Handle UTF-32 codepoints + + 333481 Handle UTF-32 codepoints in decode and encode + 333608 tlds defined in web.xml are not picked up + 333679 Refactored jetty-jmx. Moved mbeans to modules + 333679 refactor jetty-jmx to support the OSGi PAX tests. diff --git a/jetty-server/src/main/java/org/eclipse/jetty/server/HttpWriter.java b/jetty-server/src/main/java/org/eclipse/jetty/server/HttpWriter.java index f25089bf730..47ba4357a09 100644 --- a/jetty-server/src/main/java/org/eclipse/jetty/server/HttpWriter.java +++ b/jetty-server/src/main/java/org/eclipse/jetty/server/HttpWriter.java @@ -47,7 +47,7 @@ public class HttpWriter extends Writer { _out=out; _generator=_out._generator; - + _surrogate=0; // AS lastUTF16CodePoint } /* ------------------------------------------------------------ */ @@ -159,6 +159,30 @@ public class HttpWriter extends Writer { int code = s[offset+i]; + // Do we already have a surrogate? + if(_surrogate==0) + { + // No - is this char code a surrogate? + if(Character.isHighSurrogate((char)code)) + { + _surrogate=code; // UCS-? + continue; + } + } + // else handle a low surrogate + else if(Character.isLowSurrogate((char)code)) + { + code = Character.toCodePoint((char)_surrogate, (char)code); // UCS-4 + _surrogate=0; // USED + } + // else UCS-2 + else + { + code=_surrogate; // UCS-2 + _surrogate=0; // USED + i--; + } + if ((code & 0xffffff80) == 0) { // 1b @@ -239,7 +263,8 @@ public class HttpWriter extends Writer else { buffer[bytes++]=(byte)('?'); - } + } + if (bytes==buffer.length) { @@ -260,7 +285,8 @@ public class HttpWriter extends Writer offset+=chars; } } - + + /* ------------------------------------------------------------ */ private Writer getConverter() throws IOException { diff --git a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpWriterTest.java b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpWriterTest.java index 73bc4ba9dea..936bca05d66 100644 --- a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpWriterTest.java +++ b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpWriterTest.java @@ -156,6 +156,85 @@ public class HttpWriterTest assertTrue(response.startsWith("HTTP/1.1 200 OK\r\nContent-Length: 1025\r\n\r\n\u05531234567890")); } + @Test + public void testUTF16x2() throws Exception + { + _writer.setCharacterEncoding(StringUtil.__UTF8); + + String source = "\uD842\uDF9F"; + // int codePoint = 134047; + // char[] codeUnits = new char[Character.charCount(codePoint)]; + // int count = Character.toChars(codePoint, codeUnits, 0); + // String source = new String(codeUnits, 0, count); + + byte[] bytes = source.getBytes("UTF-8"/* StringUtil.__UTF81 */); + _writer.write(source.toCharArray(),0,source.toCharArray().length); + + java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream(); + java.io.OutputStreamWriter osw = new java.io.OutputStreamWriter(baos/* ,StringUtil.__UTF8 */); + osw.write(source.toCharArray(),0,source.toCharArray().length); + osw.flush(); + + myReportBytes(bytes); + myReportBytes(baos.toByteArray()); + myReportBytes(_bytes.asArray()); + + assertArrayEquals(bytes,_bytes.asArray()); + assertArrayEquals(baos.toByteArray(),_bytes.asArray()); + } + + @Test + public void testMultiByteOverflowUTF16x2() throws Exception + { + _writer.setCharacterEncoding(StringUtil.__UTF8); + + final String singleByteStr = "a"; + int remainSize = 1; + final String multiByteDuplicateStr = "\uD842\uDF9F"; // valid(High + Low) + // final String multiByteDuplicateStr = "\uD842\uD842"; // invalid(High + High) + // final String multiByteDuplicateStr = "\uDF9F\uDF9F"; // invalid(Low + Low) + // final String multiByteDuplicateStr = "\uDF9F\uD842"; // invalid(Low + High) + // int adjustSize = 0; + int adjustSize = -1; + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < HttpWriter.MAX_OUTPUT_CHARS + adjustSize; i++) + { + sb.append(singleByteStr); + } + sb.append(multiByteDuplicateStr); + for (int i = 0; i < remainSize; i++) + { + sb.append(singleByteStr); + } + String source = sb.toString(); + + byte[] bytes = source.getBytes("UTF-8"/* StringUtil.__UTF81 */); + _writer.write(source.toCharArray(),0,source.toCharArray().length); + + java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream(); + java.io.OutputStreamWriter osw = new java.io.OutputStreamWriter(baos/* ,StringUtil.__UTF8 */); + osw.write(source.toCharArray(),0,source.toCharArray().length); + osw.flush(); + + myReportBytes(bytes); + myReportBytes(baos.toByteArray()); + myReportBytes(_bytes.asArray()); + + assertArrayEquals(bytes,_bytes.asArray()); + assertArrayEquals(baos.toByteArray(),_bytes.asArray()); + } + + private void myReportBytes(byte[] bytes) throws Exception + { + for (int i = 0; i < bytes.length; i++) + { + // System.err.format("%s%x",(i == 0)?"[":(i % (HttpWriter.MAX_OUTPUT_CHARS) == 0)?"][":",",bytes[i]); + } + // System.err.format("]->%s\n",new String(bytes,StringUtil.__UTF8)); + } + + private void assertArrayEquals(byte[] b1, byte[] b2) { assertEquals(b1.length,b2.length);