Reintroduce an Exception type for invalid UTF-8 (#10553)

Introduce `Utf8CharacterCodingException`  and `Utf8IllegalArgumentException` as a substitutes for the removed `Utf8Appendable.NotUtf8Exception`.

* Updates from review
This commit is contained in:
Greg Wilkins 2023-09-22 04:24:42 +02:00 committed by GitHub
parent 812d65d7ae
commit 57b953be67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 68 additions and 36 deletions

View File

@ -329,7 +329,7 @@ public class UrlEncoded
switch (c)
{
case '&':
value = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid value: Bad UTF-8"));
value = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
if (key != null)
{
adder.accept(key, value);
@ -347,7 +347,7 @@ public class UrlEncoded
buffer.append(c);
break;
}
key = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid key: Bad UTF-8"));
key = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
break;
case '+':
@ -363,7 +363,7 @@ public class UrlEncoded
}
else
{
throw new IllegalArgumentException("Incomplete % encoding");
throw new Utf8StringBuilder.Utf8IllegalArgumentException();
}
break;
@ -375,7 +375,7 @@ public class UrlEncoded
if (key != null)
{
value = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid value: Bad UTF-8"));
value = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
adder.accept(key, value);
}
else if (buffer.length() > 0)

View File

@ -54,7 +54,7 @@ public class Utf8LineParser
if (parseByte(b))
{
state = State.START;
return utf.takeCompleteString(() -> new IllegalArgumentException("Bad UTF-8"));
return utf.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
}
}
// have not reached end of line (yet)

View File

@ -387,16 +387,29 @@ public class Utf8StringBuilder implements CharsetStringBuilder
@Override
public String build() throws CharacterCodingException
{
return takeCompleteString(Utf8StringBuilder::newUtf8CharacterCodingException);
return takeCompleteString(Utf8CharacterCodingException::new);
}
private static CharacterCodingException newUtf8CharacterCodingException()
public static class Utf8CharacterCodingException extends CharacterCodingException
{
return new CharacterCodingException()
@Override
public String getMessage()
{
{
initCause(new IllegalArgumentException("Bad UTF-8 encoding"));
}
};
return "Invalid UTF-8";
}
@Override
public String toString()
{
return "%s@%x: Invalid UTF-8".formatted(CharacterCodingException.class.getSimpleName(), hashCode());
}
}
public static class Utf8IllegalArgumentException extends IllegalArgumentException
{
public Utf8IllegalArgumentException()
{
super(new Utf8CharacterCodingException());
}
}
}

View File

@ -30,6 +30,7 @@ import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -42,7 +43,7 @@ import static org.junit.jupiter.api.Assertions.fail;
public class Utf8StringBuilderTest
{
@Test
public void testUtf() throws Exception
public void testUtf()
{
String source = "abcd012345\n\r\u0000¤჻\ufffdjetty";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
@ -56,7 +57,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testUtf8WithMissingByte() throws Exception
public void testUtf8WithMissingByte()
{
String source = "abc჻";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
@ -71,7 +72,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testUtf8WithAdditionalByte() throws Exception
public void testUtf8WithAdditionalByte()
{
String source = "abcXX";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
@ -88,7 +89,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testUTF32codes() throws Exception
public void testUTF32codes()
{
String source = "\uD842\uDF9F";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
@ -103,7 +104,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testGermanUmlauts() throws Exception
public void testGermanUmlauts()
{
byte[] bytes = new byte[6];
bytes[0] = (byte)0xC3;
@ -121,7 +122,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testInvalidUTF8() throws Exception
public void testInvalidUTF8()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();
utf8.append((byte)0xC2); // start of sequence
@ -129,10 +130,19 @@ public class Utf8StringBuilderTest
assertThat(utf8.toPartialString(), equalTo("<EFBFBD>")); // only first sequence is reported as BAD
assertThat(utf8.toCompleteString(), equalTo("<EFBFBD><EFBFBD>")); // now both sequences are reported as BAD
assertThrows(CharacterCodingException.class, utf8::build);
try
{
utf8.build();
}
catch (Throwable t)
{
assertThat(t.toString(), containsString("Invalid UTF-8"));
}
}
@Test
public void testInvalidZeroUTF8() throws Exception
public void testInvalidZeroUTF8()
{
// From https://datatracker.ietf.org/doc/html/rfc3629#section-10
Utf8StringBuilder utf8 = new Utf8StringBuilder();
@ -144,7 +154,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testInvalidAlternateDotEncodingUTF8() throws Exception
public void testInvalidAlternateDotEncodingUTF8()
{
// From https://datatracker.ietf.org/doc/html/rfc3629#section-10
Utf8StringBuilder utf8 = new Utf8StringBuilder();
@ -160,7 +170,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testFastFail1() throws Exception
public void testFastFail1()
{
byte[] part1 = StringUtil.fromHexString("cebae1bdb9cf83cebcceb5");
byte[] part2 = StringUtil.fromHexString("f4908080"); // INVALID
@ -178,7 +188,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testFastFail2() throws Exception
public void testFastFail2()
{
byte[] part1 = StringUtil.fromHexString("cebae1bdb9cf83cebcceb5f4");
byte[] part2 = StringUtil.fromHexString("90"); // INVALID
@ -196,7 +206,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testPartialSplitSingleCodepoint() throws Exception
public void testPartialSplitSingleCodepoint()
{
// GOTHIC LETTER HWAIR
final String gothicUnicode = "𐍈";
@ -223,7 +233,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testPartialUnsplitCodepoint() throws Exception
public void testPartialUnsplitCodepoint()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();
@ -245,7 +255,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testPartialSplitCodepoint() throws Exception
public void testPartialSplitCodepoint()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();
@ -267,7 +277,7 @@ public class Utf8StringBuilderTest
}
@Test
public void testPartialSplitCodepointWithNoBuf() throws Exception
public void testPartialSplitCodepointWithNoBuf()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();

View File

@ -170,7 +170,7 @@ public class CloseStatus
Utf8StringBuilder utf = new Utf8StringBuilder();
// if this throws, we know we have bad UTF8
utf.append(reasonBytes, 0, reasonBytes.length);
String reason = utf.takeCompleteString(() -> new BadPayloadException("Invalid UTF8 in CLOSE Reason"));
String reason = utf.takeCompleteString(BadPayloadException.InvalidUtf8::new);
this.code = statusCode;
this.reason = reason;

View File

@ -13,6 +13,7 @@
package org.eclipse.jetty.websocket.core.exception;
import org.eclipse.jetty.util.Utf8StringBuilder;
import org.eclipse.jetty.websocket.core.CloseStatus;
/**
@ -38,4 +39,12 @@ public class BadPayloadException extends CloseException
{
super(CloseStatus.BAD_PAYLOAD, t);
}
public static class InvalidUtf8 extends BadPayloadException
{
public InvalidUtf8()
{
super("Invalid UTF-8", new Utf8StringBuilder.Utf8CharacterCodingException());
}
}
}

View File

@ -199,13 +199,13 @@ public class MessageHandler implements FrameHandler
if (frame.isFin())
{
onText(textBuffer.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8")), callback);
onText(textBuffer.takeCompleteString(BadPayloadException.InvalidUtf8::new), callback);
textBuffer.reset();
}
else
{
if (textBuffer.hasCodingErrors())
throw new BadPayloadException("Invalid UTF-8");
throw new BadPayloadException.InvalidUtf8();
else
callback.succeeded();
}

View File

@ -54,12 +54,12 @@ public class PartialStringMessageSink extends AbstractMessageSink
if (frame.isFin())
{
String complete = accumulator.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8"));
String complete = accumulator.takeCompleteString(BadPayloadException.InvalidUtf8::new);
getMethodHandle().invoke(complete, true);
}
else
{
String partial = accumulator.takePartialString(() -> new BadPayloadException("Invalid UTF-8"));
String partial = accumulator.takePartialString(BadPayloadException.InvalidUtf8::new);
getMethodHandle().invoke(partial, false);
}

View File

@ -65,7 +65,7 @@ public class StringMessageSink extends AbstractMessageSink
if (frame.isFin())
{
getMethodHandle().invoke(out.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8")));
getMethodHandle().invoke(out.takeCompleteString(BadPayloadException.InvalidUtf8::new));
callback.succeeded();
autoDemand();
}

View File

@ -84,10 +84,10 @@ public class PartialStringMessageSinkTest
// Check decoding
Utf8StringBuilder check = new Utf8StringBuilder();
check.append(utf8Bytes, 0, 2);
String partial = check.takePartialString(IllegalStateException::new);
String partial = check.takePartialString(Utf8StringBuilder.Utf8CharacterCodingException::new);
assertThat(partial, equalTo(""));
check.append(utf8Bytes, 2, 2);
String complete = check.takeCompleteString(IllegalStateException::new);
String complete = check.takeCompleteString(Utf8StringBuilder.Utf8CharacterCodingException::new);
assertThat(complete, equalTo(gothicUnicode));
FutureCallback callback = new FutureCallback();
@ -142,7 +142,7 @@ public class PartialStringMessageSinkTest
public static class OnMessageEndpoint
{
private BlockingArrayQueue<List<String>> messages;
private final BlockingArrayQueue<List<String>> messages;
public OnMessageEndpoint()
{

View File

@ -111,7 +111,7 @@ public class HttpWriterTest
Utf8StringBuilder buf = new Utf8StringBuilder();
buf.append(BufferUtil.toArray(_bytes), 0, _bytes.remaining());
assertEquals(data, buf.takeCompleteString(IllegalArgumentException::new));
assertEquals(data, buf.takeCompleteString(Utf8StringBuilder.Utf8CharacterCodingException::new));
}
@Test