diff --git a/core-java-modules/core-java/src/main/java/com/baeldung/encoding/CharacterEncodingExamples.java b/core-java-modules/core-java/src/main/java/com/baeldung/encoding/CharacterEncodingExamples.java index bdd92e37f6..1ed5f9e62a 100644 --- a/core-java-modules/core-java/src/main/java/com/baeldung/encoding/CharacterEncodingExamples.java +++ b/core-java-modules/core-java/src/main/java/com/baeldung/encoding/CharacterEncodingExamples.java @@ -1,10 +1,15 @@ package com.baeldung.encoding; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; public class CharacterEncodingExamples { @@ -29,4 +34,12 @@ public class CharacterEncodingExamples { } return buffer.toString(); } + + static String decodeText(String input, Charset charset, CodingErrorAction codingErrorAction) throws IOException { + CharsetDecoder charsetDecoder = charset.newDecoder(); + charsetDecoder.onMalformedInput(codingErrorAction); + return new BufferedReader( + new InputStreamReader( + new ByteArrayInputStream(input.getBytes()), charsetDecoder)).readLine(); + } } diff --git a/core-java-modules/core-java/src/test/java/com/baeldung/encoding/CharacterEncodingExamplesUnitTest.java b/core-java-modules/core-java/src/test/java/com/baeldung/encoding/CharacterEncodingExamplesUnitTest.java index 95b3605d95..fe3867a3c3 100644 --- a/core-java-modules/core-java/src/test/java/com/baeldung/encoding/CharacterEncodingExamplesUnitTest.java +++ b/core-java-modules/core-java/src/test/java/com/baeldung/encoding/CharacterEncodingExamplesUnitTest.java @@ -1,9 +1,24 @@ package com.baeldung.encoding; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.MalformedInputException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import org.junit.Assert; import org.junit.Test; +import org.junit.jupiter.api.Assertions; public class CharacterEncodingExamplesUnitTest { @@ -58,4 +73,52 @@ public class CharacterEncodingExamplesUnitTest { "0 0 10001010 10011110 "); } + @Test + public void givenUTF8String_whenDecodeByUS_ASCII_thenIgnoreMalformedInputSequence() throws IOException { + Assertions.assertEquals("The faade pattern is a software design pattern.", CharacterEncodingExamples.decodeText("The façade pattern is a software design pattern.", StandardCharsets.US_ASCII, CodingErrorAction.IGNORE)); + } + + @Test + public void givenUTF8String_whenDecodeByUS_ASCII_thenReplaceMalformedInputSequence() throws IOException { + Assertions.assertEquals( + "The fa��ade pattern is a software design pattern.", + CharacterEncodingExamples.decodeText( + "The façade pattern is a software design pattern.", + StandardCharsets.US_ASCII, + CodingErrorAction.REPLACE)); + } + + @Test + public void givenUTF8String_whenDecodeByUS_ASCII_thenReportMalformedInputSequence() { + Assertions.assertThrows( + MalformedInputException.class, + () -> CharacterEncodingExamples.decodeText( + "The façade pattern is a software design pattern.", + StandardCharsets.US_ASCII, + CodingErrorAction.REPORT)); + } + + @Test + public void givenTextFile_whenLoopOverAllCandidateEncodings_thenProduceSuitableCandidateEncodings() { + Path path = Paths.get("src/test/resources/encoding.txt"); + List allCandidateCharSets = Arrays.asList( + StandardCharsets.US_ASCII, StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1); + + List suitableCharsets = new ArrayList<>(); + allCandidateCharSets.forEach(charset -> { + try { + CharsetDecoder charsetDecoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT); + Reader reader = new InputStreamReader(Files.newInputStream(path), charsetDecoder); + BufferedReader bufferedReader = new BufferedReader(reader); + bufferedReader.readLine(); + suitableCharsets.add(charset); + } catch (MalformedInputException ignored) { + } catch (IOException ex) { + ex.printStackTrace(); + } + }); + + Assertions.assertEquals(suitableCharsets, Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1)); + } + }