HADOOP-13192. org.apache.hadoop.util.LineReader cannot handle multibyte delimiters correctly. Contributed by binde.

(cherry picked from commit fc6b50cc57)
This commit is contained in:
Akira Ajisaka 2016-06-20 17:07:26 +09:00
parent 3f27f40503
commit 39ea0891d2
2 changed files with 42 additions and 24 deletions

View File

@ -318,7 +318,10 @@ public class LineReader implements Closeable {
break;
}
} else if (delPosn != 0) {
bufferPosn--;
bufferPosn -= delPosn;
if(bufferPosn < -1) {
bufferPosn = -1;
}
delPosn = 0;
}
}

View File

@ -80,7 +80,8 @@ public class TestLineReader {
String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;
int BufferSize=64 * 1024;
int numberOfCharToFillTheBuffer=BufferSize-CurrentBufferTailToken.length();
int numberOfCharToFillTheBuffer =
BufferSize - CurrentBufferTailToken.length();
StringBuilder fillerString=new StringBuilder();
for (int i=0; i<numberOfCharToFillTheBuffer; i++) {
fillerString.append('a'); // char 'a' as a filler for the test string
@ -112,7 +113,8 @@ public class TestLineReader {
TestStringBuilder.append(Delimiter + " North Korea");
TestStringBuilder.append(Delimiter + Delimiter+
"Guantanamo");
TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're'
TestStringBuilder.append(Delimiter + "ecord"
+ "recor" + "core"); //~EOF with 're'
TestData=TestStringBuilder.toString();
@ -137,5 +139,18 @@ public class TestLineReader {
lineReader.readLine(line);
Assert.assertEquals(("ecord"+"recor"+"core"), line.toString());
// Test 3
// The test scenario is such that,
// aaaabccc split by aaab
TestData = "aaaabccc";
Delimiter = "aaab";
lineReader = new LineReader(
new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
lineReader.readLine(line);
Assert.assertEquals("a", line.toString());
lineReader.readLine(line);
Assert.assertEquals("ccc", line.toString());
}
}