HADOOP-13192. org.apache.hadoop.util.LineReader cannot handle multibyte delimiters correctly. Contributed by binde.
(cherry picked from commit fc6b50cc57
)
This commit is contained in:
parent
3f27f40503
commit
39ea0891d2
|
@ -318,7 +318,10 @@ public class LineReader implements Closeable {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (delPosn != 0) {
|
} else if (delPosn != 0) {
|
||||||
bufferPosn--;
|
bufferPosn -= delPosn;
|
||||||
|
if(bufferPosn < -1) {
|
||||||
|
bufferPosn = -1;
|
||||||
|
}
|
||||||
delPosn = 0;
|
delPosn = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,20 +80,21 @@ public class TestLineReader {
|
||||||
String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;
|
String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;
|
||||||
|
|
||||||
int BufferSize=64 * 1024;
|
int BufferSize=64 * 1024;
|
||||||
int numberOfCharToFillTheBuffer=BufferSize-CurrentBufferTailToken.length();
|
int numberOfCharToFillTheBuffer =
|
||||||
|
BufferSize - CurrentBufferTailToken.length();
|
||||||
StringBuilder fillerString=new StringBuilder();
|
StringBuilder fillerString=new StringBuilder();
|
||||||
for (int i=0;i<numberOfCharToFillTheBuffer;i++) {
|
for (int i=0; i<numberOfCharToFillTheBuffer; i++) {
|
||||||
fillerString.append('a'); // char 'a' as a filler for the test string
|
fillerString.append('a'); // char 'a' as a filler for the test string
|
||||||
}
|
}
|
||||||
|
|
||||||
TestData = fillerString + TestPartOfInput;
|
TestData = fillerString + TestPartOfInput;
|
||||||
lineReader = new LineReader(
|
lineReader = new LineReader(
|
||||||
new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
|
new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
|
||||||
|
|
||||||
line = new Text();
|
line = new Text();
|
||||||
|
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals(fillerString.toString(),line.toString());
|
Assert.assertEquals(fillerString.toString(), line.toString());
|
||||||
|
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals(Expected, line.toString());
|
Assert.assertEquals(Expected, line.toString());
|
||||||
|
@ -107,35 +108,49 @@ public class TestLineReader {
|
||||||
Delimiter = "record";
|
Delimiter = "record";
|
||||||
StringBuilder TestStringBuilder = new StringBuilder();
|
StringBuilder TestStringBuilder = new StringBuilder();
|
||||||
|
|
||||||
TestStringBuilder.append(Delimiter+"Kerala ");
|
TestStringBuilder.append(Delimiter + "Kerala ");
|
||||||
TestStringBuilder.append(Delimiter+"Bangalore");
|
TestStringBuilder.append(Delimiter + "Bangalore");
|
||||||
TestStringBuilder.append(Delimiter+" North Korea");
|
TestStringBuilder.append(Delimiter + " North Korea");
|
||||||
TestStringBuilder.append(Delimiter+Delimiter+
|
TestStringBuilder.append(Delimiter + Delimiter+
|
||||||
"Guantanamo");
|
"Guantanamo");
|
||||||
TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're'
|
TestStringBuilder.append(Delimiter + "ecord"
|
||||||
|
+ "recor" + "core"); //~EOF with 're'
|
||||||
|
|
||||||
TestData=TestStringBuilder.toString();
|
TestData=TestStringBuilder.toString();
|
||||||
|
|
||||||
lineReader = new LineReader(
|
lineReader = new LineReader(
|
||||||
new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
|
new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
|
||||||
|
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals("",line.toString());
|
Assert.assertEquals("", line.toString());
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals("Kerala ",line.toString());
|
Assert.assertEquals("Kerala ", line.toString());
|
||||||
|
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals("Bangalore",line.toString());
|
Assert.assertEquals("Bangalore", line.toString());
|
||||||
|
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals(" North Korea",line.toString());
|
Assert.assertEquals(" North Korea", line.toString());
|
||||||
|
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals("",line.toString());
|
Assert.assertEquals("", line.toString());
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals("Guantanamo",line.toString());
|
Assert.assertEquals("Guantanamo", line.toString());
|
||||||
|
|
||||||
lineReader.readLine(line);
|
lineReader.readLine(line);
|
||||||
Assert.assertEquals(("ecord"+"recor"+"core"),line.toString());
|
Assert.assertEquals(("ecord"+"recor"+"core"), line.toString());
|
||||||
|
|
||||||
|
// Test 3
|
||||||
|
// The test scenario is such that,
|
||||||
|
// aaaabccc split by aaab
|
||||||
|
TestData = "aaaabccc";
|
||||||
|
Delimiter = "aaab";
|
||||||
|
lineReader = new LineReader(
|
||||||
|
new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
|
||||||
|
|
||||||
|
lineReader.readLine(line);
|
||||||
|
Assert.assertEquals("a", line.toString());
|
||||||
|
lineReader.readLine(line);
|
||||||
|
Assert.assertEquals("ccc", line.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue