From 2bc3351eaf240ea685bcf5042d79f1554bf89e00 Mon Sep 17 00:00:00 2001 From: Sean Mackrory Date: Wed, 21 Feb 2018 12:53:18 -0700 Subject: [PATCH] HADOOP-6852. apparent bug in concatenated-bzip2 support (decoding). Contributed by Zsolt Venczel. --- .../hadoop-client-minicluster/pom.xml | 1 + .../apache/hadoop/io/compress/BZip2Codec.java | 3 +- .../TestConcatenatedCompressedInput.java | 84 ++++++++---------- .../src/test/resources/testdata/concat.bz2 | Bin 0 -> 208 bytes .../src/test/resources/testdata/concat.gz | Bin 0 -> 148 bytes .../testdata/testCompressThenConcat.txt.bz2 | Bin 0 -> 3056 bytes .../testdata/testCompressThenConcat.txt.gz | Bin 0 -> 3413 bytes .../testdata/testConcatThenCompress.txt.bz2 | Bin 0 -> 2567 bytes .../testdata/testConcatThenCompress.txt.gz | Bin 0 -> 2734 bytes 9 files changed, 42 insertions(+), 46 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.bz2 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.gz create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.bz2 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.gz create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.bz2 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.gz diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index 905d53a38fa..a443648fdc3 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -615,6 +615,7 @@ testjar/* testshell/* + testdata/* diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java index 3c78cfce55c..99590eda679 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java @@ -180,7 +180,8 @@ public CompressionInputStream createInputStream(InputStream in, new DecompressorStream(in, decompressor, conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT)) : - new BZip2CompressionInputStream(in); + new BZip2CompressionInputStream( + in, 0L, Long.MAX_VALUE, READ_MODE.BYBLOCK); } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java index 977d083dff7..af6b9529e02 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java @@ -18,18 +18,6 @@ package org.apache.hadoop.mapred; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.Inflater; - import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; @@ -42,16 +30,26 @@ import org.apache.hadoop.util.LineReader; import org.apache.hadoop.util.ReflectionUtils; import org.junit.After; -import org.junit.Ignore; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@Ignore +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.Inflater; + +import static org.junit.Assert.*; + +/** + * Test class for concatenated {@link CompressionInputStream}. + */ public class TestConcatenatedCompressedInput { private static final Logger LOG = LoggerFactory.getLogger(TestConcatenatedCompressedInput.class); - private static int MAX_LENGTH = 10000; private static JobConf defaultConf = new JobConf(); private static FileSystem localFs = null; @@ -85,13 +83,15 @@ public class TestConcatenatedCompressedInput { public void after() { ZlibFactory.loadNativeZLib(); } + + private static final String DEFAULT_WORK_DIR = "target/test-classes/testdata"; private static Path workDir = localFs.makeQualified(new Path( - System.getProperty("test.build.data", "/tmp"), + System.getProperty("test.build.data", DEFAULT_WORK_DIR), "TestConcatenatedCompressedInput")); private static LineReader makeStream(String str) throws IOException { - return new LineReader(new ByteArrayInputStream(str.getBytes("UTF-8")), - defaultConf); + return new LineReader(new ByteArrayInputStream( + str.getBytes("UTF-8")), defaultConf); } private static void writeFile(FileSystem fs, Path name, @@ -190,7 +190,8 @@ public void testGzip() throws IOException { // copy prebuilt (correct!) version of concat.gz to HDFS final String fn = "concat" + gzip.getDefaultExtension(); - Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn); + Path fnLocal = new Path( + System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn); Path fnHDFS = new Path(workDir, fn); localFs.copyFromLocalFile(fnLocal, fnHDFS); @@ -227,7 +228,7 @@ public void testGzip() throws IOException { @Test public void testPrototypeInflaterGzip() throws IOException { CompressionCodec gzip = new GzipCodec(); // used only for file extension - localFs.delete(workDir, true); // localFs = FileSystem instance + localFs.delete(workDir, true); // localFs = FileSystem instance System.out.println(COLOR_BR_BLUE + "testPrototypeInflaterGzip() using " + "non-native/Java Inflater and manual gzip header/trailer parsing" + @@ -235,7 +236,8 @@ public void testPrototypeInflaterGzip() throws IOException { // copy prebuilt (correct!) version of concat.gz to HDFS final String fn = "concat" + gzip.getDefaultExtension(); - Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn); + Path fnLocal = new Path( + System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn); Path fnHDFS = new Path(workDir, fn); localFs.copyFromLocalFile(fnLocal, fnHDFS); @@ -326,14 +328,16 @@ public void testBuiltInGzipDecompressor() throws IOException { // copy single-member test file to HDFS String fn1 = "testConcatThenCompress.txt" + gzip.getDefaultExtension(); - Path fnLocal1 = new Path(System.getProperty("test.concat.data","/tmp"),fn1); + Path fnLocal1 = new Path( + System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn1); Path fnHDFS1 = new Path(workDir, fn1); localFs.copyFromLocalFile(fnLocal1, fnHDFS1); // copy multiple-member test file to HDFS // (actually in "seekable gzip" format, a la JIRA PIG-42) String fn2 = "testCompressThenConcat.txt" + gzip.getDefaultExtension(); - Path fnLocal2 = new Path(System.getProperty("test.concat.data","/tmp"),fn2); + Path fnLocal2 = new Path( + System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn2); Path fnHDFS2 = new Path(workDir, fn2); localFs.copyFromLocalFile(fnLocal2, fnHDFS2); @@ -439,7 +443,8 @@ private static void doSingleGzipBufferSize(JobConf jConf) throws IOException { InputSplit[] splits = format.getSplits(jConf, 100); assertEquals("compressed splits == 2", 2, splits.length); FileSplit tmp = (FileSplit) splits[0]; - if (tmp.getPath().getName().equals("testCompressThenConcat.txt.gz")) { + if (tmp.getPath() + .getName().equals("testdata/testCompressThenConcat.txt.gz")) { System.out.println(" (swapping)"); splits[0] = splits[1]; splits[1] = tmp; @@ -481,7 +486,8 @@ public void testBzip2() throws IOException { // copy prebuilt (correct!) version of concat.bz2 to HDFS final String fn = "concat" + bzip2.getDefaultExtension(); - Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn); + Path fnLocal = new Path( + System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn); Path fnHDFS = new Path(workDir, fn); localFs.copyFromLocalFile(fnLocal, fnHDFS); @@ -531,13 +537,15 @@ public void testMoreBzip2() throws IOException { // copy single-member test file to HDFS String fn1 = "testConcatThenCompress.txt" + bzip2.getDefaultExtension(); - Path fnLocal1 = new Path(System.getProperty("test.concat.data","/tmp"),fn1); + Path fnLocal1 = new Path( + System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn1); Path fnHDFS1 = new Path(workDir, fn1); localFs.copyFromLocalFile(fnLocal1, fnHDFS1); // copy multiple-member test file to HDFS String fn2 = "testCompressThenConcat.txt" + bzip2.getDefaultExtension(); - Path fnLocal2 = new Path(System.getProperty("test.concat.data","/tmp"),fn2); + Path fnLocal2 = new Path( + System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn2); Path fnHDFS2 = new Path(workDir, fn2); localFs.copyFromLocalFile(fnLocal2, fnHDFS2); @@ -549,21 +557,6 @@ public void testMoreBzip2() throws IOException { assertEquals("concat bytes available", 2567, in1.available()); assertEquals("concat bytes available", 3056, in2.available()); -/* - // FIXME - // The while-loop below dies at the beginning of the 2nd concatenated - // member (after 17 lines successfully read) with: - // - // java.io.IOException: bad block header - // at org.apache.hadoop.io.compress.bzip2.CBZip2InputStream.initBlock( - // CBZip2InputStream.java:527) - // - // It is not critical to concatenated-gzip support, HADOOP-6835, so it's - // simply commented out for now (and HADOOP-6852 filed). If and when the - // latter issue is resolved--perhaps by fixing an error here--this code - // should be reenabled. Note that the doMultipleBzip2BufferSizes() test - // below uses the same testCompressThenConcat.txt.bz2 file but works fine. - CompressionInputStream cin2 = bzip2.createInputStream(in2); LineReader in = new LineReader(cin2); Text out = new Text(); @@ -578,7 +571,6 @@ public void testMoreBzip2() throws IOException { 5346, totalBytes); assertEquals("total uncompressed lines in concatenated test file", 84, lineNum); - */ // test CBZip2InputStream with lots of different input-buffer sizes doMultipleBzip2BufferSizes(jobConf); @@ -645,7 +637,8 @@ private static void doMultipleBzip2BufferSizes(JobConf jConf) // this tests both files (testCompressThenConcat, testConcatThenCompress); all // should work with existing Java bzip2 decoder and any future native version - private static void doSingleBzip2BufferSize(JobConf jConf) throws IOException { + private static void doSingleBzip2BufferSize(JobConf jConf) + throws IOException { TextInputFormat format = new TextInputFormat(); format.configure(jConf); format.setMinSplitSize(5500); // work around 256-byte/22-splits issue @@ -654,7 +647,8 @@ private static void doSingleBzip2BufferSize(JobConf jConf) throws IOException { InputSplit[] splits = format.getSplits(jConf, 100); assertEquals("compressed splits == 2", 2, splits.length); FileSplit tmp = (FileSplit) splits[0]; - if (tmp.getPath().getName().equals("testCompressThenConcat.txt.gz")) { + if (tmp.getPath() + .getName().equals("testdata/testCompressThenConcat.txt.gz")) { System.out.println(" (swapping)"); splits[0] = splits[1]; splits[1] = tmp; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.bz2 b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f31fb0c32bb5e5b7db4e5622d78c21202dcaf30a GIT binary patch literal 208 zcmZ>Y%CIzaj8qGbyyK_&f`NhaW&?wO1A_pAb^4tN3JiumYO|PqoaKT~Nxtg&GH+?9 z%`I1+JKLxCKRevi#W?A(WBIATcYH_hO!N~`F&FNtVsL_+SEgzt&A`9}GtW->ihu%x z(k!{fR}xnQGq7B$T($6q(b@JO;VFVYc2Ckg)Y80BbHWXTe#-}wZZI&gfb@GXNHADI m^f&u^nA~(~o0Y)WDXD)x&rQ)I%4hzgcOJ{RgxGC_q8R|WP)b<< literal 0 HcmV?d00001 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.gz b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.gz new file mode 100644 index 0000000000000000000000000000000000000000..53d5a07fcaede992597112d3d05af8a71557fc30 GIT binary patch literal 148 zcmb2|=3tPx^wpbz+55DPmR_L8`cpaq9_P+``{_IkI^lcP*Gu=2)_I+i0U9SYynI7X zoIK;L`;3X<%dI~vv>6x}ac_V8Mgz9Btc&Dt?Ee=)c1hEOpj#FSOf&$Ed Nh9=1$IUWE2 literal 0 HcmV?d00001 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.bz2 b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a21c0e2c10b6e7572cb330bf793d4e3aea7e25db GIT binary patch literal 3056 zcmV zlrFZ~gAIvN)I$F0I>X?nA8#etbWU6qJWKA&vp134Le4RDBSjJ1HZRb43HNhm@ygmekMb$y7F-9i5}1m=jes?lJR0t%3a)( zcUM;5k2QzE0e_7(j^@q;nOp<^UViOG}h%f|_i?~clAWpL0PZqFZSdb~msW93B=(lQp zP;ZIne*bm1Jv`ea3WG>$+uJlaD1@T1f1sVa8-_-QT81L6NL-kaq-@u3T21#oUoj6eJPi`Ds8xT4*ydL0KkKS=jNn6952rUw{A* zKnDN_xaM4dKkwi0Kmv((S8hvc!&4AyVI~DVqMnd?nFo|2Z2*YXG-TRApa9TB^*oh4 zQ1pg?WB?igDWMfVB4C=2O;aJD$&(Fv_(OGk?Z>yuKi(Y9UtS2srC~h`Jvg1EB~+Mk zwvPBm)vmc?gHNSsSr_)B>kD%cC;FdnT2Vv@+#s7Kvy^-+k%6;P04fzvAC-byvE!&L z3>pMTJA;I2n!4kK@MbZBJWsXeln;`&!Wa@36ogxmUr<3?OfOi?-Zz4&9JeWwWUhhG z)&9~5*Fz~eTP1)vgf!KMXO8$d29dEkf{Uj1LzQihD?~Poz-@M&oR;55YRhyIN!`)} z*+RDeBw(3~`YH-^sV5jj^9IGH zCJQVN9Zx6|x{jHestfO;RClhEp7G;%Unei&$QEWzPvia)Lli z2O}!JtY@0y)!EdnR9rK&)l#G87$$k72YSw~(1))A<|)&HCEj#+J;Cy+5*2|>tVPDX z;*G3Q0(_tALjx01oE$)1l!=d(Z0~p^iQlclh}ko>mV1*9@Nt7A9sVxlig2MJvEyzg zLRx4xF+o`-Q&~fy6pR1>hhKmI5MT!Y2(TrLAV2@#;6MV%aJHm?XiSYH@{_|&QN1Y8 z(<4GD`3aFUG*HkQ1JnZA=UuK*l!yg1!o8zPsKJ3S$I`DznzGa`EW;o;vEF;{){);A6Dmf>AOwnf z)Ypj>tT-`m)gEC>j8!nE!Mzh8NJ@RMG$3GsN=y(sQGp{Or;0 zN&5=ch}IP_#0B6Xz@f;|F#B!fy1u1W_EORb%7uP(i?XVVE>Do}rjm~tyU^s5bJJ{2 zF;u7(Au?Z#rfne9*rkD~7vO?dDA{(D5n&Rktk}>fI~_JEjB}cs2#P8-cPoiq5w}Nn zc)+SKfR!kCM$yYTd`q1SE|7-ZEFYcmQ%;=XD^qvK%yP21eJr(NR*@Hb*OGag(=YoG z5-p~UgnrvWKx&z` zLtb4Vep7o<`}Ed7dh56^(^sc$_6lEWloe#dtN;g{$e5X1O9suQ$I949)InWRI#L2Y zK3<^beqt8+J44ijID!$EKSDL8wG6|9s$mA#zw&#KapU)=c|C(2cW^W$suGV$LSPXbVV=`wJ>=_ml9NQ~Oy9^9M z&qV@k4-)-dZ#eVX9$XQn_Xh<$6eJP7`aWP5XqZU@O$(6$EG#{D?jAVDcz`ZN%Qn{N zm!X>toSCc2aopom7-NBbbkb^nZYXu^o@d+E(Ka3k3j4dAh#j4_htUwS^4yeB4x6?& z6b$~Z*2%`yP&`v|#JOm()iaTSYP6Ps`ynR-`1Q*Eab_-S7?SMjog{)-wF`l=VWnbI zXex@Jb=+FSJ6oo(&_OM9tU8_hi@744C`c64OYHzVKZp(h literal 0 HcmV?d00001 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.gz b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..75e5f8c7f7405238ec5f36380b4026aa20411128 GIT binary patch literal 3413 zcmai!Wmppm8-_=Uv`9>Hph%3AkdBRX0#ZY17z0Mb1`0>|Qvxkzr;s-hA`7_Rs9oNDh^7(gEz?d*_9}0&^Vy#hMK+!9qf=+HZ*MUm%PU?=Z zhvaP`Uf2S*OeiMmLi|%s<#tD~cDto z9p~LmKTXUazZG?^mZ?=DRnkO&9v_Ga z9Xb@+Z{PVTniI3bccy;8s~E|8>qsKZib-?PjR_jLRbWFlP#_l%2&zFfk&zdpcB=PY zOqxOjwmWFIw{$nFKSJXa@pJR0ba(mq4HSvzD$ed2x#GS)kBF+J90Gg+hBv8MnDQHe zI#bmLJL%3+u{N89xguJXR3rHy<&h%xa1cfi##Ref%Z*bn^4s0Ey9P7j=fx4B=B%8VRZ zk{+H0JyJr&orhmo6ojwvRojK`9_p)AuIfA&C9ZfAFc_n-4z}Tjdr9R*=2~k+(J-s| z?xP!cd)mz-%E87JoJb9Hlf}#N_50ue`PYo#p)-iW1A(I}X-RE52y8b;sSmQv>L%XH zh!Ugziae6gv(3x-%H*w+y2m=*U0MJ@h`<0CdLtnNprHZ$@BU;AmE|J00W|+&`3->5 zBVE6ba?JWa)aclp$LU@Go9-!6*@8S7-Aq0-i6%?&mhum%M*gKZ_$JQ8xTq*~T3hy> z__}WQ+O(#nQH$Z2Vq8$hm{r$=kZB>zyn#rcsDFK^qbf1(tw7;HC$M1>ol7kyu7AoV zmXfP#!JSjTW9VQrSeEv!T|k);PrTEGi{~wnJ-jLPiXEytWZyyh$jF&wzL;l4zyzdY z0}5phliRS)J(7M?SGhErGs69%5rZCvhII|a)SPO1CDwayp5jK!I8`)(q8OzHKCU`x z70`|ci;z}wVYa&}8J9ju`l7kUnkZQxQB(dTXTUIatnUo|@=_377BNdhhp5ll;rY!; zc9t0KYad7(i2M=^_-RqU)XVdrQiAE3*vtLsBxgjNj)e)&uL?yEhjnu_=h$kY*H9^g zktCH#Xo`zB^3zR-N=l4i;o&Ou(ztv`bOzzC11!z8XrFA870M4;ujWtkfLC}12Pqv5 zW4&?2wO3}4Bm4FHYc_p%0YkekP5!yhPtTa>!5pq~aAU=Iw(592HAaIAcSU=S@s3>5 z`qmQV_)wSRF9H?50WNHA?*ZTirN}CF3iU$x;(7#>_L8^?emhd~dxC|3J7yAwmC z(W&Uhq+w)3&C9z5Xt4=V1`Lum!(nJn!U!y7PTmWNcl~yjUF`AeJ zLeIRqjkoBN&D!RgH-Eq`ZQatQII!EweHt9q6$x+h%<$gSDml5URYP2zX+Pf(J5(3` z7hmZs{*SK;0DY_Wpbv8F-}bmR3#ZxI$SMx*H%wiWB!q*EMf$tDW%Lhr4!AS>ABRfK zzasowf72B4g_1sAc6l{6Cv>9T zC(P$+VSm6^rpN{R!k_qgf((6AJh0(9*h8sUueIJm<@eN3B3&$m@fOmHAOi1-LA=(Y zc}=e+L0%fbi8Q7N_7!%1vT+=`F7>t2KGQ9)QZ9NVqJ1KG5jPbltpGl2WRlG+`DRDz zx!l~|oDjvLdG}jxz zMXh&V5Jfa=zIHju68O9o`#K_f?8y?PQQaf@FjB@ki};@4N*zlgQGW~AhC~T(ZZ{V6>t>5XPx{VUCmQyQTS!nxRE6E1lG4dX!K@tDjMD< zfxTrAZ^ZPN*+<`?nSQsareSMt*BmO;?#;TiaJKg=d5a#$9++t6zS?S5f7QTdMC2IP zN;Ui2J$SUWfBm|+T!ukm%6;lAKHPjZ5jCU9Qi1+jv=6nS`!zMN6@gF z=N8<)UT*rTh{-ENvf16X0P6P*Dk`v&T$R0l3hco+Y91*KkB@W(WaIZ*(t#?IZ>ew;A^!T`ah)PB%^9EPfIPqriY(;^g!u)Tqt%<(& z^YPdk^PzvSTkMbB)c>}|iU0>|UE+!|bJr%65C}%btGqul)_&yQ!*ZHWt7CcGx znRxSZmzI9JI&QSjw^cmpi6qIiyKz?o9T}v*3djb5#^&DL z7o2)L&m#jTohe8;Ue5s4>4CG|K0(wD@$>D9YDqi zM}%Pf_#i38!3K<5LHqDuL))zv4;pg`-PMYKSIlsID(>S%_lp%~9 zcfx92G~(rH=AECl2O>68DX<_$KWtbxP#|q4lp}v)k5pa}@d)^^36MfxH5-@-WM%MS zkMd{y@TiAB)slq;ctjPmxxmu>Bg4Ft3N@1lJP8MWhX*gY0wb79_Q(ho{yY38x5T>* z*GlsbXh(?dbWJV-x5JOw3-c;Y!7Ml9Hr8kKKIaCmS}kI(aN>e2ouqRD#Mhe52GG0b zt))6+rjV2Z8dNU4WNA;wxF%TMjZcAJLoA|p_o6dRHI?Si^vWF_R|8zdnJ?dsnw)9|se=T2M)^dJ$-lkU3?n1BZ z^L(CPta;HErYH2mYz}CZgDW`~ zi(P4}RXI4n9_QVc`a|q!BS$fpQkJk4azy0ugS0nj&9bfPPLT9|Z-pIE6Iq-@z z#}D!4x6Ji|ao22bOj89a%8Zibz}tZ+q%-M=)>B72yh8iDvwjR>8$qq@Es7SBiwcpb ioY`()s2VWYqn(KHpPK%f2X-tt;d%Z<_F2&d0N_8eqBH^k literal 0 HcmV?d00001 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.bz2 b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..5983e52cc034a6a9b482e7c0bdca8515bb99dad0 GIT binary patch literal 2567 zcmV+i3i$OxT4*^jL0KkKS+$^s@Pri3F*LSUz&o{69h0MHtz=$M%kK@{~SjG6|3 z8USi~G(e}Sc{LeEjDP?D2AH7KG(aY!)bxR%003r@5T~knBSsL=&}avu07!x$Aroa! zDTL4f05hn8M7d7tg)e^Ct?BYU81O5`etAv4yI1vcCl9>gmRT+Nwh0lPB|`l1VW-3C z`T5mqpTTwdl(Yr!_WGv8WmM#PxjWQ0c?lOpN@+pP7S@z!1tp$$l` zNIgn?TTj7h+CnMHM}$tqKPT-W5R6|AB$-dq;kUe_de^7sV?eddx{dh4=FisEbwHt@JYNbu6glC7tgPu+sw6BA@o&=tvKARfP5j}wx2VSATDuE>799&7G(StRP4@vUVJw7iP2FI#kFbh~*78*@28JQLeftmaS{Jelsq&$sLfQ zyM`teGj!2h#;-XL9KjT%AtABONSV6?fx;q-<{!(5IE(R_ed#j?IJDmkl#p#@qZW43 z3ov5ttL5%X+n9%3c)Mn+5-51`+Mt?y<9#)_)b1NLwRy}{=x||ubrmFceOR@xj-g3H z-Sq+aRmQuri5aOJ*PUZtFN{)d66p({CrZb;*ZYVbr7m{uE;;V;%a#f1Q0f{cda|%L zN>tZOJy;|c*Mt(qm(lSK+CHm{%Tf@O(`KdA+Xm8ds;+Fnm_f`(AzsgujOZ7-O}J$w z;6+FfF3c@8vT~t!oNVNyWV=eFUw4#9#V;_|5Rh4P94;XU0n8t&Zw3sFzoWOYu#~xk^_Rj)Zj+b5P4sUU9giActm;Q{ zfHvVquB-}zr5DQP#v;%&qYjz)Z1I&BI*$cqw&Lz8t1x7C!{pCbu+K&-H#9a!hAqZ6 zh=75$1ffEe8ElPH1TtW|!orvo#kyH{#&g>j##aox2v-L(8Fc0*aN&@}d$Y@k6_78N zUL82$xT!`how@U^ccq005D(Q1o*NQ@LXA&aK-0c^P8!J1D9QNyphISjf_JV zBdowsPS2I(!50!^Wgs7ms+&%20N_*6FTxkKYOdUUc~zRu-oHnVG*E>D0c;Uw+@Ec! zLFn5Tp4$9bc5@EUi?yR}G^T5arIeWx7Dr&~3(ut3FnF1irecwKD%gyJ;}qYl^~ebb z`?RzvRjOv%GlDpBN$OEcQUP1;`OMX$-cjw4EJX-G%x{t$UU;Q8;;xEryhwJ5uG$<) z^J~LH8A1&%sxs-zW&h9m-IWPDZOyihJZ8@(D-M(6?5hZ&% zwg$sV+xYjbQc9P^N(0i;IfM)nwS5}qt_Fgl$0ePa=DkD`8bu=wunLM_YIbY4q!Td| z(BX)QF)aYwU=75{wP?|x)6tYkmbDp^>V}CkDVl_#D;6nU9&RO$@nF>=&}$T85VkLA zZlYcF9|S0IOj-j9-{-%dtA4h3Z-G6XWiDVagaC@)2*u?B3739FfQ59H1A|{)R9r^x zVRWvZ8#FPrHgc^_DwnGed2U4)i`~7kSMl&(G?19PMsIGG#P8)#zcO1`fgEcg~tf&ZAP`0zm+w2uLFOBps{9jx!*5s?>oEFxP&h1}PndDmLE?(X>Unjzw%mTq5(9R8fvsTfzY`a%LW8SH(eOmY4PH185YYv)M1q8 zMx8pAC!F5UZu?_D8pl&WcHy9GT(MYBo}1@`QNkCrO=oU?$?l7AyRO*+2A#k-OjlN> z{LaS4niW(>1u~-u+UdpZ+2eUyontFHZMI&6To4LgnY$YD7mOz+3E5GWtzCuP2}N*2 z1c0?lg;K6iDji=T9b!m8S>UpPMQqHSWLkyS&E^zDf3lDSD?KbKejHcUau!sZ+CGV#!2SW%g*`X z-t<5O$itZ%)3LbtEgjd)%>)Sc&$<$dqMP2fp$P6rz&r*vOfjDSA4e62J2lPM-YNmi zhpqK?I;)4{XQ*1r%LE@dfm}+CwAlhpF%4!nd&qpKqU092X~Q7Ah6{z>weXHN9qHR# z(oDp%nAfztxg?%*=$#!}p^7;kPNR|xJqr9}IuwXq`s0t_9Yt)T}vb&8l| z=apabtQjEN%us0W?5GWbnO4usNc6@oF?v+&Kt_s-mNb^T>sV!GV?guYGZ9WWcXZn@ z$gu-;EuAmXM!7A-Nq6Q1k_|{z<&AyMuqT3+k@Th?+X9yXSRJU1zF3&Y$@UX8O6;0I z4W&i}ln{j#vvDLCqcOQCwU%=ZRdhpZgtEhyY{Q48lO8$U>w^P=+>a1lMl^z=3Xa#A z1`*;4*;ROI7xy?vFqb0?8Bv+kUDVa;T|>1qiAqejR5U9z6IiQYhL0n;w8M>BA^lof^Uxs^2|#^{g-XGB)pOfODg9gXLVs1`T15 zq6SI^IW|XhNa|UR1t0HUJGJ2m$s*>8w_17v;nM`?80E|hpBa7bFtH^#tiwc;o|Wx- zuvldc1$#npJ`S7EBYUZ3+jS7eK8iB&*{qu) dRko)yK|*LTRT&7Sh8zFI+>uTcBrRvOlYseC%kuyL literal 0 HcmV?d00001 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.gz b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..6e8eaa56f78953871b3f1dd0081f7719c1e4b4fe GIT binary patch literal 2734 zcmV;f3Q_eRiwFpK?;}hC19W9`bVF}$V_|euXk~6gZ*6dLWpi^bba-?CZCF8$+&B`v z^A+@IE>-=3J~TXDVBCXQuQ6@F*dt3yN!+GLElHI|e*H!yCAYm8z-U*MBr+Kp85yY$ zE(AM^eJ<1NBs|!cgm1gJT&x?@^%~_a+0H)pc1oKKDUNo@()l7`zg{PI_i;4A*BZp8 zY-6%>in3cD?Y!G%%|$NO;qjR2%s@`)IeHH!{aYKHgJUV|CdFo^~zV_9g>hvmFkD zH8z(8=oKjq_*=$3ylq|dKpEL31YCZ%!x|;KfPH3>`lwGxS&(5+V}FYSkxl!OiXEkz zvUU^h#EJ_pO?(Cm&d6MyT;(~l{CD+8EKXZ$CTr%zZ59O6d4P@N8xZL(QFA}!G}~T+ z2M|alb}m(xg<*ptspY63mDS-jzOB&n7UWxmLx3wOF>ArFt^t$9*iWE>$(Ks z7~AB>$yPrz1|El@6mixSfS)4&f3}@Ny1z`w7ydCV>#bB@*NRxd^Tn5%Lc}I-LbF3iYxZTbADo^6G(l{B)?BU&_TmNwOzj`BCm@rGqgVGT<~;P|7Y2eV zVI*f2l{Ev(((u8JzJTdU<=x&uzt<=m&U!H}%n9Yy>O8ia#b;nW0?i9fOdJRXPKCuK zUu=iJj9Lj2K^I`Gu{dT*kfk^lpa!P(MWalefDRimnB~Ek7H@=NAm$w)VB?}S-}$(S4JwVPu(09imV(rI?%En4FFM;$5sze14egjA*gnMJBjPh8v+nv zgGiJ{WyKnB)K`szC0h;%QLOHGMrFr^G}NOCZT2ID8AS@x4rF9&N{1{_Iz6Dc2^q(r zh$ci%pa{RV9#hZ;K@}ioNRNWUKPn$YUeRZEaEjQj3-=6~XrQ_TSICzuve|Px)kaX~ zVj>!C`|XCLPULBadhe$8V=v_uFn&a3GwZD=e0j3(b0Zj4C7QuYKvG6nfhWzr?Nbvc zQuE0ehEEXR=?~a{X8q)o(iil$PIazkr~IeresA=xq`)K?lM#Y>Q?Bq9@Y>%6dB$^b zly~T^O)xhi*(IDZDsZ#PBtDp<(7}V?XJ1coBAkzi5q!?tJ!605x=gO$p(JV~_Qc=K zThnbjL8qZT=*Jy``IJ<_cRR1xg{l1yKOpN-ThVze@<2gCpDq4|gLQ|hQVIqnx#|6b zecwu>VZOK8xL#*c_^?0o^$MDgD@p$xc^E2#7N$O;h$}~NjXH}JaaHzmIi=C)Z|Kli z1Wu6Ue6a%NuxqUR!W%Xq_hA2xL%qer0$wAKdoW)_I}hv*WWKo~H%5U4w0EEDDkt7i zq;KIjp-S`ON@?fedH|uwQCpDU?7Ab5$Sm!*0T=#L7L5 z`U;@$?znk!@S?Hl7AkL90;NqIiKAO|%d{6YWlK(jl#()8TU30T6=ptRB{v{4OYf_C z{>E`V9l5kgPv9vveCVMm*_k)Q!Rdwt@lhDRT42PqpwZNm09?>YOtxGuG}d)k(6@&eBvgzz!1GA|+RQ0~S$ z<%hC>#Sd7-!JHiUO(_H_eqQ72#kOwccxvjUsswa7Yved0gIUl+4&suEd%gfc6~P0q04>SDH0CwBs-$oE0WnfLZ@b zG4vxB%<9eDE9`f~e1Y;pN6XQw&fBf*(r#+r==3(GX-$)$bCSe}qBii338UGg&uGJ{ zazNsDjEri3qt-!zAD3dVT>GQ|YA0&0x@T31T&YRc&71|l_z~_S_@L6Xz0uUL20{Eh z<3VLVyy92ayvh$VkowoOe0RQ$Nbl@d{579Ej9qrx;2kODv{}%ZAqkh`je-^|_R9blQ!!6yEN zX>9;1KnnDrl7CM-tJ$$Kwb7$}5f|Sw!#j0K=D2Cw`v$&ksVd=jf6J=QCk?qICg#w$ zmZP0N_H92}hQ@r(qklJ4{gh(ERX)Qk0mVY)Yl-Iz0({wjro(TjUzq+8o|9GpvLuUR zR!uLMv|81(u5Y=)gB^c|mhS9bE16P&ZayZT6i&U z9)mCL>d_esD{+g`pJK>(_@Y1;LmV7rnM(>8YQ-E)3$upwxM>UIu!7i? oAv@043!5TLEBajU!_g^D0D1GPDEqvsBHHu+0Pi;d`Qj7+0K39QG5`Po literal 0 HcmV?d00001