diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index 292736e9ecf..b615614150b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -1179,14 +1179,16 @@ public long skip(final long n) throws IOException { checkNotClosed(); streamStatistics.skipOperationStarted(); - long targetPos = pos + n; + // target pos should be less than EOF + long targetPos = Math.min(contentLength, pos + n); + long bytesToSkip = targetPos - pos; long skipped; try { lazySeek(targetPos, 1); - skipped = n; + skipped = bytesToSkip; } catch (EOFException e) { - LOG.debug("Lazy seek failed, attempting default skip"); + LOG.debug("Lazy seek failed, attempting default skip", e); skipped = wrappedStream.skip(n); if (skipped > 0) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java index 4935797b809..93544f5a1d3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java @@ -493,11 +493,13 @@ public void testSkip() throws Throwable { in = openTestFile(S3AInputPolicy.Random, DEFAULT_READAHEAD_RANGE); - in.skip(_4K); + assertEquals("bytes skipped", _4K, in.skip(_4K)); + // Skip within read ahead range, will not make a new get request - in.skip(_8K); + assertEquals("bytes skipped", _8K, in.skip(_8K)); + // Skip outside read ahead range, should make a new get request - in.skip(_256K); + assertEquals("bytes skipped", _256K, in.skip(_256K)); IOStatistics ioStatistics = streamStatistics.getIOStatistics();