HADOOP-17113. Adding ReadAhead Counters in ABFS (#2154)
Contributed by Mehakmeet Singh
This commit is contained in:
parent
d5b4766158
commit
48a7c5b6ba
|
@ -238,6 +238,9 @@ public class AbfsInputStream extends FSInputStream implements CanUnbuffer,
|
||||||
if (receivedBytes > 0) {
|
if (receivedBytes > 0) {
|
||||||
incrementReadOps();
|
incrementReadOps();
|
||||||
LOG.debug("Received data from read ahead, not doing remote read");
|
LOG.debug("Received data from read ahead, not doing remote read");
|
||||||
|
if (streamStatistics != null) {
|
||||||
|
streamStatistics.readAheadBytesRead(receivedBytes);
|
||||||
|
}
|
||||||
return receivedBytes;
|
return receivedBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,6 +295,9 @@ public class AbfsInputStream extends FSInputStream implements CanUnbuffer,
|
||||||
throw new IOException(ex);
|
throw new IOException(ex);
|
||||||
}
|
}
|
||||||
long bytesRead = op.getResult().getBytesReceived();
|
long bytesRead = op.getResult().getBytesReceived();
|
||||||
|
if (streamStatistics != null) {
|
||||||
|
streamStatistics.remoteBytesRead(bytesRead);
|
||||||
|
}
|
||||||
if (bytesRead > Integer.MAX_VALUE) {
|
if (bytesRead > Integer.MAX_VALUE) {
|
||||||
throw new IOException("Unexpected Content-Length");
|
throw new IOException("Unexpected Content-Length");
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,6 +84,18 @@ public interface AbfsInputStreamStatistics {
|
||||||
*/
|
*/
|
||||||
void remoteReadOperation();
|
void remoteReadOperation();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records the bytes read from readAhead buffer.
|
||||||
|
* @param bytes the bytes to be incremented.
|
||||||
|
*/
|
||||||
|
void readAheadBytesRead(long bytes);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records bytes read remotely after nothing from readAheadBuffer was read.
|
||||||
|
* @param bytes the bytes to be incremented.
|
||||||
|
*/
|
||||||
|
void remoteBytesRead(long bytes);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Makes the string of all the AbfsInputStream statistics.
|
* Makes the string of all the AbfsInputStream statistics.
|
||||||
* @return the string with all the statistics.
|
* @return the string with all the statistics.
|
||||||
|
|
|
@ -33,6 +33,8 @@ public class AbfsInputStreamStatisticsImpl
|
||||||
private long readOperations;
|
private long readOperations;
|
||||||
private long bytesReadFromBuffer;
|
private long bytesReadFromBuffer;
|
||||||
private long remoteReadOperations;
|
private long remoteReadOperations;
|
||||||
|
private long readAheadBytesRead;
|
||||||
|
private long remoteBytesRead;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Seek backwards, incrementing the seek and backward seek counters.
|
* Seek backwards, incrementing the seek and backward seek counters.
|
||||||
|
@ -128,6 +130,30 @@ public class AbfsInputStreamStatisticsImpl
|
||||||
readOperations++;
|
readOperations++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Total bytes read from readAhead buffer during a read operation.
|
||||||
|
*
|
||||||
|
* @param bytes the bytes to be incremented.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void readAheadBytesRead(long bytes) {
|
||||||
|
if (bytes > 0) {
|
||||||
|
readAheadBytesRead += bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Total bytes read remotely after nothing was read from readAhead buffer.
|
||||||
|
*
|
||||||
|
* @param bytes the bytes to be incremented.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void remoteBytesRead(long bytes) {
|
||||||
|
if (bytes > 0) {
|
||||||
|
remoteBytesRead += bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*
|
*
|
||||||
|
@ -178,6 +204,14 @@ public class AbfsInputStreamStatisticsImpl
|
||||||
return remoteReadOperations;
|
return remoteReadOperations;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getReadAheadBytesRead() {
|
||||||
|
return readAheadBytesRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getRemoteBytesRead() {
|
||||||
|
return remoteBytesRead;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* String operator describes all the current statistics.
|
* String operator describes all the current statistics.
|
||||||
* <b>Important: there are no guarantees as to the stability
|
* <b>Important: there are no guarantees as to the stability
|
||||||
|
@ -199,6 +233,8 @@ public class AbfsInputStreamStatisticsImpl
|
||||||
sb.append(", ReadOperations=").append(readOperations);
|
sb.append(", ReadOperations=").append(readOperations);
|
||||||
sb.append(", bytesReadFromBuffer=").append(bytesReadFromBuffer);
|
sb.append(", bytesReadFromBuffer=").append(bytesReadFromBuffer);
|
||||||
sb.append(", remoteReadOperations=").append(remoteReadOperations);
|
sb.append(", remoteReadOperations=").append(remoteReadOperations);
|
||||||
|
sb.append(", readAheadBytesRead=").append(readAheadBytesRead);
|
||||||
|
sb.append(", remoteBytesRead=").append(remoteBytesRead);
|
||||||
sb.append('}');
|
sb.append('}');
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.fs.azurebfs;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.assertj.core.api.Assertions;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -39,6 +40,10 @@ public class ITestAbfsInputStreamStatistics
|
||||||
LoggerFactory.getLogger(ITestAbfsInputStreamStatistics.class);
|
LoggerFactory.getLogger(ITestAbfsInputStreamStatistics.class);
|
||||||
private static final int ONE_MB = 1024 * 1024;
|
private static final int ONE_MB = 1024 * 1024;
|
||||||
private static final int ONE_KB = 1024;
|
private static final int ONE_KB = 1024;
|
||||||
|
private static final int CUSTOM_BLOCK_BUFFER_SIZE = 4 * 1024;
|
||||||
|
private static final int CUSTOM_READ_AHEAD_BUFFER_SIZE = 8 * CUSTOM_BLOCK_BUFFER_SIZE;
|
||||||
|
private static final int THREAD_SLEEP_10_SECONDS = 10;
|
||||||
|
private static final int TIMEOUT_30_SECONDS = 30000;
|
||||||
private byte[] defBuffer = new byte[ONE_MB];
|
private byte[] defBuffer = new byte[ONE_MB];
|
||||||
|
|
||||||
public ITestAbfsInputStreamStatistics() throws Exception {
|
public ITestAbfsInputStreamStatistics() throws Exception {
|
||||||
|
@ -75,6 +80,8 @@ public class ITestAbfsInputStreamStatistics
|
||||||
checkInitValue(stats.getReadOperations(), "readOps");
|
checkInitValue(stats.getReadOperations(), "readOps");
|
||||||
checkInitValue(stats.getBytesReadFromBuffer(), "bytesReadFromBuffer");
|
checkInitValue(stats.getBytesReadFromBuffer(), "bytesReadFromBuffer");
|
||||||
checkInitValue(stats.getRemoteReadOperations(), "remoteReadOps");
|
checkInitValue(stats.getRemoteReadOperations(), "remoteReadOps");
|
||||||
|
checkInitValue(stats.getReadAheadBytesRead(), "readAheadBytesRead");
|
||||||
|
checkInitValue(stats.getRemoteBytesRead(), "readAheadRemoteBytesRead");
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
IOUtils.cleanupWithLogger(LOG, outputStream, inputStream);
|
IOUtils.cleanupWithLogger(LOG, outputStream, inputStream);
|
||||||
|
@ -285,6 +292,94 @@ public class ITestAbfsInputStreamStatistics
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testing readAhead counters in AbfsInputStream with 30 seconds timeout.
|
||||||
|
*/
|
||||||
|
@Test(timeout = TIMEOUT_30_SECONDS)
|
||||||
|
public void testReadAheadCounters() throws IOException, InterruptedException {
|
||||||
|
describe("Test to check correct values for readAhead counters in "
|
||||||
|
+ "AbfsInputStream");
|
||||||
|
|
||||||
|
AzureBlobFileSystem fs = getFileSystem();
|
||||||
|
AzureBlobFileSystemStore abfss = fs.getAbfsStore();
|
||||||
|
Path readAheadCountersPath = path(getMethodName());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setting the block size for readAhead as 4KB.
|
||||||
|
*/
|
||||||
|
abfss.getAbfsConfiguration().setReadBufferSize(CUSTOM_BLOCK_BUFFER_SIZE);
|
||||||
|
|
||||||
|
AbfsOutputStream out = null;
|
||||||
|
AbfsInputStream in = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Creating a file of 1MB size.
|
||||||
|
*/
|
||||||
|
out = createAbfsOutputStreamWithFlushEnabled(fs, readAheadCountersPath);
|
||||||
|
out.write(defBuffer);
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
in = abfss.openFileForRead(readAheadCountersPath, fs.getFsStatistics());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reading 1KB after each i * KB positions. Hence the reads are from 0
|
||||||
|
* to 1KB, 1KB to 2KB, and so on.. for 5 operations.
|
||||||
|
*/
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
in.seek(ONE_KB * i);
|
||||||
|
in.read(defBuffer, ONE_KB * i, ONE_KB);
|
||||||
|
}
|
||||||
|
AbfsInputStreamStatisticsImpl stats =
|
||||||
|
(AbfsInputStreamStatisticsImpl) in.getStreamStatistics();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since, readAhead is done in background threads. Sometimes, the
|
||||||
|
* threads aren't finished in the background and could result in
|
||||||
|
* inaccurate results. So, we wait till we have the accurate values
|
||||||
|
* with a limit of 30 seconds as that's when the test times out.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
while (stats.getRemoteBytesRead() < CUSTOM_READ_AHEAD_BUFFER_SIZE
|
||||||
|
|| stats.getReadAheadBytesRead() < CUSTOM_BLOCK_BUFFER_SIZE) {
|
||||||
|
Thread.sleep(THREAD_SLEEP_10_SECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Verifying the counter values of readAheadBytesRead and remoteBytesRead.
|
||||||
|
*
|
||||||
|
* readAheadBytesRead : Since, we read 1KBs 5 times, that means we go
|
||||||
|
* from 0 to 5KB in the file. The bufferSize is set to 4KB, and since
|
||||||
|
* we have 8 blocks of readAhead buffer. We would have 8 blocks of 4KB
|
||||||
|
* buffer. Our read is till 5KB, hence readAhead would ideally read 2
|
||||||
|
* blocks of 4KB which is equal to 8KB. But, sometimes to get more than
|
||||||
|
* one block from readAhead buffer we might have to wait for background
|
||||||
|
* threads to fill the buffer and hence we might do remote read which
|
||||||
|
* would be faster. Therefore, readAheadBytesRead would be equal to or
|
||||||
|
* greater than 4KB.
|
||||||
|
*
|
||||||
|
* remoteBytesRead : Since, the bufferSize is set to 4KB and the number
|
||||||
|
* of blocks or readAheadQueueDepth is equal to 8. We would read 8 * 4
|
||||||
|
* KB buffer on the first read, which is equal to 32KB. But, if we are not
|
||||||
|
* able to read some bytes that were in the buffer after doing
|
||||||
|
* readAhead, we might use remote read again. Thus, the bytes read
|
||||||
|
* remotely could also be greater than 32Kb.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
Assertions.assertThat(stats.getReadAheadBytesRead()).describedAs(
|
||||||
|
"Mismatch in readAheadBytesRead counter value")
|
||||||
|
.isGreaterThanOrEqualTo(CUSTOM_BLOCK_BUFFER_SIZE);
|
||||||
|
|
||||||
|
Assertions.assertThat(stats.getRemoteBytesRead()).describedAs(
|
||||||
|
"Mismatch in remoteBytesRead counter value")
|
||||||
|
.isGreaterThanOrEqualTo(CUSTOM_READ_AHEAD_BUFFER_SIZE);
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
IOUtils.cleanupWithLogger(LOG, out, in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method to assert the initial values of the statistics.
|
* Method to assert the initial values of the statistics.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue