MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte characters in the job name. Contributed by Kousuke Saruta.
This commit is contained in:
parent
8ee060311c
commit
df99ea8a92
|
@ -709,6 +709,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
MAPREDUCE-6563. Streaming documentation contains a stray '%' character.
|
MAPREDUCE-6563. Streaming documentation contains a stray '%' character.
|
||||||
(cnauroth)
|
(cnauroth)
|
||||||
|
|
||||||
|
MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte
|
||||||
|
characters in the job name. (Kousuke Saruta via aajisaka)
|
||||||
|
|
||||||
Release 2.7.3 - UNRELEASED
|
Release 2.7.3 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.URLDecoder;
|
import java.net.URLDecoder;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -57,7 +58,8 @@ public class FileNameIndexUtils {
|
||||||
* @param indexInfo the index info.
|
* @param indexInfo the index info.
|
||||||
* @return the done job history filename.
|
* @return the done job history filename.
|
||||||
*/
|
*/
|
||||||
public static String getDoneFileName(JobIndexInfo indexInfo) throws IOException {
|
public static String getDoneFileName(JobIndexInfo indexInfo)
|
||||||
|
throws IOException {
|
||||||
return getDoneFileName(indexInfo,
|
return getDoneFileName(indexInfo,
|
||||||
JHAdminConfig.DEFAULT_MR_HS_JOBNAME_LIMIT);
|
JHAdminConfig.DEFAULT_MR_HS_JOBNAME_LIMIT);
|
||||||
}
|
}
|
||||||
|
@ -66,47 +68,56 @@ public class FileNameIndexUtils {
|
||||||
int jobNameLimit) throws IOException {
|
int jobNameLimit) throws IOException {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
//JobId
|
//JobId
|
||||||
sb.append(escapeDelimiters(TypeConverter.fromYarn(indexInfo.getJobId()).toString()));
|
sb.append(encodeJobHistoryFileName(escapeDelimiters(
|
||||||
|
TypeConverter.fromYarn(indexInfo.getJobId()).toString())));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//SubmitTime
|
//SubmitTime
|
||||||
sb.append(indexInfo.getSubmitTime());
|
sb.append(encodeJobHistoryFileName(String.valueOf(
|
||||||
|
indexInfo.getSubmitTime())));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//UserName
|
//UserName
|
||||||
sb.append(escapeDelimiters(getUserName(indexInfo)));
|
sb.append(encodeJobHistoryFileName(escapeDelimiters(
|
||||||
|
getUserName(indexInfo))));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//JobName
|
//JobName
|
||||||
sb.append(escapeDelimiters(trimJobName(
|
sb.append(trimURLEncodedString(encodeJobHistoryFileName(escapeDelimiters(
|
||||||
getJobName(indexInfo), jobNameLimit)));
|
getJobName(indexInfo))), jobNameLimit));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//FinishTime
|
//FinishTime
|
||||||
sb.append(indexInfo.getFinishTime());
|
sb.append(encodeJobHistoryFileName(
|
||||||
|
String.valueOf(indexInfo.getFinishTime())));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//NumMaps
|
//NumMaps
|
||||||
sb.append(indexInfo.getNumMaps());
|
sb.append(encodeJobHistoryFileName(
|
||||||
|
String.valueOf(indexInfo.getNumMaps())));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//NumReduces
|
//NumReduces
|
||||||
sb.append(indexInfo.getNumReduces());
|
sb.append(encodeJobHistoryFileName(
|
||||||
|
String.valueOf(indexInfo.getNumReduces())));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//JobStatus
|
//JobStatus
|
||||||
sb.append(indexInfo.getJobStatus());
|
sb.append(encodeJobHistoryFileName(indexInfo.getJobStatus()));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//QueueName
|
//QueueName
|
||||||
sb.append(escapeDelimiters(getQueueName(indexInfo)));
|
sb.append(escapeDelimiters(encodeJobHistoryFileName(
|
||||||
|
getQueueName(indexInfo))));
|
||||||
sb.append(DELIMITER);
|
sb.append(DELIMITER);
|
||||||
|
|
||||||
//JobStartTime
|
//JobStartTime
|
||||||
sb.append(indexInfo.getJobStartTime());
|
sb.append(encodeJobHistoryFileName(
|
||||||
|
String.valueOf(indexInfo.getJobStartTime())));
|
||||||
|
|
||||||
sb.append(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION);
|
sb.append(encodeJobHistoryFileName(
|
||||||
return encodeJobHistoryFileName(sb.toString());
|
JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
|
||||||
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -116,21 +127,24 @@ public class FileNameIndexUtils {
|
||||||
* @param jhFileName the job history filename.
|
* @param jhFileName the job history filename.
|
||||||
* @return a JobIndexInfo object built from the filename.
|
* @return a JobIndexInfo object built from the filename.
|
||||||
*/
|
*/
|
||||||
public static JobIndexInfo getIndexInfo(String jhFileName) throws IOException {
|
public static JobIndexInfo getIndexInfo(String jhFileName)
|
||||||
String fileName = jhFileName.substring(0, jhFileName.indexOf(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
|
throws IOException {
|
||||||
|
String fileName = jhFileName.substring(0,
|
||||||
|
jhFileName.indexOf(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
|
||||||
JobIndexInfo indexInfo = new JobIndexInfo();
|
JobIndexInfo indexInfo = new JobIndexInfo();
|
||||||
|
|
||||||
String[] jobDetails = fileName.split(DELIMITER);
|
String[] jobDetails = fileName.split(DELIMITER);
|
||||||
|
|
||||||
JobID oldJobId = JobID.forName(decodeJobHistoryFileName(jobDetails[JOB_ID_INDEX]));
|
JobID oldJobId =
|
||||||
|
JobID.forName(decodeJobHistoryFileName(jobDetails[JOB_ID_INDEX]));
|
||||||
JobId jobId = TypeConverter.toYarn(oldJobId);
|
JobId jobId = TypeConverter.toYarn(oldJobId);
|
||||||
indexInfo.setJobId(jobId);
|
indexInfo.setJobId(jobId);
|
||||||
|
|
||||||
// Do not fail if there are some minor parse errors
|
// Do not fail if there are some minor parse errors
|
||||||
try {
|
try {
|
||||||
try {
|
try {
|
||||||
indexInfo.setSubmitTime(
|
indexInfo.setSubmitTime(Long.parseLong(
|
||||||
Long.parseLong(decodeJobHistoryFileName(jobDetails[SUBMIT_TIME_INDEX])));
|
decodeJobHistoryFileName(jobDetails[SUBMIT_TIME_INDEX])));
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
LOG.warn("Unable to parse submit time from job history file "
|
LOG.warn("Unable to parse submit time from job history file "
|
||||||
+ jhFileName + " : " + e);
|
+ jhFileName + " : " + e);
|
||||||
|
@ -143,24 +157,24 @@ public class FileNameIndexUtils {
|
||||||
decodeJobHistoryFileName(jobDetails[JOB_NAME_INDEX]));
|
decodeJobHistoryFileName(jobDetails[JOB_NAME_INDEX]));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
indexInfo.setFinishTime(
|
indexInfo.setFinishTime(Long.parseLong(
|
||||||
Long.parseLong(decodeJobHistoryFileName(jobDetails[FINISH_TIME_INDEX])));
|
decodeJobHistoryFileName(jobDetails[FINISH_TIME_INDEX])));
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
LOG.warn("Unable to parse finish time from job history file "
|
LOG.warn("Unable to parse finish time from job history file "
|
||||||
+ jhFileName + " : " + e);
|
+ jhFileName + " : " + e);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
indexInfo.setNumMaps(
|
indexInfo.setNumMaps(Integer.parseInt(
|
||||||
Integer.parseInt(decodeJobHistoryFileName(jobDetails[NUM_MAPS_INDEX])));
|
decodeJobHistoryFileName(jobDetails[NUM_MAPS_INDEX])));
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
LOG.warn("Unable to parse num maps from job history file "
|
LOG.warn("Unable to parse num maps from job history file "
|
||||||
+ jhFileName + " : " + e);
|
+ jhFileName + " : " + e);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
indexInfo.setNumReduces(
|
indexInfo.setNumReduces(Integer.parseInt(
|
||||||
Integer.parseInt(decodeJobHistoryFileName(jobDetails[NUM_REDUCES_INDEX])));
|
decodeJobHistoryFileName(jobDetails[NUM_REDUCES_INDEX])));
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
LOG.warn("Unable to parse num reduces from job history file "
|
LOG.warn("Unable to parse num reduces from job history file "
|
||||||
+ jhFileName + " : " + e);
|
+ jhFileName + " : " + e);
|
||||||
|
@ -176,8 +190,8 @@ public class FileNameIndexUtils {
|
||||||
if (jobDetails.length <= JOB_START_TIME_INDEX) {
|
if (jobDetails.length <= JOB_START_TIME_INDEX) {
|
||||||
indexInfo.setJobStartTime(indexInfo.getSubmitTime());
|
indexInfo.setJobStartTime(indexInfo.getSubmitTime());
|
||||||
} else {
|
} else {
|
||||||
indexInfo.setJobStartTime(
|
indexInfo.setJobStartTime(Long.parseLong(
|
||||||
Long.parseLong(decodeJobHistoryFileName(jobDetails[JOB_START_TIME_INDEX])));
|
decodeJobHistoryFileName(jobDetails[JOB_START_TIME_INDEX])));
|
||||||
}
|
}
|
||||||
} catch (NumberFormatException e){
|
} catch (NumberFormatException e){
|
||||||
LOG.warn("Unable to parse start time from job history file "
|
LOG.warn("Unable to parse start time from job history file "
|
||||||
|
@ -208,7 +222,8 @@ public class FileNameIndexUtils {
|
||||||
if (logFileName.contains(DELIMITER_ESCAPE)) {
|
if (logFileName.contains(DELIMITER_ESCAPE)) {
|
||||||
replacementDelimiterEscape = nonOccursString(logFileName);
|
replacementDelimiterEscape = nonOccursString(logFileName);
|
||||||
|
|
||||||
logFileName = logFileName.replaceAll(DELIMITER_ESCAPE, replacementDelimiterEscape);
|
logFileName = logFileName.replaceAll(
|
||||||
|
DELIMITER_ESCAPE, replacementDelimiterEscape);
|
||||||
}
|
}
|
||||||
|
|
||||||
String encodedFileName = null;
|
String encodedFileName = null;
|
||||||
|
@ -223,7 +238,8 @@ public class FileNameIndexUtils {
|
||||||
|
|
||||||
// Restore protected escape delimiters after encoding
|
// Restore protected escape delimiters after encoding
|
||||||
if (replacementDelimiterEscape != null) {
|
if (replacementDelimiterEscape != null) {
|
||||||
encodedFileName = encodedFileName.replaceAll(replacementDelimiterEscape, DELIMITER_ESCAPE);
|
encodedFileName = encodedFileName.replaceAll(
|
||||||
|
replacementDelimiterEscape, DELIMITER_ESCAPE);
|
||||||
}
|
}
|
||||||
|
|
||||||
return encodedFileName;
|
return encodedFileName;
|
||||||
|
@ -289,12 +305,59 @@ public class FileNameIndexUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Trims the job-name if required
|
* Trims the url-encoded string if required
|
||||||
*/
|
*/
|
||||||
private static String trimJobName(String jobName, int jobNameLimit) {
|
private static String trimURLEncodedString(
|
||||||
if (jobName.length() > jobNameLimit) {
|
String encodedString, int limitLength) {
|
||||||
jobName = jobName.substring(0, jobNameLimit);
|
assert(limitLength >= 0) : "limitLength should be positive integer";
|
||||||
|
|
||||||
|
if (encodedString.length() < limitLength) {
|
||||||
|
return encodedString;
|
||||||
}
|
}
|
||||||
return jobName;
|
|
||||||
|
int index = 0;
|
||||||
|
int increase = 0;
|
||||||
|
byte[] strBytes = encodedString.getBytes(UTF_8);
|
||||||
|
|
||||||
|
// calculate effective character length based on UTF-8 specification.
|
||||||
|
// The size of a character coded in UTF-8 should be 4-byte at most.
|
||||||
|
// See RFC3629
|
||||||
|
while (true) {
|
||||||
|
byte b = strBytes[index];
|
||||||
|
if (b == '%') {
|
||||||
|
byte minuend1 = strBytes[index + 1];
|
||||||
|
byte subtrahend1 = (byte)(Character.isDigit(
|
||||||
|
minuend1) ? '0' : 'A' - 10);
|
||||||
|
byte minuend2 = strBytes[index + 2];
|
||||||
|
byte subtrahend2 = (byte)(Character.isDigit(
|
||||||
|
minuend2) ? '0' : 'A' - 10);
|
||||||
|
int initialHex =
|
||||||
|
((Character.toUpperCase(minuend1) - subtrahend1) << 4) +
|
||||||
|
(Character.toUpperCase(minuend2) - subtrahend2);
|
||||||
|
|
||||||
|
if (0x00 <= initialHex && initialHex <= 0x7F) {
|
||||||
|
// For 1-byte UTF-8 characters
|
||||||
|
increase = 3;
|
||||||
|
} else if (0xC2 <= initialHex && initialHex <= 0xDF) {
|
||||||
|
// For 2-byte UTF-8 characters
|
||||||
|
increase = 6;
|
||||||
|
} else if (0xE0 <= initialHex && initialHex <= 0xEF) {
|
||||||
|
// For 3-byte UTF-8 characters
|
||||||
|
increase = 9;
|
||||||
|
} else {
|
||||||
|
// For 4-byte UTF-8 characters
|
||||||
|
increase = 12;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
increase = 1;
|
||||||
|
}
|
||||||
|
if (index + increase > limitLength) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
index += increase;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return encodedString.substring(0, index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.mapreduce.v2.jobhistory;
|
package org.apache.hadoop.mapreduce.v2.jobhistory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
import org.apache.hadoop.mapreduce.JobID;
|
import org.apache.hadoop.mapreduce.JobID;
|
||||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||||
|
@ -30,14 +31,14 @@ import org.junit.Test;
|
||||||
public class TestFileNameIndexUtils {
|
public class TestFileNameIndexUtils {
|
||||||
|
|
||||||
private static final String OLD_JOB_HISTORY_FILE_FORMATTER = "%s"
|
private static final String OLD_JOB_HISTORY_FILE_FORMATTER = "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
|
+ JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
|
||||||
|
|
||||||
private static final String OLD_FORMAT_BEFORE_ADD_START_TIME = "%s"
|
private static final String OLD_FORMAT_BEFORE_ADD_START_TIME = "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
|
@ -51,29 +52,29 @@ public class TestFileNameIndexUtils {
|
||||||
+ JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
|
+ JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
|
||||||
|
|
||||||
private static final String JOB_HISTORY_FILE_FORMATTER = "%s"
|
private static final String JOB_HISTORY_FILE_FORMATTER = "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ FileNameIndexUtils.DELIMITER + "%s"
|
+ FileNameIndexUtils.DELIMITER + "%s"
|
||||||
+ JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
|
+ JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
|
||||||
|
|
||||||
private static final String JOB_ID = "job_1317928501754_0001";
|
private static final String JOB_ID = "job_1317928501754_0001";
|
||||||
private static final String SUBMIT_TIME = "1317928742025";
|
private static final String SUBMIT_TIME = "1317928742025";
|
||||||
private static final String USER_NAME = "username";
|
private static final String USER_NAME = "username";
|
||||||
private static final String USER_NAME_WITH_DELIMITER = "user"
|
private static final String USER_NAME_WITH_DELIMITER = "user"
|
||||||
+ FileNameIndexUtils.DELIMITER + "name";
|
+ FileNameIndexUtils.DELIMITER + "name";
|
||||||
private static final String USER_NAME_WITH_DELIMITER_ESCAPE = "user"
|
private static final String USER_NAME_WITH_DELIMITER_ESCAPE = "user"
|
||||||
+ FileNameIndexUtils.DELIMITER_ESCAPE + "name";
|
+ FileNameIndexUtils.DELIMITER_ESCAPE + "name";
|
||||||
private static final String JOB_NAME = "mapreduce";
|
private static final String JOB_NAME = "mapreduce";
|
||||||
private static final String JOB_NAME_WITH_DELIMITER = "map"
|
private static final String JOB_NAME_WITH_DELIMITER = "map"
|
||||||
+ FileNameIndexUtils.DELIMITER + "reduce";
|
+ FileNameIndexUtils.DELIMITER + "reduce";
|
||||||
private static final String JOB_NAME_WITH_DELIMITER_ESCAPE = "map"
|
private static final String JOB_NAME_WITH_DELIMITER_ESCAPE = "map"
|
||||||
+ FileNameIndexUtils.DELIMITER_ESCAPE + "reduce";
|
+ FileNameIndexUtils.DELIMITER_ESCAPE + "reduce";
|
||||||
private static final String FINISH_TIME = "1317928754958";
|
private static final String FINISH_TIME = "1317928754958";
|
||||||
private static final String NUM_MAPS = "1";
|
private static final String NUM_MAPS = "1";
|
||||||
private static final String NUM_REDUCES = "1";
|
private static final String NUM_REDUCES = "1";
|
||||||
|
@ -123,7 +124,7 @@ public class TestFileNameIndexUtils {
|
||||||
Assert.assertEquals("Queue name different after encoding and decoding",
|
Assert.assertEquals("Queue name different after encoding and decoding",
|
||||||
info.getQueueName(), parsedInfo.getQueueName());
|
info.getQueueName(), parsedInfo.getQueueName());
|
||||||
Assert.assertEquals("Job start time different after encoding and decoding",
|
Assert.assertEquals("Job start time different after encoding and decoding",
|
||||||
info.getJobStartTime(), parsedInfo.getJobStartTime());
|
info.getJobStartTime(), parsedInfo.getJobStartTime());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -173,6 +174,158 @@ public class TestFileNameIndexUtils {
|
||||||
parsedInfo.getJobName());
|
parsedInfo.getJobName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify the name of jobhistory file is not greater than 255 bytes
|
||||||
|
* even if there are some multibyte characters in the job name.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testJobNameWithMultibyteChars() throws IOException {
|
||||||
|
JobIndexInfo info = new JobIndexInfo();
|
||||||
|
JobID oldJobId = JobID.forName(JOB_ID);
|
||||||
|
JobId jobId = TypeConverter.toYarn(oldJobId);
|
||||||
|
info.setJobId(jobId);
|
||||||
|
info.setSubmitTime(Long.parseLong(SUBMIT_TIME));
|
||||||
|
info.setUser(USER_NAME);
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
info.setFinishTime(Long.parseLong(FINISH_TIME));
|
||||||
|
info.setNumMaps(Integer.parseInt(NUM_MAPS));
|
||||||
|
info.setNumReduces(Integer.parseInt(NUM_REDUCES));
|
||||||
|
info.setJobStatus(JOB_STATUS);
|
||||||
|
info.setQueueName(QUEUE_NAME);
|
||||||
|
info.setJobStartTime(Long.parseLong(JOB_START_TIME));
|
||||||
|
|
||||||
|
// Test for 1 byte UTF-8 character
|
||||||
|
// which is encoded into 1 x 3 = 3 characters by URL encode.
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
sb.append('%');
|
||||||
|
}
|
||||||
|
String longJobName = sb.toString();
|
||||||
|
info.setJobName(longJobName);
|
||||||
|
|
||||||
|
String jobHistoryFile =
|
||||||
|
FileNameIndexUtils.getDoneFileName(info, 50);
|
||||||
|
|
||||||
|
Assert.assertTrue(jobHistoryFile.length() <= 255);
|
||||||
|
String trimedJobName = jobHistoryFile.split(
|
||||||
|
FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
|
||||||
|
|
||||||
|
// 3 x 16 < 50 < 3 x 17 so the length of trimedJobName should be 48
|
||||||
|
Assert.assertEquals(48, trimedJobName.getBytes(UTF_8).length);
|
||||||
|
|
||||||
|
// validate whether trimmedJobName by testing reversibility
|
||||||
|
byte[] trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
|
||||||
|
String reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
|
||||||
|
Assert.assertArrayEquals(trimedJobNameInByte,
|
||||||
|
reEncodedTrimedJobName.getBytes(UTF_8));
|
||||||
|
sb.setLength(0);
|
||||||
|
|
||||||
|
// Test for 2 bytes UTF-8 character
|
||||||
|
// which is encoded into 2 x 3 = 6 characters by URL encode.
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
sb.append('\u03A9'); // large omega
|
||||||
|
}
|
||||||
|
longJobName = sb.toString();
|
||||||
|
info.setJobName(longJobName);
|
||||||
|
|
||||||
|
jobHistoryFile =
|
||||||
|
FileNameIndexUtils.getDoneFileName(info, 27);
|
||||||
|
|
||||||
|
Assert.assertTrue(jobHistoryFile.length() <= 255);
|
||||||
|
trimedJobName = jobHistoryFile.split(
|
||||||
|
FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
|
||||||
|
|
||||||
|
// 6 x 4 < 27 < 6 x 5 so the length of trimedJobName should be 24
|
||||||
|
Assert.assertEquals(24, trimedJobName.getBytes(UTF_8).length);
|
||||||
|
|
||||||
|
// validate whether trimmedJobName by testing reversibility
|
||||||
|
trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
|
||||||
|
reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
|
||||||
|
Assert.assertArrayEquals(trimedJobNameInByte,
|
||||||
|
reEncodedTrimedJobName.getBytes(UTF_8));
|
||||||
|
sb.setLength(0);
|
||||||
|
|
||||||
|
// Test for 3 bytes UTF-8 character
|
||||||
|
// which is encoded into 3 x 3 = 9 characters by URL encode.
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
sb.append('\u2192'); // rightwards arrow
|
||||||
|
}
|
||||||
|
longJobName = sb.toString();
|
||||||
|
info.setJobName(longJobName);
|
||||||
|
|
||||||
|
jobHistoryFile =
|
||||||
|
FileNameIndexUtils.getDoneFileName(info, 40);
|
||||||
|
|
||||||
|
Assert.assertTrue(jobHistoryFile.length() <= 255);
|
||||||
|
trimedJobName = jobHistoryFile.split(
|
||||||
|
FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
|
||||||
|
|
||||||
|
// 9 x 4 < 40 < 9 x 5 so the length of trimedJobName should be 36
|
||||||
|
Assert.assertEquals(36, trimedJobName.getBytes(UTF_8).length);
|
||||||
|
|
||||||
|
// validate whether trimmedJobName by testing reversibility
|
||||||
|
trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
|
||||||
|
reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
|
||||||
|
Assert.assertArrayEquals(trimedJobNameInByte,
|
||||||
|
reEncodedTrimedJobName.getBytes(UTF_8));
|
||||||
|
sb.setLength(0);
|
||||||
|
|
||||||
|
// Test for 4 bytes UTF-8 character
|
||||||
|
// which is encoded into 4 x 3 = 12 characters by URL encode.
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
sb.append("\uD867\uDE3D"); // Mugil cephalus in Kanji.
|
||||||
|
}
|
||||||
|
longJobName = sb.toString();
|
||||||
|
info.setJobName(longJobName);
|
||||||
|
|
||||||
|
jobHistoryFile =
|
||||||
|
FileNameIndexUtils.getDoneFileName(info, 49);
|
||||||
|
|
||||||
|
Assert.assertTrue(jobHistoryFile.length() <= 255);
|
||||||
|
trimedJobName = jobHistoryFile.split(
|
||||||
|
FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
|
||||||
|
|
||||||
|
// 12 x 4 < 49 < 12 x 5 so the length of trimedJobName should be 48
|
||||||
|
Assert.assertEquals(48, trimedJobName.getBytes(UTF_8).length);
|
||||||
|
|
||||||
|
// validate whether trimmedJobName by testing reversibility
|
||||||
|
trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
|
||||||
|
reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
|
||||||
|
Assert.assertArrayEquals(trimedJobNameInByte,
|
||||||
|
reEncodedTrimedJobName.getBytes(UTF_8));
|
||||||
|
sb.setLength(0);
|
||||||
|
|
||||||
|
// Test for the combination of 1 to 4 bytes UTF-8 characters
|
||||||
|
sb.append('\u732B') // cat in Kanji (encoded into 3 bytes x 3 characters)
|
||||||
|
.append("[") // (encoded into 1 byte x 3 characters)
|
||||||
|
.append('\u03BB') // small lambda (encoded into 2 bytes x 3 characters)
|
||||||
|
.append('/') // (encoded into 1 byte x 3 characters)
|
||||||
|
.append('A') // not url-encoded (1 byte x 1 character)
|
||||||
|
.append("\ud867\ude49") // flying fish in
|
||||||
|
// Kanji (encoded into 4 bytes x 3 characters)
|
||||||
|
.append('\u72AC'); // dog in Kanji (encoded into 3 bytes x 3 characters)
|
||||||
|
|
||||||
|
longJobName = sb.toString();
|
||||||
|
info.setJobName(longJobName);
|
||||||
|
|
||||||
|
jobHistoryFile =
|
||||||
|
FileNameIndexUtils.getDoneFileName(info, 23);
|
||||||
|
|
||||||
|
Assert.assertTrue(jobHistoryFile.length() <= 255);
|
||||||
|
trimedJobName = jobHistoryFile.split(
|
||||||
|
FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
|
||||||
|
|
||||||
|
// total size of the first 5 characters = 22
|
||||||
|
// 23 < total size of the first 6 characters
|
||||||
|
Assert.assertEquals(22, trimedJobName.getBytes(UTF_8).length);
|
||||||
|
|
||||||
|
// validate whether trimmedJobName by testing reversibility
|
||||||
|
trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
|
||||||
|
reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
|
||||||
|
Assert.assertArrayEquals(trimedJobNameInByte,
|
||||||
|
reEncodedTrimedJobName.getBytes(UTF_8));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUserNamePercentDecoding() throws IOException {
|
public void testUserNamePercentDecoding() throws IOException {
|
||||||
String jobHistoryFile = String.format(JOB_HISTORY_FILE_FORMATTER,
|
String jobHistoryFile = String.format(JOB_HISTORY_FILE_FORMATTER,
|
||||||
|
|
Loading…
Reference in New Issue