delete version, interval, dataSource directories on segment deletion if possible, so that they are not left behind and consume ns quota on HDFS

This commit is contained in:
Himanshu Gupta 2015-08-23 22:06:12 -05:00
parent 0c11097b4f
commit c2bebfe39e
1 changed files with 29 additions and 2 deletions

View File

@ -44,8 +44,25 @@ public class HdfsDataSegmentKiller implements DataSegmentKiller
final FileSystem fs = checkPathAndGetFilesystem(path);
try {
if (path.getName().endsWith(".zip")) {
// delete the parent directory containing the zip file and the descriptor
fs.delete(path.getParent(), true);
// path format -- > .../dataSource/interval/version/partitionNum/xxx.zip
Path partitionNumDir = path.getParent();
if (!fs.delete(partitionNumDir, true)) {
throw new SegmentLoadingException(
"Unable to kill segment, failed to delete dir [%s]",
partitionNumDir.toString()
);
}
//try to delete other directories if possible
Path versionDir = partitionNumDir.getParent();
if (safeNonRecursiveDelete(fs, versionDir)) {
Path intervalDir = versionDir.getParent();
if (safeNonRecursiveDelete(fs, intervalDir)) {
Path dataSourceDir = intervalDir.getParent();
safeNonRecursiveDelete(fs, dataSourceDir);
}
}
} else {
throw new SegmentLoadingException("Unknown file type[%s]", path);
}
@ -55,6 +72,16 @@ public class HdfsDataSegmentKiller implements DataSegmentKiller
}
}
private boolean safeNonRecursiveDelete(FileSystem fs, Path path)
{
try {
return fs.delete(path, false);
}
catch (Exception ex) {
return false;
}
}
private Path getPath(DataSegment segment)
{
return new Path(String.valueOf(segment.getLoadSpec().get("path")));