SOLR-9204: Improve performance of getting directory size with hdfs.

This commit is contained in:
markrmiller 2016-06-13 12:22:50 -04:00
parent 72914198e6
commit 08c14f1356
7 changed files with 86 additions and 3 deletions

View File

@ -254,6 +254,8 @@ Optimizations
* SOLR-8744: Overseer operations performed with fine grained mutual exclusion (noble, Scott Blum)
* SOLR-9204: Improve performance of getting directory size with hdfs. (Mark Miller)
Other Changes
----------------------
* SOLR-8860: Remove back-compat handling of router format made in SOLR-4221 in 4.5.0. (shalin)

View File

@ -497,7 +497,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
}
return livePaths;
}
@Override
protected boolean deleteOldIndexDirectory(String oldDirPath) throws IOException {
Set<String> livePaths = getLivePaths();
@ -508,4 +508,8 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
return super.deleteOldIndexDirectory(oldDirPath);
}
protected synchronized String getPath(Directory directory) {
return byDirectoryCache.get(directory).path;
}
}

View File

@ -145,6 +145,31 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
*/
public abstract void remove(String path) throws IOException;
/**
* @param directory to calculate size of
* @return size in bytes
* @throws IOException on low level IO error
*/
public long size(Directory directory) throws IOException {
return sizeOfDirectory(directory);
}
/**
* @param path to calculate size of
* @return size in bytes
* @throws IOException on low level IO error
*/
public long size(String path) throws IOException {
Directory dir = get(path, DirContext.DEFAULT, null);
long size;
try {
size = sizeOfDirectory(dir);
} finally {
release(dir);
}
return size;
}
/**
* Override for more efficient moves.
*

View File

@ -31,6 +31,7 @@ import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.security.UserGroupInformation;
@ -410,6 +411,38 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements Sol
+ cd.getDataDir()));
}
/**
* @param directory to calculate size of
* @return size in bytes
* @throws IOException on low level IO error
*/
@Override
public long size(Directory directory) throws IOException {
String hdfsDirPath = getPath(directory);
return size(hdfsDirPath);
}
/**
* @param path to calculate size of
* @return size in bytes
* @throws IOException on low level IO error
*/
@Override
public long size(String path) throws IOException {
Path hdfsDirPath = new Path(path);
FileSystem fileSystem = null;
try {
fileSystem = FileSystem.newInstance(hdfsDirPath.toUri(), getConf());
long size = fileSystem.getContentSummary(hdfsDirPath).getLength();
return size;
} catch (IOException e) {
LOG.error("Error checking if hdfs path exists", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Error checking if hdfs path exists", e);
} finally {
IOUtils.closeQuietly(fileSystem);
}
}
public String getConfDir() {
return confDir;
}

View File

@ -761,7 +761,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
try {
dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
try {
size = DirectoryFactory.sizeOfDirectory(dir);
size = core.getDirectoryFactory().size(dir);
} finally {
core.getDirectoryFactory().release(dir);
}

View File

@ -988,7 +988,7 @@ enum CoreAdminOperation {
dir = core.getDirectoryFactory().get(core.getIndexDir(), DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
try {
size = DirectoryFactory.sizeOfDirectory(dir);
size = core.getDirectoryFactory().size(dir);
} finally {
core.getDirectoryFactory().release(dir);
}

View File

@ -22,8 +22,13 @@ import java.util.Collection;
import java.util.List;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase.Nightly;
@ -130,6 +135,20 @@ public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest {
if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName()
.startsWith(ACOLLECTION)) {
assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory);
Directory dir = core.getDirectoryFactory().get(core.getDataDir(), null, null);
try {
long dataDirSize = core.getDirectoryFactory().size(dir);
FileSystem fileSystem = null;
fileSystem = FileSystem.newInstance(
new Path(core.getDataDir()).toUri(), new Configuration());
long size = fileSystem.getContentSummary(
new Path(core.getDataDir())).getLength();
assertEquals(size, dataDirSize);
} finally {
core.getDirectoryFactory().release(dir);
}
RefCounted<IndexWriter> iwRef = core.getUpdateHandler()
.getSolrCoreState().getIndexWriter(core);
try {