From 08c14f135639beddc0c33c0c087962f8b5f88f33 Mon Sep 17 00:00:00 2001 From: markrmiller Date: Mon, 13 Jun 2016 12:22:50 -0400 Subject: [PATCH] SOLR-9204: Improve performance of getting directory size with hdfs. --- solr/CHANGES.txt | 2 ++ .../solr/core/CachingDirectoryFactory.java | 6 +++- .../apache/solr/core/DirectoryFactory.java | 25 ++++++++++++++ .../solr/core/HdfsDirectoryFactory.java | 33 +++++++++++++++++++ .../solr/handler/ReplicationHandler.java | 2 +- .../handler/admin/CoreAdminOperation.java | 2 +- .../HdfsWriteToMultipleCollectionsTest.java | 19 +++++++++++ 7 files changed, 86 insertions(+), 3 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c886fd07c83..82b8760dfbc 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -254,6 +254,8 @@ Optimizations * SOLR-8744: Overseer operations performed with fine grained mutual exclusion (noble, Scott Blum) +* SOLR-9204: Improve performance of getting directory size with hdfs. (Mark Miller) + Other Changes ---------------------- * SOLR-8860: Remove back-compat handling of router format made in SOLR-4221 in 4.5.0. (shalin) diff --git a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java index 11cf479fc9d..5b7ad1b5a53 100644 --- a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java @@ -497,7 +497,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory { } return livePaths; } - + @Override protected boolean deleteOldIndexDirectory(String oldDirPath) throws IOException { Set livePaths = getLivePaths(); @@ -508,4 +508,8 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory { return super.deleteOldIndexDirectory(oldDirPath); } + + protected synchronized String getPath(Directory directory) { + return byDirectoryCache.get(directory).path; + } } diff --git a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java index 8cc9e7d2073..f9539132a35 100644 --- a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java @@ -145,6 +145,31 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin, */ public abstract void remove(String path) throws IOException; + /** + * @param directory to calculate size of + * @return size in bytes + * @throws IOException on low level IO error + */ + public long size(Directory directory) throws IOException { + return sizeOfDirectory(directory); + } + + /** + * @param path to calculate size of + * @return size in bytes + * @throws IOException on low level IO error + */ + public long size(String path) throws IOException { + Directory dir = get(path, DirContext.DEFAULT, null); + long size; + try { + size = sizeOfDirectory(dir); + } finally { + release(dir); + } + return size; + } + /** * Override for more efficient moves. * diff --git a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java index 25f094e1fe5..ada4af35cdd 100644 --- a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java +++ b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java @@ -31,6 +31,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.security.UserGroupInformation; @@ -410,6 +411,38 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements Sol + cd.getDataDir())); } + /** + * @param directory to calculate size of + * @return size in bytes + * @throws IOException on low level IO error + */ + @Override + public long size(Directory directory) throws IOException { + String hdfsDirPath = getPath(directory); + return size(hdfsDirPath); + } + + /** + * @param path to calculate size of + * @return size in bytes + * @throws IOException on low level IO error + */ + @Override + public long size(String path) throws IOException { + Path hdfsDirPath = new Path(path); + FileSystem fileSystem = null; + try { + fileSystem = FileSystem.newInstance(hdfsDirPath.toUri(), getConf()); + long size = fileSystem.getContentSummary(hdfsDirPath).getLength(); + return size; + } catch (IOException e) { + LOG.error("Error checking if hdfs path exists", e); + throw new SolrException(ErrorCode.SERVER_ERROR, "Error checking if hdfs path exists", e); + } finally { + IOUtils.closeQuietly(fileSystem); + } + } + public String getConfDir() { return confDir; } diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java index 3785db76b79..14898d796eb 100644 --- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java @@ -761,7 +761,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw try { dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); try { - size = DirectoryFactory.sizeOfDirectory(dir); + size = core.getDirectoryFactory().size(dir); } finally { core.getDirectoryFactory().release(dir); } diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java index 51e776de0ae..3fdf3efde32 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java @@ -988,7 +988,7 @@ enum CoreAdminOperation { dir = core.getDirectoryFactory().get(core.getIndexDir(), DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); try { - size = DirectoryFactory.sizeOfDirectory(dir); + size = core.getDirectoryFactory().size(dir); } finally { core.getDirectoryFactory().release(dir); } diff --git a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java index ddf6e51c224..043cdecf97e 100644 --- a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java @@ -22,8 +22,13 @@ import java.util.Collection; import java.util.List; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.NRTCachingDirectory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase.Nightly; @@ -130,6 +135,20 @@ public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest { if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName() .startsWith(ACOLLECTION)) { assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory); + Directory dir = core.getDirectoryFactory().get(core.getDataDir(), null, null); + try { + long dataDirSize = core.getDirectoryFactory().size(dir); + FileSystem fileSystem = null; + + fileSystem = FileSystem.newInstance( + new Path(core.getDataDir()).toUri(), new Configuration()); + long size = fileSystem.getContentSummary( + new Path(core.getDataDir())).getLength(); + assertEquals(size, dataDirSize); + } finally { + core.getDirectoryFactory().release(dir); + } + RefCounted iwRef = core.getUpdateHandler() .getSolrCoreState().getIndexWriter(core); try {