mirror of https://github.com/apache/lucene.git
SOLR-9204: Improve performance of getting directory size with hdfs.
This commit is contained in:
parent
72914198e6
commit
08c14f1356
|
@ -254,6 +254,8 @@ Optimizations
|
|||
|
||||
* SOLR-8744: Overseer operations performed with fine grained mutual exclusion (noble, Scott Blum)
|
||||
|
||||
* SOLR-9204: Improve performance of getting directory size with hdfs. (Mark Miller)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
* SOLR-8860: Remove back-compat handling of router format made in SOLR-4221 in 4.5.0. (shalin)
|
||||
|
|
|
@ -508,4 +508,8 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
|
|||
|
||||
return super.deleteOldIndexDirectory(oldDirPath);
|
||||
}
|
||||
|
||||
protected synchronized String getPath(Directory directory) {
|
||||
return byDirectoryCache.get(directory).path;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -145,6 +145,31 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
|
|||
*/
|
||||
public abstract void remove(String path) throws IOException;
|
||||
|
||||
/**
|
||||
* @param directory to calculate size of
|
||||
* @return size in bytes
|
||||
* @throws IOException on low level IO error
|
||||
*/
|
||||
public long size(Directory directory) throws IOException {
|
||||
return sizeOfDirectory(directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param path to calculate size of
|
||||
* @return size in bytes
|
||||
* @throws IOException on low level IO error
|
||||
*/
|
||||
public long size(String path) throws IOException {
|
||||
Directory dir = get(path, DirContext.DEFAULT, null);
|
||||
long size;
|
||||
try {
|
||||
size = sizeOfDirectory(dir);
|
||||
} finally {
|
||||
release(dir);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Override for more efficient moves.
|
||||
*
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.concurrent.TimeUnit;
|
|||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FsStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.PathFilter;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
|
@ -410,6 +411,38 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements Sol
|
|||
+ cd.getDataDir()));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param directory to calculate size of
|
||||
* @return size in bytes
|
||||
* @throws IOException on low level IO error
|
||||
*/
|
||||
@Override
|
||||
public long size(Directory directory) throws IOException {
|
||||
String hdfsDirPath = getPath(directory);
|
||||
return size(hdfsDirPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param path to calculate size of
|
||||
* @return size in bytes
|
||||
* @throws IOException on low level IO error
|
||||
*/
|
||||
@Override
|
||||
public long size(String path) throws IOException {
|
||||
Path hdfsDirPath = new Path(path);
|
||||
FileSystem fileSystem = null;
|
||||
try {
|
||||
fileSystem = FileSystem.newInstance(hdfsDirPath.toUri(), getConf());
|
||||
long size = fileSystem.getContentSummary(hdfsDirPath).getLength();
|
||||
return size;
|
||||
} catch (IOException e) {
|
||||
LOG.error("Error checking if hdfs path exists", e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error checking if hdfs path exists", e);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(fileSystem);
|
||||
}
|
||||
}
|
||||
|
||||
public String getConfDir() {
|
||||
return confDir;
|
||||
}
|
||||
|
|
|
@ -761,7 +761,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
try {
|
||||
dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
try {
|
||||
size = DirectoryFactory.sizeOfDirectory(dir);
|
||||
size = core.getDirectoryFactory().size(dir);
|
||||
} finally {
|
||||
core.getDirectoryFactory().release(dir);
|
||||
}
|
||||
|
|
|
@ -988,7 +988,7 @@ enum CoreAdminOperation {
|
|||
dir = core.getDirectoryFactory().get(core.getIndexDir(), DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
try {
|
||||
size = DirectoryFactory.sizeOfDirectory(dir);
|
||||
size = core.getDirectoryFactory().size(dir);
|
||||
} finally {
|
||||
core.getDirectoryFactory().release(dir);
|
||||
}
|
||||
|
|
|
@ -22,8 +22,13 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.NRTCachingDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase.Nightly;
|
||||
|
@ -130,6 +135,20 @@ public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest {
|
|||
if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName()
|
||||
.startsWith(ACOLLECTION)) {
|
||||
assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory);
|
||||
Directory dir = core.getDirectoryFactory().get(core.getDataDir(), null, null);
|
||||
try {
|
||||
long dataDirSize = core.getDirectoryFactory().size(dir);
|
||||
FileSystem fileSystem = null;
|
||||
|
||||
fileSystem = FileSystem.newInstance(
|
||||
new Path(core.getDataDir()).toUri(), new Configuration());
|
||||
long size = fileSystem.getContentSummary(
|
||||
new Path(core.getDataDir())).getLength();
|
||||
assertEquals(size, dataDirSize);
|
||||
} finally {
|
||||
core.getDirectoryFactory().release(dir);
|
||||
}
|
||||
|
||||
RefCounted<IndexWriter> iwRef = core.getUpdateHandler()
|
||||
.getSolrCoreState().getIndexWriter(core);
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue