diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2580023c5ff..811f44f68df 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -304,6 +304,8 @@ Improvements * SOLR-13016: Computing suggestions when policy have "#EQUAL" or "#ALL" rules take too long (noble) +* SOLR-13029: solr.hdfs.buffer.size can be configured for HdfsBackupRepository for better performance (Tim Owen via Mikhail Khludnev) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/core/backup/repository/HdfsBackupRepository.java b/solr/core/src/java/org/apache/solr/core/backup/repository/HdfsBackupRepository.java index 99f858addc5..64bb14cd49c 100644 --- a/solr/core/src/java/org/apache/solr/core/backup/repository/HdfsBackupRepository.java +++ b/solr/core/src/java/org/apache/solr/core/backup/repository/HdfsBackupRepository.java @@ -43,18 +43,28 @@ import org.apache.solr.store.hdfs.HdfsDirectory.HdfsIndexInput; public class HdfsBackupRepository implements BackupRepository { private static final String HDFS_UMASK_MODE_PARAM = "solr.hdfs.permissions.umask-mode"; + private static final String HDFS_COPY_BUFFER_SIZE_PARAM = "solr.hdfs.buffer.size"; private HdfsDirectoryFactory factory; private Configuration hdfsConfig = null; private FileSystem fileSystem = null; private Path baseHdfsPath = null; private NamedList config = null; + protected int copyBufferSize = HdfsDirectory.DEFAULT_BUFFER_SIZE; @SuppressWarnings("rawtypes") @Override public void init(NamedList args) { this.config = args; + // Configure the size of the buffer used for copying index files to/from HDFS, if specified. + if (args.get(HDFS_COPY_BUFFER_SIZE_PARAM) != null) { + this.copyBufferSize = (Integer)args.get(HDFS_COPY_BUFFER_SIZE_PARAM); + if (this.copyBufferSize <= 0) { + throw new IllegalArgumentException("Value of " + HDFS_COPY_BUFFER_SIZE_PARAM + " must be > 0"); + } + } + // We don't really need this factory instance. But we want to initialize it here to // make sure that all HDFS related initialization is at one place (and not duplicated here). factory = new HdfsDirectoryFactory(); @@ -174,7 +184,7 @@ public class HdfsBackupRepository implements BackupRepository { @Override public void copyFileFrom(Directory sourceDir, String fileName, URI dest) throws IOException { try (HdfsDirectory dir = new HdfsDirectory(new Path(dest), NoLockFactory.INSTANCE, - hdfsConfig, HdfsDirectory.DEFAULT_BUFFER_SIZE)) { + hdfsConfig, copyBufferSize)) { dir.copyFrom(sourceDir, fileName, fileName, DirectoryFactory.IOCONTEXT_NO_CACHE); } } @@ -182,7 +192,7 @@ public class HdfsBackupRepository implements BackupRepository { @Override public void copyFileTo(URI sourceRepo, String fileName, Directory dest) throws IOException { try (HdfsDirectory dir = new HdfsDirectory(new Path(sourceRepo), NoLockFactory.INSTANCE, - hdfsConfig, HdfsDirectory.DEFAULT_BUFFER_SIZE)) { + hdfsConfig, copyBufferSize)) { dest.copyFrom(dir, fileName, fileName, DirectoryFactory.IOCONTEXT_NO_CACHE); } } diff --git a/solr/core/src/test/org/apache/solr/core/backup/repository/HdfsBackupRepositoryTest.java b/solr/core/src/test/org/apache/solr/core/backup/repository/HdfsBackupRepositoryTest.java new file mode 100644 index 00000000000..0da02b02903 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/core/backup/repository/HdfsBackupRepositoryTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.core.backup.repository; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.HdfsDirectoryFactory; +import org.apache.solr.store.hdfs.HdfsDirectory; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class HdfsBackupRepositoryTest { + + @Test(expected = NullPointerException.class) + public void testHdfsHomePropertyMissing() { + HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository(); + NamedList namedList = new SimpleOrderedMap<>(); + hdfsBackupRepository.init(namedList); + } + + @Test + public void testHdfsHomePropertySet() { + HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository(); + NamedList namedList = new SimpleOrderedMap<>(); + namedList.add(HdfsDirectoryFactory.HDFS_HOME, "hdfs://localhost"); + hdfsBackupRepository.init(namedList); + } + + @Test(expected = ClassCastException.class) + public void testCopyBufferSizeNonNumeric() { + HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository(); + NamedList namedList = new SimpleOrderedMap<>(); + namedList.add("solr.hdfs.buffer.size", "xyz"); + hdfsBackupRepository.init(namedList); + } + + @Test(expected = ClassCastException.class) + public void testCopyBufferSizeWrongType() { + HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository(); + NamedList namedList = new SimpleOrderedMap<>(); + namedList.add("solr.hdfs.buffer.size", "8192"); + hdfsBackupRepository.init(namedList); + } + + @Test(expected = IllegalArgumentException.class) + public void testCopyBufferSizeNegative() { + HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository(); + NamedList namedList = new SimpleOrderedMap<>(); + namedList.add("solr.hdfs.buffer.size", -1); + hdfsBackupRepository.init(namedList); + } + + @Test(expected = IllegalArgumentException.class) + public void testCopyBufferSizeZero() { + HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository(); + NamedList namedList = new SimpleOrderedMap<>(); + namedList.add("solr.hdfs.buffer.size", 0); + hdfsBackupRepository.init(namedList); + } + + @Test + public void testCopyBufferSet() { + HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository(); + NamedList namedList = new SimpleOrderedMap<>(); + namedList.add(HdfsDirectoryFactory.HDFS_HOME, "hdfs://localhost"); + namedList.add("solr.hdfs.buffer.size", 32768); + hdfsBackupRepository.init(namedList); + assertEquals(hdfsBackupRepository.copyBufferSize, 32768); + } + + @Test + public void testCopyBufferDefaultSize() { + HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository(); + NamedList namedList = new SimpleOrderedMap<>(); + namedList.add(HdfsDirectoryFactory.HDFS_HOME, "hdfs://localhost"); + hdfsBackupRepository.init(namedList); + assertEquals(hdfsBackupRepository.copyBufferSize, HdfsDirectory.DEFAULT_BUFFER_SIZE); + } +} diff --git a/solr/solr-ref-guide/src/making-and-restoring-backups.adoc b/solr/solr-ref-guide/src/making-and-restoring-backups.adoc index 8d4f33e66b5..fc17ebb6066 100644 --- a/solr/solr-ref-guide/src/making-and-restoring-backups.adoc +++ b/solr/solr-ref-guide/src/making-and-restoring-backups.adoc @@ -230,3 +230,5 @@ Example `solr.xml` section to configure a repository like < ---- + +Better throughput might be achieved by increasing buffer size with `262144`. Buffer size is specified in bytes, by default it's 4KB. \ No newline at end of file