SOLR-13029: configure buffer size in HdfsBackupRepository.

This commit is contained in:
Mikhail Khludnev 2019-01-24 17:46:12 +03:00
parent 6f3d8a9770
commit 5a54c624ca
4 changed files with 111 additions and 2 deletions

View File

@ -304,6 +304,8 @@ Improvements
* SOLR-13016: Computing suggestions when policy have "#EQUAL" or "#ALL" rules take too long (noble) * SOLR-13016: Computing suggestions when policy have "#EQUAL" or "#ALL" rules take too long (noble)
* SOLR-13029: solr.hdfs.buffer.size can be configured for HdfsBackupRepository for better performance (Tim Owen via Mikhail Khludnev)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -43,18 +43,28 @@ import org.apache.solr.store.hdfs.HdfsDirectory.HdfsIndexInput;
public class HdfsBackupRepository implements BackupRepository { public class HdfsBackupRepository implements BackupRepository {
private static final String HDFS_UMASK_MODE_PARAM = "solr.hdfs.permissions.umask-mode"; private static final String HDFS_UMASK_MODE_PARAM = "solr.hdfs.permissions.umask-mode";
private static final String HDFS_COPY_BUFFER_SIZE_PARAM = "solr.hdfs.buffer.size";
private HdfsDirectoryFactory factory; private HdfsDirectoryFactory factory;
private Configuration hdfsConfig = null; private Configuration hdfsConfig = null;
private FileSystem fileSystem = null; private FileSystem fileSystem = null;
private Path baseHdfsPath = null; private Path baseHdfsPath = null;
private NamedList config = null; private NamedList config = null;
protected int copyBufferSize = HdfsDirectory.DEFAULT_BUFFER_SIZE;
@SuppressWarnings("rawtypes") @SuppressWarnings("rawtypes")
@Override @Override
public void init(NamedList args) { public void init(NamedList args) {
this.config = args; this.config = args;
// Configure the size of the buffer used for copying index files to/from HDFS, if specified.
if (args.get(HDFS_COPY_BUFFER_SIZE_PARAM) != null) {
this.copyBufferSize = (Integer)args.get(HDFS_COPY_BUFFER_SIZE_PARAM);
if (this.copyBufferSize <= 0) {
throw new IllegalArgumentException("Value of " + HDFS_COPY_BUFFER_SIZE_PARAM + " must be > 0");
}
}
// We don't really need this factory instance. But we want to initialize it here to // We don't really need this factory instance. But we want to initialize it here to
// make sure that all HDFS related initialization is at one place (and not duplicated here). // make sure that all HDFS related initialization is at one place (and not duplicated here).
factory = new HdfsDirectoryFactory(); factory = new HdfsDirectoryFactory();
@ -174,7 +184,7 @@ public class HdfsBackupRepository implements BackupRepository {
@Override @Override
public void copyFileFrom(Directory sourceDir, String fileName, URI dest) throws IOException { public void copyFileFrom(Directory sourceDir, String fileName, URI dest) throws IOException {
try (HdfsDirectory dir = new HdfsDirectory(new Path(dest), NoLockFactory.INSTANCE, try (HdfsDirectory dir = new HdfsDirectory(new Path(dest), NoLockFactory.INSTANCE,
hdfsConfig, HdfsDirectory.DEFAULT_BUFFER_SIZE)) { hdfsConfig, copyBufferSize)) {
dir.copyFrom(sourceDir, fileName, fileName, DirectoryFactory.IOCONTEXT_NO_CACHE); dir.copyFrom(sourceDir, fileName, fileName, DirectoryFactory.IOCONTEXT_NO_CACHE);
} }
} }
@ -182,7 +192,7 @@ public class HdfsBackupRepository implements BackupRepository {
@Override @Override
public void copyFileTo(URI sourceRepo, String fileName, Directory dest) throws IOException { public void copyFileTo(URI sourceRepo, String fileName, Directory dest) throws IOException {
try (HdfsDirectory dir = new HdfsDirectory(new Path(sourceRepo), NoLockFactory.INSTANCE, try (HdfsDirectory dir = new HdfsDirectory(new Path(sourceRepo), NoLockFactory.INSTANCE,
hdfsConfig, HdfsDirectory.DEFAULT_BUFFER_SIZE)) { hdfsConfig, copyBufferSize)) {
dest.copyFrom(dir, fileName, fileName, DirectoryFactory.IOCONTEXT_NO_CACHE); dest.copyFrom(dir, fileName, fileName, DirectoryFactory.IOCONTEXT_NO_CACHE);
} }
} }

View File

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.backup.repository;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.HdfsDirectoryFactory;
import org.apache.solr.store.hdfs.HdfsDirectory;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class HdfsBackupRepositoryTest {
@Test(expected = NullPointerException.class)
public void testHdfsHomePropertyMissing() {
HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository();
NamedList<Object> namedList = new SimpleOrderedMap<>();
hdfsBackupRepository.init(namedList);
}
@Test
public void testHdfsHomePropertySet() {
HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository();
NamedList<Object> namedList = new SimpleOrderedMap<>();
namedList.add(HdfsDirectoryFactory.HDFS_HOME, "hdfs://localhost");
hdfsBackupRepository.init(namedList);
}
@Test(expected = ClassCastException.class)
public void testCopyBufferSizeNonNumeric() {
HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository();
NamedList<Object> namedList = new SimpleOrderedMap<>();
namedList.add("solr.hdfs.buffer.size", "xyz");
hdfsBackupRepository.init(namedList);
}
@Test(expected = ClassCastException.class)
public void testCopyBufferSizeWrongType() {
HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository();
NamedList<Object> namedList = new SimpleOrderedMap<>();
namedList.add("solr.hdfs.buffer.size", "8192");
hdfsBackupRepository.init(namedList);
}
@Test(expected = IllegalArgumentException.class)
public void testCopyBufferSizeNegative() {
HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository();
NamedList<Object> namedList = new SimpleOrderedMap<>();
namedList.add("solr.hdfs.buffer.size", -1);
hdfsBackupRepository.init(namedList);
}
@Test(expected = IllegalArgumentException.class)
public void testCopyBufferSizeZero() {
HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository();
NamedList<Object> namedList = new SimpleOrderedMap<>();
namedList.add("solr.hdfs.buffer.size", 0);
hdfsBackupRepository.init(namedList);
}
@Test
public void testCopyBufferSet() {
HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository();
NamedList<Object> namedList = new SimpleOrderedMap<>();
namedList.add(HdfsDirectoryFactory.HDFS_HOME, "hdfs://localhost");
namedList.add("solr.hdfs.buffer.size", 32768);
hdfsBackupRepository.init(namedList);
assertEquals(hdfsBackupRepository.copyBufferSize, 32768);
}
@Test
public void testCopyBufferDefaultSize() {
HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository();
NamedList<Object> namedList = new SimpleOrderedMap<>();
namedList.add(HdfsDirectoryFactory.HDFS_HOME, "hdfs://localhost");
hdfsBackupRepository.init(namedList);
assertEquals(hdfsBackupRepository.copyBufferSize, HdfsDirectory.DEFAULT_BUFFER_SIZE);
}
}

View File

@ -230,3 +230,5 @@ Example `solr.xml` section to configure a repository like <<running-solr-on-hdfs
</repository> </repository>
</backup> </backup>
---- ----
Better throughput might be achieved by increasing buffer size with `<int name="solr.hdfs.buffer.size">262144</int>`. Buffer size is specified in bytes, by default it's 4KB.