SLR-10108: bin/solr script recursive copy broken

This commit is contained in:
Erick Erickson 2017-03-27 12:15:05 -07:00
parent 2ba54a36ba
commit 0b3ca1bb61
5 changed files with 213 additions and 47 deletions

View File

@ -135,6 +135,8 @@ Bug Fixes
* SOLR-10281: ADMIN_PATHS is duplicated in two places and inconsistent. This can cause automatic * SOLR-10281: ADMIN_PATHS is duplicated in two places and inconsistent. This can cause automatic
retries to /admin/metrics handler by the CloudSolrClient. (shalin) retries to /admin/metrics handler by the CloudSolrClient. (shalin)
* SOLR-10108: bin/solr script recursive copy broken (Erick Erickson)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -496,17 +496,27 @@ function print_usage() {
echo " NOTE: <src> and <dest> may both be Zookeeper resources prefixed by 'zk:'" echo " NOTE: <src> and <dest> may both be Zookeeper resources prefixed by 'zk:'"
echo " When <src> is a zk resource, <dest> may be '.'" echo " When <src> is a zk resource, <dest> may be '.'"
echo " If <dest> ends with '/', then <dest> will be a local folder or parent znode and the last" echo " If <dest> ends with '/', then <dest> will be a local folder or parent znode and the last"
echo " element of the <src> path will be appended." echo " element of the <src> path will be appended unless <src> also ends in a slash. "
echo " <dest> may be zk:, which may be useful when using the cp -r form to backup/restore "
echo " the entire zk state."
echo " You must enclose local paths that end in a wildcard in quotes or just"
echo " end the local path in a slash. That is,"
echo " 'bin/solr zk cp -r /some/dir/ zk:/ -z localhost:2181' is equivalent to"
echo " 'bin/solr zk cp -r \"/some/dir/*\" zk:/ -z localhost:2181'"
echo " but 'bin/solr zk cp -r /some/dir/* zk:/ -z localhost:2181' will throw an error"
echo "" echo ""
echo " The 'file:' prefix is stripped, thus 'file:/' specifies an absolute local path and" echo " here's an example of backup/restore for a ZK configuration:"
echo " 'file:somewhere' specifies a relative local path. All paths on Zookeeper are absolute" echo " to copy to local: 'bin/solr zk cp -r zk:/ /some/dir -z localhost:2181'"
echo " so the slash is required." echo " to restore to ZK: 'bin/solr zk cp -r /some/dir/ zk:/ -z localhost:2181'"
echo ""
echo " The 'file:' prefix is stripped, thus 'file:/wherever' specifies an absolute local path and"
echo " 'file:somewhere' specifies a relative local path. All paths on Zookeeper are absolute."
echo "" echo ""
echo " Zookeeper nodes CAN have data, so moving a single file to a parent znode" echo " Zookeeper nodes CAN have data, so moving a single file to a parent znode"
echo " will overlay the data on the parent Znode so specifying the trailing slash" echo " will overlay the data on the parent Znode so specifying the trailing slash"
echo " is important." echo " can be important."
echo "" echo ""
echo " Wildcards are not supported" echo " Wildcards are supported when copying from local, trailing only and must be quoted."
echo "" echo ""
echo " rm deletes files or folders on Zookeeper" echo " rm deletes files or folders on Zookeeper"
echo " -r     Recursively delete if <path> is a directory. Command will fail if <path>" echo " -r     Recursively delete if <path> is a directory. Command will fail if <path>"
@ -1093,7 +1103,7 @@ if [[ "$SCRIPT_CMD" == "zk" ]]; then
if [ -z "$ZK_DST" ]; then if [ -z "$ZK_DST" ]; then
ZK_DST=$1 ZK_DST=$1
else else
print_short_zk_usage "Unrecognized or misplaced command $1" print_short_zk_usage "Unrecognized or misplaced command $1. 'cp' with trailing asterisk requires quoting, see help text."
fi fi
fi fi
shift shift

View File

@ -479,23 +479,32 @@ echo.
echo. ^<src^>, ^<dest^> : [file:][/]path/to/local/file or zk:/path/to/zk/node echo. ^<src^>, ^<dest^> : [file:][/]path/to/local/file or zk:/path/to/zk/node
echo NOTE: ^<src^> and ^<dest^> may both be Zookeeper resources prefixed by 'zk:' echo NOTE: ^<src^> and ^<dest^> may both be Zookeeper resources prefixed by 'zk:'
echo When ^<src^> is a zk resource, ^<dest^> may be '.' echo When ^<src^> is a zk resource, ^<dest^> may be '.'
echo If ^<dest^> ends with '/', then ^<dest^> will be a local folder or parent znode and the last echo element of the ^<src^> path will be appended unless ^<src^> also ends in a slash.
echo element of the ^<src^> path will be appended. echo ^<dest^> may be zk:, which may be useful when using the cp -r form to backup/restore
echo the entire zk state.
echo You must enclose local paths that end in a wildcard in quotes or just
echo end the local path in a slash. That is,
echo 'bin/solr zk cp -r /some/dir/ zk:/ -z localhost:2181' is equivalent to
echo 'bin/solr zk cp -r ^"/some/dir/*^" zk:/ -z localhost:2181'
echo but 'bin/solr zk cp -r /some/dir/* zk:/ -z localhost:2181' will throw an error
echo .
echo here's an example of backup/restore for a ZK configuration:
echo to copy to local: 'bin/solr zk cp -r zk:/ /some/dir -z localhost:2181'
echo to restore to ZK: 'bin/solr zk cp -r /some/dir/ zk:/ -z localhost:2181'
echo. echo.
echo The 'file:' prefix is stripped, thus 'file:/' specifies an absolute local path and echo The 'file:' prefix is stripped, thus 'file:/wherever' specifies an absolute local path and
echo 'file:somewhere' specifies a relative local path. All paths on Zookeeper are absolute echo 'file:somewhere' specifies a relative local path. All paths on Zookeeper are absolute.
echo so the slash is required.
echo. echo.
echo Zookeeper nodes CAN have data, so moving a single file to a parent znode echo Zookeeper nodes CAN have data, so moving a single file to a parent znode
echo will overlay the data on the parent Znode so specifying the trailing slash echo will overlay the data on the parent Znode so specifying the trailing slash
echo is important. echo can be important.
echo. echo.
echo Wildcards are not supported echo Wildcards are supported when copying from local, trailing only and must be quoted.
echo. echo.
echo rm deletes files or folders on Zookeeper echo rm deletes files or folders on Zookeeper
echo -r Recursively delete if ^<path^> is a directory. Command will fail if ^<path^> echo -r Recursively delete if ^<path^> is a directory. Command will fail if ^<path^>
echo has children and -r is not specified. Optional echo has children and -r is not specified. Optional
echo ^<path^> : [zk:]/path/to/zk/node. ^<path^> may not be the root ('/')" echo ^<path^> : [zk:]/path/to/zk/node. ^<path^> may not be the root ('/')
echo. echo.
echo mv moves (renames) znodes on Zookeeper echo mv moves (renames) znodes on Zookeeper
echo ^<src^>, ^<dest^> : Zookeeper nodes, the 'zk:' prefix is optional. echo ^<src^>, ^<dest^> : Zookeeper nodes, the 'zk:' prefix is optional.
@ -512,7 +521,7 @@ echo.
echo Only the node names are listed, not data echo Only the node names are listed, not data
echo. echo.
echo mkroot makes a znode in Zookeeper with no data. Can be used to make a path of arbitrary echo mkroot makes a znode in Zookeeper with no data. Can be used to make a path of arbitrary
echo depth but primarily intended to create a 'chroot'." echo depth but primarily intended to create a 'chroot'.
echo. echo.
echo ^<path^>: The Zookeeper path to create. Leading slash is assumed if not present. echo ^<path^>: The Zookeeper path to create. Leading slash is assumed if not present.
echo Intermediate nodes are created as needed if not present. echo Intermediate nodes are created as needed if not present.

View File

@ -28,11 +28,14 @@ import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor; import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkMaintenanceUtils; import org.apache.solr.common.cloud.ZkMaintenanceUtils;
import org.apache.solr.util.SolrCLI; import org.apache.solr.util.SolrCLI;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -201,7 +204,6 @@ public class SolrCLIZkUtilsTest extends SolrCloudTestCase {
assertEquals("Copy should have succeeded.", 0, res); assertEquals("Copy should have succeeded.", 0, res);
verifyZkLocalPathsMatch(srcPathCheck, "/cp4"); verifyZkLocalPathsMatch(srcPathCheck, "/cp4");
// try with recurse not specified // try with recurse not specified
args = new String[]{ args = new String[]{
"-src", "file:" + srcPathCheck.toAbsolutePath().toString(), "-src", "file:" + srcPathCheck.toAbsolutePath().toString(),
@ -306,6 +308,70 @@ public class SolrCLIZkUtilsTest extends SolrCloudTestCase {
assertEquals("Copy from somewhere in ZK to ZK root should have succeeded.", 0, res); assertEquals("Copy from somewhere in ZK to ZK root should have succeeded.", 0, res);
assertTrue("Should have found znode /solrconfig.xml: ", zkClient.exists("/solrconfig.xml", true)); assertTrue("Should have found znode /solrconfig.xml: ", zkClient.exists("/solrconfig.xml", true));
// Check that the form path/ works for copying files up. Should append the last bit of the source path to the dst
args = new String[]{
"-src", "file:" + srcPathCheck.toAbsolutePath().toString(),
"-dst", "zk:/cp7/",
"-recurse", "true",
"-zkHost", zkAddr,
};
res = cpTool.runTool(SolrCLI.processCommandLineArgs(SolrCLI.joinCommonAndToolOptions(cpTool.getOptions()), args));
assertEquals("Copy should have succeeded.", 0, res);
verifyZkLocalPathsMatch(srcPathCheck, "/cp7/" + srcPathCheck.getFileName().toString());
// Check for an intermediate ZNODE having content. You know cp7/stopwords is a parent node.
tmp = createTempDir("dirdata");
Path file = Paths.get(tmp.toAbsolutePath().toString(), "zknode.data");
List<String> lines = new ArrayList<>();
lines.add("{Some Arbitrary Data}");
Files.write(file, lines, Charset.forName("UTF-8"));
// First, just copy the data up the cp7 since it's a directory.
args = new String[]{
"-src", "file:" + file.toAbsolutePath().toString(),
"-dst", "zk:/cp7/conf/stopwords/",
"-recurse", "false",
"-zkHost", zkAddr,
};
res = cpTool.runTool(SolrCLI.processCommandLineArgs(SolrCLI.joinCommonAndToolOptions(cpTool.getOptions()), args));
assertEquals("Copy should have succeeded.", 0, res);
String content = new String(zkClient.getData("/cp7/conf/stopwords", null, null, true), StandardCharsets.UTF_8);
assertTrue("There should be content in the node! ", content.contains("{Some Arbitrary Data}"));
res = cpTool.runTool(SolrCLI.processCommandLineArgs(SolrCLI.joinCommonAndToolOptions(cpTool.getOptions()), args));
assertEquals("Copy should have succeeded.", 0, res);
tmp = createTempDir("cp8");
args = new String[]{
"-src", "zk:/cp7",
"-dst", "file:" + tmp.toAbsolutePath().toString(),
"-recurse", "true",
"-zkHost", zkAddr,
};
res = cpTool.runTool(SolrCLI.processCommandLineArgs(SolrCLI.joinCommonAndToolOptions(cpTool.getOptions()), args));
assertEquals("Copy should have succeeded.", 0, res);
// Next, copy cp7 down and verify that zknode.data exists for cp7
Path zData = Paths.get(tmp.toAbsolutePath().toString(), "conf/stopwords/zknode.data");
assertTrue("znode.data should have been copied down", zData.toFile().exists());
// Finally, copy up to cp8 and verify that the data is up there.
args = new String[]{
"-src", "file:" + tmp.toAbsolutePath().toString(),
"-dst", "zk:/cp9",
"-recurse", "true",
"-zkHost", zkAddr,
};
res = cpTool.runTool(SolrCLI.processCommandLineArgs(SolrCLI.joinCommonAndToolOptions(cpTool.getOptions()), args));
assertEquals("Copy should have succeeded.", 0, res);
content = new String(zkClient.getData("/cp9/conf/stopwords", null, null, true), StandardCharsets.UTF_8);
assertTrue("There should be content in the node! ", content.contains("{Some Arbitrary Data}"));
} }
@Test @Test
@ -577,13 +643,22 @@ public class SolrCLIZkUtilsTest extends SolrCloudTestCase {
verifyAllZNodesAreFiles(fileRoot, zkRoot); verifyAllZNodesAreFiles(fileRoot, zkRoot);
} }
private static boolean isEphemeral(String zkPath) throws KeeperException, InterruptedException {
Stat znodeStat = zkClient.exists(zkPath, null, true);
return znodeStat.getEphemeralOwner() != 0;
}
void verifyAllZNodesAreFiles(Path fileRoot, String zkRoot) throws KeeperException, InterruptedException { void verifyAllZNodesAreFiles(Path fileRoot, String zkRoot) throws KeeperException, InterruptedException {
for (String node : zkClient.getChildren(zkRoot, null, true)) { for (String child : zkClient.getChildren(zkRoot, null, true)) {
Path thisPath = Paths.get(fileRoot.toAbsolutePath().toString(), node); // Skip ephemeral nodes
assertTrue("Znode " + node + " should have been found on disk at " + fileRoot.toAbsolutePath().toString(), if (zkRoot.endsWith("/") == false) zkRoot += "/";
if (isEphemeral(zkRoot + child)) continue;
Path thisPath = Paths.get(fileRoot.toAbsolutePath().toString(), child);
assertTrue("Znode " + child + " should have been found on disk at " + fileRoot.toAbsolutePath().toString(),
Files.exists(thisPath)); Files.exists(thisPath));
verifyAllZNodesAreFiles(thisPath, zkRoot + "/" + node); verifyAllZNodesAreFiles(thisPath, zkRoot + child);
} }
} }

View File

@ -17,6 +17,7 @@
package org.apache.solr.common.cloud; package org.apache.solr.common.cloud;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.nio.file.FileVisitResult; import java.nio.file.FileVisitResult;
@ -31,6 +32,7 @@ import java.util.regex.Pattern;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -40,6 +42,7 @@ import org.slf4j.LoggerFactory;
*/ */
public class ZkMaintenanceUtils { public class ZkMaintenanceUtils {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String ZKNODE_DATA_FILE = "zknode.data";
private ZkMaintenanceUtils() {} // don't let it be instantiated, all methods are static. private ZkMaintenanceUtils() {} // don't let it be instantiated, all methods are static.
/** /**
@ -119,6 +122,9 @@ public class ZkMaintenanceUtils {
if (srcIsZk == false && dstIsZk == false) { if (srcIsZk == false && dstIsZk == false) {
throw new SolrServerException("At least one of the source and dest parameters must be prefixed with 'zk:' "); throw new SolrServerException("At least one of the source and dest parameters must be prefixed with 'zk:' ");
} }
if (dstIsZk && dst.length() == 0) {
dst = "/"; // for consistency, one can copy from zk: and send to zk:/
}
dst = normalizeDest(src, dst); dst = normalizeDest(src, dst);
if (srcIsZk && dstIsZk) { if (srcIsZk && dstIsZk) {
@ -148,18 +154,26 @@ public class ZkMaintenanceUtils {
Files.write(filename, data); Files.write(filename, data);
} }
private static String normalizeDest(String srcName, String dstName) { private static String normalizeDest(String srcName, String dstName) {
// Pull the last element of the src path and add it to the dst. // Special handling for "."
if (dstName.endsWith("/")) { if (dstName.equals(".")) {
return Paths.get(".").normalize().toAbsolutePath().toString();
}
// Pull the last element of the src path and add it to the dst if the src does NOT end in a slash
// If the source ends in a slash, do not append the last segment to the dest
if (dstName.endsWith("/")) { // Dest is a directory.
int pos = srcName.lastIndexOf("/"); int pos = srcName.lastIndexOf("/");
if (pos < 0) { if (pos < 0) {
dstName += srcName; dstName += srcName;
} else { } else {
dstName += srcName.substring(pos + 1); dstName += srcName.substring(pos + 1);
} }
} else if (dstName.equals(".")) {
dstName = Paths.get(".").normalize().toAbsolutePath().toString();
} }
log.info("copying from '{}' to '{}'", srcName, dstName);
return dstName; return dstName;
} }
@ -227,8 +241,15 @@ public class ZkMaintenanceUtils {
}); });
} }
public static void uploadToZK(SolrZkClient zkClient, final Path rootPath, final String zkPath, public static void uploadToZK(SolrZkClient zkClient, final Path fromPath, final String zkPath,
final Pattern filenameExclusions) throws IOException { final Pattern filenameExclusions) throws IOException {
String path = fromPath.toString();
if (path.endsWith("*")) {
path = path.substring(0, path.length() - 1);
}
final Path rootPath = Paths.get(path);
if (!Files.exists(rootPath)) if (!Files.exists(rootPath))
throw new IOException("Path " + rootPath + " does not exist"); throw new IOException("Path " + rootPath + " does not exist");
@ -243,7 +264,12 @@ public class ZkMaintenanceUtils {
} }
String zkNode = createZkNodeName(zkPath, rootPath, file); String zkNode = createZkNodeName(zkPath, rootPath, file);
try { try {
zkClient.makePath(zkNode, file.toFile(), false, true); // if the path exists (and presumably we're uploading data to it) just set its data
if (file.toFile().getName().equals(ZKNODE_DATA_FILE) && zkClient.exists(zkNode, true)) {
zkClient.setData(zkNode, file.toFile(), true);
} else {
zkClient.makePath(zkNode, file.toFile(), false, true);
}
} catch (KeeperException | InterruptedException e) { } catch (KeeperException | InterruptedException e) {
throw new IOException("Error uploading file " + file.toString() + " to zookeeper path " + zkNode, throw new IOException("Error uploading file " + file.toString() + " to zookeeper path " + zkNode,
SolrZkClient.checkInterrupted(e)); SolrZkClient.checkInterrupted(e));
@ -253,28 +279,58 @@ public class ZkMaintenanceUtils {
@Override @Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
return (dir.getFileName().toString().startsWith(".")) ? FileVisitResult.SKIP_SUBTREE : FileVisitResult.CONTINUE; if (dir.getFileName().toString().startsWith(".")) return FileVisitResult.SKIP_SUBTREE;
return FileVisitResult.CONTINUE;
} }
}); });
} }
public static void downloadFromZK(SolrZkClient zkClient, String zkPath, Path dir) throws IOException { private static boolean isEphemeral(SolrZkClient zkClient, String zkPath) throws KeeperException, InterruptedException {
Stat znodeStat = zkClient.exists(zkPath, null, true);
return znodeStat.getEphemeralOwner() != 0;
}
private static int copyDataDown(SolrZkClient zkClient, String zkPath, File file) throws IOException, KeeperException, InterruptedException {
byte[] data = zkClient.getData(zkPath, null, null, true);
if (data != null && data.length > 1) { // There are apparently basically empty ZNodes.
log.info("Writing file {}", file.toString());
Files.write(file.toPath(), data);
return data.length;
}
return 0;
}
public static void downloadFromZK(SolrZkClient zkClient, String zkPath, Path file) throws IOException {
try { try {
List<String> files = zkClient.getChildren(zkPath, null, true); List<String> children = zkClient.getChildren(zkPath, null, true);
Files.createDirectories(dir); // If it has no children, it's a leaf node, write the assoicated data from the ZNode.
for (String file : files) { // Otherwise, continue recursing, but write the associated data to a special file if any
List<String> children = zkClient.getChildren(zkPath + "/" + file, null, true); if (children.size() == 0) {
if (children.size() == 0) { // If we didn't copy data down, then we also didn't create the file. But we still need a marker on the local
byte[] data = zkClient.getData(zkPath + "/" + file, null, null, true); // disk so create a dir.
Path filename = dir.resolve(file); if (copyDataDown(zkClient, zkPath, file.toFile()) == 0) {
log.info("Writing file {}", filename); Files.createDirectories(file);
Files.write(filename, data); }
} else { } else {
downloadFromZK(zkClient, zkPath + "/" + file, dir.resolve(file)); Files.createDirectories(file); // Make parent dir.
// ZK nodes, whether leaf or not can have data. If it's a non-leaf node and
// has associated data write it into the special file.
copyDataDown(zkClient, zkPath, new File(file.toFile(), ZKNODE_DATA_FILE));
for (String child : children) {
String zkChild = zkPath;
if (zkChild.endsWith("/") == false) zkChild += "/";
zkChild += child;
if (isEphemeral(zkClient, zkChild)) { // Don't copy ephemeral nodes
continue;
}
// Go deeper into the tree now
downloadFromZK(zkClient, zkChild, file.resolve(child));
} }
} }
} catch (KeeperException | InterruptedException e) { } catch (KeeperException | InterruptedException e) {
throw new IOException("Error downloading files from zookeeper path " + zkPath + " to " + dir.toString(), throw new IOException("Error downloading files from zookeeper path " + zkPath + " to " + file.toString(),
SolrZkClient.checkInterrupted(e)); SolrZkClient.checkInterrupted(e));
} }
} }
@ -336,10 +392,24 @@ public class ZkMaintenanceUtils {
if ("\\".equals(separator)) if ("\\".equals(separator))
relativePath = relativePath.replaceAll("\\\\", "/"); relativePath = relativePath.replaceAll("\\\\", "/");
// It's possible that the relative path and file are the same, in which case // It's possible that the relative path and file are the same, in which case
// adding the bare slash is A Bad Idea // adding the bare slash is A Bad Idea unless it's a non-leaf data node
if (relativePath.length() == 0) return zkRoot; boolean isNonLeafData = file.toFile().getName().equals(ZKNODE_DATA_FILE);
if (relativePath.length() == 0 && isNonLeafData == false) return zkRoot;
return zkRoot + "/" + relativePath; // Important to have this check if the source is file:whatever/ and the destination is just zk:/
if (zkRoot.endsWith("/") == false) zkRoot += "/";
String ret = zkRoot + relativePath;
// Special handling for data associated with non-leaf node.
if (isNonLeafData) {
// special handling since what we need to do is add the data to the parent.
ret = ret.substring(0, ret.indexOf(ZKNODE_DATA_FILE));
if (ret.endsWith("/")) {
ret = ret.substring(0, ret.length() - 1);
}
}
return ret;
} }
} }