mirror of https://github.com/apache/druid.git
consolidate CompressionUtils.java since now in the same jar (#6908)
This commit is contained in:
parent
873232954f
commit
3895914aa2
|
@ -1,618 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.java.util.common;
|
||||
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.io.ByteSink;
|
||||
import com.google.common.io.ByteSource;
|
||||
import com.google.common.io.ByteStreams;
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
|
||||
import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream;
|
||||
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
|
||||
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
|
||||
import org.apache.druid.java.util.common.io.NativeIO;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.channels.Channels;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Enumeration;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
import java.util.zip.ZipInputStream;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
public class CompressionUtils
|
||||
{
|
||||
private static final Logger log = new Logger(CompressionUtils.class);
|
||||
private static final int DEFAULT_RETRY_COUNT = 3;
|
||||
private static final String BZ2_SUFFIX = ".bz2";
|
||||
private static final String GZ_SUFFIX = ".gz";
|
||||
private static final String XZ_SUFFIX = ".xz";
|
||||
private static final String ZIP_SUFFIX = ".zip";
|
||||
private static final String SNAPPY_SUFFIX = ".sz";
|
||||
private static final String ZSTD_SUFFIX = ".zst";
|
||||
|
||||
/**
|
||||
* Zip the contents of directory into the file indicated by outputZipFile. Sub directories are skipped
|
||||
*
|
||||
* @param directory The directory whose contents should be added to the zip in the output stream.
|
||||
* @param outputZipFile The output file to write the zipped data to
|
||||
* @param fsync True if the output file should be fsynced to disk
|
||||
*
|
||||
* @return The number of bytes (uncompressed) read from the input directory.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long zip(File directory, File outputZipFile, boolean fsync) throws IOException
|
||||
{
|
||||
if (!isZip(outputZipFile.getName())) {
|
||||
log.warn("No .zip suffix[%s], putting files from [%s] into it anyway.", outputZipFile, directory);
|
||||
}
|
||||
|
||||
if (fsync) {
|
||||
return FileUtils.writeAtomically(outputZipFile, out -> zip(directory, out));
|
||||
} else {
|
||||
try (
|
||||
final FileChannel fileChannel = FileChannel.open(
|
||||
outputZipFile.toPath(),
|
||||
StandardOpenOption.WRITE,
|
||||
StandardOpenOption.CREATE
|
||||
);
|
||||
final OutputStream out = Channels.newOutputStream(fileChannel)
|
||||
) {
|
||||
return zip(directory, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Zip the contents of directory into the file indicated by outputZipFile. Sub directories are skipped
|
||||
*
|
||||
* @param directory The directory whose contents should be added to the zip in the output stream.
|
||||
* @param outputZipFile The output file to write the zipped data to
|
||||
*
|
||||
* @return The number of bytes (uncompressed) read from the input directory.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long zip(File directory, File outputZipFile) throws IOException
|
||||
{
|
||||
return zip(directory, outputZipFile, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Zips the contents of the input directory to the output stream. Sub directories are skipped
|
||||
*
|
||||
* @param directory The directory whose contents should be added to the zip in the output stream.
|
||||
* @param out The output stream to write the zip data to. Caller is responsible for closing this stream.
|
||||
*
|
||||
* @return The number of bytes (uncompressed) read from the input directory.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long zip(File directory, OutputStream out) throws IOException
|
||||
{
|
||||
if (!directory.isDirectory()) {
|
||||
throw new IOE("directory[%s] is not a directory", directory);
|
||||
}
|
||||
|
||||
final ZipOutputStream zipOut = new ZipOutputStream(out);
|
||||
|
||||
long totalSize = 0;
|
||||
for (File file : directory.listFiles()) {
|
||||
log.info("Adding file[%s] with size[%,d]. Total size so far[%,d]", file, file.length(), totalSize);
|
||||
if (file.length() > Integer.MAX_VALUE) {
|
||||
zipOut.finish();
|
||||
throw new IOE("file[%s] too large [%,d]", file, file.length());
|
||||
}
|
||||
zipOut.putNextEntry(new ZipEntry(file.getName()));
|
||||
totalSize += Files.asByteSource(file).copyTo(zipOut);
|
||||
}
|
||||
zipOut.closeEntry();
|
||||
// Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
|
||||
zipOut.flush();
|
||||
zipOut.finish();
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzip the byteSource to the output directory. If cacheLocally is true, the byteSource is cached to local disk before unzipping.
|
||||
* This may cause more predictable behavior than trying to unzip a large file directly off a network stream, for example.
|
||||
* * @param byteSource The ByteSource which supplies the zip data
|
||||
*
|
||||
* @param byteSource The ByteSource which supplies the zip data
|
||||
* @param outDir The output directory to put the contents of the zip
|
||||
* @param shouldRetry A predicate expression to determine if a new InputStream should be acquired from ByteSource and the copy attempted again
|
||||
* @param cacheLocally A boolean flag to indicate if the data should be cached locally
|
||||
*
|
||||
* @return A FileCopyResult containing the result of writing the zip entries to disk
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult unzip(
|
||||
final ByteSource byteSource,
|
||||
final File outDir,
|
||||
final Predicate<Throwable> shouldRetry,
|
||||
boolean cacheLocally
|
||||
) throws IOException
|
||||
{
|
||||
if (!cacheLocally) {
|
||||
try {
|
||||
return RetryUtils.retry(
|
||||
() -> unzip(byteSource.openStream(), outDir),
|
||||
shouldRetry,
|
||||
DEFAULT_RETRY_COUNT
|
||||
);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw e;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw Throwables.propagate(e);
|
||||
}
|
||||
} else {
|
||||
final File tmpFile = File.createTempFile("compressionUtilZipCache", ZIP_SUFFIX);
|
||||
try {
|
||||
FileUtils.retryCopy(
|
||||
byteSource,
|
||||
tmpFile,
|
||||
shouldRetry,
|
||||
DEFAULT_RETRY_COUNT
|
||||
);
|
||||
return unzip(tmpFile, outDir);
|
||||
}
|
||||
finally {
|
||||
if (!tmpFile.delete()) {
|
||||
log.warn("Could not delete zip cache at [%s]", tmpFile.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzip the byteSource to the output directory. If cacheLocally is true, the byteSource is cached to local disk before unzipping.
|
||||
* This may cause more predictable behavior than trying to unzip a large file directly off a network stream, for example.
|
||||
*
|
||||
* @param byteSource The ByteSource which supplies the zip data
|
||||
* @param outDir The output directory to put the contents of the zip
|
||||
* @param cacheLocally A boolean flag to indicate if the data should be cached locally
|
||||
*
|
||||
* @return A FileCopyResult containing the result of writing the zip entries to disk
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult unzip(
|
||||
final ByteSource byteSource,
|
||||
final File outDir,
|
||||
boolean cacheLocally
|
||||
) throws IOException
|
||||
{
|
||||
return unzip(byteSource, outDir, FileUtils.IS_EXCEPTION, cacheLocally);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzip the pulled file to an output directory. This is only expected to work on zips with lone files, and is not intended for zips with directory structures.
|
||||
*
|
||||
* @param pulledFile The file to unzip
|
||||
* @param outDir The directory to store the contents of the file.
|
||||
*
|
||||
* @return a FileCopyResult of the files which were written to disk
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult unzip(final File pulledFile, final File outDir) throws IOException
|
||||
{
|
||||
if (!(outDir.exists() && outDir.isDirectory())) {
|
||||
throw new ISE("outDir[%s] must exist and be a directory", outDir);
|
||||
}
|
||||
log.info("Unzipping file[%s] to [%s]", pulledFile, outDir);
|
||||
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
|
||||
try (final ZipFile zipFile = new ZipFile(pulledFile)) {
|
||||
final Enumeration<? extends ZipEntry> enumeration = zipFile.entries();
|
||||
while (enumeration.hasMoreElements()) {
|
||||
final ZipEntry entry = enumeration.nextElement();
|
||||
final File outFile = new File(outDir, entry.getName());
|
||||
|
||||
validateZipOutputFile(pulledFile.getCanonicalPath(), outFile, outDir);
|
||||
|
||||
result.addFiles(
|
||||
FileUtils.retryCopy(
|
||||
new ByteSource()
|
||||
{
|
||||
@Override
|
||||
public InputStream openStream() throws IOException
|
||||
{
|
||||
return new BufferedInputStream(zipFile.getInputStream(entry));
|
||||
}
|
||||
},
|
||||
outFile,
|
||||
FileUtils.IS_EXCEPTION,
|
||||
DEFAULT_RETRY_COUNT
|
||||
).getFiles()
|
||||
);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void validateZipOutputFile(
|
||||
String sourceFilename,
|
||||
final File outFile,
|
||||
final File outDir
|
||||
) throws IOException
|
||||
{
|
||||
// check for evil zip exploit that allows writing output to arbitrary directories
|
||||
final File canonicalOutFile = outFile.getCanonicalFile();
|
||||
final String canonicalOutDir = outDir.getCanonicalPath();
|
||||
if (!canonicalOutFile.toPath().startsWith(canonicalOutDir)) {
|
||||
throw new ISE(
|
||||
"Unzipped output path[%s] of sourceFile[%s] does not start with outDir[%s].",
|
||||
canonicalOutFile,
|
||||
sourceFilename,
|
||||
canonicalOutDir
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzip from the input stream to the output directory, using the entry's file name as the file name in the output directory.
|
||||
* The behavior of directories in the input stream's zip is undefined.
|
||||
* If possible, it is recommended to use unzip(ByteStream, File) instead
|
||||
*
|
||||
* @param in The input stream of the zip data. This stream is closed
|
||||
* @param outDir The directory to copy the unzipped data to
|
||||
*
|
||||
* @return The FileUtils.FileCopyResult containing information on all the files which were written
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult unzip(InputStream in, File outDir) throws IOException
|
||||
{
|
||||
try (final ZipInputStream zipIn = new ZipInputStream(in)) {
|
||||
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
|
||||
ZipEntry entry;
|
||||
while ((entry = zipIn.getNextEntry()) != null) {
|
||||
final File file = new File(outDir, entry.getName());
|
||||
|
||||
validateZipOutputFile("", file, outDir);
|
||||
|
||||
NativeIO.chunkedCopy(zipIn, file);
|
||||
|
||||
result.addFile(file);
|
||||
zipIn.closeEntry();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* gunzip the file to the output file.
|
||||
*
|
||||
* @param pulledFile The source of the gz data
|
||||
* @param outFile A target file to put the contents
|
||||
*
|
||||
* @return The result of the file copy
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gunzip(final File pulledFile, File outFile)
|
||||
{
|
||||
return gunzip(Files.asByteSource(pulledFile), outFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzips the input stream via a gzip filter. use gunzip(ByteSource, File, Predicate) if possible
|
||||
*
|
||||
* @param in The input stream to run through the gunzip filter. This stream is closed
|
||||
* @param outFile The file to output to
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gunzip(InputStream in, File outFile) throws IOException
|
||||
{
|
||||
try (GZIPInputStream gzipInputStream = gzipInputStream(in)) {
|
||||
NativeIO.chunkedCopy(gzipInputStream, outFile);
|
||||
return new FileUtils.FileCopyResult(outFile);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fixes java bug 7036144 http://bugs.java.com/bugdatabase/view_bug.do?bug_id=7036144 which affects concatenated GZip
|
||||
*
|
||||
* @param in The raw input stream
|
||||
*
|
||||
* @return A GZIPInputStream that can handle concatenated gzip streams in the input
|
||||
*
|
||||
* @see #decompress(InputStream, String) which should be used instead for streams coming from files
|
||||
*/
|
||||
public static GZIPInputStream gzipInputStream(final InputStream in) throws IOException
|
||||
{
|
||||
return new GZIPInputStream(
|
||||
new FilterInputStream(in)
|
||||
{
|
||||
@Override
|
||||
public int available() throws IOException
|
||||
{
|
||||
final int otherAvailable = super.available();
|
||||
// Hack. Docs say available() should return an estimate,
|
||||
// so we estimate about 1KB to work around available == 0 bug in GZIPInputStream
|
||||
return otherAvailable == 0 ? 1 << 10 : otherAvailable;
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* gunzip from the source stream to the destination stream.
|
||||
*
|
||||
* @param in The input stream which is to be decompressed. This stream is closed.
|
||||
* @param out The output stream to write to. This stream is closed
|
||||
*
|
||||
* @return The number of bytes written to the output stream.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long gunzip(InputStream in, OutputStream out) throws IOException
|
||||
{
|
||||
try (GZIPInputStream gzipInputStream = gzipInputStream(in)) {
|
||||
final long result = ByteStreams.copy(gzipInputStream, out);
|
||||
out.flush();
|
||||
return result;
|
||||
}
|
||||
finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A gunzip function to store locally
|
||||
*
|
||||
* @param in The factory to produce input streams
|
||||
* @param outFile The file to store the result into
|
||||
* @param shouldRetry A predicate to indicate if the Throwable is recoverable
|
||||
*
|
||||
* @return The count of bytes written to outFile
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gunzip(
|
||||
final ByteSource in,
|
||||
final File outFile,
|
||||
Predicate<Throwable> shouldRetry
|
||||
)
|
||||
{
|
||||
return FileUtils.retryCopy(
|
||||
new ByteSource()
|
||||
{
|
||||
@Override
|
||||
public InputStream openStream() throws IOException
|
||||
{
|
||||
return gzipInputStream(in.openStream());
|
||||
}
|
||||
},
|
||||
outFile,
|
||||
shouldRetry,
|
||||
DEFAULT_RETRY_COUNT
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gunzip from the input stream to the output file
|
||||
*
|
||||
* @param in The compressed input stream to read from
|
||||
* @param outFile The file to write the uncompressed results to
|
||||
*
|
||||
* @return A FileCopyResult of the file written
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gunzip(final ByteSource in, File outFile)
|
||||
{
|
||||
return gunzip(in, outFile, FileUtils.IS_EXCEPTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy inputStream to out while wrapping out in a GZIPOutputStream
|
||||
* Closes both input and output
|
||||
*
|
||||
* @param inputStream The input stream to copy data from. This stream is closed
|
||||
* @param out The output stream to wrap in a GZIPOutputStream before copying. This stream is closed
|
||||
*
|
||||
* @return The size of the data copied
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long gzip(InputStream inputStream, OutputStream out) throws IOException
|
||||
{
|
||||
try (GZIPOutputStream outputStream = new GZIPOutputStream(out)) {
|
||||
final long result = ByteStreams.copy(inputStream, outputStream);
|
||||
out.flush();
|
||||
return result;
|
||||
}
|
||||
finally {
|
||||
inputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gzips the input file to the output
|
||||
*
|
||||
* @param inFile The file to gzip
|
||||
* @param outFile A target file to copy the uncompressed contents of inFile to
|
||||
* @param shouldRetry Predicate on a potential throwable to determine if the copy should be attempted again.
|
||||
*
|
||||
* @return The result of the file copy
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gzip(final File inFile, final File outFile, Predicate<Throwable> shouldRetry)
|
||||
{
|
||||
gzip(Files.asByteSource(inFile), Files.asByteSink(outFile), shouldRetry);
|
||||
return new FileUtils.FileCopyResult(outFile);
|
||||
}
|
||||
|
||||
public static long gzip(final ByteSource in, final ByteSink out, Predicate<Throwable> shouldRetry)
|
||||
{
|
||||
return StreamUtils.retryCopy(
|
||||
in,
|
||||
new ByteSink()
|
||||
{
|
||||
@Override
|
||||
public OutputStream openStream() throws IOException
|
||||
{
|
||||
return new GZIPOutputStream(out.openStream());
|
||||
}
|
||||
},
|
||||
shouldRetry,
|
||||
DEFAULT_RETRY_COUNT
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* GZip compress the contents of inFile into outFile
|
||||
*
|
||||
* @param inFile The source of data
|
||||
* @param outFile The destination for compressed data
|
||||
*
|
||||
* @return A FileCopyResult of the resulting file at outFile
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gzip(final File inFile, final File outFile)
|
||||
{
|
||||
return gzip(inFile, outFile, FileUtils.IS_EXCEPTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if fName is a valid name for a "*.zip" file
|
||||
*
|
||||
* @param fName The name of the file in question
|
||||
*
|
||||
* @return True if fName is properly named for a .zip file, false otherwise
|
||||
*/
|
||||
public static boolean isZip(String fName)
|
||||
{
|
||||
if (Strings.isNullOrEmpty(fName)) {
|
||||
return false;
|
||||
}
|
||||
return fName.endsWith(ZIP_SUFFIX); // Technically a file named `.zip` would be fine
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if fName is a valid name for a "*.gz" file
|
||||
*
|
||||
* @param fName The name of the file in question
|
||||
*
|
||||
* @return True if fName is a properly named .gz file, false otherwise
|
||||
*/
|
||||
public static boolean isGz(String fName)
|
||||
{
|
||||
if (Strings.isNullOrEmpty(fName)) {
|
||||
return false;
|
||||
}
|
||||
return fName.endsWith(GZ_SUFFIX) && fName.length() > GZ_SUFFIX.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file name without the .gz extension
|
||||
*
|
||||
* @param fname The name of the gzip file
|
||||
*
|
||||
* @return fname without the ".gz" extension
|
||||
*
|
||||
* @throws IAE if fname is not a valid "*.gz" file name
|
||||
*/
|
||||
public static String getGzBaseName(String fname)
|
||||
{
|
||||
final String reducedFname = Files.getNameWithoutExtension(fname);
|
||||
if (isGz(fname) && !reducedFname.isEmpty()) {
|
||||
return reducedFname;
|
||||
}
|
||||
throw new IAE("[%s] is not a valid gz file name", fname);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress an input stream from a file, based on the filename.
|
||||
*/
|
||||
public static InputStream decompress(final InputStream in, final String fileName) throws IOException
|
||||
{
|
||||
if (fileName.endsWith(GZ_SUFFIX)) {
|
||||
return gzipInputStream(in);
|
||||
} else if (fileName.endsWith(BZ2_SUFFIX)) {
|
||||
return new BZip2CompressorInputStream(in, true);
|
||||
} else if (fileName.endsWith(XZ_SUFFIX)) {
|
||||
return new XZCompressorInputStream(in, true);
|
||||
} else if (fileName.endsWith(SNAPPY_SUFFIX)) {
|
||||
return new FramedSnappyCompressorInputStream(in);
|
||||
} else if (fileName.endsWith(ZSTD_SUFFIX)) {
|
||||
return new ZstdCompressorInputStream(in);
|
||||
} else if (fileName.endsWith(ZIP_SUFFIX)) {
|
||||
// This reads the first file in the archive.
|
||||
final ZipInputStream zipIn = new ZipInputStream(in, StandardCharsets.UTF_8);
|
||||
try {
|
||||
final ZipEntry nextEntry = zipIn.getNextEntry();
|
||||
if (nextEntry == null) {
|
||||
zipIn.close();
|
||||
|
||||
// No files in the archive - return an empty stream.
|
||||
return new ByteArrayInputStream(new byte[0]);
|
||||
}
|
||||
return zipIn;
|
||||
}
|
||||
catch (IOException e) {
|
||||
try {
|
||||
zipIn.close();
|
||||
}
|
||||
catch (IOException e2) {
|
||||
e.addSuppressed(e2);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
} else {
|
||||
return in;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper method for unit tests (for checking that we fixed https://snyk.io/research/zip-slip-vulnerability)
|
||||
public static void makeEvilZip(File outputFile) throws IOException
|
||||
{
|
||||
ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream(outputFile));
|
||||
ZipEntry zipEntry = new ZipEntry("../../../../../../../../../../../../../../../tmp/evil.txt");
|
||||
zipOutputStream.putNextEntry(zipEntry);
|
||||
byte[] output = StringUtils.toUtf8("evil text");
|
||||
zipOutputStream.write(output);
|
||||
zipOutputStream.closeEntry();
|
||||
zipOutputStream.close();
|
||||
}
|
||||
}
|
|
@ -19,66 +19,595 @@
|
|||
|
||||
package org.apache.druid.utils;
|
||||
|
||||
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.io.ByteSink;
|
||||
import com.google.common.io.ByteSource;
|
||||
import com.google.common.io.ByteStreams;
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
|
||||
import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream;
|
||||
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
|
||||
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
|
||||
import org.apache.druid.guice.annotations.PublicApi;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.IOE;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.RetryUtils;
|
||||
import org.apache.druid.java.util.common.StreamUtils;
|
||||
import org.apache.druid.java.util.common.io.NativeIO;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.channels.Channels;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Enumeration;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
import java.util.zip.ZipInputStream;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
/**
|
||||
*/
|
||||
@PublicApi
|
||||
public class CompressionUtils
|
||||
{
|
||||
private static final Logger log = new Logger(CompressionUtils.class);
|
||||
private static final int DEFAULT_RETRY_COUNT = 3;
|
||||
private static final String BZ2_SUFFIX = ".bz2";
|
||||
private static final String GZ_SUFFIX = ".gz";
|
||||
private static final String XZ_SUFFIX = ".xz";
|
||||
private static final String ZIP_SUFFIX = ".zip";
|
||||
private static final String SNAPPY_SUFFIX = ".sz";
|
||||
private static final String ZSTD_SUFFIX = ".zst";
|
||||
|
||||
|
||||
@Deprecated // Use org.apache.druid.java.util.common.CompressionUtils.zip
|
||||
public static long zip(File directory, File outputZipFile) throws IOException
|
||||
/**
|
||||
* Zip the contents of directory into the file indicated by outputZipFile. Sub directories are skipped
|
||||
*
|
||||
* @param directory The directory whose contents should be added to the zip in the output stream.
|
||||
* @param outputZipFile The output file to write the zipped data to
|
||||
* @param fsync True if the output file should be fsynced to disk
|
||||
*
|
||||
* @return The number of bytes (uncompressed) read from the input directory.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long zip(File directory, File outputZipFile, boolean fsync) throws IOException
|
||||
{
|
||||
return org.apache.druid.java.util.common.CompressionUtils.zip(directory, outputZipFile);
|
||||
if (!isZip(outputZipFile.getName())) {
|
||||
log.warn("No .zip suffix[%s], putting files from [%s] into it anyway.", outputZipFile, directory);
|
||||
}
|
||||
|
||||
|
||||
@Deprecated // Use org.apache.druid.java.util.common.CompressionUtils.zip
|
||||
public static long zip(File directory, OutputStream out) throws IOException
|
||||
{
|
||||
return org.apache.druid.java.util.common.CompressionUtils.zip(directory, out);
|
||||
if (fsync) {
|
||||
return FileUtils.writeAtomically(outputZipFile, out -> zip(directory, out));
|
||||
} else {
|
||||
try (
|
||||
final FileChannel fileChannel = FileChannel.open(
|
||||
outputZipFile.toPath(),
|
||||
StandardOpenOption.WRITE,
|
||||
StandardOpenOption.CREATE
|
||||
);
|
||||
final OutputStream out = Channels.newOutputStream(fileChannel)
|
||||
) {
|
||||
return zip(directory, out);
|
||||
}
|
||||
|
||||
@Deprecated // Use org.apache.druid.java.util.common.CompressionUtils.unzip
|
||||
public static void unzip(File pulledFile, File outDir) throws IOException
|
||||
{
|
||||
org.apache.druid.java.util.common.CompressionUtils.unzip(pulledFile, outDir);
|
||||
}
|
||||
|
||||
@Deprecated // Use org.apache.druid.java.util.common.CompressionUtils.unzip
|
||||
public static void unzip(InputStream in, File outDir) throws IOException
|
||||
{
|
||||
org.apache.druid.java.util.common.CompressionUtils.unzip(in, outDir);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uncompress using a gzip uncompress algorithm from the `pulledFile` to the `outDir`.
|
||||
* Unlike `org.apache.druid.java.util.common.CompressionUtils.gunzip`, this function takes an output *DIRECTORY* and tries to guess the file name.
|
||||
* It is recommended that the caller use `org.apache.druid.java.util.common.CompressionUtils.gunzip` and specify the output file themselves to ensure names are as expected
|
||||
* Zip the contents of directory into the file indicated by outputZipFile. Sub directories are skipped
|
||||
*
|
||||
* @param pulledFile The source file
|
||||
* @param outDir The destination directory to put the resulting file
|
||||
* @param directory The directory whose contents should be added to the zip in the output stream.
|
||||
* @param outputZipFile The output file to write the zipped data to
|
||||
*
|
||||
* @throws IOException on propagated IO exception, IAE if it cannot determine the proper new name for `pulledFile`
|
||||
* @return The number of bytes (uncompressed) read from the input directory.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Deprecated // See description for alternative
|
||||
public static void gunzip(File pulledFile, File outDir)
|
||||
public static long zip(File directory, File outputZipFile) throws IOException
|
||||
{
|
||||
final File outFile = new File(outDir, org.apache.druid.java.util.common.CompressionUtils.getGzBaseName(pulledFile.getName()));
|
||||
org.apache.druid.java.util.common.CompressionUtils.gunzip(pulledFile, outFile);
|
||||
if (!pulledFile.delete()) {
|
||||
log.error("Could not delete tmpFile[%s].", pulledFile);
|
||||
return zip(directory, outputZipFile, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Zips the contents of the input directory to the output stream. Sub directories are skipped
|
||||
*
|
||||
* @param directory The directory whose contents should be added to the zip in the output stream.
|
||||
* @param out The output stream to write the zip data to. Caller is responsible for closing this stream.
|
||||
*
|
||||
* @return The number of bytes (uncompressed) read from the input directory.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long zip(File directory, OutputStream out) throws IOException
|
||||
{
|
||||
if (!directory.isDirectory()) {
|
||||
throw new IOE("directory[%s] is not a directory", directory);
|
||||
}
|
||||
|
||||
final ZipOutputStream zipOut = new ZipOutputStream(out);
|
||||
|
||||
long totalSize = 0;
|
||||
for (File file : directory.listFiles()) {
|
||||
log.info("Adding file[%s] with size[%,d]. Total size so far[%,d]", file, file.length(), totalSize);
|
||||
if (file.length() > Integer.MAX_VALUE) {
|
||||
zipOut.finish();
|
||||
throw new IOE("file[%s] too large [%,d]", file, file.length());
|
||||
}
|
||||
zipOut.putNextEntry(new ZipEntry(file.getName()));
|
||||
totalSize += Files.asByteSource(file).copyTo(zipOut);
|
||||
}
|
||||
zipOut.closeEntry();
|
||||
// Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
|
||||
zipOut.flush();
|
||||
zipOut.finish();
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzip the byteSource to the output directory. If cacheLocally is true, the byteSource is cached to local disk before unzipping.
|
||||
* This may cause more predictable behavior than trying to unzip a large file directly off a network stream, for example.
|
||||
* * @param byteSource The ByteSource which supplies the zip data
|
||||
*
|
||||
* @param byteSource The ByteSource which supplies the zip data
|
||||
* @param outDir The output directory to put the contents of the zip
|
||||
* @param shouldRetry A predicate expression to determine if a new InputStream should be acquired from ByteSource and the copy attempted again
|
||||
* @param cacheLocally A boolean flag to indicate if the data should be cached locally
|
||||
*
|
||||
* @return A FileCopyResult containing the result of writing the zip entries to disk
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult unzip(
|
||||
final ByteSource byteSource,
|
||||
final File outDir,
|
||||
final Predicate<Throwable> shouldRetry,
|
||||
boolean cacheLocally
|
||||
) throws IOException
|
||||
{
|
||||
if (!cacheLocally) {
|
||||
try {
|
||||
return RetryUtils.retry(
|
||||
() -> unzip(byteSource.openStream(), outDir),
|
||||
shouldRetry,
|
||||
DEFAULT_RETRY_COUNT
|
||||
);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw e;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw Throwables.propagate(e);
|
||||
}
|
||||
} else {
|
||||
final File tmpFile = File.createTempFile("compressionUtilZipCache", ZIP_SUFFIX);
|
||||
try {
|
||||
FileUtils.retryCopy(
|
||||
byteSource,
|
||||
tmpFile,
|
||||
shouldRetry,
|
||||
DEFAULT_RETRY_COUNT
|
||||
);
|
||||
return unzip(tmpFile, outDir);
|
||||
}
|
||||
finally {
|
||||
if (!tmpFile.delete()) {
|
||||
log.warn("Could not delete zip cache at [%s]", tmpFile.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzip the byteSource to the output directory. If cacheLocally is true, the byteSource is cached to local disk before unzipping.
|
||||
* This may cause more predictable behavior than trying to unzip a large file directly off a network stream, for example.
|
||||
*
|
||||
* @param byteSource The ByteSource which supplies the zip data
|
||||
* @param outDir The output directory to put the contents of the zip
|
||||
* @param cacheLocally A boolean flag to indicate if the data should be cached locally
|
||||
*
|
||||
* @return A FileCopyResult containing the result of writing the zip entries to disk
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult unzip(
|
||||
final ByteSource byteSource,
|
||||
final File outDir,
|
||||
boolean cacheLocally
|
||||
) throws IOException
|
||||
{
|
||||
return unzip(byteSource, outDir, FileUtils.IS_EXCEPTION, cacheLocally);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzip the pulled file to an output directory. This is only expected to work on zips with lone files, and is not intended for zips with directory structures.
|
||||
*
|
||||
* @param pulledFile The file to unzip
|
||||
* @param outDir The directory to store the contents of the file.
|
||||
*
|
||||
* @return a FileCopyResult of the files which were written to disk
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult unzip(final File pulledFile, final File outDir) throws IOException
|
||||
{
|
||||
if (!(outDir.exists() && outDir.isDirectory())) {
|
||||
throw new ISE("outDir[%s] must exist and be a directory", outDir);
|
||||
}
|
||||
log.info("Unzipping file[%s] to [%s]", pulledFile, outDir);
|
||||
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
|
||||
try (final ZipFile zipFile = new ZipFile(pulledFile)) {
|
||||
final Enumeration<? extends ZipEntry> enumeration = zipFile.entries();
|
||||
while (enumeration.hasMoreElements()) {
|
||||
final ZipEntry entry = enumeration.nextElement();
|
||||
final File outFile = new File(outDir, entry.getName());
|
||||
|
||||
validateZipOutputFile(pulledFile.getCanonicalPath(), outFile, outDir);
|
||||
|
||||
result.addFiles(
|
||||
FileUtils.retryCopy(
|
||||
new ByteSource()
|
||||
{
|
||||
@Override
|
||||
public InputStream openStream() throws IOException
|
||||
{
|
||||
return new BufferedInputStream(zipFile.getInputStream(entry));
|
||||
}
|
||||
},
|
||||
outFile,
|
||||
FileUtils.IS_EXCEPTION,
|
||||
DEFAULT_RETRY_COUNT
|
||||
).getFiles()
|
||||
);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void validateZipOutputFile(
|
||||
String sourceFilename,
|
||||
final File outFile,
|
||||
final File outDir
|
||||
) throws IOException
|
||||
{
|
||||
// check for evil zip exploit that allows writing output to arbitrary directories
|
||||
final File canonicalOutFile = outFile.getCanonicalFile();
|
||||
final String canonicalOutDir = outDir.getCanonicalPath();
|
||||
if (!canonicalOutFile.toPath().startsWith(canonicalOutDir)) {
|
||||
throw new ISE(
|
||||
"Unzipped output path[%s] of sourceFile[%s] does not start with outDir[%s].",
|
||||
canonicalOutFile,
|
||||
sourceFilename,
|
||||
canonicalOutDir
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzip from the input stream to the output directory, using the entry's file name as the file name in the output directory.
|
||||
* The behavior of directories in the input stream's zip is undefined.
|
||||
* If possible, it is recommended to use unzip(ByteStream, File) instead
|
||||
*
|
||||
* @param in The input stream of the zip data. This stream is closed
|
||||
* @param outDir The directory to copy the unzipped data to
|
||||
*
|
||||
* @return The FileUtils.FileCopyResult containing information on all the files which were written
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult unzip(InputStream in, File outDir) throws IOException
|
||||
{
|
||||
try (final ZipInputStream zipIn = new ZipInputStream(in)) {
|
||||
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
|
||||
ZipEntry entry;
|
||||
while ((entry = zipIn.getNextEntry()) != null) {
|
||||
final File file = new File(outDir, entry.getName());
|
||||
|
||||
validateZipOutputFile("", file, outDir);
|
||||
|
||||
NativeIO.chunkedCopy(zipIn, file);
|
||||
|
||||
result.addFile(file);
|
||||
zipIn.closeEntry();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* gunzip the file to the output file.
|
||||
*
|
||||
* @param pulledFile The source of the gz data
|
||||
* @param outFile A target file to put the contents
|
||||
*
|
||||
* @return The result of the file copy
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gunzip(final File pulledFile, File outFile)
|
||||
{
|
||||
return gunzip(Files.asByteSource(pulledFile), outFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzips the input stream via a gzip filter. use gunzip(ByteSource, File, Predicate) if possible
|
||||
*
|
||||
* @param in The input stream to run through the gunzip filter. This stream is closed
|
||||
* @param outFile The file to output to
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gunzip(InputStream in, File outFile) throws IOException
|
||||
{
|
||||
try (GZIPInputStream gzipInputStream = gzipInputStream(in)) {
|
||||
NativeIO.chunkedCopy(gzipInputStream, outFile);
|
||||
return new FileUtils.FileCopyResult(outFile);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fixes java bug 7036144 http://bugs.java.com/bugdatabase/view_bug.do?bug_id=7036144 which affects concatenated GZip
|
||||
*
|
||||
* @param in The raw input stream
|
||||
*
|
||||
* @return A GZIPInputStream that can handle concatenated gzip streams in the input
|
||||
*
|
||||
* @see #decompress(InputStream, String) which should be used instead for streams coming from files
|
||||
*/
|
||||
public static GZIPInputStream gzipInputStream(final InputStream in) throws IOException
|
||||
{
|
||||
return new GZIPInputStream(
|
||||
new FilterInputStream(in)
|
||||
{
|
||||
@Override
|
||||
public int available() throws IOException
|
||||
{
|
||||
final int otherAvailable = super.available();
|
||||
// Hack. Docs say available() should return an estimate,
|
||||
// so we estimate about 1KB to work around available == 0 bug in GZIPInputStream
|
||||
return otherAvailable == 0 ? 1 << 10 : otherAvailable;
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* gunzip from the source stream to the destination stream.
|
||||
*
|
||||
* @param in The input stream which is to be decompressed. This stream is closed.
|
||||
* @param out The output stream to write to. This stream is closed
|
||||
*
|
||||
* @return The number of bytes written to the output stream.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long gunzip(InputStream in, OutputStream out) throws IOException
|
||||
{
|
||||
try (GZIPInputStream gzipInputStream = gzipInputStream(in)) {
|
||||
final long result = ByteStreams.copy(gzipInputStream, out);
|
||||
out.flush();
|
||||
return result;
|
||||
}
|
||||
finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A gunzip function to store locally
|
||||
*
|
||||
* @param in The factory to produce input streams
|
||||
* @param outFile The file to store the result into
|
||||
* @param shouldRetry A predicate to indicate if the Throwable is recoverable
|
||||
*
|
||||
* @return The count of bytes written to outFile
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gunzip(
|
||||
final ByteSource in,
|
||||
final File outFile,
|
||||
Predicate<Throwable> shouldRetry
|
||||
)
|
||||
{
|
||||
return FileUtils.retryCopy(
|
||||
new ByteSource()
|
||||
{
|
||||
@Override
|
||||
public InputStream openStream() throws IOException
|
||||
{
|
||||
return gzipInputStream(in.openStream());
|
||||
}
|
||||
},
|
||||
outFile,
|
||||
shouldRetry,
|
||||
DEFAULT_RETRY_COUNT
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gunzip from the input stream to the output file
|
||||
*
|
||||
* @param in The compressed input stream to read from
|
||||
* @param outFile The file to write the uncompressed results to
|
||||
*
|
||||
* @return A FileCopyResult of the file written
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gunzip(final ByteSource in, File outFile)
|
||||
{
|
||||
return gunzip(in, outFile, FileUtils.IS_EXCEPTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy inputStream to out while wrapping out in a GZIPOutputStream
|
||||
* Closes both input and output
|
||||
*
|
||||
* @param inputStream The input stream to copy data from. This stream is closed
|
||||
* @param out The output stream to wrap in a GZIPOutputStream before copying. This stream is closed
|
||||
*
|
||||
* @return The size of the data copied
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long gzip(InputStream inputStream, OutputStream out) throws IOException
|
||||
{
|
||||
try (GZIPOutputStream outputStream = new GZIPOutputStream(out)) {
|
||||
final long result = ByteStreams.copy(inputStream, outputStream);
|
||||
out.flush();
|
||||
return result;
|
||||
}
|
||||
finally {
|
||||
inputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gzips the input file to the output
|
||||
*
|
||||
* @param inFile The file to gzip
|
||||
* @param outFile A target file to copy the uncompressed contents of inFile to
|
||||
* @param shouldRetry Predicate on a potential throwable to determine if the copy should be attempted again.
|
||||
*
|
||||
* @return The result of the file copy
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gzip(final File inFile, final File outFile, Predicate<Throwable> shouldRetry)
|
||||
{
|
||||
gzip(Files.asByteSource(inFile), Files.asByteSink(outFile), shouldRetry);
|
||||
return new FileUtils.FileCopyResult(outFile);
|
||||
}
|
||||
|
||||
public static long gzip(final ByteSource in, final ByteSink out, Predicate<Throwable> shouldRetry)
|
||||
{
|
||||
return StreamUtils.retryCopy(
|
||||
in,
|
||||
new ByteSink()
|
||||
{
|
||||
@Override
|
||||
public OutputStream openStream() throws IOException
|
||||
{
|
||||
return new GZIPOutputStream(out.openStream());
|
||||
}
|
||||
},
|
||||
shouldRetry,
|
||||
DEFAULT_RETRY_COUNT
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* GZip compress the contents of inFile into outFile
|
||||
*
|
||||
* @param inFile The source of data
|
||||
* @param outFile The destination for compressed data
|
||||
*
|
||||
* @return A FileCopyResult of the resulting file at outFile
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileUtils.FileCopyResult gzip(final File inFile, final File outFile)
|
||||
{
|
||||
return gzip(inFile, outFile, FileUtils.IS_EXCEPTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if fName is a valid name for a "*.zip" file
|
||||
*
|
||||
* @param fName The name of the file in question
|
||||
*
|
||||
* @return True if fName is properly named for a .zip file, false otherwise
|
||||
*/
|
||||
public static boolean isZip(String fName)
|
||||
{
|
||||
if (Strings.isNullOrEmpty(fName)) {
|
||||
return false;
|
||||
}
|
||||
return fName.endsWith(ZIP_SUFFIX); // Technically a file named `.zip` would be fine
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if fName is a valid name for a "*.gz" file
|
||||
*
|
||||
* @param fName The name of the file in question
|
||||
*
|
||||
* @return True if fName is a properly named .gz file, false otherwise
|
||||
*/
|
||||
public static boolean isGz(String fName)
|
||||
{
|
||||
if (Strings.isNullOrEmpty(fName)) {
|
||||
return false;
|
||||
}
|
||||
return fName.endsWith(GZ_SUFFIX) && fName.length() > GZ_SUFFIX.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file name without the .gz extension
|
||||
*
|
||||
* @param fname The name of the gzip file
|
||||
*
|
||||
* @return fname without the ".gz" extension
|
||||
*
|
||||
* @throws IAE if fname is not a valid "*.gz" file name
|
||||
*/
|
||||
public static String getGzBaseName(String fname)
|
||||
{
|
||||
final String reducedFname = Files.getNameWithoutExtension(fname);
|
||||
if (isGz(fname) && !reducedFname.isEmpty()) {
|
||||
return reducedFname;
|
||||
}
|
||||
throw new IAE("[%s] is not a valid gz file name", fname);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress an input stream from a file, based on the filename.
|
||||
*/
|
||||
public static InputStream decompress(final InputStream in, final String fileName) throws IOException
|
||||
{
|
||||
if (fileName.endsWith(GZ_SUFFIX)) {
|
||||
return gzipInputStream(in);
|
||||
} else if (fileName.endsWith(BZ2_SUFFIX)) {
|
||||
return new BZip2CompressorInputStream(in, true);
|
||||
} else if (fileName.endsWith(XZ_SUFFIX)) {
|
||||
return new XZCompressorInputStream(in, true);
|
||||
} else if (fileName.endsWith(SNAPPY_SUFFIX)) {
|
||||
return new FramedSnappyCompressorInputStream(in);
|
||||
} else if (fileName.endsWith(ZSTD_SUFFIX)) {
|
||||
return new ZstdCompressorInputStream(in);
|
||||
} else if (fileName.endsWith(ZIP_SUFFIX)) {
|
||||
// This reads the first file in the archive.
|
||||
final ZipInputStream zipIn = new ZipInputStream(in, StandardCharsets.UTF_8);
|
||||
try {
|
||||
final ZipEntry nextEntry = zipIn.getNextEntry();
|
||||
if (nextEntry == null) {
|
||||
zipIn.close();
|
||||
|
||||
// No files in the archive - return an empty stream.
|
||||
return new ByteArrayInputStream(new byte[0]);
|
||||
}
|
||||
return zipIn;
|
||||
}
|
||||
catch (IOException e) {
|
||||
try {
|
||||
zipIn.close();
|
||||
}
|
||||
catch (IOException e2) {
|
||||
e.addSuppressed(e2);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
} else {
|
||||
return in;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream
|
|||
import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorOutputStream;
|
||||
import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
|
||||
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
|
@ -353,7 +354,7 @@ public class CompressionUtilsTest
|
|||
|
||||
File evilZip = new File(tmpDir, "evil.zip");
|
||||
java.nio.file.Files.deleteIfExists(evilZip.toPath());
|
||||
CompressionUtils.makeEvilZip(evilZip);
|
||||
CompressionUtilsTest.makeEvilZip(evilZip);
|
||||
|
||||
try {
|
||||
CompressionUtils.unzip(evilZip, tmpDir);
|
||||
|
@ -376,7 +377,7 @@ public class CompressionUtilsTest
|
|||
|
||||
File evilZip = new File(tmpDir, "evil.zip");
|
||||
java.nio.file.Files.deleteIfExists(evilZip.toPath());
|
||||
CompressionUtils.makeEvilZip(evilZip);
|
||||
CompressionUtilsTest.makeEvilZip(evilZip);
|
||||
|
||||
try {
|
||||
CompressionUtils.unzip(new FileInputStream(evilZip), tmpDir);
|
||||
|
@ -730,4 +731,16 @@ public class CompressionUtilsTest
|
|||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper method for unit tests (for checking that we fixed https://snyk.io/research/zip-slip-vulnerability)
|
||||
public static void makeEvilZip(File outputFile) throws IOException
|
||||
{
|
||||
ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream(outputFile));
|
||||
ZipEntry zipEntry = new ZipEntry("../../../../../../../../../../../../../../../tmp/evil.txt");
|
||||
zipOutputStream.putNextEntry(zipEntry);
|
||||
byte[] output = StringUtils.toUtf8("evil text");
|
||||
zipOutputStream.write(output);
|
||||
zipOutputStream.closeEntry();
|
||||
zipOutputStream.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,10 +26,10 @@ import io.netty.handler.codec.http.DefaultHttpResponse;
|
|||
import io.netty.handler.codec.http.HttpHeaders;
|
||||
import io.netty.handler.codec.http.HttpResponseStatus;
|
||||
import io.netty.handler.codec.http.HttpVersion;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.lifecycle.Lifecycle;
|
||||
import org.apache.druid.java.util.emitter.service.UnitEvent;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.asynchttpclient.ListenableFuture;
|
||||
import org.asynchttpclient.Request;
|
||||
import org.asynchttpclient.Response;
|
||||
|
|
|
@ -130,7 +130,7 @@ The following example was retrieved from a Historical process configured to use
|
|||
00Z_2015-04-14T02:41:09.484Z
|
||||
2015-04-14T02:42:33,463 INFO [ZkCoordinator-0] org.apache.druid.guice.JsonConfigurator - Loaded class[class org.apache.druid.storage.azure.AzureAccountConfig] from props[drui
|
||||
d.azure.] as [org.apache.druid.storage.azure.AzureAccountConfig@759c9ad9]
|
||||
2015-04-14T02:49:08,275 INFO [ZkCoordinator-0] org.apache.druid.java.util.common.CompressionUtils - Unzipping file[/opt/druid/tmp/compressionUtilZipCache1263964429587449785.z
|
||||
2015-04-14T02:49:08,275 INFO [ZkCoordinator-0] org.apache.druid.utils.CompressionUtils - Unzipping file[/opt/druid/tmp/compressionUtilZipCache1263964429587449785.z
|
||||
ip] to [/opt/druid/zk_druid/dde/2015-01-02T00:00:00.000Z_2015-01-03T00:00:00.000Z/2015-04-14T02:41:09.484Z/0]
|
||||
2015-04-14T02:49:08,276 INFO [ZkCoordinator-0] org.apache.druid.storage.azure.AzureDataSegmentPuller - Loaded 1196 bytes from [dde/2015-01-02T00:00:00.000Z_2015-01-03
|
||||
T00:00:00.000Z/2015-04-14T02:41:09.484Z/0/index.zip] to [/opt/druid/zk_druid/dde/2015-01-02T00:00:00.000Z_2015-01-03T00:00:00.000Z/2015-04-14T02:41:09.484Z/0]
|
||||
|
|
|
@ -28,10 +28,10 @@ import org.apache.druid.data.input.FiniteFirehoseFactory;
|
|||
import org.apache.druid.data.input.InputSplit;
|
||||
import org.apache.druid.data.input.impl.StringInputRowParser;
|
||||
import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.storage.azure.AzureByteSource;
|
||||
import org.apache.druid.storage.azure.AzureStorage;
|
||||
import org.apache.druid.storage.azure.AzureUtils;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
|
|
@ -22,9 +22,9 @@ package org.apache.druid.storage.azure;
|
|||
import com.google.common.io.ByteSource;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.loading.SegmentLoadingException;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -24,12 +24,12 @@ import com.google.common.collect.ImmutableList;
|
|||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.inject.Inject;
|
||||
import com.microsoft.azure.storage.StorageException;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.SegmentUtils;
|
||||
import org.apache.druid.segment.loading.DataSegmentPusher;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.joda.time.format.ISODateTimeFormat;
|
||||
|
||||
import java.io.File;
|
||||
|
|
|
@ -22,11 +22,11 @@ package org.apache.druid.storage.cassandra;
|
|||
import com.google.common.base.Predicates;
|
||||
import com.google.inject.Inject;
|
||||
import com.netflix.astyanax.recipes.storage.ChunkedStorage;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.RetryUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.loading.SegmentLoadingException;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
|
|
|
@ -27,11 +27,11 @@ import com.netflix.astyanax.MutationBatch;
|
|||
import com.netflix.astyanax.connectionpool.exceptions.NotFoundException;
|
||||
import com.netflix.astyanax.recipes.storage.ChunkedStorage;
|
||||
import com.netflix.astyanax.recipes.storage.ChunkedStorageProvider;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.SegmentUtils;
|
||||
import org.apache.druid.segment.loading.DataSegmentPusher;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
|
|
@ -27,11 +27,11 @@ import org.apache.druid.data.input.FiniteFirehoseFactory;
|
|||
import org.apache.druid.data.input.InputSplit;
|
||||
import org.apache.druid.data.input.impl.StringInputRowParser;
|
||||
import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.storage.cloudfiles.CloudFilesByteSource;
|
||||
import org.apache.druid.storage.cloudfiles.CloudFilesObjectApiProxy;
|
||||
import org.apache.druid.storage.cloudfiles.CloudFilesUtils;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.jclouds.rackspace.cloudfiles.v1.CloudFilesApi;
|
||||
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -20,11 +20,11 @@
|
|||
package org.apache.druid.storage.cloudfiles;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.loading.SegmentLoadingException;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.jclouds.rackspace.cloudfiles.v1.CloudFilesApi;
|
||||
|
||||
import java.io.File;
|
||||
|
|
|
@ -23,11 +23,11 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.SegmentUtils;
|
||||
import org.apache.druid.segment.loading.DataSegmentPusher;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.jclouds.rackspace.cloudfiles.v1.CloudFilesApi;
|
||||
|
||||
import java.io.File;
|
||||
|
|
|
@ -27,10 +27,10 @@ import org.apache.druid.data.input.FiniteFirehoseFactory;
|
|||
import org.apache.druid.data.input.InputSplit;
|
||||
import org.apache.druid.data.input.impl.StringInputRowParser;
|
||||
import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.storage.google.GoogleByteSource;
|
||||
import org.apache.druid.storage.google.GoogleStorage;
|
||||
import org.apache.druid.storage.google.GoogleUtils;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
|
|
@ -21,11 +21,11 @@ package org.apache.druid.storage.google;
|
|||
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.loading.SegmentLoadingException;
|
||||
import org.apache.druid.segment.loading.URIDataPuller;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -26,7 +26,6 @@ import com.google.common.base.Throwables;
|
|||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.RE;
|
||||
import org.apache.druid.java.util.common.RetryUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
|
@ -34,6 +33,7 @@ import org.apache.druid.java.util.common.logger.Logger;
|
|||
import org.apache.druid.segment.SegmentUtils;
|
||||
import org.apache.druid.segment.loading.DataSegmentPusher;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -23,7 +23,6 @@ import com.google.common.base.Predicate;
|
|||
import com.google.common.base.Throwables;
|
||||
import com.google.common.io.ByteSource;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.RetryUtils;
|
||||
|
@ -33,6 +32,7 @@ import org.apache.druid.java.util.common.io.NativeIO;
|
|||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.loading.SegmentLoadingException;
|
||||
import org.apache.druid.segment.loading.URIDataPuller;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
|
|
@ -26,13 +26,13 @@ import com.google.common.base.Suppliers;
|
|||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.common.utils.UUIDUtils;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.IOE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.SegmentUtils;
|
||||
import org.apache.druid.segment.loading.DataSegmentPusher;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
|
|
@ -21,10 +21,10 @@ package org.apache.druid.storage.hdfs;
|
|||
|
||||
import com.google.common.io.ByteStreams;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.IOE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.segment.loading.SegmentLoadingException;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
|
|
|
@ -81,7 +81,6 @@ import org.apache.druid.indexing.seekablestream.SeekableStreamPartitions;
|
|||
import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor;
|
||||
import org.apache.druid.indexing.test.TestDataSegmentAnnouncer;
|
||||
import org.apache.druid.indexing.test.TestDataSegmentKiller;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.DateTimes;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.Intervals;
|
||||
|
@ -149,6 +148,7 @@ import org.apache.druid.server.DruidNode;
|
|||
import org.apache.druid.server.coordination.DataSegmentServerAnnouncer;
|
||||
import org.apache.druid.server.coordination.ServerType;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.apache.kafka.clients.producer.KafkaProducer;
|
||||
import org.apache.kafka.clients.producer.ProducerRecord;
|
||||
import org.easymock.EasyMock;
|
||||
|
|
|
@ -86,7 +86,6 @@ import org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecor
|
|||
import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor;
|
||||
import org.apache.druid.indexing.test.TestDataSegmentAnnouncer;
|
||||
import org.apache.druid.indexing.test.TestDataSegmentKiller;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.DateTimes;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.Intervals;
|
||||
|
@ -147,6 +146,7 @@ import org.apache.druid.server.coordination.DataSegmentServerAnnouncer;
|
|||
import org.apache.druid.server.coordination.ServerType;
|
||||
import org.apache.druid.server.security.AuthorizerMapper;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.easymock.EasyMock;
|
||||
import org.easymock.EasyMockSupport;
|
||||
import org.joda.time.Interval;
|
||||
|
|
|
@ -23,7 +23,6 @@ import com.google.common.io.ByteSource;
|
|||
import com.google.inject.Inject;
|
||||
import org.apache.druid.data.SearchableVersionedDataFinder;
|
||||
import org.apache.druid.data.input.MapPopulator;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.RetryUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
|
@ -32,6 +31,7 @@ import org.apache.druid.query.lookup.namespace.CacheGenerator;
|
|||
import org.apache.druid.query.lookup.namespace.UriExtractionNamespace;
|
||||
import org.apache.druid.segment.loading.URIDataPuller;
|
||||
import org.apache.druid.server.lookup.namespace.cache.CacheScheduler;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
|
@ -34,7 +34,6 @@ import org.apache.druid.data.input.FiniteFirehoseFactory;
|
|||
import org.apache.druid.data.input.InputSplit;
|
||||
import org.apache.druid.data.input.impl.StringInputRowParser;
|
||||
import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.IOE;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
|
@ -42,6 +41,7 @@ import org.apache.druid.java.util.common.StringUtils;
|
|||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.storage.s3.S3Utils;
|
||||
import org.apache.druid.storage.s3.ServerSideEncryptingAmazonS3;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
|
|
@ -29,7 +29,6 @@ import com.google.common.base.Throwables;
|
|||
import com.google.common.io.ByteSource;
|
||||
import com.google.common.io.Files;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.IOE;
|
||||
|
@ -40,6 +39,7 @@ import org.apache.druid.java.util.common.io.Closer;
|
|||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.loading.SegmentLoadingException;
|
||||
import org.apache.druid.segment.loading.URIDataPuller;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import javax.tools.FileObject;
|
||||
import java.io.File;
|
||||
|
|
|
@ -25,12 +25,12 @@ import com.google.common.base.Throwables;
|
|||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.emitter.EmittingLogger;
|
||||
import org.apache.druid.segment.SegmentUtils;
|
||||
import org.apache.druid.segment.loading.DataSegmentPusher;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -146,6 +146,13 @@
|
|||
<version>${hadoop.compile.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid-core</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid-server</artifactId>
|
||||
|
|
|
@ -25,7 +25,6 @@ import com.google.common.base.Strings;
|
|||
import com.google.common.base.Throwables;
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.druid.indexer.updater.HadoopDruidConverterConfig;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.DateTimes;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
|
@ -38,6 +37,7 @@ import org.apache.druid.segment.ProgressIndicator;
|
|||
import org.apache.druid.segment.SegmentUtils;
|
||||
import org.apache.druid.segment.loading.DataSegmentPusher;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.druid.data.input.impl.CSVParseSpec;
|
|||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.StringInputRowParser;
|
||||
import org.apache.druid.data.input.impl.TimestampSpec;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.CompressionUtilsTest;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.Intervals;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
|
@ -193,7 +193,7 @@ public class JobHelperTest
|
|||
|
||||
File evilZip = new File(tmpDir, "evil.zip");
|
||||
Files.deleteIfExists(evilZip.toPath());
|
||||
CompressionUtils.makeEvilZip(evilZip);
|
||||
CompressionUtilsTest.makeEvilZip(evilZip);
|
||||
|
||||
try {
|
||||
JobHelper.unzipNoGuava(
|
||||
|
|
|
@ -22,13 +22,13 @@ package org.apache.druid.segment.loading;
|
|||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.MapUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.UOE;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import javax.tools.FileObject;
|
||||
import java.io.File;
|
||||
|
|
|
@ -22,11 +22,11 @@ package org.apache.druid.segment.loading;
|
|||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.IOE;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.SegmentUtils;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -28,9 +28,9 @@ import org.apache.druid.data.input.FiniteFirehoseFactory;
|
|||
import org.apache.druid.data.input.InputSplit;
|
||||
import org.apache.druid.data.input.impl.StringInputRowParser;
|
||||
import org.apache.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
|
|
@ -29,8 +29,8 @@ import org.apache.druid.data.input.FiniteFirehoseFactory;
|
|||
import org.apache.druid.data.input.InputSplit;
|
||||
import org.apache.druid.data.input.impl.AbstractTextFilesFirehoseFactory;
|
||||
import org.apache.druid.data.input.impl.StringInputRowParser;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.emitter.EmittingLogger;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -21,7 +21,7 @@ package org.apache.druid.segment.loading;
|
|||
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
|
|
|
@ -23,11 +23,11 @@ import com.google.common.collect.ImmutableList;
|
|||
import com.google.common.io.Files;
|
||||
import com.google.common.primitives.Ints;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.druid.java.util.common.CompressionUtils;
|
||||
import org.apache.druid.java.util.common.Intervals;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.timeline.DataSegment;
|
||||
import org.apache.druid.timeline.partition.NoneShardSpec;
|
||||
import org.apache.druid.utils.CompressionUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
|
|
Loading…
Reference in New Issue