mirror of https://github.com/apache/druid.git
1) Add support for storing segments in HDFS
This commit is contained in:
parent
cd535fcd79
commit
60b279b0d3
|
@ -46,32 +46,49 @@ public class CompressionUtils
|
||||||
|
|
||||||
public static long zip(File directory, File outputZipFile) throws IOException
|
public static long zip(File directory, File outputZipFile) throws IOException
|
||||||
{
|
{
|
||||||
if (!directory.isDirectory()) {
|
|
||||||
throw new IOException(String.format("directory[%s] is not a directory", directory));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!outputZipFile.getName().endsWith(".zip")) {
|
if (!outputZipFile.getName().endsWith(".zip")) {
|
||||||
log.warn("No .zip suffix[%s], putting files from [%s] into it anyway.", outputZipFile, directory);
|
log.warn("No .zip suffix[%s], putting files from [%s] into it anyway.", outputZipFile, directory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final FileOutputStream out = new FileOutputStream(outputZipFile);
|
||||||
|
try {
|
||||||
|
final long retVal = zip(directory, out);
|
||||||
|
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
Closeables.closeQuietly(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long zip(File directory, OutputStream out) throws IOException
|
||||||
|
{
|
||||||
|
if (!directory.isDirectory()) {
|
||||||
|
throw new IOException(String.format("directory[%s] is not a directory", directory));
|
||||||
|
}
|
||||||
|
|
||||||
long totalSize = 0;
|
long totalSize = 0;
|
||||||
ZipOutputStream zipOut = null;
|
ZipOutputStream zipOut = null;
|
||||||
try {
|
try {
|
||||||
zipOut = new ZipOutputStream(new FileOutputStream(outputZipFile));
|
zipOut = new ZipOutputStream(out);
|
||||||
File[] files = directory.listFiles();
|
File[] files = directory.listFiles();
|
||||||
for (File file : files) {
|
for (File file : files) {
|
||||||
log.info("Adding file[%s] with size[%,d]. Total size so far[%,d]", file, file.length(), totalSize);
|
log.info("Adding file[%s] with size[%,d]. Total size so far[%,d]", file, file.length(), totalSize);
|
||||||
if (file.length() >= Integer.MAX_VALUE) {
|
if (file.length() >= Integer.MAX_VALUE) {
|
||||||
zipOut.close();
|
zipOut.finish();
|
||||||
outputZipFile.delete();
|
|
||||||
throw new IOException(String.format("file[%s] too large [%,d]", file, file.length()));
|
throw new IOException(String.format("file[%s] too large [%,d]", file, file.length()));
|
||||||
}
|
}
|
||||||
zipOut.putNextEntry(new ZipEntry(file.getName()));
|
zipOut.putNextEntry(new ZipEntry(file.getName()));
|
||||||
totalSize += ByteStreams.copy(Files.newInputStreamSupplier(file), zipOut);
|
totalSize += ByteStreams.copy(Files.newInputStreamSupplier(file), zipOut);
|
||||||
}
|
}
|
||||||
|
zipOut.closeEntry();
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
Closeables.closeQuietly(zipOut);
|
if (zipOut != null) {
|
||||||
|
zipOut.finish();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return totalSize;
|
return totalSize;
|
||||||
|
@ -100,11 +117,12 @@ public class CompressionUtils
|
||||||
|
|
||||||
ZipEntry entry;
|
ZipEntry entry;
|
||||||
while ((entry = zipIn.getNextEntry()) != null) {
|
while ((entry = zipIn.getNextEntry()) != null) {
|
||||||
OutputStream out = null;
|
FileOutputStream out = null;
|
||||||
try {
|
try {
|
||||||
out = new FileOutputStream(new File(outDir, entry.getName()));
|
out = new FileOutputStream(new File(outDir, entry.getName()));
|
||||||
ByteStreams.copy(zipIn, out);
|
ByteStreams.copy(zipIn, out);
|
||||||
zipIn.closeEntry();
|
zipIn.closeEntry();
|
||||||
|
out.close();
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
Closeables.closeQuietly(out);
|
Closeables.closeQuietly(out);
|
||||||
|
|
|
@ -70,13 +70,6 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-core</artifactId>
|
<artifactId>hadoop-core</artifactId>
|
||||||
<version>0.20.2</version>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.mortbay.jetty</groupId>
|
|
||||||
<artifactId>servlet-api-2.5</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
package com.metamx.druid.indexer;
|
package com.metamx.druid.indexer;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.base.Optional;
|
import com.google.common.base.Optional;
|
||||||
import com.google.common.base.Throwables;
|
import com.google.common.base.Throwables;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
|
@ -47,6 +48,7 @@ import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.LocalFileSystem;
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.s3native.NativeS3FileSystem;
|
import org.apache.hadoop.fs.s3native.NativeS3FileSystem;
|
||||||
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.io.BytesWritable;
|
import org.apache.hadoop.io.BytesWritable;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapred.InvalidJobConfException;
|
import org.apache.hadoop.mapred.InvalidJobConfException;
|
||||||
|
@ -58,7 +60,6 @@ import org.apache.hadoop.mapreduce.Reducer;
|
||||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
import org.joda.time.Interval;
|
import org.joda.time.Interval;
|
||||||
|
|
||||||
|
@ -417,6 +418,11 @@ public class IndexGeneratorJob implements Jobby
|
||||||
"type", "local",
|
"type", "local",
|
||||||
"path", indexOutURI.getPath()
|
"path", indexOutURI.getPath()
|
||||||
);
|
);
|
||||||
|
} else if (outputFS instanceof DistributedFileSystem) {
|
||||||
|
loadSpec = ImmutableMap.<String, Object>of(
|
||||||
|
"type", "hdfs",
|
||||||
|
"path", indexOutURI.getPath()
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
throw new ISE("Unknown file system[%s]", outputFS.getClass());
|
throw new ISE("Unknown file system[%s]", outputFS.getClass());
|
||||||
}
|
}
|
||||||
|
|
11
pom.xml
11
pom.xml
|
@ -138,6 +138,17 @@
|
||||||
<artifactId>curator-x-discovery</artifactId>
|
<artifactId>curator-x-discovery</artifactId>
|
||||||
<version>${netflix.curator.version}</version>
|
<version>${netflix.curator.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-core</artifactId>
|
||||||
|
<version>0.20.2</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.mortbay.jetty</groupId>
|
||||||
|
<artifactId>servlet-api-2.5</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>it.uniroma3.mat</groupId>
|
<groupId>it.uniroma3.mat</groupId>
|
||||||
<artifactId>extendedset</artifactId>
|
<artifactId>extendedset</artifactId>
|
||||||
|
|
|
@ -168,6 +168,10 @@
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
<artifactId>slf4j-log4j12</artifactId>
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Dependencies required for jets3t b/c emr pom doesn't include them -->
|
<!-- Dependencies required for jets3t b/c emr pom doesn't include them -->
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -27,8 +27,13 @@ import com.google.common.collect.Maps;
|
||||||
import com.metamx.common.ISE;
|
import com.metamx.common.ISE;
|
||||||
import com.metamx.common.logger.Logger;
|
import com.metamx.common.logger.Logger;
|
||||||
import com.metamx.druid.DruidProcessingConfig;
|
import com.metamx.druid.DruidProcessingConfig;
|
||||||
|
import com.metamx.druid.Query;
|
||||||
|
import com.metamx.druid.collect.StupidPool;
|
||||||
import com.metamx.druid.loading.DataSegmentPusher;
|
import com.metamx.druid.loading.DataSegmentPusher;
|
||||||
import com.metamx.druid.loading.DelegatingSegmentLoader;
|
import com.metamx.druid.loading.DelegatingSegmentLoader;
|
||||||
|
import com.metamx.druid.loading.HdfsDataSegmentPuller;
|
||||||
|
import com.metamx.druid.loading.HdfsDataSegmentPusher;
|
||||||
|
import com.metamx.druid.loading.HdfsDataSegmentPusherConfig;
|
||||||
import com.metamx.druid.loading.LocalDataSegmentPuller;
|
import com.metamx.druid.loading.LocalDataSegmentPuller;
|
||||||
import com.metamx.druid.loading.LocalDataSegmentPusher;
|
import com.metamx.druid.loading.LocalDataSegmentPusher;
|
||||||
import com.metamx.druid.loading.LocalDataSegmentPusherConfig;
|
import com.metamx.druid.loading.LocalDataSegmentPusherConfig;
|
||||||
|
@ -37,15 +42,13 @@ import com.metamx.druid.loading.QueryableIndexFactory;
|
||||||
import com.metamx.druid.loading.S3DataSegmentPuller;
|
import com.metamx.druid.loading.S3DataSegmentPuller;
|
||||||
import com.metamx.druid.loading.S3DataSegmentPusher;
|
import com.metamx.druid.loading.S3DataSegmentPusher;
|
||||||
import com.metamx.druid.loading.S3DataSegmentPusherConfig;
|
import com.metamx.druid.loading.S3DataSegmentPusherConfig;
|
||||||
|
import com.metamx.druid.loading.SegmentLoader;
|
||||||
import com.metamx.druid.loading.SegmentLoaderConfig;
|
import com.metamx.druid.loading.SegmentLoaderConfig;
|
||||||
import com.metamx.druid.loading.SingleSegmentLoader;
|
import com.metamx.druid.loading.SingleSegmentLoader;
|
||||||
import com.metamx.druid.query.group.GroupByQueryEngine;
|
|
||||||
import com.metamx.druid.query.group.GroupByQueryEngineConfig;
|
|
||||||
import com.metamx.druid.Query;
|
|
||||||
import com.metamx.druid.collect.StupidPool;
|
|
||||||
import com.metamx.druid.loading.SegmentLoader;
|
|
||||||
import com.metamx.druid.query.QueryRunnerFactory;
|
import com.metamx.druid.query.QueryRunnerFactory;
|
||||||
import com.metamx.druid.query.group.GroupByQuery;
|
import com.metamx.druid.query.group.GroupByQuery;
|
||||||
|
import com.metamx.druid.query.group.GroupByQueryEngine;
|
||||||
|
import com.metamx.druid.query.group.GroupByQueryEngineConfig;
|
||||||
import com.metamx.druid.query.group.GroupByQueryRunnerFactory;
|
import com.metamx.druid.query.group.GroupByQueryRunnerFactory;
|
||||||
import com.metamx.druid.query.group.GroupByQueryRunnerFactoryConfig;
|
import com.metamx.druid.query.group.GroupByQueryRunnerFactoryConfig;
|
||||||
import com.metamx.druid.query.metadata.SegmentMetadataQuery;
|
import com.metamx.druid.query.metadata.SegmentMetadataQuery;
|
||||||
|
@ -57,6 +60,7 @@ import com.metamx.druid.query.timeboundary.TimeBoundaryQueryRunnerFactory;
|
||||||
import com.metamx.druid.query.timeseries.TimeseriesQuery;
|
import com.metamx.druid.query.timeseries.TimeseriesQuery;
|
||||||
import com.metamx.druid.query.timeseries.TimeseriesQueryRunnerFactory;
|
import com.metamx.druid.query.timeseries.TimeseriesQueryRunnerFactory;
|
||||||
import com.metamx.druid.utils.PropUtils;
|
import com.metamx.druid.utils.PropUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.jets3t.service.S3ServiceException;
|
import org.jets3t.service.S3ServiceException;
|
||||||
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
||||||
import org.jets3t.service.security.AWSCredentials;
|
import org.jets3t.service.security.AWSCredentials;
|
||||||
|
@ -85,13 +89,13 @@ public class ServerInit
|
||||||
final QueryableIndexFactory factory = new MMappedQueryableIndexFactory();
|
final QueryableIndexFactory factory = new MMappedQueryableIndexFactory();
|
||||||
|
|
||||||
SingleSegmentLoader s3segmentLoader = new SingleSegmentLoader(segmentGetter, factory, config);
|
SingleSegmentLoader s3segmentLoader = new SingleSegmentLoader(segmentGetter, factory, config);
|
||||||
SingleSegmentLoader localSegmentLoader = new SingleSegmentLoader(new LocalDataSegmentPuller(), factory, config);
|
|
||||||
|
|
||||||
delegateLoader.setLoaderTypes(
|
delegateLoader.setLoaderTypes(
|
||||||
ImmutableMap.<String, SegmentLoader>builder()
|
ImmutableMap.<String, SegmentLoader>builder()
|
||||||
.put("s3", s3segmentLoader)
|
.put("s3", s3segmentLoader)
|
||||||
.put("s3_zip", s3segmentLoader)
|
.put("s3_zip", s3segmentLoader)
|
||||||
.put("local", localSegmentLoader)
|
.put("local", new SingleSegmentLoader(new LocalDataSegmentPuller(), factory, config))
|
||||||
|
.put("hdfs", new SingleSegmentLoader(new HdfsDataSegmentPuller(new Configuration()), factory, config))
|
||||||
.build()
|
.build()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -167,6 +171,11 @@ public class ServerInit
|
||||||
if (Boolean.parseBoolean(props.getProperty("druid.pusher.local", "false"))) {
|
if (Boolean.parseBoolean(props.getProperty("druid.pusher.local", "false"))) {
|
||||||
return new LocalDataSegmentPusher(configFactory.build(LocalDataSegmentPusherConfig.class), jsonMapper);
|
return new LocalDataSegmentPusher(configFactory.build(LocalDataSegmentPusherConfig.class), jsonMapper);
|
||||||
}
|
}
|
||||||
|
else if (Boolean.parseBoolean(props.getProperty("druid.pusher.hdfs", "false"))) {
|
||||||
|
final HdfsDataSegmentPusherConfig config = configFactory.build(HdfsDataSegmentPusherConfig.class);
|
||||||
|
|
||||||
|
return new HdfsDataSegmentPusher(config, new Configuration(), jsonMapper);
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
|
|
||||||
final RestS3Service s3Client;
|
final RestS3Service s3Client;
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
package com.metamx.druid.loading;
|
||||||
|
|
||||||
|
import com.google.common.io.Closeables;
|
||||||
|
import com.metamx.druid.client.DataSegment;
|
||||||
|
import com.metamx.druid.utils.CompressionUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class HdfsDataSegmentPuller implements DataSegmentPuller
|
||||||
|
{
|
||||||
|
private final Configuration config;
|
||||||
|
|
||||||
|
public HdfsDataSegmentPuller(final Configuration config)
|
||||||
|
{
|
||||||
|
this.config = config;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void getSegmentFiles(DataSegment segment, File dir) throws SegmentLoadingException
|
||||||
|
{
|
||||||
|
final Path path = getPath(segment);
|
||||||
|
|
||||||
|
final FileSystem fs = checkPathAndGetFilesystem(path);
|
||||||
|
|
||||||
|
FSDataInputStream in = null;
|
||||||
|
try {
|
||||||
|
if (path.getName().endsWith(".zip")) {
|
||||||
|
in = fs.open(path);
|
||||||
|
CompressionUtils.unzip(in, dir);
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new SegmentLoadingException("Unknown file type[%s]", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new SegmentLoadingException(e, "Some IOException");
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
Closeables.closeQuietly(in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getLastModified(DataSegment segment) throws SegmentLoadingException
|
||||||
|
{
|
||||||
|
Path path = getPath(segment);
|
||||||
|
FileSystem fs = checkPathAndGetFilesystem(path);
|
||||||
|
|
||||||
|
try {
|
||||||
|
return fs.getFileStatus(path).getModificationTime();
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new SegmentLoadingException(e, "Problem loading status of path[%s]", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path getPath(DataSegment segment) {
|
||||||
|
return new Path(String.valueOf(segment.getLoadSpec().get("path")));
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileSystem checkPathAndGetFilesystem(Path path) throws SegmentLoadingException
|
||||||
|
{
|
||||||
|
FileSystem fs;
|
||||||
|
try {
|
||||||
|
fs = path.getFileSystem(config);
|
||||||
|
|
||||||
|
if (!fs.exists(path)) {
|
||||||
|
throw new SegmentLoadingException("Path[%s] doesn't exist.", path);
|
||||||
|
}
|
||||||
|
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
package com.metamx.druid.loading;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import com.google.common.io.ByteStreams;
|
||||||
|
import com.google.common.io.Closeables;
|
||||||
|
import com.google.common.io.OutputSupplier;
|
||||||
|
import com.metamx.common.logger.Logger;
|
||||||
|
import com.metamx.druid.client.DataSegment;
|
||||||
|
import com.metamx.druid.index.v1.IndexIO;
|
||||||
|
import com.metamx.druid.utils.CompressionUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class HdfsDataSegmentPusher implements DataSegmentPusher
|
||||||
|
{
|
||||||
|
private static final Logger log = new Logger(HdfsDataSegmentPusher.class);
|
||||||
|
|
||||||
|
private final HdfsDataSegmentPusherConfig config;
|
||||||
|
private final Configuration hadoopConfig;
|
||||||
|
private final ObjectMapper jsonMapper;
|
||||||
|
|
||||||
|
public HdfsDataSegmentPusher(
|
||||||
|
HdfsDataSegmentPusherConfig config,
|
||||||
|
Configuration hadoopConfig,
|
||||||
|
ObjectMapper jsonMapper
|
||||||
|
)
|
||||||
|
{
|
||||||
|
this.config = config;
|
||||||
|
this.hadoopConfig = hadoopConfig;
|
||||||
|
this.jsonMapper = jsonMapper;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataSegment push(File inDir, DataSegment segment) throws IOException
|
||||||
|
{
|
||||||
|
final String storageDir = DataSegmentPusherUtil.getStorageDir(segment);
|
||||||
|
Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir));
|
||||||
|
FileSystem fs = outFile.getFileSystem(hadoopConfig);
|
||||||
|
|
||||||
|
fs.mkdirs(outFile.getParent());
|
||||||
|
log.info("Compressing files from[%s] to [%s]", inDir, outFile);
|
||||||
|
FSDataOutputStream out = null;
|
||||||
|
long size;
|
||||||
|
try {
|
||||||
|
out = fs.create(outFile);
|
||||||
|
|
||||||
|
size = CompressionUtils.zip(inDir, out);
|
||||||
|
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
Closeables.closeQuietly(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
return createDescriptorFile(
|
||||||
|
segment.withLoadSpec(makeLoadSpec(outFile))
|
||||||
|
.withSize(size)
|
||||||
|
.withBinaryVersion(IndexIO.CURRENT_VERSION_ID),
|
||||||
|
outFile.getParent(),
|
||||||
|
fs
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DataSegment createDescriptorFile(DataSegment segment, Path outDir, final FileSystem fs) throws IOException
|
||||||
|
{
|
||||||
|
final Path descriptorFile = new Path(outDir, "descriptor.json");
|
||||||
|
log.info("Creating descriptor file at[%s]", descriptorFile);
|
||||||
|
ByteStreams.copy(
|
||||||
|
ByteStreams.newInputStreamSupplier(jsonMapper.writeValueAsBytes(segment)),
|
||||||
|
new HdfsOutputStreamSupplier(fs, descriptorFile)
|
||||||
|
);
|
||||||
|
return segment;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ImmutableMap<String, Object> makeLoadSpec(Path outFile)
|
||||||
|
{
|
||||||
|
return ImmutableMap.<String, Object>of("type", "hdfs", "path", outFile.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class HdfsOutputStreamSupplier implements OutputSupplier<OutputStream>
|
||||||
|
{
|
||||||
|
private final FileSystem fs;
|
||||||
|
private final Path descriptorFile;
|
||||||
|
|
||||||
|
public HdfsOutputStreamSupplier(FileSystem fs, Path descriptorFile)
|
||||||
|
{
|
||||||
|
this.fs = fs;
|
||||||
|
this.descriptorFile = descriptorFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public OutputStream getOutput() throws IOException
|
||||||
|
{
|
||||||
|
return fs.create(descriptorFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
/*
|
||||||
|
* Druid - a distributed column store.
|
||||||
|
* Copyright (C) 2012 Metamarkets Group Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.metamx.druid.loading;
|
||||||
|
|
||||||
|
import org.skife.config.Config;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public abstract class HdfsDataSegmentPusherConfig
|
||||||
|
{
|
||||||
|
@Config("druid.pusher.hdfs.storageDirectory")
|
||||||
|
public abstract File getStorageDirectory();
|
||||||
|
}
|
Loading…
Reference in New Issue