mirror of https://github.com/apache/druid.git
Added support to use Rackspace's cloudfiles as static firehose
This commit is contained in:
parent
67b6556457
commit
3294f33838
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.firehose.cloudfiles;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import javax.validation.constraints.NotNull;
|
||||
|
||||
public class CloudFilesBlob
|
||||
{
|
||||
@JsonProperty
|
||||
@NotNull
|
||||
private String container = null;
|
||||
|
||||
@JsonProperty
|
||||
@NotNull
|
||||
private String path = null;
|
||||
|
||||
@JsonProperty
|
||||
@NotNull
|
||||
private String region = null;
|
||||
|
||||
public CloudFilesBlob()
|
||||
{
|
||||
}
|
||||
|
||||
public CloudFilesBlob(String container, String path, String region)
|
||||
{
|
||||
this.container = container;
|
||||
this.path = path;
|
||||
this.region = region;
|
||||
}
|
||||
|
||||
public String getContainer()
|
||||
{
|
||||
return container;
|
||||
}
|
||||
|
||||
public String getPath()
|
||||
{
|
||||
return path;
|
||||
}
|
||||
|
||||
public String getRegion()
|
||||
{
|
||||
return region;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.firehose.cloudfiles;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.databind.Module;
|
||||
import com.fasterxml.jackson.databind.jsontype.NamedType;
|
||||
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.inject.Binder;
|
||||
|
||||
import io.druid.initialization.DruidModule;
|
||||
|
||||
public class CloudFilesFirehoseDruidModule implements DruidModule
|
||||
{
|
||||
|
||||
@Override
|
||||
public List<? extends Module> getJacksonModules()
|
||||
{
|
||||
return ImmutableList.of(
|
||||
new SimpleModule().registerSubtypes(
|
||||
new NamedType(StaticCloudFilesFirehoseFactory.class, "static-cloudfiles")));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(Binder arg0)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.firehose.cloudfiles;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JacksonInject;
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.metamx.common.CompressionUtils;
|
||||
import com.metamx.common.logger.Logger;
|
||||
import com.metamx.common.parsers.ParseException;
|
||||
|
||||
import io.druid.data.input.impl.FileIteratingFirehose;
|
||||
import io.druid.data.input.Firehose;
|
||||
import io.druid.data.input.FirehoseFactory;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.io.LineIterator;
|
||||
import org.jclouds.rackspace.cloudfiles.v1.CloudFilesApi;
|
||||
|
||||
import io.druid.data.input.impl.StringInputRowParser;
|
||||
import io.druid.storage.cloudfiles.CloudFilesByteSource;
|
||||
import io.druid.storage.cloudfiles.CloudFilesObjectApiProxy;
|
||||
|
||||
public class StaticCloudFilesFirehoseFactory implements FirehoseFactory<StringInputRowParser>
|
||||
{
|
||||
private static final Logger log = new Logger(StaticCloudFilesFirehoseFactory.class);
|
||||
|
||||
private final CloudFilesApi cloudFilesApi;
|
||||
private final List<CloudFilesBlob> blobs;
|
||||
|
||||
@JsonCreator
|
||||
public StaticCloudFilesFirehoseFactory(
|
||||
@JacksonInject("objectApi") CloudFilesApi cloudFilesApi,
|
||||
@JsonProperty("blobs") CloudFilesBlob[] blobs)
|
||||
{
|
||||
this.cloudFilesApi = cloudFilesApi;
|
||||
this.blobs = ImmutableList.copyOf(blobs);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public List<CloudFilesBlob> getBlobs()
|
||||
{
|
||||
return blobs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Firehose connect(StringInputRowParser stringInputRowParser) throws IOException, ParseException
|
||||
{
|
||||
Preconditions.checkNotNull(cloudFilesApi, "null cloudFilesApi");
|
||||
|
||||
final LinkedList<CloudFilesBlob> objectQueue = Lists.newLinkedList(blobs);
|
||||
|
||||
return new FileIteratingFirehose(
|
||||
new Iterator<LineIterator>()
|
||||
{
|
||||
|
||||
@Override
|
||||
public boolean hasNext()
|
||||
{
|
||||
return !objectQueue.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LineIterator next()
|
||||
{
|
||||
final CloudFilesBlob nextURI = objectQueue.poll();
|
||||
|
||||
final String region = nextURI.getRegion();
|
||||
final String container = nextURI.getContainer();
|
||||
final String path = nextURI.getPath();
|
||||
|
||||
log.info("Retrieving file from region[%s], container[%s] and path [%s]",
|
||||
region, container, path);
|
||||
CloudFilesObjectApiProxy objectApi = new CloudFilesObjectApiProxy(
|
||||
cloudFilesApi, region, container);
|
||||
final CloudFilesByteSource byteSource = new CloudFilesByteSource(objectApi, path);
|
||||
|
||||
try
|
||||
{
|
||||
final InputStream innerInputStream = byteSource.openStream();
|
||||
final InputStream outerInputStream = path.endsWith(".gz")
|
||||
? CompressionUtils.gzipInputStream(innerInputStream)
|
||||
: innerInputStream;
|
||||
|
||||
return IOUtils.lineIterator(
|
||||
new BufferedReader(
|
||||
new InputStreamReader(outerInputStream, Charsets.UTF_8)));
|
||||
} catch (IOException e)
|
||||
{
|
||||
log.error(e,
|
||||
"Exception opening container[%s] blob[%s] from region[%s]",
|
||||
container, path, region);
|
||||
|
||||
throw Throwables.propagate(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove()
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
},
|
||||
stringInputRowParser);
|
||||
}
|
||||
|
||||
}
|
|
@ -18,3 +18,4 @@
|
|||
#
|
||||
|
||||
io.druid.storage.cloudfiles.CloudFilesStorageDruidModule
|
||||
io.druid.firehose.cloudfiles.CloudFilesFirehoseDruidModule
|
||||
|
|
Loading…
Reference in New Issue