This commit is contained in:
Matt Gilman 2015-04-24 17:28:15 -04:00
commit 9033173eb3
3 changed files with 141 additions and 132 deletions

View File

@ -20,21 +20,20 @@ import java.util.Map;
/**
* <p>
* The NiFiDataPacket provides a packaging around a NiFi FlowFile. It wraps both a FlowFile's
* content and its attributes so that they can be processed by Spark
* The NiFiDataPacket provides a packaging around a NiFi FlowFile. It wraps both
* a FlowFile's content and its attributes so that they can be processed by
* Spark
* </p>
*/
public interface NiFiDataPacket {
/**
* Returns the contents of a NiFi FlowFile
* @return
* @return the contents of a NiFi FlowFile
*/
byte[] getContent();
/**
* Returns a Map of attributes that are associated with the NiFi FlowFile
* @return
* @return a Map of attributes that are associated with the NiFi FlowFile
*/
Map<String, String> getAttributes();
}

View File

@ -31,34 +31,37 @@ import org.apache.nifi.stream.io.StreamUtils;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.receiver.Receiver;
/**
* <p>
* The <code>NiFiReceiver</code> is a Reliable Receiver that provides a way to pull data
* from Apache NiFi so that it can be processed by Spark Streaming. The NiFi Receiver connects
* to NiFi instance provided in the config and requests data from
* the OutputPort that is named. In NiFi, when an OutputPort is added to the root process group,
* it acts as a queue of data for remote clients. This receiver is then able to pull that data
* from NiFi reliably.
* The <code>NiFiReceiver</code> is a Reliable Receiver that provides a way to
* pull data from Apache NiFi so that it can be processed by Spark Streaming.
* The NiFi Receiver connects to NiFi instance provided in the config and
* requests data from the OutputPort that is named. In NiFi, when an OutputPort
* is added to the root process group, it acts as a queue of data for remote
* clients. This receiver is then able to pull that data from NiFi reliably.
* </p>
*
* <p>
* It is important to note that if pulling data from a NiFi cluster, the URL that should be used
* is that of the NiFi Cluster Manager. The Receiver will automatically handle determining the nodes
* in that cluster and pull from those nodes as appropriate.
* It is important to note that if pulling data from a NiFi cluster, the URL
* that should be used is that of the NiFi Cluster Manager. The Receiver will
* automatically handle determining the nodes in that cluster and pull from
* those nodes as appropriate.
* </p>
*
* <p>
* In order to use the NiFiReceiver, you will need to first build a {@link SiteToSiteClientConfig} to provide
* to the constructor. This can be achieved by using the {@link SiteToSiteClient.Builder}.
* Below is an example snippet of driver code to pull data from NiFi that is running on localhost:8080. This
* example assumes that NiFi exposes and OutputPort on the root group named "Data For Spark".
* Additionally, it assumes that the data that it will receive from this OutputPort is text
* data, as it will map the byte array received from NiFi to a UTF-8 Encoded string.
* In order to use the NiFiReceiver, you will need to first build a
* {@link SiteToSiteClientConfig} to provide to the constructor. This can be
* achieved by using the {@link SiteToSiteClient.Builder}. Below is an example
* snippet of driver code to pull data from NiFi that is running on
* localhost:8080. This example assumes that NiFi exposes and OutputPort on the
* root group named "Data For Spark". Additionally, it assumes that the data
* that it will receive from this OutputPort is text data, as it will map the
* byte array received from NiFi to a UTF-8 Encoded string.
* </p>
*
* <code>
* <pre>
* {@code
* Pattern SPACE = Pattern.compile(" ");
*
* // Build a Site-to-site client config
@ -106,10 +109,12 @@ import org.apache.spark.streaming.receiver.Receiver;
* wordCounts.print();
* ssc.start();
* ssc.awaitTermination();
* }
* </pre>
* </code>
*/
public class NiFiReceiver extends Receiver<NiFiDataPacket> {
private static final long serialVersionUID = 3067274587595578836L;
private final SiteToSiteClientConfig clientConfig;
@ -131,9 +136,11 @@ public class NiFiReceiver extends Receiver<NiFiDataPacket> {
}
class ReceiveRunnable implements Runnable {
public ReceiveRunnable() {
}
@Override
public void run() {
try {
final SiteToSiteClient client = new SiteToSiteClient.Builder().fromConfig(clientConfig).build();
@ -148,12 +155,13 @@ public class NiFiReceiver extends Receiver<NiFiDataPacket> {
// no data available. Wait a bit and try again
try {
Thread.sleep(1000L);
} catch (InterruptedException e) {}
} catch (InterruptedException e) {
}
continue;
}
final List<NiFiDataPacket> dataPackets = new ArrayList<NiFiDataPacket>();
final List<NiFiDataPacket> dataPackets = new ArrayList<>();
do {
// Read the data into a byte array and wrap it along with the attributes
// into a NiFiDataPacket.
@ -163,10 +171,12 @@ public class NiFiReceiver extends Receiver<NiFiDataPacket> {
final Map<String, String> attributes = dataPacket.getAttributes();
final NiFiDataPacket NiFiDataPacket = new NiFiDataPacket() {
@Override
public byte[] getContent() {
return data;
}
@Override
public Map<String, String> getAttributes() {
return attributes;
}