mirror of https://github.com/apache/druid.git
updated with further cleanliness
This commit is contained in:
parent
8bc4d7c436
commit
80f4ae25b5
|
@ -1,5 +1,6 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>com.metamx.druid</groupId>
|
<groupId>com.metamx.druid</groupId>
|
||||||
<artifactId>druid-examples</artifactId>
|
<artifactId>druid-examples</artifactId>
|
||||||
|
|
|
@ -13,10 +13,10 @@ import com.metamx.druid.loading.DataSegmentPusher;
|
||||||
import com.metamx.druid.log.LogLevelAdjuster;
|
import com.metamx.druid.log.LogLevelAdjuster;
|
||||||
import com.metamx.druid.realtime.RealtimeNode;
|
import com.metamx.druid.realtime.RealtimeNode;
|
||||||
import com.metamx.druid.realtime.SegmentPublisher;
|
import com.metamx.druid.realtime.SegmentPublisher;
|
||||||
import druid.examples.webStream.WebFirehoseFactory;
|
|
||||||
import druid.examples.flights.FlightsFirehoseFactory;
|
import druid.examples.flights.FlightsFirehoseFactory;
|
||||||
import druid.examples.rand.RandomFirehoseFactory;
|
import druid.examples.rand.RandomFirehoseFactory;
|
||||||
import druid.examples.twitter.TwitterSpritzerFirehoseFactory;
|
import druid.examples.twitter.TwitterSpritzerFirehoseFactory;
|
||||||
|
import druid.examples.webStream.WebFirehoseFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
|
@ -114,8 +114,7 @@ public class FlightsConverter
|
||||||
|
|
||||||
if (value.equals("NA")) {
|
if (value.equals("NA")) {
|
||||||
event.put(metricDimension, 0);
|
event.put(metricDimension, 0);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
event.put(metricDimension, Integer.parseInt(value));
|
event.put(metricDimension, Integer.parseInt(value));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,8 +80,7 @@ public class FlightsFirehoseFactory implements FirehoseFactory
|
||||||
try {
|
try {
|
||||||
if (line != null) {
|
if (line != null) {
|
||||||
return true;
|
return true;
|
||||||
}
|
} else if (in != null) {
|
||||||
else if (in != null) {
|
|
||||||
line = in.readLine();
|
line = in.readLine();
|
||||||
|
|
||||||
if (line == null) {
|
if (line == null) {
|
||||||
|
@ -90,16 +89,14 @@ public class FlightsFirehoseFactory implements FirehoseFactory
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
} else if (files.hasNext()) {
|
||||||
else if (files.hasNext()) {
|
|
||||||
final File nextFile = files.next();
|
final File nextFile = files.next();
|
||||||
|
|
||||||
if (nextFile.getName().endsWith(".gz")) {
|
if (nextFile.getName().endsWith(".gz")) {
|
||||||
in = new BufferedReader(
|
in = new BufferedReader(
|
||||||
new InputStreamReader(new GZIPInputStream(new FileInputStream(nextFile)), Charsets.UTF_8)
|
new InputStreamReader(new GZIPInputStream(new FileInputStream(nextFile)), Charsets.UTF_8)
|
||||||
);
|
);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
in = new BufferedReader(new FileReader(nextFile));
|
in = new BufferedReader(new FileReader(nextFile));
|
||||||
}
|
}
|
||||||
return hasMore();
|
return hasMore();
|
||||||
|
|
|
@ -35,81 +35,89 @@ import static java.lang.Thread.sleep;
|
||||||
* the moment an event is delivered.)
|
* the moment an event is delivered.)
|
||||||
* Values are offset by adding the modulus of the token number to the random number
|
* Values are offset by adding the modulus of the token number to the random number
|
||||||
* so that token values have distinct, non-overlapping ranges.
|
* so that token values have distinct, non-overlapping ranges.
|
||||||
*
|
* <p/>
|
||||||
* </p>
|
* </p>
|
||||||
* Example spec file:
|
* Example spec file:
|
||||||
* <pre>
|
* <pre>
|
||||||
[{
|
* [{
|
||||||
"schema" : { "dataSource":"randseq",
|
* "schema" : { "dataSource":"randseq",
|
||||||
"aggregators":[ {"type":"count", "name":"events"},
|
* "aggregators":[ {"type":"count", "name":"events"},
|
||||||
{"type":"doubleSum","name":"outColumn","fieldName":"inColumn"} ],
|
* {"type":"doubleSum","name":"outColumn","fieldName":"inColumn"} ],
|
||||||
"indexGranularity":"minute",
|
* "indexGranularity":"minute",
|
||||||
"shardSpec" : { "type": "none" } },
|
* "shardSpec" : { "type": "none" } },
|
||||||
"config" : { "maxRowsInMemory" : 50000,
|
* "config" : { "maxRowsInMemory" : 50000,
|
||||||
"intermediatePersistPeriod" : "PT2m" },
|
* "intermediatePersistPeriod" : "PT2m" },
|
||||||
|
|
||||||
"firehose" : { "type" : "rand",
|
|
||||||
"sleepUsec": 100000,
|
|
||||||
"maxGeneratedRows" : 5000000,
|
|
||||||
"seed" : 0,
|
|
||||||
"nTokens" : 19,
|
|
||||||
"nPerSleep" : 3
|
|
||||||
},
|
|
||||||
|
|
||||||
"plumber" : { "type" : "realtime",
|
|
||||||
"windowPeriod" : "PT5m",
|
|
||||||
"segmentGranularity":"hour",
|
|
||||||
"basePersistDirectory" : "/tmp/realtime/basePersist" }
|
|
||||||
}]
|
|
||||||
* </pre>
|
|
||||||
*
|
*
|
||||||
|
* "firehose" : { "type" : "rand",
|
||||||
|
* "sleepUsec": 100000,
|
||||||
|
* "maxGeneratedRows" : 5000000,
|
||||||
|
* "seed" : 0,
|
||||||
|
* "nTokens" : 19,
|
||||||
|
* "nPerSleep" : 3
|
||||||
|
* },
|
||||||
|
*
|
||||||
|
* "plumber" : { "type" : "realtime",
|
||||||
|
* "windowPeriod" : "PT5m",
|
||||||
|
* "segmentGranularity":"hour",
|
||||||
|
* "basePersistDirectory" : "/tmp/realtime/basePersist" }
|
||||||
|
* }]
|
||||||
|
* </pre>
|
||||||
|
* <p/>
|
||||||
* Example query using POST to /druid/v2/ (where UTC date and time MUST include the current hour):
|
* Example query using POST to /druid/v2/ (where UTC date and time MUST include the current hour):
|
||||||
* <pre>
|
* <pre>
|
||||||
{
|
* {
|
||||||
"queryType": "groupBy",
|
* "queryType": "groupBy",
|
||||||
"dataSource": "randSeq",
|
* "dataSource": "randSeq",
|
||||||
"granularity": "all",
|
* "granularity": "all",
|
||||||
"dimensions": [],
|
* "dimensions": [],
|
||||||
"aggregations":[
|
* "aggregations":[
|
||||||
{ "type": "count", "name": "rows"},
|
* { "type": "count", "name": "rows"},
|
||||||
{ "type": "doubleSum", "fieldName": "events", "name": "e"},
|
* { "type": "doubleSum", "fieldName": "events", "name": "e"},
|
||||||
{ "type": "doubleSum", "fieldName": "outColumn", "name": "randomNumberSum"}
|
* { "type": "doubleSum", "fieldName": "outColumn", "name": "randomNumberSum"}
|
||||||
],
|
* ],
|
||||||
"postAggregations":[
|
* "postAggregations":[
|
||||||
{ "type":"arithmetic",
|
* { "type":"arithmetic",
|
||||||
"name":"avg_random",
|
* "name":"avg_random",
|
||||||
"fn":"/",
|
* "fn":"/",
|
||||||
"fields":[ {"type":"fieldAccess","name":"randomNumberSum","fieldName":"randomNumberSum"},
|
* "fields":[ {"type":"fieldAccess","name":"randomNumberSum","fieldName":"randomNumberSum"},
|
||||||
{"type":"fieldAccess","name":"rows","fieldName":"rows"} ]}
|
* {"type":"fieldAccess","name":"rows","fieldName":"rows"} ]}
|
||||||
],
|
* ],
|
||||||
"intervals":["2012-10-01T00:00/2020-01-01T00"]
|
* "intervals":["2012-10-01T00:00/2020-01-01T00"]
|
||||||
}
|
* }
|
||||||
* </pre>
|
* </pre>
|
||||||
*/
|
*/
|
||||||
@JsonTypeName("rand")
|
@JsonTypeName("rand")
|
||||||
public class RandomFirehoseFactory implements FirehoseFactory
|
public class RandomFirehoseFactory implements FirehoseFactory
|
||||||
{
|
{
|
||||||
private static final Logger log = new Logger(RandomFirehoseFactory.class);
|
private static final Logger log = new Logger(RandomFirehoseFactory.class);
|
||||||
/** msec to sleep before generating a new row; if this and delayNsec are 0, then go as fast as possible.
|
/**
|
||||||
|
* msec to sleep before generating a new row; if this and delayNsec are 0, then go as fast as possible.
|
||||||
* json param sleepUsec (microseconds) is used to initialize this.
|
* json param sleepUsec (microseconds) is used to initialize this.
|
||||||
*/
|
*/
|
||||||
private final long delayMsec;
|
private final long delayMsec;
|
||||||
/** nsec to sleep before generating a new row; if this and delayMsec are 0, then go as fast as possible.
|
/**
|
||||||
|
* nsec to sleep before generating a new row; if this and delayMsec are 0, then go as fast as possible.
|
||||||
* json param sleepUsec (microseconds) is used to initialize this.
|
* json param sleepUsec (microseconds) is used to initialize this.
|
||||||
*/
|
*/
|
||||||
private final int delayNsec;
|
private final int delayNsec;
|
||||||
/** max rows to generate, -1 is infinite, 0 means nothing is generated; use this to prevent
|
/**
|
||||||
|
* max rows to generate, -1 is infinite, 0 means nothing is generated; use this to prevent
|
||||||
* infinite space consumption or to see what happens when a Firehose stops delivering
|
* infinite space consumption or to see what happens when a Firehose stops delivering
|
||||||
* values, or to have hasMore() return false.
|
* values, or to have hasMore() return false.
|
||||||
*/
|
*/
|
||||||
private final long maxGeneratedRows;
|
private final long maxGeneratedRows;
|
||||||
/** seed for random number generator; if 0, then no seed is used. */
|
/**
|
||||||
|
* seed for random number generator; if 0, then no seed is used.
|
||||||
|
*/
|
||||||
private final long seed;
|
private final long seed;
|
||||||
/** number of tokens to randomly associate with values (no heap limits). This can be used to
|
/**
|
||||||
|
* number of tokens to randomly associate with values (no heap limits). This can be used to
|
||||||
* stress test the number of tokens.
|
* stress test the number of tokens.
|
||||||
*/
|
*/
|
||||||
private final int nTokens;
|
private final int nTokens;
|
||||||
/** Number of token events per sleep interval. */
|
/**
|
||||||
|
* Number of token events per sleep interval.
|
||||||
|
*/
|
||||||
private final int nPerSleep;
|
private final int nPerSleep;
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
|
@ -144,7 +152,8 @@ public class RandomFirehoseFactory implements FirehoseFactory
|
||||||
double dmsec = (double) delayMsec + ((double) this.delayNsec) / 1000000.;
|
double dmsec = (double) delayMsec + ((double) this.delayNsec) / 1000000.;
|
||||||
if (dmsec > 0.0) {
|
if (dmsec > 0.0) {
|
||||||
log.info("sleep period=" + dmsec + "msec");
|
log.info("sleep period=" + dmsec + "msec");
|
||||||
log.info("approximate max rate of record generation=" + (nPerSleep * 1000./dmsec) + "/sec" +
|
log.info(
|
||||||
|
"approximate max rate of record generation=" + (nPerSleep * 1000. / dmsec) + "/sec" +
|
||||||
" or " + (60. * nPerSleep * 1000. / dmsec) + "/minute"
|
" or " + (60. * nPerSleep * 1000. / dmsec) + "/minute"
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
|
@ -193,7 +202,8 @@ public class RandomFirehoseFactory implements FirehoseFactory
|
||||||
if (modulus == 0) {
|
if (modulus == 0) {
|
||||||
sleep(sleepMsec, delayNsec);
|
sleep(sleepMsec, delayNsec);
|
||||||
}
|
}
|
||||||
} catch (InterruptedException e) {
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
throw new RuntimeException("InterruptedException");
|
throw new RuntimeException("InterruptedException");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,13 +11,13 @@ import com.metamx.druid.realtime.firehose.Firehose;
|
||||||
import com.metamx.druid.realtime.firehose.FirehoseFactory;
|
import com.metamx.druid.realtime.firehose.FirehoseFactory;
|
||||||
import twitter4j.ConnectionLifeCycleListener;
|
import twitter4j.ConnectionLifeCycleListener;
|
||||||
import twitter4j.HashtagEntity;
|
import twitter4j.HashtagEntity;
|
||||||
|
import twitter4j.StallWarning;
|
||||||
import twitter4j.Status;
|
import twitter4j.Status;
|
||||||
import twitter4j.StatusDeletionNotice;
|
import twitter4j.StatusDeletionNotice;
|
||||||
import twitter4j.StatusListener;
|
import twitter4j.StatusListener;
|
||||||
import twitter4j.TwitterStream;
|
import twitter4j.TwitterStream;
|
||||||
import twitter4j.TwitterStreamFactory;
|
import twitter4j.TwitterStreamFactory;
|
||||||
import twitter4j.User;
|
import twitter4j.User;
|
||||||
import twitter4j.StallWarning;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -49,21 +49,23 @@ import static java.lang.Thread.sleep;
|
||||||
* is UTC):
|
* is UTC):
|
||||||
* <pre>
|
* <pre>
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
* <p/>
|
||||||
*
|
* <p/>
|
||||||
* Notes on twitter.com HTTP (REST) API: v1.0 will be disabled around 2013-03 so v1.1 should be used;
|
* Notes on twitter.com HTTP (REST) API: v1.0 will be disabled around 2013-03 so v1.1 should be used;
|
||||||
* twitter4j 3.0 (not yet released) will support the v1.1 api.
|
* twitter4j 3.0 (not yet released) will support the v1.1 api.
|
||||||
* Specifically, we should be using https://stream.twitter.com/1.1/statuses/sample.json
|
* Specifically, we should be using https://stream.twitter.com/1.1/statuses/sample.json
|
||||||
* See: http://jira.twitter4j.org/browse/TFJ-186
|
* See: http://jira.twitter4j.org/browse/TFJ-186
|
||||||
*
|
* <p/>
|
||||||
* Notes on JSON parsing: as of twitter4j 2.2.x, the json parser has some bugs (ex: Status.toString()
|
* Notes on JSON parsing: as of twitter4j 2.2.x, the json parser has some bugs (ex: Status.toString()
|
||||||
* can have number format exceptions), so it might be necessary to extract raw json and process it
|
* can have number format exceptions), so it might be necessary to extract raw json and process it
|
||||||
* separately. If so, set twitter4.jsonStoreEnabled=true and look at DataObjectFactory#getRawJSON();
|
* separately. If so, set twitter4.jsonStoreEnabled=true and look at DataObjectFactory#getRawJSON();
|
||||||
* com.fasterxml.jackson.databind.ObjectMapper should be used to parse.
|
* com.fasterxml.jackson.databind.ObjectMapper should be used to parse.
|
||||||
|
*
|
||||||
* @author pbaclace
|
* @author pbaclace
|
||||||
*/
|
*/
|
||||||
@JsonTypeName("twitzer")
|
@JsonTypeName("twitzer")
|
||||||
public class TwitterSpritzerFirehoseFactory implements FirehoseFactory {
|
public class TwitterSpritzerFirehoseFactory implements FirehoseFactory
|
||||||
|
{
|
||||||
private static final Logger log = new Logger(TwitterSpritzerFirehoseFactory.class);
|
private static final Logger log = new Logger(TwitterSpritzerFirehoseFactory.class);
|
||||||
/**
|
/**
|
||||||
* max events to receive, -1 is infinite, 0 means nothing is delivered; use this to prevent
|
* max events to receive, -1 is infinite, 0 means nothing is delivered; use this to prevent
|
||||||
|
@ -94,7 +96,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory {
|
||||||
@Override
|
@Override
|
||||||
public Firehose connect() throws IOException
|
public Firehose connect() throws IOException
|
||||||
{
|
{
|
||||||
final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() {
|
final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener()
|
||||||
|
{
|
||||||
@Override
|
@Override
|
||||||
public void onConnect()
|
public void onConnect()
|
||||||
{
|
{
|
||||||
|
@ -134,7 +137,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory {
|
||||||
//
|
//
|
||||||
twitterStream = new TwitterStreamFactory().getInstance();
|
twitterStream = new TwitterStreamFactory().getInstance();
|
||||||
twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener);
|
twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener);
|
||||||
statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j
|
statusListener = new StatusListener()
|
||||||
|
{ // This is what really gets called to deliver stuff from twitter4j
|
||||||
@Override
|
@Override
|
||||||
public void onStatus(Status status)
|
public void onStatus(Status status)
|
||||||
{
|
{
|
||||||
|
@ -147,7 +151,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
log.warn("queue too slow!");
|
log.warn("queue too slow!");
|
||||||
}
|
}
|
||||||
} catch (InterruptedException e) {
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
throw new RuntimeException("InterruptedException", e);
|
throw new RuntimeException("InterruptedException", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,7 +184,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onStallWarning(StallWarning warning) {
|
public void onStallWarning(StallWarning warning)
|
||||||
|
{
|
||||||
System.out.println("Got stall warning:" + warning);
|
System.out.println("Got stall warning:" + warning);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -188,9 +194,11 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory {
|
||||||
twitterStream.sample(); // creates a generic StatusStream
|
twitterStream.sample(); // creates a generic StatusStream
|
||||||
log.info("returned from sample()");
|
log.info("returned from sample()");
|
||||||
|
|
||||||
return new Firehose() {
|
return new Firehose()
|
||||||
|
{
|
||||||
|
|
||||||
private final Runnable doNothingRunnable = new Runnable() {
|
private final Runnable doNothingRunnable = new Runnable()
|
||||||
|
{
|
||||||
public void run()
|
public void run()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -240,7 +248,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory {
|
||||||
try {
|
try {
|
||||||
log.info("reached limit, sleeping a long time...");
|
log.info("reached limit, sleeping a long time...");
|
||||||
sleep(2000000000L);
|
sleep(2000000000L);
|
||||||
} catch (InterruptedException e) {
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
throw new RuntimeException("InterruptedException", e);
|
throw new RuntimeException("InterruptedException", e);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -254,7 +263,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory {
|
||||||
Status status;
|
Status status;
|
||||||
try {
|
try {
|
||||||
status = queue.take();
|
status = queue.take();
|
||||||
} catch (InterruptedException e) {
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
throw new RuntimeException("InterruptedException", e);
|
throw new RuntimeException("InterruptedException", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,19 +27,16 @@ import java.io.StringReader;
|
||||||
|
|
||||||
public class TestCaseSupplier implements InputSupplier<BufferedReader>
|
public class TestCaseSupplier implements InputSupplier<BufferedReader>
|
||||||
{
|
{
|
||||||
private final String s;
|
private final String testString;
|
||||||
|
|
||||||
public TestCaseSupplier(String s)
|
public TestCaseSupplier(String testString)
|
||||||
{
|
{
|
||||||
this.s = s;
|
this.testString = testString;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BufferedReader getInput() throws IOException
|
public BufferedReader getInput() throws IOException
|
||||||
{
|
{
|
||||||
StringBuilder buffer = new StringBuilder();
|
return new BufferedReader(new StringReader(testString));
|
||||||
buffer.append(s);
|
|
||||||
BufferedReader br = new BufferedReader(new StringReader(buffer.toString()));
|
|
||||||
return br;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,12 +19,11 @@
|
||||||
|
|
||||||
package druid.examples.webStream;
|
package druid.examples.webStream;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.base.Throwables;
|
import com.google.common.base.Throwables;
|
||||||
import com.google.common.io.InputSupplier;
|
import com.google.common.io.InputSupplier;
|
||||||
import com.metamx.emitter.EmittingLogger;
|
import com.metamx.emitter.EmittingLogger;
|
||||||
import org.codehaus.jackson.JsonParseException;
|
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
|
||||||
import org.codehaus.jackson.type.TypeReference;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -35,17 +34,21 @@ import java.util.concurrent.TimeUnit;
|
||||||
public class UpdateStream implements Runnable
|
public class UpdateStream implements Runnable
|
||||||
{
|
{
|
||||||
private static final EmittingLogger log = new EmittingLogger(UpdateStream.class);
|
private static final EmittingLogger log = new EmittingLogger(UpdateStream.class);
|
||||||
private InputSupplier supplier;
|
|
||||||
private BlockingQueue<Map<String, Object>> queue;
|
|
||||||
private ObjectMapper mapper;
|
|
||||||
private final TypeReference<HashMap<String, Object>> typeRef;
|
|
||||||
private static final long queueWaitTime = 15L;
|
private static final long queueWaitTime = 15L;
|
||||||
|
private final TypeReference<HashMap<String, Object>> typeRef;
|
||||||
|
private final InputSupplier<BufferedReader> supplier;
|
||||||
|
private final BlockingQueue<Map<String, Object>> queue;
|
||||||
|
private final ObjectMapper mapper;
|
||||||
|
|
||||||
public UpdateStream(InputSupplier supplier, BlockingQueue<Map<String, Object>> queue)
|
public UpdateStream(
|
||||||
|
InputSupplier<BufferedReader> supplier,
|
||||||
|
BlockingQueue<Map<String, Object>> queue,
|
||||||
|
ObjectMapper mapper
|
||||||
|
)
|
||||||
{
|
{
|
||||||
this.supplier = supplier;
|
this.supplier = supplier;
|
||||||
this.queue = queue;
|
this.queue = queue;
|
||||||
this.mapper = new ObjectMapper();
|
this.mapper = mapper;
|
||||||
this.typeRef = new TypeReference<HashMap<String, Object>>()
|
this.typeRef = new TypeReference<HashMap<String, Object>>()
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
|
@ -60,28 +63,18 @@ public class UpdateStream implements Runnable
|
||||||
public void run() throws RuntimeException
|
public void run() throws RuntimeException
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
BufferedReader reader = (BufferedReader) supplier.getInput();
|
BufferedReader reader = supplier.getInput();
|
||||||
String line;
|
String line;
|
||||||
while ((line = reader.readLine()) != null) {
|
while ((line = reader.readLine()) != null) {
|
||||||
if (isValid(line)) {
|
if (isValid(line)) {
|
||||||
try {
|
|
||||||
HashMap<String, Object> map = mapper.readValue(line, typeRef);
|
HashMap<String, Object> map = mapper.readValue(line, typeRef);
|
||||||
queue.offer(map, queueWaitTime, TimeUnit.SECONDS);
|
queue.offer(map, queueWaitTime, TimeUnit.SECONDS);
|
||||||
log.info("Successfully added to queue");
|
log.info("Successfully added to queue");
|
||||||
}
|
}
|
||||||
catch (JsonParseException e) {
|
|
||||||
log.info("Invalid JSON Stream. Please check if the url returns a proper JSON stream.");
|
|
||||||
Throwables.propagate(e);
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
Throwables.propagate(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
e.printStackTrace();
|
throw Throwables.propagate(e);
|
||||||
Throwables.propagate(e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,14 +22,18 @@ package druid.examples.webStream;
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||||
|
import com.google.common.base.Throwables;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import com.metamx.common.parsers.TimestampParser;
|
||||||
import com.metamx.druid.input.InputRow;
|
import com.metamx.druid.input.InputRow;
|
||||||
import com.metamx.druid.input.MapBasedInputRow;
|
import com.metamx.druid.input.MapBasedInputRow;
|
||||||
|
import com.metamx.druid.jackson.DefaultObjectMapper;
|
||||||
import com.metamx.druid.realtime.firehose.Firehose;
|
import com.metamx.druid.realtime.firehose.Firehose;
|
||||||
import com.metamx.druid.realtime.firehose.FirehoseFactory;
|
import com.metamx.druid.realtime.firehose.FirehoseFactory;
|
||||||
import com.metamx.emitter.EmittingLogger;
|
import com.metamx.emitter.EmittingLogger;
|
||||||
|
import org.joda.time.DateTime;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
|
@ -41,11 +45,11 @@ import java.util.concurrent.Executors;
|
||||||
public class WebFirehoseFactory implements FirehoseFactory
|
public class WebFirehoseFactory implements FirehoseFactory
|
||||||
{
|
{
|
||||||
private static final EmittingLogger log = new EmittingLogger(WebFirehoseFactory.class);
|
private static final EmittingLogger log = new EmittingLogger(WebFirehoseFactory.class);
|
||||||
|
private static final int QUEUE_SIZE = 2000;
|
||||||
private final String url;
|
private final String url;
|
||||||
private final List<String> dimensions;
|
private final List<String> dimensions;
|
||||||
private final String timeDimension;
|
private final String timeDimension;
|
||||||
private final List<String> renamedDimensions;
|
private final List<String> renamedDimensions;
|
||||||
private static final int QUEUE_SIZE = 2000;
|
|
||||||
|
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
|
@ -53,13 +57,13 @@ public class WebFirehoseFactory implements FirehoseFactory
|
||||||
@JsonProperty("url") String url,
|
@JsonProperty("url") String url,
|
||||||
@JsonProperty("dimensions") List<String> dimensions,
|
@JsonProperty("dimensions") List<String> dimensions,
|
||||||
@JsonProperty("renamedDimensions") List<String> renamedDimensions,
|
@JsonProperty("renamedDimensions") List<String> renamedDimensions,
|
||||||
@JsonProperty("timeDimension") String s
|
@JsonProperty("timeDimension") String timeDimension
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.dimensions = dimensions;
|
this.dimensions = dimensions;
|
||||||
this.renamedDimensions = renamedDimensions;
|
this.renamedDimensions = renamedDimensions;
|
||||||
this.timeDimension = s;
|
this.timeDimension = timeDimension;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -67,17 +71,7 @@ public class WebFirehoseFactory implements FirehoseFactory
|
||||||
{
|
{
|
||||||
final BlockingQueue<Map<String, Object>> queue = new ArrayBlockingQueue<Map<String, Object>>(QUEUE_SIZE);
|
final BlockingQueue<Map<String, Object>> queue = new ArrayBlockingQueue<Map<String, Object>>(QUEUE_SIZE);
|
||||||
|
|
||||||
Runnable updateStream = new UpdateStream(new WebJsonSupplier(dimensions, url), queue);
|
Runnable updateStream = new UpdateStream(new WebJsonSupplier(dimensions, url), queue, new DefaultObjectMapper());
|
||||||
// Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler()
|
|
||||||
// {
|
|
||||||
// public void uncaughtException(Thread th, Throwable ex)
|
|
||||||
// {
|
|
||||||
// log.info("Uncaught exception: " + ex);
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
// final Thread streamReader = new Thread(updateStream);
|
|
||||||
// streamReader.setUncaughtExceptionHandler(h);
|
|
||||||
// streamReader.start();
|
|
||||||
final ExecutorService service = Executors.newSingleThreadExecutor();
|
final ExecutorService service = Executors.newSingleThreadExecutor();
|
||||||
service.submit(updateStream);
|
service.submit(updateStream);
|
||||||
|
|
||||||
|
@ -95,53 +89,45 @@ public class WebFirehoseFactory implements FirehoseFactory
|
||||||
@Override
|
@Override
|
||||||
public InputRow nextRow()
|
public InputRow nextRow()
|
||||||
{
|
{
|
||||||
if (Thread.currentThread().isInterrupted()) {
|
|
||||||
throw new RuntimeException("Interrupted, time to stop");
|
|
||||||
}
|
|
||||||
Map<String, Object> update;
|
|
||||||
try {
|
try {
|
||||||
update = queue.take();
|
Map<String, Object> processedMap = processMap(queue.take());
|
||||||
|
DateTime date = TimestampParser.createTimestampParser("auto")
|
||||||
|
.apply(processedMap.get(timeDimension).toString());
|
||||||
|
long seconds = (long) date.getMillis();
|
||||||
|
//the parser doesn't check for posix. Only expects iso or millis. This checks for posix
|
||||||
|
if (new DateTime(seconds * 1000).getYear() == new DateTime().getYear()) {
|
||||||
|
seconds = (long) date.getMillis() * 1000;
|
||||||
}
|
}
|
||||||
catch (InterruptedException e) {
|
|
||||||
throw new RuntimeException("InterrutpedException", e);
|
|
||||||
}
|
|
||||||
Map<String, Object> processedMap = processMap(update);
|
|
||||||
return new MapBasedInputRow(
|
return new MapBasedInputRow(
|
||||||
((Integer) processedMap.get(timeDimension)).longValue() * 1000,
|
seconds,
|
||||||
renamedDimensions,
|
renamedDimensions,
|
||||||
processedMap
|
processedMap
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw Throwables.propagate(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private Map<String, Object> renameKeys(Map<String, Object> update)
|
private Map<String, Object> renameKeys(Map<String, Object> update)
|
||||||
{
|
{
|
||||||
Map<String, Object> renamedMap = new HashMap<String, Object>();
|
Map<String, Object> renamedMap = Maps.newHashMap();
|
||||||
int iter = 0;
|
for (int iter = 0; iter < dimensions.size(); iter++) {
|
||||||
while (iter < dimensions.size()) {
|
if (update.get(dimensions.get(iter)) == null) {
|
||||||
|
update.put(dimensions.get(iter), null);
|
||||||
|
}
|
||||||
Object obj = update.get(dimensions.get(iter));
|
Object obj = update.get(dimensions.get(iter));
|
||||||
renamedMap.put(renamedDimensions.get(iter), obj);
|
renamedMap.put(renamedDimensions.get(iter), obj);
|
||||||
iter++;
|
}
|
||||||
|
if (renamedMap.get(timeDimension) == null) {
|
||||||
|
renamedMap.put(timeDimension, System.currentTimeMillis());
|
||||||
}
|
}
|
||||||
return renamedMap;
|
return renamedMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void processNullDimensions(Map<String, Object> map)
|
|
||||||
{
|
|
||||||
for (String key : renamedDimensions) {
|
|
||||||
if (map.get(key) == null) {
|
|
||||||
if (key.equals(timeDimension)) {
|
|
||||||
map.put(key, new Integer((int) System.currentTimeMillis() / 1000));
|
|
||||||
} else {
|
|
||||||
map.put(key, null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Map<String, Object> processMap(Map<String, Object> map)
|
private Map<String, Object> processMap(Map<String, Object> map)
|
||||||
{
|
{
|
||||||
Map<String, Object> renamedMap = renameKeys(map);
|
Map<String, Object> renamedMap = renameKeys(map);
|
||||||
processNullDimensions(renamedMap);
|
|
||||||
return renamedMap;
|
return renamedMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,7 +142,6 @@ public class WebFirehoseFactory implements FirehoseFactory
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException
|
public void close() throws IOException
|
||||||
{
|
{
|
||||||
log.info("CLOSING!!!");
|
|
||||||
service.shutdown();
|
service.shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ package druid.examples.webStream;
|
||||||
|
|
||||||
import com.google.common.io.InputSupplier;
|
import com.google.common.io.InputSupplier;
|
||||||
import com.metamx.druid.input.InputRow;
|
import com.metamx.druid.input.InputRow;
|
||||||
|
import com.metamx.druid.jackson.DefaultObjectMapper;
|
||||||
import com.metamx.druid.realtime.firehose.Firehose;
|
import com.metamx.druid.realtime.firehose.Firehose;
|
||||||
import com.metamx.druid.realtime.firehose.FirehoseFactory;
|
import com.metamx.druid.realtime.firehose.FirehoseFactory;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
@ -128,7 +129,7 @@ public class WebFirehoseFactoryTest
|
||||||
InputSupplier testCaseSupplier = new TestCaseSupplier(
|
InputSupplier testCaseSupplier = new TestCaseSupplier(
|
||||||
"{ \"a\": \"Mozilla\\/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko\\/20100101 Firefox\\/21.0\", \"c\": \"US\", \"nk\": 1, \"tz\": \"America\\/New_York\", \"gr\": \"NY\", \"g\": \"1Chgyj\", \"h\": \"15vMQjX\", \"l\": \"o_d63rn9enb\", \"al\": \"en-US,en;q=0.5\", \"hh\": \"1.usa.gov\", \"r\": \"http:\\/\\/forecast.weather.gov\\/MapClick.php?site=okx&FcstType=text&zmx=1&zmy=1&map.x=98&map.y=200&site=OKX\", \"u\": \"http:\\/\\/www.spc.ncep.noaa.gov\\/\", \"t\": 1372121562, \"hc\": 1368193091, \"cy\": \"New York\", \"ll\": [ 40.862598, -73.921799 ] }"
|
"{ \"a\": \"Mozilla\\/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko\\/20100101 Firefox\\/21.0\", \"c\": \"US\", \"nk\": 1, \"tz\": \"America\\/New_York\", \"gr\": \"NY\", \"g\": \"1Chgyj\", \"h\": \"15vMQjX\", \"l\": \"o_d63rn9enb\", \"al\": \"en-US,en;q=0.5\", \"hh\": \"1.usa.gov\", \"r\": \"http:\\/\\/forecast.weather.gov\\/MapClick.php?site=okx&FcstType=text&zmx=1&zmy=1&map.x=98&map.y=200&site=OKX\", \"u\": \"http:\\/\\/www.spc.ncep.noaa.gov\\/\", \"t\": 1372121562, \"hc\": 1368193091, \"cy\": \"New York\", \"ll\": [ 40.862598, -73.921799 ] }"
|
||||||
);
|
);
|
||||||
UpdateStream updateStream = new UpdateStream(testCaseSupplier, queue);
|
UpdateStream updateStream = new UpdateStream(testCaseSupplier, queue, new DefaultObjectMapper());
|
||||||
Thread t = new Thread(updateStream);
|
Thread t = new Thread(updateStream);
|
||||||
t.start();
|
t.start();
|
||||||
Map<String, Object> expectedAnswer = new HashMap<String, Object>();
|
Map<String, Object> expectedAnswer = new HashMap<String, Object>();
|
||||||
|
@ -188,7 +189,7 @@ public class WebFirehoseFactoryTest
|
||||||
InputSupplier testCaseSupplier = new TestCaseSupplier(
|
InputSupplier testCaseSupplier = new TestCaseSupplier(
|
||||||
"{ \"a\": \"Mozilla\\/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko\\/20100101 Firefox\\/21.0\", \"nk\": 1, \"tz\": \"America\\/New_York\", \"g\": \"1Chgyj\", \"h\": \"15vMQjX\", \"l\": \"o_d63rn9enb\", \"al\": \"en-US,en;q=0.5\", \"hh\": \"1.usa.gov\", \"r\": \"http:\\/\\/forecast.weather.gov\\/MapClick.php?site=okx&FcstType=text&zmx=1&zmy=1&map.x=98&map.y=200&site=OKX\", \"u\": \"http:\\/\\/www.spc.ncep.noaa.gov\\/\", \"t\": 1372121562, \"hc\": 1368193091, \"kw\": \"spcnws\", \"cy\": \"New York\", \"ll\": [ 40.862598, -73.921799 ] }"
|
"{ \"a\": \"Mozilla\\/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko\\/20100101 Firefox\\/21.0\", \"nk\": 1, \"tz\": \"America\\/New_York\", \"g\": \"1Chgyj\", \"h\": \"15vMQjX\", \"l\": \"o_d63rn9enb\", \"al\": \"en-US,en;q=0.5\", \"hh\": \"1.usa.gov\", \"r\": \"http:\\/\\/forecast.weather.gov\\/MapClick.php?site=okx&FcstType=text&zmx=1&zmy=1&map.x=98&map.y=200&site=OKX\", \"u\": \"http:\\/\\/www.spc.ncep.noaa.gov\\/\", \"t\": 1372121562, \"hc\": 1368193091, \"kw\": \"spcnws\", \"cy\": \"New York\", \"ll\": [ 40.862598, -73.921799 ] }"
|
||||||
);
|
);
|
||||||
UpdateStream updateStream = new UpdateStream(testCaseSupplier, queue);
|
UpdateStream updateStream = new UpdateStream(testCaseSupplier, queue, new DefaultObjectMapper());
|
||||||
Thread t = new Thread(updateStream);
|
Thread t = new Thread(updateStream);
|
||||||
t.start();
|
t.start();
|
||||||
InputRow row = webbieHose.nextRow();
|
InputRow row = webbieHose.nextRow();
|
||||||
|
|
Loading…
Reference in New Issue