mirror of https://github.com/apache/druid.git
fix endpoint bugs and more docs
This commit is contained in:
parent
edba3ba40e
commit
d9062d91fa
|
@ -54,10 +54,8 @@ The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Tim
|
||||||
"gran": "day"
|
"gran": "day"
|
||||||
},
|
},
|
||||||
"pathSpec": {
|
"pathSpec": {
|
||||||
"type": "granularity",
|
"type": "static",
|
||||||
"dataGranularity": "hour",
|
"paths" : "example/path/data.gz,example/path/moredata.gz"
|
||||||
"inputPath": "s3n:\/\/billy-bucket\/the\/data\/is\/here",
|
|
||||||
"filePattern": ".*"
|
|
||||||
},
|
},
|
||||||
"rollupSpec": {
|
"rollupSpec": {
|
||||||
"aggs": [
|
"aggs": [
|
||||||
|
@ -116,6 +114,20 @@ The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Tim
|
||||||
|
|
||||||
There are multiple types of path specification:
|
There are multiple types of path specification:
|
||||||
|
|
||||||
|
##### `static`
|
||||||
|
|
||||||
|
Is a type of data loader where a static path to where the data files are located is passed.
|
||||||
|
|
||||||
|
|property|description|required?|
|
||||||
|
|--------|-----------|---------|
|
||||||
|
|paths|A String of input paths indicating where the raw data is located.|yes|
|
||||||
|
|
||||||
|
For example, using the static input paths:
|
||||||
|
|
||||||
|
```
|
||||||
|
"paths" : "s3n://billy-bucket/the/data/is/here/data.gz, s3n://billy-bucket/the/data/is/here/moredata.gz, s3n://billy-bucket/the/data/is/here/evenmoredata.gz"
|
||||||
|
```
|
||||||
|
|
||||||
##### `granularity`
|
##### `granularity`
|
||||||
|
|
||||||
Is a type of data loader that expects data to be laid out in a specific path format. Specifically, it expects it to be segregated by day in this directory format `y=XXXX/m=XX/d=XX/H=XX/M=XX/S=XX` (dates are represented by lowercase, time is represented by uppercase).
|
Is a type of data loader that expects data to be laid out in a specific path format. Specifically, it expects it to be segregated by day in this directory format `y=XXXX/m=XX/d=XX/H=XX/M=XX/S=XX` (dates are represented by lowercase, time is represented by uppercase).
|
||||||
|
|
|
@ -21,6 +21,10 @@ druid.storage.bucket=druid
|
||||||
druid.storage.baseKey=sample
|
druid.storage.baseKey=sample
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## How do I get HDFS to work?
|
||||||
|
|
||||||
|
Make sure to include the `druid-hdfs-storage` module as one of your extensions and set `druid.storage.type=hdfs`.
|
||||||
|
|
||||||
## I don't see my Druid segments on my historical nodes
|
## I don't see my Druid segments on my historical nodes
|
||||||
You can check the coordinator console located at `<COORDINATOR_IP>:<PORT>/cluster.html`. Make sure that your segments have actually loaded on [historical nodes](Historical.html). If your segments are not present, check the coordinator logs for messages about capacity of replication errors. One reason that segments are not downloaded is because historical nodes have maxSizes that are too small, making them incapable of downloading more data. You can change that with (for example):
|
You can check the coordinator console located at `<COORDINATOR_IP>:<PORT>/cluster.html`. Make sure that your segments have actually loaded on [historical nodes](Historical.html). If your segments are not present, check the coordinator logs for messages about capacity of replication errors. One reason that segments are not downloaded is because historical nodes have maxSizes that are too small, making them incapable of downloading more data. You can change that with (for example):
|
||||||
|
|
||||||
|
@ -31,7 +35,7 @@ You can check the coordinator console located at `<COORDINATOR_IP>:<PORT>/cluste
|
||||||
|
|
||||||
## My queries are returning empty results
|
## My queries are returning empty results
|
||||||
|
|
||||||
You can check `<BROKER_IP>:<PORT>/druid/v2/datasources/<YOUR_DATASOURCE>` for the dimensions and metrics that have been created for your datasource. Make sure that the name of the aggregators you use in your query match one of these metrics. Also make sure that the query interval you specify match a valid time range where data exists.
|
You can check `<BROKER_IP>:<PORT>/druid/v2/datasources/<YOUR_DATASOURCE>?interval=0/3000` for the dimensions and metrics that have been created for your datasource. Make sure that the name of the aggregators you use in your query match one of these metrics. Also make sure that the query interval you specify match a valid time range where data exists. Note: the broker endpoint will only return valid results on historical segments.
|
||||||
|
|
||||||
## More information
|
## More information
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ The plumber handles generated segments both while they are being generated and w
|
||||||
|
|
||||||
|Field|Type|Description|Required|
|
|Field|Type|Description|Required|
|
||||||
|-----|----|-----------|--------|
|
|-----|----|-----------|--------|
|
||||||
|type|String|Specifies the type of plumber. Each value will have its own configuration schema. Plumbers packaged with Druid are described below.|yes|
|
|type|String|Specifies the type of plumber. Each value will have its own configuration schema. Plumbers packaged with Druid are described below. The default type is "realtime".|yes|
|
||||||
|
|
||||||
The following can be configured on the plumber:
|
The following can be configured on the plumber:
|
||||||
|
|
||||||
|
@ -16,12 +16,11 @@ The following can be configured on the plumber:
|
||||||
* `maxPendingPersists` is how many persists a plumber can do concurrently without starting to block.
|
* `maxPendingPersists` is how many persists a plumber can do concurrently without starting to block.
|
||||||
* `segmentGranularity` specifies the granularity of the segment, or the amount of time a segment will represent.
|
* `segmentGranularity` specifies the granularity of the segment, or the amount of time a segment will represent.
|
||||||
* `rejectionPolicy` controls how data sets the data acceptance policy for creating and handing off segments. The following policies are available:
|
* `rejectionPolicy` controls how data sets the data acceptance policy for creating and handing off segments. The following policies are available:
|
||||||
* `serverTime` – The default policy, it is optimal for current data that is generated and ingested in real time. Uses `windowPeriod` to accept only those events that are inside the window looking forward and back.
|
* `serverTime` – The recommended policy for "current time" data, it is optimal for current data that is generated and ingested in real time. Uses `windowPeriod` to accept only those events that are inside the window looking forward and back.
|
||||||
|
* `messageTime` – Can be used for non-"current time" as long as that data is relatively in sequence. Events are rejected if they are less than `windowPeriod` from the event with the latest timestamp. Hand off only occurs if an event is seen after the segmentGranularity and `windowPeriod`.
|
||||||
* `none` – Never hands off data unless shutdown() is called on the configured firehose.
|
* `none` – Never hands off data unless shutdown() is called on the configured firehose.
|
||||||
* `test` – Useful for testing that handoff is working, *not useful in terms of data integrity*. It uses the sum of `segmentGranularity` plus `windowPeriod` as a window.
|
* `test` – Useful for testing that handoff is working, *not useful in terms of data integrity*. It uses the sum of `segmentGranularity` plus `windowPeriod` as a window.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Available Plumbers
|
Available Plumbers
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
|
|
@ -118,10 +118,10 @@ The Plumber handles generated segments both while they are being generated and w
|
||||||
* `windowPeriod` is the amount of lag time to allow events. The example configures a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.
|
* `windowPeriod` is the amount of lag time to allow events. The example configures a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.
|
||||||
* `segmentGranularity` specifies the granularity of the segment, or the amount of time a segment will represent.
|
* `segmentGranularity` specifies the granularity of the segment, or the amount of time a segment will represent.
|
||||||
* `basePersistDirectory` is the directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.
|
* `basePersistDirectory` is the directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.
|
||||||
|
* `rejectionPolicy` determines what events are rejected upon ingestion.
|
||||||
|
|
||||||
See [Plumber](Plumber.html) for a fuller discussion of Plumber configuration.
|
See [Plumber](Plumber.html) for a fuller discussion of Plumber configuration.
|
||||||
|
|
||||||
|
|
||||||
Constraints
|
Constraints
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
|
|
@ -159,6 +159,10 @@ public class DatasourcesResource
|
||||||
for (DruidServer druidServer : serverInventoryView.getInventory()) {
|
for (DruidServer druidServer : serverInventoryView.getInventory()) {
|
||||||
DruidDataSource druidDataSource = druidServer.getDataSource(dataSourceName);
|
DruidDataSource druidDataSource = druidServer.getDataSource(dataSourceName);
|
||||||
|
|
||||||
|
if (druidDataSource == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
long dataSourceSegmentSize = 0;
|
long dataSourceSegmentSize = 0;
|
||||||
for (DataSegment dataSegment : druidDataSource.getSegments()) {
|
for (DataSegment dataSegment : druidDataSource.getSegments()) {
|
||||||
dataSourceSegmentSize += dataSegment.getSize();
|
dataSourceSegmentSize += dataSegment.getSize();
|
||||||
|
@ -270,10 +274,7 @@ public class DatasourcesResource
|
||||||
}
|
}
|
||||||
|
|
||||||
Pair<DataSegment, Set<String>> val = getSegment(dataSegment.getIdentifier());
|
Pair<DataSegment, Set<String>> val = getSegment(dataSegment.getIdentifier());
|
||||||
|
segments.put(dataSegment.getIdentifier(), ImmutableMap.of("metadata", val.lhs, "servers", val.rhs));
|
||||||
segments.put("id", dataSegment.getIdentifier());
|
|
||||||
segments.put("metadata", val.lhs);
|
|
||||||
segments.put("servers", val.rhs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Response.ok(retVal).build();
|
return Response.ok(retVal).build();
|
||||||
|
@ -335,10 +336,7 @@ public class DatasourcesResource
|
||||||
}
|
}
|
||||||
|
|
||||||
Pair<DataSegment, Set<String>> val = getSegment(dataSegment.getIdentifier());
|
Pair<DataSegment, Set<String>> val = getSegment(dataSegment.getIdentifier());
|
||||||
|
segments.put(dataSegment.getIdentifier(), ImmutableMap.of("metadata", val.lhs, "servers", val.rhs));
|
||||||
segments.put("id", dataSegment.getIdentifier());
|
|
||||||
segments.put("metadata", val.lhs);
|
|
||||||
segments.put("servers", val.rhs);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -366,14 +364,14 @@ public class DatasourcesResource
|
||||||
return Response.ok(retVal).build();
|
return Response.ok(retVal).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
final Set<Interval> intervals = Sets.newTreeSet(comparator);
|
final Set<String> retVal = Sets.newTreeSet(Comparators.inverse(String.CASE_INSENSITIVE_ORDER));
|
||||||
for (DataSegment dataSegment : dataSource.getSegments()) {
|
for (DataSegment dataSegment : dataSource.getSegments()) {
|
||||||
if (theInterval.contains(dataSegment.getInterval())) {
|
if (theInterval.contains(dataSegment.getInterval())) {
|
||||||
intervals.add(dataSegment.getInterval());
|
retVal.add(dataSegment.getIdentifier());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Response.ok(intervals).build();
|
return Response.ok(retVal).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
|
|
|
@ -22,12 +22,17 @@ package io.druid.server.http;
|
||||||
import com.google.api.client.util.Lists;
|
import com.google.api.client.util.Lists;
|
||||||
import com.google.api.client.util.Maps;
|
import com.google.api.client.util.Maps;
|
||||||
import com.google.common.base.Function;
|
import com.google.common.base.Function;
|
||||||
|
import com.google.common.collect.HashBasedTable;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
import com.google.common.collect.Table;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
|
import com.metamx.common.MapUtils;
|
||||||
import io.druid.client.DruidDataSource;
|
import io.druid.client.DruidDataSource;
|
||||||
import io.druid.client.DruidServer;
|
import io.druid.client.DruidServer;
|
||||||
import io.druid.client.InventoryView;
|
import io.druid.client.InventoryView;
|
||||||
|
import io.druid.timeline.DataSegment;
|
||||||
|
import org.joda.time.Interval;
|
||||||
|
|
||||||
import javax.ws.rs.GET;
|
import javax.ws.rs.GET;
|
||||||
import javax.ws.rs.Path;
|
import javax.ws.rs.Path;
|
||||||
|
@ -92,9 +97,29 @@ public class TiersResource
|
||||||
@Path("/{tierName}")
|
@Path("/{tierName}")
|
||||||
@Produces("application/json")
|
@Produces("application/json")
|
||||||
public Response getTierDatasources(
|
public Response getTierDatasources(
|
||||||
@PathParam("tierName") String tierName
|
@PathParam("tierName") String tierName,
|
||||||
|
@QueryParam("simple") String simple
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
if (simple != null) {
|
||||||
|
Table<String, Interval, Map<String, Object>> retVal = HashBasedTable.create();
|
||||||
|
for (DruidServer druidServer : serverInventoryView.getInventory()) {
|
||||||
|
if (druidServer.getTier().equalsIgnoreCase(tierName)) {
|
||||||
|
for (DataSegment dataSegment : druidServer.getSegments().values()) {
|
||||||
|
Map<String, Object> properties = retVal.get(dataSegment.getDataSource(), dataSegment.getInterval());
|
||||||
|
if (properties == null) {
|
||||||
|
properties = Maps.newHashMap();
|
||||||
|
retVal.put(dataSegment.getDataSource(), dataSegment.getInterval(), properties);
|
||||||
|
}
|
||||||
|
properties.put("size", MapUtils.getLong(properties, "size", 0L) + dataSegment.getSize());
|
||||||
|
properties.put("count", MapUtils.getInt(properties, "count", 0) + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Response.ok(retVal.rowMap()).build();
|
||||||
|
}
|
||||||
|
|
||||||
Set<String> retVal = Sets.newHashSet();
|
Set<String> retVal = Sets.newHashSet();
|
||||||
for (DruidServer druidServer : serverInventoryView.getInventory()) {
|
for (DruidServer druidServer : serverInventoryView.getInventory()) {
|
||||||
if (druidServer.getTier().equalsIgnoreCase(tierName)) {
|
if (druidServer.getTier().equalsIgnoreCase(tierName)) {
|
||||||
|
|
|
@ -91,7 +91,7 @@ public class QueryHostFinder<T>
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("Selected [%s]", server.getHost());
|
log.debug("Selected [%s]", server.getHost());
|
||||||
|
|
||||||
return server.getHost();
|
return server.getHost();
|
||||||
}
|
}
|
||||||
|
|
|
@ -91,8 +91,8 @@ public class CliCoordinator extends ServerRunnable
|
||||||
JsonConfigProvider.bind(binder, "druid.manager.segments", DatabaseSegmentManagerConfig.class);
|
JsonConfigProvider.bind(binder, "druid.manager.segments", DatabaseSegmentManagerConfig.class);
|
||||||
JsonConfigProvider.bind(binder, "druid.manager.rules", DatabaseRuleManagerConfig.class);
|
JsonConfigProvider.bind(binder, "druid.manager.rules", DatabaseRuleManagerConfig.class);
|
||||||
|
|
||||||
binder.bind(RedirectFilter.class).in(LazySingleton.class);
|
//binder.bind(RedirectFilter.class).in(LazySingleton.class);
|
||||||
binder.bind(RedirectInfo.class).to(CoordinatorRedirectInfo.class).in(LazySingleton.class);
|
//binder.bind(RedirectInfo.class).to(CoordinatorRedirectInfo.class).in(LazySingleton.class);
|
||||||
|
|
||||||
binder.bind(DatabaseSegmentManager.class)
|
binder.bind(DatabaseSegmentManager.class)
|
||||||
.toProvider(DatabaseSegmentManagerProvider.class)
|
.toProvider(DatabaseSegmentManagerProvider.class)
|
||||||
|
|
|
@ -46,7 +46,7 @@ class CoordinatorJettyServerInitializer implements JettyServerInitializer
|
||||||
|
|
||||||
root.addServlet(holderPwd, "/");
|
root.addServlet(holderPwd, "/");
|
||||||
root.setResourceBase(DruidCoordinator.class.getClassLoader().getResource("static").toExternalForm());
|
root.setResourceBase(DruidCoordinator.class.getClassLoader().getResource("static").toExternalForm());
|
||||||
root.addFilter(new FilterHolder(injector.getInstance(RedirectFilter.class)), "/*", null);
|
//root.addFilter(new FilterHolder(injector.getInstance(RedirectFilter.class)), "/*", null);
|
||||||
root.addFilter(GzipFilter.class, "/*", null);
|
root.addFilter(GzipFilter.class, "/*", null);
|
||||||
|
|
||||||
// Can't use '/*' here because of Guice and Jetty static content conflicts
|
// Can't use '/*' here because of Guice and Jetty static content conflicts
|
||||||
|
|
Loading…
Reference in New Issue