Merge remote-tracking branch 'origin/master' into druid-0.7.x-compressionstrategy

This commit is contained in:
Charles Allen 2014-11-19 09:39:35 -08:00
commit ccc757dc64
158 changed files with 4707 additions and 2813 deletions

View File

@ -18,55 +18,68 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-cassandra-storage</artifactId>
<name>druid-cassandra-storage</name>
<description>druid-cassandra-storage</description>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-cassandra-storage</artifactId>
<name>druid-cassandra-storage</name>
<description>druid-cassandra-storage</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<dependency>
<groupId>com.netflix.astyanax</groupId>
<artifactId>astyanax</artifactId>
<version>1.0.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<dependency>
<groupId>com.netflix.astyanax</groupId>
<artifactId>astyanax</artifactId>
<version>1.0.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -18,142 +18,155 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<name>druid-common</name>
<description>druid-common</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<name>druid-common</name>
<description>druid-common</description>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>java-util</artifactId>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<dependencies>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>java-util</artifactId>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
</dependency>
<dependency>
<groupId>commons-pool</groupId>
<artifactId>commons-pool</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>org.skife.config</groupId>
<artifactId>config-magic</artifactId>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-validator</artifactId>
</dependency>
<dependency>
<groupId>javax.validation</groupId>
<artifactId>validation-api</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-guava</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-joda</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-smile</artifactId>
</dependency>
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
</dependency>
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
</dependency>
<dependency>
<groupId>org.jdbi</groupId>
<artifactId>jdbi</artifactId>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
</dependency>
<dependency>
<groupId>commons-pool</groupId>
<artifactId>commons-pool</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>org.skife.config</groupId>
<artifactId>config-magic</artifactId>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-validator</artifactId>
</dependency>
<dependency>
<groupId>javax.validation</groupId>
<artifactId>validation-api</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-guava</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-joda</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-smile</artifactId>
</dependency>
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
</dependency>
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
</dependency>
<dependency>
<groupId>org.jdbi</groupId>
<artifactId>jdbi</artifactId>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>java-util</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
</dependencies>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>java-util</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -21,6 +21,7 @@ package io.druid.common.utils;
import com.google.common.io.ByteStreams;
import com.google.common.io.OutputSupplier;
import com.google.common.primitives.Ints;
import io.druid.collections.IntList;
import java.io.IOException;
@ -262,4 +263,9 @@ public class SerializerUtils
return retVal;
}
public int getSerializedStringByteSize(String str)
{
return Ints.BYTES + str.getBytes(UTF8).length;
}
}

View File

@ -29,7 +29,13 @@ public class MetadataStorageConnectorConfig
private boolean createTables = true;
@JsonProperty
private String connectURI = null;
private String host = "localhost";
@JsonProperty
private int port = 1527;
@JsonProperty
private String connectURI;
@JsonProperty
private String user = null;
@ -37,16 +43,26 @@ public class MetadataStorageConnectorConfig
@JsonProperty
private String password = null;
@JsonProperty
private String validationQuery = "SELECT 1";
public boolean isCreateTables()
{
return createTables;
}
public String getHost()
{
return host;
}
public int getPort()
{
return port;
}
public String getConnectURI()
{
if (connectURI == null) {
return String.format("jdbc:derby://%s:%s/druid;create=true", host, port);
}
return connectURI;
}
@ -60,20 +76,14 @@ public class MetadataStorageConnectorConfig
return password;
}
public String getValidationQuery()
{
return validationQuery;
}
@Override
public String toString()
{
return "DbConnectorConfig{" +
"createTables=" + createTables +
", connectURI='" + connectURI + '\'' +
", connectURI='" + getConnectURI() + '\'' +
", user='" + user + '\'' +
", password=****" +
", validationQuery='" + validationQuery + '\'' +
'}';
}
}

View File

@ -20,6 +20,7 @@
package io.druid.collections;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.metamx.common.guava.BaseSequence;
@ -28,7 +29,7 @@ import com.metamx.common.guava.Sequence;
import com.metamx.common.guava.SequenceTestHelper;
import com.metamx.common.guava.Sequences;
import com.metamx.common.guava.TestSequence;
import junit.framework.Assert;
import org.junit.Assert;
import org.junit.Test;
import javax.annotation.Nullable;
@ -59,6 +60,61 @@ public class OrderedMergeSequenceTest
}
}
@Test
public void testMergeEmptySequence() throws Exception
{
final ArrayList<TestSequence<Integer>> testSequences = Lists.newArrayList(
TestSequence.create(ImmutableList.<Integer>of()),
TestSequence.create(2, 8),
TestSequence.create(4, 6, 8)
);
OrderedMergeSequence<Integer> seq = makeMergedSequence(Ordering.<Integer>natural(), testSequences);
SequenceTestHelper.testAll(seq, Arrays.asList(2, 4, 6, 8, 8));
for (TestSequence<Integer> sequence : testSequences) {
Assert.assertTrue(sequence.isClosed());
}
}
@Test
public void testMergeEmptySequenceAtEnd() throws Exception
{
final ArrayList<TestSequence<Integer>> testSequences = Lists.newArrayList(
TestSequence.create(2, 8),
TestSequence.create(4, 6, 8),
TestSequence.create(ImmutableList.<Integer>of())
);
OrderedMergeSequence<Integer> seq = makeMergedSequence(Ordering.<Integer>natural(), testSequences);
SequenceTestHelper.testAll(seq, Arrays.asList(2, 4, 6, 8, 8));
for (TestSequence<Integer> sequence : testSequences) {
Assert.assertTrue(sequence.isClosed());
}
}
@Test
public void testMergeEmptySequenceMiddle() throws Exception
{
final ArrayList<TestSequence<Integer>> testSequences = Lists.newArrayList(
TestSequence.create(2, 8),
TestSequence.create(ImmutableList.<Integer>of()),
TestSequence.create(4, 6, 8)
);
OrderedMergeSequence<Integer> seq = makeMergedSequence(Ordering.<Integer>natural(), testSequences);
SequenceTestHelper.testAll(seq, Arrays.asList(2, 4, 6, 8, 8));
for (TestSequence<Integer> sequence : testSequences) {
Assert.assertTrue(sequence.isClosed());
}
}
@Test
public void testScrewsUpOnOutOfOrderBeginningOfList() throws Exception
{

View File

@ -82,7 +82,7 @@ Data segment announcers are used to announce segments.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.announcer.type`|Choices: legacy or batch. The type of data segment announcer to use.|legacy|
|`druid.announcer.type`|Choices: legacy or batch. The type of data segment announcer to use.|batch|
##### Single Data Segment Announcer

View File

@ -1,18 +1,34 @@
---
layout: doc_page
---
A data source is the Druid equivalent of a database table. However, a query can also masquerade as a data source, providing subquery-like functionality. Query data sources are currently only supported by [GroupBy](GroupByQuery.html) queries.
## Datasources
A data source is the Druid equivalent of a database table. However, a query can also masquerade as a data source, providing subquery-like functionality. Query data sources are currently supported only by [GroupBy](GroupByQuery.html) queries.
### Table Data Source
The table data source the most common type. It's represented by a string, or by the full structure:
The table data source is the most common type. It's represented by a string, or by the full structure:
```json
{
"type": "table",
"name": <string_value>
"name": "<string_value>"
}
```
### Union Data Source
This data source unions two or more table data sources.
```json
{
"type": "union",
"dataSources": ["<string_value1>", "<string_value2>", "<string_value3>", ... ]
}
```
Note that the data sources being unioned should have the same schema.
### Query Data Source
```json
{

View File

@ -29,9 +29,9 @@ Returns dimension values transformed using the given [DimExtractionFn](#toc_3)
}
```
## DimExtractionFn
## <a id="toc_3"></a>DimExtractionFn
`DimExtractionFn`s define the transformation applied to each dimenion value
`DimExtractionFn`s define the transformation applied to each dimension value
### RegexDimExtractionFn

View File

@ -93,6 +93,22 @@ A sample ingest firehose spec is shown below -
|metrics|The list of metrics to select. If left empty, no metrics are returned. If left null or not defined, all metrics are selected.|no|
|filter| See [Filters](Filters.html)|yes|
#### CombiningFirehose
This firehose can be used to combine and merge data from a list of different firehoses.
This can be used to merge data from more than one firehoses.
```json
{
"type" : "combining",
"delegates" : [ { firehose1 }, { firehose2 }, ..... ]
}
```
|property|description|required?|
|--------|-----------|---------|
|type|combining|yes|
|delegates|list of firehoses to combine data from|yes|
Parsing Data
------------

View File

@ -17,25 +17,25 @@ Supported granularity strings are: `all`, `none`, `minute`, `fifteen_minute`, `t
### Duration Granularities
Duration granularities are specified as an exact duration in milliseconds and timestamps are returned as UTC.
Duration granularities are specified as an exact duration in milliseconds and timestamps are returned as UTC. Duration granularity values are in millis.
They also support specifying an optional origin, which defines where to start counting time buckets from (defaults to 1970-01-01T00:00:00Z).
```javascript
{"type": "duration", "duration": "7200000"}
{"type": "duration", "duration": 7200000}
```
This chunks up every 2 hours.
```javascript
{"type": "duration", "duration": "3600000", "origin": "2012-01-01T00:30:00Z"}
{"type": "duration", "duration": 3600000, "origin": "2012-01-01T00:30:00Z"}
```
This chunks up every hour on the half-hour.
### Period Granularities
Period granularities are specified as arbitrary period combinations of years, months, weeks, hours, minutes and seconds (e.g. P2W, P3M, PT1H30M, PT0.750S) in ISO8601 format. They support specifying a time zone which determines where period boundaries start as well as the timezone of the returned timestamps. By default, years start on the first of January, months start on the first of the month and weeks start on Mondays unless an origin is specified.
Period granularities are specified as arbitrary period combinations of years, months, weeks, hours, minutes and seconds (e.g. P2W, P3M, PT1H30M, PT0.750S) in [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. They support specifying a time zone which determines where period boundaries start as well as the timezone of the returned timestamps. By default, years start on the first of January, months start on the first of the month and weeks start on Mondays unless an origin is specified.
Time zone is optional (defaults to UTC). Origin is optional (defaults to 1970-01-01T00:00:00 in the given time zone).

View File

@ -49,7 +49,7 @@ There are 11 main parts to a groupBy query:
|--------|-----------|---------|
|queryType|This String should always be "groupBy"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|dataSource|A String defining the data source to query, very similar to a table in a relational database, or a [DataSource](DataSource.html) structure.|yes|
|dimensions|A JSON list of dimensions to do the groupBy over|yes|
|dimensions|A JSON list of dimensions to do the groupBy over; or see [DimensionSpec](DimensionSpecs) for ways to extract dimensions. |yes|
|limitSpec|See [LimitSpec](LimitSpec.html).|no|
|having|See [Having](Having.html).|no|
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|

View File

@ -13,6 +13,15 @@ Druid supports the following types of having clauses.
The simplest having clause is a numeric filter.
Numeric filters can be used as the base filters for more complex boolean expressions of filters.
Here's an example of a having-clause numeric filter:
```json
{
"type": "greaterThan",
"aggregation": "myAggMetric",
"value": 100
}
#### Equal To
The equalTo filter will match rows with a specific aggregate value.
@ -21,7 +30,7 @@ The grammar for an `equalTo` filter is as follows:
```json
{
"type": "equalTo",
"aggregation": <aggregate_metric>,
"aggregation": "<aggregate_metric>",
"value": <numeric_value>
}
```
@ -36,7 +45,7 @@ The grammar for a `greaterThan` filter is as follows:
```json
{
"type": "greaterThan",
"aggregation": <aggregate_metric>,
"aggregation": "<aggregate_metric>",
"value": <numeric_value>
}
```
@ -51,7 +60,7 @@ The grammar for a `greaterThan` filter is as follows:
```json
{
"type": "lessThan",
"aggregation": <aggregate_metric>,
"aggregation": "<aggregate_metric>",
"value": <numeric_value>
}
```

View File

@ -18,11 +18,13 @@ The default limit spec takes a limit and the list of columns to do an orderBy op
#### OrderByColumnSpec
OrderByColumnSpecs indicate how to do order by operations. Each order by condition can be a `jsonString` or a map of the following form:
OrderByColumnSpecs indicate how to do order by operations. Each order-by condition can be a `jsonString` or a map of the following form:
```json
{
"dimension" : <Any dimension or metric>,
"direction" : "ASCENDING OR DESCENDING"
"dimension" : "<Any dimension or metric name>",
"direction" : <"ascending"|"descending">
}
```
If only the dimension is provided (as a JSON string), the default order-by is ascending.

View File

@ -4,7 +4,7 @@ layout: doc_page
# Extending Druid With Custom Modules
Druid version 0.6 introduces a new module system that allows for the addition of extensions at runtime.
Druid uses a module system that allows for the addition of extensions at runtime.
## Specifying extensions
@ -20,11 +20,12 @@ Druid has the ability to automatically load extension jars from maven at runtime
## Configuring the extensions
Druid 0.6 introduces four new properties for configuring the loading of extensions:
Druid provides the following settings to configure the loading of extensions:
* `druid.extensions.coordinates`
This is a JSON Array list of "groupId:artifactId:version" maven coordinates. Defaults to `[]`
This is a JSON array of "groupId:artifactId[:version]" maven coordinates. For artifacts without version specified, Druid will append the default version. Defaults to `[]`
* `druid.extensions.defaultVersion`
Version to use for extension artifacts without version information. Defaults to the `druid-server` artifact version.
* `druid.extensions.localRepository`
This specifies where to look for the "local repository". The way maven gets dependencies is that it downloads them to a "local repository" on your local disk and then collects the paths to each of the jars. This specifies the directory to consider the "local repository". Defaults to `~/.m2/repository`
* `druid.extensions.remoteRepositories`

View File

@ -119,15 +119,15 @@ Including this strategy means all timeBoundary queries are always routed to the
Queries with a priority set to less than minPriority are routed to the lowest priority broker. Queries with priority set to greater than maxPriority are routed to the highest priority broker. By default, minPriority is 0 and maxPriority is 1. Using these default values, if a query with priority 0 (the default query priority is 0) is sent, the query skips the priority selection logic.
### javascript
### JavaScript
Allows defining arbitrary routing rules using a JavaScript function. The function is passed the configuration and the query to be executed, and returns the tier it should be routed to, or null for the default tier.
*Example*: a function that return the highest priority broker unless the given query has more than two aggregators.
*Example*: a function that sends queries containing more than three aggregators to the lowest priority broker.
```json
{
"type" : "javascript",
"function" : "function (config, query) { if (config.getTierToBrokerMap().values().size() > 0 && query.getAggregatorSpecs && query.getAggregatorSpecs().size() <= 2) { return config.getTierToBrokerMap().values().toArray()[0] } else { return config.getDefaultBrokerServiceName() } }"
"function" : "function (config, query) { if (query.getAggregatorSpecs && query.getAggregatorSpecs().size() >= 3) { var size = config.getTierToBrokerMap().values().size(); if (size > 0) { return config.getTierToBrokerMap().values().toArray()[size-1] } else { return config.getDefaultBrokerServiceName() } } else { return null } }"
}
```

View File

@ -27,7 +27,7 @@ Forever load rules are of the form:
}
```
* `type` - this should always be "loadByInterval"
* `type` - this should always be "loadForever"
* `tieredReplicants` - A JSON Object where the keys are the tier names and values are the number of replicas for that tier.
@ -86,7 +86,7 @@ Forever drop rules are of the form:
}
```
* `type` - this should always be "dropByPeriod"
* `type` - this should always be "dropForever"
All segments that match this rule are dropped from the cluster.

View File

@ -30,14 +30,14 @@ There are several main parts to a search query:
|property|description|required?|
|--------|-----------|---------|
|queryType|This String should always be "search"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|
|filter|See [Filters](Filters.html)|no|
|queryType|This String should always be "search"; this is the first thing Druid looks at to figure out how to interpret the query.|yes|
|dataSource|A String defining the data source to query, very similar to a table in a relational database.|yes|
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html).|yes|
|filter|See [Filters](Filters.html).|no|
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|searchDimensions|The dimensions to run the search over. Excluding this means the search is run over all dimensions.|no|
|query|See [SearchQuerySpec](SearchQuerySpec.html).|yes|
|sort|How the results of the search should be sorted. Two possible types here are "lexicographic" and "strlen".|yes|
|sort|An object specifying how the results of the search should be sorted. Two possible types here are "lexicographic" (the default sort) and "strlen".|no|
|context|An additional JSON Object which can be used to specify certain flags.|no|
The format of the result is:

View File

@ -26,9 +26,9 @@ There are several main parts to a select query:
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|filter|See [Filters](Filters.html)|no|
|dimensions|The list of dimensions to select. If left empty, all dimensions are returned.|no|
|metrics|The list of metrics to select. If left empty, all metrics are returned.|no|
|pagingSpec|A JSON object indicating offsets into different scanned segments. Select query results will return a pagingSpec that can be reused for pagination.|yes|
|dimensions|A String array of dimensions to select. If left empty, all dimensions are returned.|no|
|metrics|A String array of metrics to select. If left empty, all metrics are returned.|no|
|pagingSpec|A JSON object indicating offsets into different scanned segments. Query results will return a `pagingIdentifiers` value that can be reused in the next query for pagination.|yes|
|context|An additional JSON Object which can be used to specify certain flags.|no|
The format of the result is:
@ -140,4 +140,30 @@ The format of the result is:
} ]
```
The result returns a global pagingSpec that can be reused for the next select query. The offset will need to be increased by 1 on the client side.
The `threshold` determines how many hits are returned, with each hit indexed by an offset.
The results above include:
```json
"pagingIdentifiers" : {
"wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9" : 4
},
```
This can be used with the next query's pagingSpec:
```json
{
"queryType": "select",
"dataSource": "wikipedia",
"dimensions":[],
"metrics":[],
"granularity": "all",
"intervals": [
"2013-01-01/2013-01-02"
],
"pagingSpec":{"pagingIdentifiers": {"wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9" : 5}, "threshold":5}
}
Note that in the second query, an offset is specified and that it is 1 greater than the largest offset found in the initial results. To return the next "page", this offset must be incremented by 1 with each new query. When an empty results set is received, the very last page has been returned.

View File

@ -11,7 +11,7 @@ The topN metric spec specifies how topN values should be sorted.
The simplest metric specification is a String value indicating the metric to sort topN results by. They are included in a topN query with:
```json
"metric": <metric_value_string>
"metric": "<metric_name>"
```
The metric field can also be given as a JSON object. The grammar for dimension values sorted by numeric value is shown below:
@ -19,7 +19,7 @@ The metric field can also be given as a JSON object. The grammar for dimension v
```json
"metric": {
"type": "numeric",
"metric": "<metric_value>"
"metric": "<metric_name>"
}
```

View File

@ -72,9 +72,9 @@ There are 10 parts to a topN query, but 7 of them are shared with [TimeseriesQue
|property|description|required?|
|--------|-----------|---------|
|dimension|A JSON object defining the dimension that you want the top taken for. For more info, see [DimensionSpecs](DimensionSpecs.html)|yes|
|dimension|A String or JSON object defining the dimension that you want the top taken for. For more info, see [DimensionSpecs](DimensionSpecs.html)|yes|
|threshold|An integer defining the N in the topN (i.e. how many you want in the top list)|yes|
|metric|A JSON object specifying the metric to sort by for the top list. For more info, see [TopNMetricSpec](TopNMetricSpec.html).|yes|
|metric|A String or JSON object specifying the metric to sort by for the top list. For more info, see [TopNMetricSpec](TopNMetricSpec.html).|yes|
Please note the context JSON object is also available for topN queries and should be used with the same caution as the timeseries case.
The format of the results would look like so:

View File

@ -75,9 +75,13 @@ Setting up Zookeeper
Before we get started, we need to start Apache Zookeeper.
```bash
curl http://apache.osuosl.org/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
tar xzf zookeeper-3.4.5.tar.gz
cd zookeeper-3.4.5
Download zookeeper from [http://www.apache.org/dyn/closer.cgi/zookeeper/](http://www.apache.org/dyn/closer.cgi/zookeeper/)
Install zookeeper.
e.g.
curl http://www.gtlib.gatech.edu/pub/apache/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz -o zookeeper-3.4.6.tar.gz
tar xzf zookeeper-3.4.6.tar.gz
cd zookeeper-3.4.6
cp conf/zoo_sample.cfg conf/zoo.cfg
./bin/zkServer.sh start
cd ..

View File

@ -48,9 +48,13 @@ CREATE database druid;
#### Setting up Zookeeper
```bash
curl http://apache.osuosl.org/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
tar xzf zookeeper-3.4.5.tar.gz
cd zookeeper-3.4.5
Download zookeeper from [http://www.apache.org/dyn/closer.cgi/zookeeper/](http://www.apache.org/dyn/closer.cgi/zookeeper/)
Install zookeeper.
e.g.
curl http://www.gtlib.gatech.edu/pub/apache/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz -o zookeeper-3.4.6.tar.gz
tar xzf zookeeper-3.4.6.tar.gz
cd zookeeper-3.4.6
cp conf/zoo_sample.cfg conf/zoo.cfg
./bin/zkServer.sh start
cd ..

View File

@ -18,106 +18,119 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-examples</artifactId>
<name>druid-examples</name>
<description>druid-examples</description>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-examples</artifactId>
<name>druid-examples</name>
<description>druid-examples</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-core</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-async</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-stream</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>commons-validator</groupId>
<artifactId>commons-validator</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-core</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-async</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-stream</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>commons-validator</groupId>
<artifactId>commons-validator</artifactId>
<version>1.4.0</version>
</dependency>
<!-- For tests! -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- For tests! -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<outputFile>
${project.build.directory}/${project.artifactId}-${project.version}-selfcontained.jar
</outputFile>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<outputFile>
${project.build.directory}/${project.artifactId}-${project.version}-selfcontained.jar
</outputFile>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -18,73 +18,86 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-hdfs-storage</artifactId>
<name>druid-hdfs-storage</name>
<description>druid-hdfs-storage</description>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-hdfs-storage</artifactId>
<name>druid-hdfs-storage</name>
<description>druid-hdfs-storage</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<!-- override jets3t from hadoop-core -->
<dependency>
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
</dependency>
<!-- override httpclient / httpcore version from jets3t -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>emitter</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<!-- override jets3t from hadoop-core -->
<dependency>
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
</dependency>
<!-- override httpclient / httpcore version from jets3t -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>emitter</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -17,54 +17,67 @@
~ along with this program; if not, write to the Free Software
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-histogram</artifactId>
<name>druid-histogram</name>
<description>druid-histogram</description>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-histogram</artifactId>
<name>druid-histogram</name>
<description>druid-histogram</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-processing</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-processing</artifactId>
<version>${project.parent.version}</version>
</dependency>
<!-- Tests -->
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-processing</artifactId>
<version>${project.parent.version}</version>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- Tests -->
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-processing</artifactId>
<version>${project.parent.version}</version>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -18,116 +18,129 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid</groupId>
<artifactId>druid-indexing-hadoop</artifactId>
<name>druid-indexing-hadoop</name>
<description>Druid Indexing Hadoop</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid-indexing-hadoop</artifactId>
<name>druid-indexing-hadoop</name>
<description>Druid Indexing Hadoop</description>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>java-util</artifactId>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>java-util</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<!-- override jets3t from hadoop-core -->
<dependency>
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
</dependency>
<!-- override httpclient / httpcore version from jets3t -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>org.jdbi</groupId>
<artifactId>jdbi</artifactId>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<!-- override jets3t from hadoop-core -->
<dependency>
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
</dependency>
<!-- override httpclient / httpcore version from jets3t -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>org.jdbi</groupId>
<artifactId>jdbi</artifactId>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<outputFile>
${project.build.directory}/${project.artifactId}-${project.version}-selfcontained.jar
</outputFile>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<outputFile>
${project.build.directory}/${project.artifactId}-${project.version}-selfcontained.jar
</outputFile>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -38,14 +38,18 @@ public class HadoopDruidIndexerJob implements Jobby
@Inject
public HadoopDruidIndexerJob(
HadoopDruidIndexerConfig config
HadoopDruidIndexerConfig config,
MetadataStorageUpdaterJobHandler handler
)
{
config.verify();
this.config = config;
if (config.isUpdaterJobSpecSet()) {
metadataStorageUpdaterJob = new MetadataStorageUpdaterJob(config);
metadataStorageUpdaterJob = new MetadataStorageUpdaterJob(
config,
handler
);
} else {
metadataStorageUpdaterJob = null;
}

View File

@ -19,7 +19,6 @@
package io.druid.indexer;
import com.google.inject.Inject;
import io.druid.timeline.DataSegment;
import java.util.List;
@ -29,15 +28,15 @@ import java.util.List;
public class MetadataStorageUpdaterJob implements Jobby
{
private final HadoopDruidIndexerConfig config;
@Inject
private MetadataStorageUpdaterJobHandler handler;
private final MetadataStorageUpdaterJobHandler handler;
public MetadataStorageUpdaterJob(
HadoopDruidIndexerConfig config
HadoopDruidIndexerConfig config,
MetadataStorageUpdaterJobHandler handler
)
{
this.config = config;
this.handler = handler;
}
@Override

View File

@ -26,7 +26,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
import io.druid.indexer.HadoopDruidIndexerConfig;
import io.druid.indexer.Jobby;
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = SingleDimensionPartitionsSpec.class)
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = HashedPartitionsSpec.class)
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = "dimension", value = SingleDimensionPartitionsSpec.class),
@JsonSubTypes.Type(name = "random", value = RandomPartitionsSpec.class),

View File

@ -23,10 +23,16 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Supplier;
import io.druid.metadata.MetadataStorageConnectorConfig;
import javax.validation.constraints.NotNull;
/**
*/
public class MetadataStorageUpdaterJobSpec implements Supplier<MetadataStorageConnectorConfig>
{
@JsonProperty("type")
@NotNull
public String type;
@JsonProperty("connectURI")
public String connectURI;
@ -44,6 +50,11 @@ public class MetadataStorageUpdaterJobSpec implements Supplier<MetadataStorageCo
return segmentTable;
}
public String getType()
{
return type;
}
@Override
public MetadataStorageConnectorConfig get()
{

View File

@ -22,6 +22,7 @@ package io.druid.indexer;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import io.druid.indexer.partitions.HashedPartitionsSpec;
import io.druid.metadata.MetadataStorageConnectorConfig;
import io.druid.indexer.partitions.PartitionsSpec;
import io.druid.indexer.partitions.RandomPartitionsSpec;
@ -133,7 +134,7 @@ public class HadoopIngestionSpecTest
}
@Test
public void testPartitionsSpecAutoDimension()
public void testPartitionsSpecAutoDHashed()
{
final HadoopIngestionSpec schema;
@ -167,55 +168,7 @@ public class HadoopIngestionSpecTest
Assert.assertTrue(
"partitionSpec",
partitionsSpec instanceof SingleDimensionPartitionsSpec
);
}
@Test
public void testPartitionsSpecSpecificDimensionLegacy()
{
final HadoopIngestionSpec schema;
try {
schema = jsonReadWriteRead(
"{"
+ "\"partitionsSpec\":{"
+ " \"targetPartitionSize\":100,"
+ " \"partitionDimension\":\"foo\""
+ " }"
+ "}",
HadoopIngestionSpec.class
);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
final PartitionsSpec partitionsSpec = schema.getTuningConfig().getPartitionsSpec();
Assert.assertEquals(
"isDeterminingPartitions",
partitionsSpec.isDeterminingPartitions(),
true
);
Assert.assertEquals(
"getTargetPartitionSize",
partitionsSpec.getTargetPartitionSize(),
100
);
Assert.assertEquals(
"getMaxPartitionSize",
partitionsSpec.getMaxPartitionSize(),
150
);
Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof SingleDimensionPartitionsSpec);
Assert.assertEquals(
"getPartitionDimension",
((SingleDimensionPartitionsSpec)partitionsSpec).getPartitionDimension(),
"foo"
partitionsSpec instanceof HashedPartitionsSpec
);
}
@ -274,6 +227,7 @@ public class HadoopIngestionSpecTest
schema = jsonReadWriteRead(
"{"
+ "\"partitionsSpec\":{"
+ " \"type\":\"dimension\","
+ " \"targetPartitionSize\":100,"
+ " \"maxPartitionSize\":200,"
+ " \"partitionDimension\":\"foo\""

View File

@ -18,72 +18,85 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid</groupId>
<artifactId>druid-indexing-service</artifactId>
<name>druid-indexing-service</name>
<description>druid-indexing-service</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid-indexing-service</artifactId>
<name>druid-indexing-service</name>
<description>druid-indexing-service</description>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-indexing-hadoop</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-indexing-hadoop</artifactId>
<version>${project.parent.version}</version>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -26,7 +26,8 @@ import com.google.common.base.Throwables;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.response.ToStringResponseHandler;
import com.metamx.http.client.response.StatusResponseHandler;
import com.metamx.http.client.response.StatusResponseHolder;
import io.druid.client.selector.Server;
import io.druid.curator.discovery.ServerDiscoverySelector;
import io.druid.indexing.common.RetryPolicy;
@ -37,6 +38,7 @@ import org.joda.time.Duration;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
public class RemoteTaskActionClient implements TaskActionClient
@ -75,22 +77,25 @@ public class RemoteTaskActionClient implements TaskActionClient
while (true) {
try {
final Server server;
final URI serviceUri;
try {
serviceUri = getServiceUri();
server = getServiceInstance();
serviceUri = makeServiceUri(server);
}
catch (Exception e) {
// Want to retry, so throw an IOException.
throw new IOException("Failed to locate service uri", e);
}
final String response;
final StatusResponseHolder response;
log.info("Submitting action for task[%s] to overlord[%s]: %s", task.getId(), serviceUri, taskAction);
try {
response = httpClient.post(serviceUri.toURL())
.setContent("application/json", dataToSend)
.go(new ToStringResponseHandler(Charsets.UTF_8))
.go(new StatusResponseHandler(Charsets.UTF_8))
.get();
}
catch (Exception e) {
@ -99,13 +104,24 @@ public class RemoteTaskActionClient implements TaskActionClient
throw Throwables.propagate(e);
}
final Map<String, Object> responseDict = jsonMapper.readValue(
response, new TypeReference<Map<String, Object>>()
{
if (response.getStatus().getCode() / 200 == 1) {
final Map<String, Object> responseDict = jsonMapper.readValue(
response.getContent(),
new TypeReference<Map<String, Object>>()
{
}
);
return jsonMapper.convertValue(responseDict.get("result"), taskAction.getReturnTypeReference());
} else {
// Want to retry, so throw an IOException.
throw new IOException(
String.format(
"Scary HTTP status returned: %s. Check your overlord[%s] logs for exceptions.",
response.getStatus(),
server.getHost()
)
);
}
);
return jsonMapper.convertValue(responseDict.get("result"), taskAction.getReturnTypeReference());
}
catch (IOException | ChannelException e) {
log.warn(e, "Exception submitting action for task[%s]", task.getId());
@ -127,13 +143,18 @@ public class RemoteTaskActionClient implements TaskActionClient
}
}
private URI getServiceUri() throws Exception
private URI makeServiceUri(final Server instance) throws URISyntaxException
{
return new URI(String.format("%s://%s%s", instance.getScheme(), instance.getHost(), "/druid/indexer/v1/action"));
}
private Server getServiceInstance()
{
final Server instance = selector.pick();
if (instance == null) {
throw new ISE("Cannot find instance of indexer to talk to!");
} else {
return instance;
}
return new URI(String.format("%s://%s%s", instance.getScheme(), instance.getHost(), "/druid/indexer/v1/action"));
}
}

View File

@ -27,7 +27,7 @@ import com.google.common.base.Predicate;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import io.druid.segment.IndexIO;
import io.druid.segment.IndexMaker;
import io.druid.segment.IndexMerger;
import io.druid.segment.IndexableAdapter;
import io.druid.segment.QueryableIndexIndexableAdapter;
import io.druid.segment.Rowboat;
@ -37,7 +37,6 @@ import io.druid.timeline.TimelineObjectHolder;
import io.druid.timeline.VersionedIntervalTimeline;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.File;
import java.util.List;
import java.util.Map;
@ -106,7 +105,7 @@ public class AppendTask extends MergeTaskBase
);
}
return IndexMaker.append(adapters, outDir);
return IndexMerger.append(adapters, outDir);
}
@Override

View File

@ -29,6 +29,7 @@ import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.inject.Injector;
import com.metamx.common.logger.Logger;
import io.druid.common.utils.JodaUtils;
import io.druid.guice.ExtensionsConfig;
@ -38,6 +39,7 @@ import io.druid.indexer.HadoopDruidIndexerConfig;
import io.druid.indexer.HadoopDruidIndexerJob;
import io.druid.indexer.HadoopIngestionSpec;
import io.druid.indexer.Jobby;
import io.druid.indexer.MetadataStorageUpdaterJobHandler;
import io.druid.indexing.common.TaskLock;
import io.druid.indexing.common.TaskStatus;
import io.druid.indexing.common.TaskToolbox;
@ -63,8 +65,9 @@ public class HadoopIndexTask extends AbstractTask
private static final Logger log = new Logger(HadoopIndexTask.class);
private static final ExtensionsConfig extensionsConfig;
final static Injector injector = GuiceInjectors.makeStartupInjector();
static {
extensionsConfig = GuiceInjectors.makeStartupInjector().getInstance(ExtensionsConfig.class);
extensionsConfig = injector.getInstance(ExtensionsConfig.class);
}
private static String getTheDataSource(HadoopIngestionSpec spec, HadoopIngestionSpec config)
@ -184,7 +187,7 @@ public class HadoopIndexTask extends AbstractTask
final List<URL> extensionURLs = Lists.newArrayList();
for (String coordinate : extensionsConfig.getCoordinates()) {
final ClassLoader coordinateLoader = Initialization.getClassLoaderForCoordinates(
aetherClient, coordinate
aetherClient, coordinate, extensionsConfig.getDefaultVersion()
);
extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs()));
}
@ -197,7 +200,7 @@ public class HadoopIndexTask extends AbstractTask
// put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts
for (String hadoopDependencyCoordinate : finalHadoopDependencyCoordinates) {
final ClassLoader hadoopLoader = Initialization.getClassLoaderForCoordinates(
aetherClient, hadoopDependencyCoordinate
aetherClient, hadoopDependencyCoordinate, extensionsConfig.getDefaultVersion()
);
driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs()));
}
@ -288,7 +291,10 @@ public class HadoopIndexTask extends AbstractTask
.withTuningConfig(theSchema.getTuningConfig().withVersion(version))
);
HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config);
HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(
config,
injector.getInstance(MetadataStorageUpdaterJobHandler.class)
);
log.info("Starting a hadoop index generator job...");
if (job.run()) {

View File

@ -28,7 +28,7 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.segment.IndexIO;
import io.druid.segment.IndexMaker;
import io.druid.segment.IndexMerger;
import io.druid.segment.QueryableIndex;
import io.druid.timeline.DataSegment;
@ -60,7 +60,7 @@ public class MergeTask extends MergeTaskBase
public File merge(final Map<DataSegment, File> segments, final File outDir)
throws Exception
{
return IndexMaker.mergeQueryableIndex(
return IndexMerger.mergeQueryableIndex(
Lists.transform(
ImmutableList.copyOf(segments.values()),
new Function<File, QueryableIndex>()

View File

@ -18,61 +18,74 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-kafka-eight</artifactId>
<name>druid-kafka-eight</name>
<description>druid-kafka-eight</description>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-kafka-eight</artifactId>
<name>druid-kafka-eight</name>
<description>druid-kafka-eight</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.9.2</artifactId>
<version>0.8.0</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.9.2</artifactId>
<version>0.8.0</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -18,60 +18,73 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-kafka-seven</artifactId>
<name>druid-kafka-seven</name>
<description>druid-kafka-seven</description>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-kafka-seven</artifactId>
<name>druid-kafka-seven</name>
<description>druid-kafka-seven</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<dependency>
<groupId>kafka</groupId>
<artifactId>core-kafka</artifactId>
<version>0.7.2-mmx4</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-api</artifactId>
</dependency>
<dependency>
<groupId>kafka</groupId>
<artifactId>core-kafka</artifactId>
<version>0.7.2-mmx4</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -61,6 +61,18 @@
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>

View File

@ -72,7 +72,7 @@ public class MySQLConnector extends SQLMetadataConnector
}
@Override
protected boolean tableExists(Handle handle, String tableName)
public boolean tableExists(Handle handle, String tableName)
{
return !handle.createQuery("SHOW tables LIKE :tableName")
.bind("tableName", tableName)

23
pom.xml
View File

@ -92,7 +92,7 @@
<dependency>
<groupId>com.metamx</groupId>
<artifactId>bytebuffer-collections</artifactId>
<version>0.0.4</version>
<version>0.1.1</version>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
@ -194,11 +194,6 @@
<artifactId>curator-x-discovery</artifactId>
<version>${apache.curator.version}</version>
</dependency>
<dependency>
<groupId>it.uniroma3.mat</groupId>
<artifactId>extendedset</artifactId>
<version>1.3.7</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
@ -447,6 +442,16 @@
<artifactId>mapdb</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.derby</groupId>
<artifactId>derbynet</artifactId>
<version>10.11.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.derby</groupId>
<artifactId>derbyclient</artifactId>
<version>10.11.1.1</version>
</dependency>
<!-- Test Scope -->
<dependency>
@ -468,6 +473,12 @@
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.carrotsearch</groupId>
<artifactId>junit-benchmarks</artifactId>
<version>0.7.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.caliper</groupId>
<artifactId>caliper</artifactId>

View File

@ -61,6 +61,18 @@
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>

View File

@ -65,7 +65,7 @@ public class PostgreSQLConnector extends SQLMetadataConnector
}
@Override
protected boolean tableExists(final Handle handle, final String tableName)
public boolean tableExists(final Handle handle, final String tableName)
{
return !handle.createQuery(
"SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = 'public' AND tablename LIKE :tableName"

View File

@ -20,116 +20,130 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid</groupId>
<artifactId>druid-processing</artifactId>
<name>druid-processing</name>
<description>A module that is everything required to understands Druid Segments</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid-processing</artifactId>
<name>druid-processing</name>
<description>A module that is everything required to understands Druid Segments</description>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>bytebuffer-collections</artifactId>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>emitter</artifactId>
</dependency>
<dependency>
<groupId>com.ning</groupId>
<artifactId>compress-lzf</artifactId>
</dependency>
<dependency>
<groupId>org.skife.config</groupId>
<artifactId>config-magic</artifactId>
</dependency>
<dependency>
<groupId>it.uniroma3.mat</groupId>
<artifactId>extendedset</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
</dependency>
<dependency>
<groupId>org.mozilla</groupId>
<artifactId>rhino</artifactId>
<version>1.7R4</version>
</dependency>
<dependency>
<groupId>com.davekoelle</groupId>
<artifactId>alphanum</artifactId>
</dependency>
<dependency>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
</dependency>
<dependency>
<groupId>org.mapdb</groupId>
<artifactId>mapdb</artifactId>
</dependency>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>bytebuffer-collections</artifactId>
</dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>emitter</artifactId>
</dependency>
<dependency>
<groupId>com.ning</groupId>
<artifactId>compress-lzf</artifactId>
</dependency>
<dependency>
<groupId>org.skife.config</groupId>
<artifactId>config-magic</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
</dependency>
<dependency>
<groupId>org.mozilla</groupId>
<artifactId>rhino</artifactId>
<version>1.7R4</version>
</dependency>
<dependency>
<groupId>com.davekoelle</groupId>
<artifactId>alphanum</artifactId>
</dependency>
<dependency>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
</dependency>
<dependency>
<groupId>org.mapdb</groupId>
<artifactId>mapdb</artifactId>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.caliper</groupId>
<artifactId>caliper</artifactId>
</dependency>
</dependencies>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.carrotsearch</groupId>
<artifactId>junit-benchmarks</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.caliper</groupId>
<artifactId>caliper</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -326,7 +326,7 @@ public class PeriodGranularity extends BaseQueryGranularity
@Override
public byte[] cacheKey()
{
return (period.toString() + ":" + chronology.getZone().toString()).getBytes(Charsets.UTF_8);
return (period.toString() + ":" + chronology.getZone().toString() + ":" + origin).getBytes(Charsets.UTF_8);
}
@Override

View File

@ -29,6 +29,8 @@ import java.util.List;
*/
public class ExtensionsConfig
{
public static final String PACKAGE_VERSION = ExtensionsConfig.class.getPackage().getImplementationVersion();
@JsonProperty
@NotNull
private boolean searchCurrentClassloader = true;
@ -37,6 +39,10 @@ public class ExtensionsConfig
@NotNull
private List<String> coordinates = ImmutableList.of();
// default version to use for extensions without version info
@JsonProperty
private String defaultVersion;
@JsonProperty
@NotNull
private String localRepository = String.format("%s/%s", System.getProperty("user.home"), ".m2/repository");
@ -58,6 +64,11 @@ public class ExtensionsConfig
return coordinates;
}
public String getDefaultVersion()
{
return defaultVersion != null ? defaultVersion : PACKAGE_VERSION;
}
public String getLocalRepository()
{
return localRepository;
@ -74,6 +85,7 @@ public class ExtensionsConfig
return "ExtensionsConfig{" +
"searchCurrentClassloader=" + searchCurrentClassloader +
", coordinates=" + coordinates +
", defaultVersion='" + getDefaultVersion() + '\'' +
", localRepository='" + localRepository + '\'' +
", remoteRepositories=" + remoteRepositories +
'}';

View File

@ -48,7 +48,9 @@ public class JacksonModule implements Module
@Provides @LazySingleton @Smile
public ObjectMapper smileMapper()
{
ObjectMapper retVal = new DefaultObjectMapper(new SmileFactory());
final SmileFactory smileFactory = new SmileFactory();
smileFactory.delegateToTextual(true);
final ObjectMapper retVal = new DefaultObjectMapper(smileFactory);
retVal.getFactory().setCodec(retVal);
return retVal;
}

View File

@ -19,15 +19,17 @@
package io.druid.query;
import org.joda.time.Interval;
import java.util.List;
/**
*/
public interface BySegmentResultValue<T>
{
public List<Result<T>> getResults();
public List<T> getResults();
public String getSegmentId();
public String getIntervalString();
public Interval getInterval();
}

View File

@ -26,7 +26,7 @@ import java.util.List;
/**
*/
public class BySegmentResultValueClass<T>
public class BySegmentResultValueClass<T> implements BySegmentResultValue<T>
{
private final List<T> results;
private final String segmentId;
@ -43,18 +43,21 @@ public class BySegmentResultValueClass<T>
this.interval = interval;
}
@Override
@JsonProperty("results")
public List<T> getResults()
{
return results;
}
@Override
@JsonProperty("segment")
public String getSegmentId()
{
return segmentId;
}
@Override
@JsonProperty("interval")
public Interval getInterval()
{

View File

@ -84,7 +84,7 @@ public class FinalizeResultsQueryRunner<T> implements QueryRunner<T>
throw new ISE("Cannot have a null result!");
}
BySegmentResultValueClass<T> resultsClass = result.getValue();
BySegmentResultValue<T> resultsClass = result.getValue();
return (T) new Result<BySegmentResultValueClass>(
result.getTimestamp(),

View File

@ -22,11 +22,12 @@ package io.druid.query;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import com.google.common.collect.MapMaker;
import com.metamx.common.guava.Sequence;
import com.metamx.common.guava.Sequences;
import com.metamx.common.guava.Yielder;
import com.metamx.common.guava.YieldingAccumulator;
import com.metamx.common.guava.YieldingSequenceBase;
import com.metamx.emitter.EmittingLogger;
import io.druid.query.spec.MultipleSpecificSegmentSpec;
import io.druid.segment.SegmentMissingException;
@ -36,18 +37,22 @@ import java.util.Map;
public class RetryQueryRunner<T> implements QueryRunner<T>
{
public static String MISSING_SEGMENTS_KEY = "missingSegments";
private static final EmittingLogger log = new EmittingLogger(RetryQueryRunner.class);
private final QueryRunner<T> baseRunner;
private final QueryToolChest<T, Query<T>> toolChest;
private final RetryQueryRunnerConfig config;
private final ObjectMapper jsonMapper;
public RetryQueryRunner(
QueryRunner<T> baseRunner,
QueryToolChest<T, Query<T>> toolChest,
RetryQueryRunnerConfig config,
ObjectMapper jsonMapper
)
{
this.baseRunner = baseRunner;
this.toolChest = toolChest;
this.config = config;
this.jsonMapper = jsonMapper;
}
@ -55,7 +60,8 @@ public class RetryQueryRunner<T> implements QueryRunner<T>
@Override
public Sequence<T> run(final Query<T> query, final Map<String, Object> context)
{
final Sequence<T> returningSeq = baseRunner.run(query, context);
final List<Sequence<T>> listOfSequences = Lists.newArrayList();
listOfSequences.add(baseRunner.run(query, context));
return new YieldingSequenceBase<T>()
{
@ -64,33 +70,32 @@ public class RetryQueryRunner<T> implements QueryRunner<T>
OutType initValue, YieldingAccumulator<OutType, T> accumulator
)
{
Yielder<OutType> yielder = returningSeq.toYielder(initValue, accumulator);
final List<SegmentDescriptor> missingSegments = getMissingSegments(context);
if (missingSegments.isEmpty()) {
return yielder;
}
if (!missingSegments.isEmpty()) {
for (int i = 0; i < config.getNumTries(); i++) {
log.info("[%,d] missing segments found. Retry attempt [%,d]", missingSegments.size(), i);
for (int i = 0; i < config.numTries(); i++) {
context.put(MISSING_SEGMENTS_KEY, Lists.newArrayList());
final Query<T> retryQuery = query.withQuerySegmentSpec(
new MultipleSpecificSegmentSpec(
missingSegments
)
);
yielder = baseRunner.run(retryQuery, context).toYielder(initValue, accumulator);
if (getMissingSegments(context).isEmpty()) {
break;
context.put(MISSING_SEGMENTS_KEY, Lists.newArrayList());
final Query<T> retryQuery = query.withQuerySegmentSpec(
new MultipleSpecificSegmentSpec(
missingSegments
)
);
Sequence<T> retrySequence = baseRunner.run(retryQuery, context);
listOfSequences.add(retrySequence);
if (getMissingSegments(context).isEmpty()) {
break;
}
}
final List<SegmentDescriptor> finalMissingSegs = getMissingSegments(context);
if (!config.isReturnPartialResults() && !finalMissingSegs.isEmpty()) {
throw new SegmentMissingException("No results found for segments[%s]", finalMissingSegs);
}
}
final List<SegmentDescriptor> finalMissingSegs = getMissingSegments(context);
if (!config.returnPartialResults() && !finalMissingSegs.isEmpty()) {
throw new SegmentMissingException("No results found for segments[%s]", finalMissingSegs);
}
return yielder;
return toolChest.mergeSequencesUnordered(Sequences.simple(listOfSequences)).toYielder(initValue, accumulator);
}
};
}

View File

@ -24,10 +24,16 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class RetryQueryRunnerConfig
{
@JsonProperty
private int numTries = 0;
@JsonProperty
private boolean returnPartialResults = false;
private int numTries = 1;
public int numTries() { return numTries; }
public boolean returnPartialResults() { return returnPartialResults; }
public int getNumTries()
{
return numTries;
}
// exists for testing and overrides
public boolean isReturnPartialResults()
{
return false;
}
}

View File

@ -19,9 +19,10 @@
package io.druid.query.filter;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import io.druid.segment.data.Indexed;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
/**
*/
@ -29,7 +30,8 @@ public interface BitmapIndexSelector
{
public Indexed<String> getDimensionValues(String dimension);
public int getNumRows();
public ImmutableConciseSet getConciseInvertedIndex(String dimension, String value);
public ImmutableConciseSet getConciseInvertedIndex(String dimension, int idx);
public BitmapFactory getBitmapFactory();
public ImmutableBitmap getBitmapIndex(String dimension, String value);
public ImmutableBitmap getBitmapIndex(String dimension, int idx);
public ImmutableRTree getSpatialIndex(String dimension);
}

View File

@ -19,12 +19,12 @@
package io.druid.query.filter;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import com.metamx.collections.bitmap.ImmutableBitmap;
/**
*/
public interface Filter
{
public ImmutableConciseSet goConcise(BitmapIndexSelector selector);
public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector);
public ValueMatcher makeMatcher(ValueMatcherFactory factory);
}

View File

@ -122,7 +122,7 @@ public class SegmentAnalyzer
String value = bitmapIndex.getValue(i);
if (value != null) {
size += value.getBytes(Charsets.UTF_8).length * bitmapIndex.getConciseSet(value).size();
size += value.getBytes(Charsets.UTF_8).length * bitmapIndex.getBitmap(value).size();
}
}

View File

@ -24,28 +24,30 @@ import com.fasterxml.jackson.annotation.JsonValue;
import io.druid.query.BySegmentResultValue;
import io.druid.query.Result;
import io.druid.query.search.search.SearchHit;
import org.joda.time.Interval;
import java.util.List;
/**
*/
public class BySegmentSearchResultValue extends SearchResultValue implements BySegmentResultValue<SearchResultValue>
public class BySegmentSearchResultValue extends SearchResultValue
implements BySegmentResultValue<Result<SearchResultValue>>
{
private final List<Result<SearchResultValue>> results;
private final String segmentId;
private final String intervalString;
private final Interval interval;
public BySegmentSearchResultValue(
@JsonProperty("results") List<Result<SearchResultValue>> results,
@JsonProperty("segment") String segmentId,
@JsonProperty("interval") String intervalString
@JsonProperty("interval") Interval interval
)
{
super(null);
this.results = results;
this.segmentId = segmentId;
this.intervalString = intervalString;
this.interval = interval;
}
@Override
@ -71,9 +73,9 @@ public class BySegmentSearchResultValue extends SearchResultValue implements ByS
@Override
@JsonProperty("interval")
public String getIntervalString()
public Interval getInterval()
{
return intervalString;
return interval;
}
@Override
@ -82,7 +84,7 @@ public class BySegmentSearchResultValue extends SearchResultValue implements ByS
return "BySegmentSearchResultValue{" +
"results=" + results +
", segmentId='" + segmentId + '\'' +
", intervalString='" + intervalString + '\'' +
", interval='" + interval.toString() + '\'' +
'}';
}
}

View File

@ -329,7 +329,7 @@ public class SearchQueryQueryToolChest extends QueryToolChest<Result<SearchResul
}
),
value.getSegmentId(),
value.getIntervalString()
value.getInterval()
)
);
}

View File

@ -21,9 +21,12 @@ package io.druid.query.search;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.common.ISE;
import com.metamx.common.guava.Accumulator;
import com.metamx.common.guava.FunctionalIterable;
@ -48,8 +51,8 @@ import io.druid.segment.column.BitmapIndex;
import io.druid.segment.column.Column;
import io.druid.segment.data.IndexedInts;
import io.druid.segment.filter.Filters;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
@ -94,12 +97,14 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
dimsToSearch = dimensions;
}
BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions();
final ImmutableConciseSet baseFilter;
final ImmutableBitmap baseFilter;
if (filter == null) {
baseFilter = ImmutableConciseSet.complement(new ImmutableConciseSet(), index.getNumRows());
baseFilter = bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), index.getNumRows());
} else {
baseFilter = filter.goConcise(new ColumnSelectorBitmapIndexSelector(index));
ColumnSelectorBitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(bitmapFactory, index);
baseFilter = filter.getBitmapIndex(selector);
}
for (String dimension : dimsToSearch) {
@ -113,7 +118,7 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
for (int i = 0; i < bitmapIndex.getCardinality(); ++i) {
String dimVal = Strings.nullToEmpty(bitmapIndex.getValue(i));
if (searchQuerySpec.accept(dimVal) &&
ImmutableConciseSet.intersection(baseFilter, bitmapIndex.getConciseSet(i)).size() > 0) {
bitmapFactory.intersection(Arrays.asList(baseFilter, bitmapIndex.getBitmap(i))).size() > 0) {
retVal.add(new SearchHit(dimension, dimVal));
if (retVal.size() >= limit) {
return makeReturnResult(limit, retVal);

View File

@ -41,7 +41,11 @@ public class LexicographicSearchSortSpec implements SearchSortSpec
@Override
public int compare(SearchHit searchHit, SearchHit searchHit1)
{
return searchHit.getValue().compareTo(searchHit1.getValue());
int retVal = searchHit.getValue().compareTo(searchHit1.getValue());
if (retVal == 0) {
retVal = searchHit.getDimension().compareTo(searchHit1.getDimension());
}
return retVal;
}
};
}

View File

@ -23,7 +23,6 @@ import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.metamx.common.guava.Accumulator;
import com.metamx.common.guava.Sequence;
import com.metamx.common.guava.Sequences;
import com.metamx.common.guava.Yielder;
import com.metamx.common.guava.YieldingAccumulator;
import io.druid.query.Query;
@ -68,20 +67,7 @@ public class SpecificSegmentQueryRunner<T> implements QueryRunner<T>
@Override
public Sequence<T> call() throws Exception
{
Sequence<T> returningSeq;
try {
returningSeq = base.run(query, context);
}
catch (SegmentMissingException e) {
List<SegmentDescriptor> missingSegments = (List<SegmentDescriptor>) context.get(RetryQueryRunner.MISSING_SEGMENTS_KEY);
if (missingSegments == null) {
missingSegments = Lists.newArrayList();
context.put(RetryQueryRunner.MISSING_SEGMENTS_KEY, missingSegments);
}
missingSegments.add(specificSpec.getDescriptor());
returningSeq = Sequences.empty();
}
return returningSeq;
return base.run(query, context);
}
}
);
@ -97,7 +83,18 @@ public class SpecificSegmentQueryRunner<T> implements QueryRunner<T>
@Override
public OutType call() throws Exception
{
return baseSequence.accumulate(initValue, accumulator);
try {
return baseSequence.accumulate(initValue, accumulator);
}
catch (SegmentMissingException e) {
List<SegmentDescriptor> missingSegments = (List<SegmentDescriptor>) context.get(RetryQueryRunner.MISSING_SEGMENTS_KEY);
if (missingSegments == null) {
missingSegments = Lists.newArrayList();
context.put(RetryQueryRunner.MISSING_SEGMENTS_KEY, missingSegments);
}
missingSegments.add(specificSpec.getDescriptor());
return initValue;
}
}
}
);

View File

@ -23,30 +23,32 @@ import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import io.druid.query.BySegmentResultValue;
import io.druid.query.BySegmentResultValueClass;
import io.druid.query.Result;
import org.joda.time.Interval;
import java.util.List;
/**
*/
public class BySegmentTopNResultValue extends TopNResultValue implements BySegmentResultValue<TopNResultValue>
public class BySegmentTopNResultValue extends TopNResultValue implements BySegmentResultValue<Result<TopNResultValue>>
{
private final List<Result<TopNResultValue>> results;
private final String segmentId;
private final String intervalString;
private final Interval interval;
@JsonCreator
public BySegmentTopNResultValue(
@JsonProperty("results") List<Result<TopNResultValue>> results,
@JsonProperty("segment") String segmentId,
@JsonProperty("interval") String intervalString
@JsonProperty("interval") Interval interval
)
{
super(null);
this.results = results;
this.segmentId = segmentId;
this.intervalString = intervalString;
this.interval = interval;
}
@Override
@ -73,9 +75,9 @@ public class BySegmentTopNResultValue extends TopNResultValue implements BySegme
@Override
@JsonProperty("interval")
public String getIntervalString()
public Interval getInterval()
{
return intervalString;
return interval;
}
@Override
@ -84,7 +86,7 @@ public class BySegmentTopNResultValue extends TopNResultValue implements BySegme
return "BySegmentTopNResultValue{" +
"results=" + results +
", segmentId='" + segmentId + '\'' +
", intervalString='" + intervalString + '\'' +
", interval='" + interval.toString() + '\'' +
'}';
}
}

View File

@ -40,6 +40,7 @@ public class PooledTopNAlgorithm
private final Capabilities capabilities;
private final TopNQuery query;
private final StupidPool<ByteBuffer> bufferPool;
private static final int AGG_UNROLL_COUNT = 8; // Must be able to fit loop below
public PooledTopNAlgorithm(
Capabilities capabilities,
@ -143,13 +144,30 @@ public class PooledTopNAlgorithm
{
return makeBufferAggregators(params.getCursor(), query.getAggregatorSpecs());
}
/**
* Use aggressive loop unrolling to aggregate the data
*
* How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
* The offsets into the output buffer are precalculated and stored in aggregatorOffsets
*
* For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
* See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
*
* This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
*
* If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
* blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
*
* Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
* still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
* (they simply take longer to start with).
*/
@Override
protected void scanAndAggregate(
PooledTopNParams params,
int[] positions,
BufferAggregator[] theAggregators,
int numProcessed
final PooledTopNParams params,
final int[] positions,
final BufferAggregator[] theAggregators,
final int numProcessed
)
{
final ByteBuffer resultsBuf = params.getResultsBuf();
@ -158,35 +176,102 @@ public class PooledTopNAlgorithm
final Cursor cursor = params.getCursor();
final DimensionSelector dimSelector = params.getDimSelector();
final int[] aggregatorOffsets = new int[aggregatorSizes.length];
for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
aggregatorOffsets[j] = offset;
offset += aggregatorSizes[j];
}
final int aggSize = theAggregators.length;
final int aggExtra = aggSize % AGG_UNROLL_COUNT;
while (!cursor.isDone()) {
final IndexedInts dimValues = dimSelector.getRow();
for (int i = 0; i < dimValues.size(); ++i) {
final int dimIndex = dimValues.get(i);
int position = positions[dimIndex];
switch (position) {
case SKIP_POSITION_VALUE:
break;
case INIT_POSITION_VALUE:
positions[dimIndex] = (dimIndex - numProcessed) * numBytesPerRecord;
position = positions[dimIndex];
for (int j = 0; j < theAggregators.length; ++j) {
theAggregators[j].init(resultsBuf, position);
position += aggregatorSizes[j];
}
position = positions[dimIndex];
default:
for (int j = 0; j < theAggregators.length; ++j) {
theAggregators[j].aggregate(resultsBuf, position);
position += aggregatorSizes[j];
}
}
final int dimSize = dimValues.size();
final int dimExtra = dimSize % AGG_UNROLL_COUNT;
switch(dimExtra){
case 7:
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(6));
case 6:
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(5));
case 5:
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(4));
case 4:
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(3));
case 3:
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(2));
case 2:
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(1));
case 1:
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(0));
}
for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i));
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i+1));
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i+2));
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i+3));
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i+4));
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i+5));
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i+6));
aggregateDimValue(positions, theAggregators, numProcessed, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i+7));
}
cursor.advance();
}
}
private static void aggregateDimValue(
final int[] positions,
final BufferAggregator[] theAggregators,
final int numProcessed,
final ByteBuffer resultsBuf,
final int numBytesPerRecord,
final int[] aggregatorOffsets,
final int aggSize,
final int aggExtra,
final int dimIndex
)
{
if (SKIP_POSITION_VALUE == positions[dimIndex]) {
return;
}
if (INIT_POSITION_VALUE == positions[dimIndex]) {
positions[dimIndex] = (dimIndex - numProcessed) * numBytesPerRecord;
final int pos = positions[dimIndex];
for (int j = 0; j < aggSize; ++j) {
theAggregators[j].init(resultsBuf, pos + aggregatorOffsets[j]);
}
}
final int position = positions[dimIndex];
switch(aggExtra) {
case 7:
theAggregators[6].aggregate(resultsBuf, position + aggregatorOffsets[6]);
case 6:
theAggregators[5].aggregate(resultsBuf, position + aggregatorOffsets[5]);
case 5:
theAggregators[4].aggregate(resultsBuf, position + aggregatorOffsets[4]);
case 4:
theAggregators[3].aggregate(resultsBuf, position + aggregatorOffsets[3]);
case 3:
theAggregators[2].aggregate(resultsBuf, position + aggregatorOffsets[2]);
case 2:
theAggregators[1].aggregate(resultsBuf, position + aggregatorOffsets[1]);
case 1:
theAggregators[0].aggregate(resultsBuf, position + aggregatorOffsets[0]);
}
for (int j = aggExtra; j < aggSize; j += AGG_UNROLL_COUNT) {
theAggregators[j].aggregate(resultsBuf, position + aggregatorOffsets[j]);
theAggregators[j+1].aggregate(resultsBuf, position + aggregatorOffsets[j+1]);
theAggregators[j+2].aggregate(resultsBuf, position + aggregatorOffsets[j+2]);
theAggregators[j+3].aggregate(resultsBuf, position + aggregatorOffsets[j+3]);
theAggregators[j+4].aggregate(resultsBuf, position + aggregatorOffsets[j+4]);
theAggregators[j+5].aggregate(resultsBuf, position + aggregatorOffsets[j+5]);
theAggregators[j+6].aggregate(resultsBuf, position + aggregatorOffsets[j+6]);
theAggregators[j+7].aggregate(resultsBuf, position + aggregatorOffsets[j+7]);
}
}
@Override
protected void updateResults(
PooledTopNParams params,

View File

@ -19,8 +19,8 @@
package io.druid.query.topn;
import com.google.common.collect.Maps;
import com.google.common.collect.MinMaxPriorityQueue;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import io.druid.query.Result;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.AggregatorUtil;
@ -28,22 +28,50 @@ import io.druid.query.aggregation.PostAggregator;
import io.druid.query.dimension.DimensionSpec;
import org.joda.time.DateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
/**
*
*/
public class TopNNumericResultBuilder implements TopNResultBuilder
{
private final DateTime timestamp;
private final DimensionSpec dimSpec;
private final String metricName;
private final List<AggregatorFactory> aggFactories;
private final List<PostAggregator> postAggs;
private MinMaxPriorityQueue<DimValHolder> pQueue = null;
private final PriorityQueue<DimValHolder> pQueue;
private final Comparator<DimValHolder> dimValComparator;
private static final Comparator<String> dimNameComparator = new Comparator<String>()
{
@Override
public int compare(String o1, String o2)
{
int retval;
if (null == o1) {
if (null == o2) {
retval = 0;
} else {
retval = -1;
}
} else if (null == o2) {
retval = 1;
} else {
retval = o1.compareTo(o2);
}
return retval;
}
};
private final int threshold;
private final Comparator metricComparator;
public TopNNumericResultBuilder(
DateTime timestamp,
@ -60,18 +88,35 @@ public class TopNNumericResultBuilder implements TopNResultBuilder
this.metricName = metricName;
this.aggFactories = aggFactories;
this.postAggs = AggregatorUtil.pruneDependentPostAgg(postAggs, this.metricName);
this.threshold = threshold;
this.metricComparator = comparator;
this.dimValComparator = new Comparator<DimValHolder>()
{
@Override
public int compare(DimValHolder d1, DimValHolder d2)
{
int retVal = metricComparator.compare(d1.getTopNMetricVal(), d2.getTopNMetricVal());
instantiatePQueue(threshold, comparator);
if (retVal == 0) {
retVal = dimNameComparator.compare(d1.getDimName(), d2.getDimName());
}
return retVal;
}
};
// The logic in addEntry first adds, then removes if needed. So it can at any point have up to threshold + 1 entries.
pQueue = new PriorityQueue<>(this.threshold + 1, this.dimValComparator);
}
@Override
public TopNResultBuilder addEntry(
public TopNNumericResultBuilder addEntry(
String dimName,
Object dimValIndex,
Object[] metricVals
)
{
Map<String, Object> metricValues = Maps.newLinkedHashMap();
final Map<String, Object> metricValues = new LinkedHashMap<>(metricVals.length + postAggs.size());
metricValues.put(dimSpec.getOutputName(), dimName);
@ -85,27 +130,47 @@ public class TopNNumericResultBuilder implements TopNResultBuilder
}
Object topNMetricVal = metricValues.get(metricName);
pQueue.add(
new DimValHolder.Builder().withTopNMetricVal(topNMetricVal)
.withDirName(dimName)
.withDimValIndex(dimValIndex)
.withMetricValues(metricValues)
.build()
);
if (shouldAdd(topNMetricVal)) {
DimValHolder dimValHolder = new DimValHolder.Builder()
.withTopNMetricVal(topNMetricVal)
.withDirName(dimName)
.withDimValIndex(dimValIndex)
.withMetricValues(metricValues)
.build();
pQueue.add(dimValHolder);
}
if (this.pQueue.size() > this.threshold) {
pQueue.poll();
}
return this;
}
private boolean shouldAdd(Object topNMetricVal)
{
final boolean belowThreshold = pQueue.size() < this.threshold;
final boolean belowMax = belowThreshold
|| this.metricComparator.compare(pQueue.peek().getTopNMetricVal(), topNMetricVal) < 0;
return belowMax;
}
@Override
public TopNResultBuilder addEntry(DimensionAndMetricValueExtractor dimensionAndMetricValueExtractor)
{
pQueue.add(
new DimValHolder.Builder().withTopNMetricVal(dimensionAndMetricValueExtractor.getDimensionValue(metricName))
.withDirName(dimSpec.getOutputName())
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
.build()
);
final Object dimValue = dimensionAndMetricValueExtractor.getDimensionValue(metricName);
if (shouldAdd(dimValue)) {
final DimValHolder valHolder = new DimValHolder.Builder()
.withTopNMetricVal(dimValue)
.withDirName(dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName()))
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
.build();
pQueue.add(valHolder);
}
if (pQueue.size() > this.threshold) {
pQueue.poll(); // throw away
}
return this;
}
@ -118,41 +183,41 @@ public class TopNNumericResultBuilder implements TopNResultBuilder
@Override
public Result<TopNResultValue> build()
{
// Pull out top aggregated values
List<Map<String, Object>> values = new ArrayList<Map<String, Object>>(pQueue.size());
while (!pQueue.isEmpty()) {
values.add(pQueue.remove().getMetricValues());
}
return new Result<TopNResultValue>(
timestamp,
new TopNResultValue(values)
);
}
private void instantiatePQueue(int threshold, final Comparator comparator)
{
this.pQueue = MinMaxPriorityQueue.orderedBy(
new Comparator<DimValHolder>()
final DimValHolder[] holderValueArray = pQueue.toArray(new DimValHolder[0]);
Arrays.sort(
holderValueArray, new Comparator<DimValHolder>()
{
@Override
public int compare(DimValHolder d1, DimValHolder d2)
{
int retVal = comparator.compare(d2.getTopNMetricVal(), d1.getTopNMetricVal());
// Values flipped compared to earlier
int retVal = metricComparator.compare(d2.getTopNMetricVal(), d1.getTopNMetricVal());
if (retVal == 0) {
if (d1.getDimName() == null) {
retVal = -1;
} else if (d2.getDimName() == null) {
retVal = 1;
} else {
retVal = d1.getDimName().compareTo(d2.getDimName());
}
retVal = dimNameComparator.compare(d1.getDimName(), d2.getDimName());
}
return retVal;
}
}
).maximumSize(threshold).create();
);
List<DimValHolder> holderValues = Arrays.asList(holderValueArray);
// Pull out top aggregated values
final List<Map<String, Object>> values = Lists.transform(
holderValues,
new Function<DimValHolder, Map<String, Object>>()
{
@Override
public Map<String, Object> apply(DimValHolder valHolder)
{
return valHolder.getMetricValues();
}
}
);
return new Result<TopNResultValue>(
timestamp,
new TopNResultValue(values)
);
}
}

View File

@ -35,6 +35,8 @@ import com.metamx.common.guava.nary.BinaryFn;
import com.metamx.emitter.service.ServiceMetricEvent;
import io.druid.collections.OrderedMergeSequence;
import io.druid.granularity.QueryGranularity;
import io.druid.query.BySegmentResultValue;
import io.druid.query.BySegmentResultValueClass;
import io.druid.query.CacheStrategy;
import io.druid.query.IntervalChunkingQueryRunner;
import io.druid.query.Query;
@ -154,6 +156,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
return new Function<Result<TopNResultValue>, Result<TopNResultValue>>()
{
private String dimension = query.getDimensionSpec().getOutputName();
final List<PostAggregator> prunedAggs = prunePostAggregators(query);
@Override
public Result<TopNResultValue> apply(Result<TopNResultValue> result)
@ -170,7 +173,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
for (AggregatorFactory agg : query.getAggregatorSpecs()) {
values.put(agg.getName(), fn.manipulate(agg, input.getMetric(agg.getName())));
}
for (PostAggregator postAgg : prunePostAggregators(query)) {
for (PostAggregator postAgg : prunedAggs) {
Object calculatedPostAgg = input.getMetric(postAgg.getName());
if (calculatedPostAgg != null) {
values.put(postAgg.getName(), calculatedPostAgg);
@ -433,7 +436,8 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
public Result<TopNResultValue> apply(Result<TopNResultValue> input)
{
if (isBySegment) {
BySegmentTopNResultValue value = (BySegmentTopNResultValue) input.getValue();
BySegmentResultValue<Result<TopNResultValue>> value = (BySegmentResultValue<Result<TopNResultValue>>) input
.getValue();
return new Result<TopNResultValue>(
input.getTimestamp(),
@ -460,7 +464,7 @@ public class TopNQueryQueryToolChest extends QueryToolChest<Result<TopNResultVal
}
),
value.getSegmentId(),
value.getIntervalString()
value.getInterval()
)
);
}

View File

@ -19,30 +19,35 @@
package io.druid.segment;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.segment.data.Offset;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import it.uniroma3.mat.extendedset.intset.IntSet;
import org.roaringbitmap.IntIterator;
/**
*/
public class ConciseOffset implements Offset
public class BitmapOffset implements Offset
{
private static final int INVALID_VALUE = -1;
IntSet.IntIterator itr;
private final ImmutableConciseSet invertedIndex;
private final IntIterator itr;
private final BitmapFactory bitmapFactory;
private final ImmutableBitmap bitmapIndex;
private volatile int val;
public ConciseOffset(ImmutableConciseSet invertedIndex)
public BitmapOffset(BitmapFactory bitmapFactory, ImmutableBitmap bitmapIndex)
{
this.invertedIndex = invertedIndex;
this.itr = invertedIndex.iterator();
this.bitmapFactory = bitmapFactory;
this.bitmapIndex = bitmapIndex;
this.itr = bitmapIndex.iterator();
increment();
}
private ConciseOffset(ConciseOffset otherOffset)
private BitmapOffset(BitmapOffset otherOffset)
{
this.invertedIndex = otherOffset.invertedIndex;
this.bitmapFactory = otherOffset.bitmapFactory;
this.bitmapIndex = otherOffset.bitmapIndex;
this.itr = otherOffset.itr.clone();
this.val = otherOffset.val;
}
@ -66,11 +71,11 @@ public class ConciseOffset implements Offset
@Override
public Offset clone()
{
if (invertedIndex == null || invertedIndex.size() == 0) {
return new ConciseOffset(new ImmutableConciseSet());
if (bitmapIndex == null || bitmapIndex.size() == 0) {
return new BitmapOffset(bitmapFactory, bitmapFactory.makeEmptyImmutableBitmap());
}
return new ConciseOffset(this);
return new BitmapOffset(this);
}
@Override

View File

@ -19,6 +19,8 @@
package io.druid.segment;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.common.guava.CloseQuietly;
import io.druid.query.filter.BitmapIndexSelector;
@ -27,20 +29,22 @@ import io.druid.segment.column.DictionaryEncodedColumn;
import io.druid.segment.column.GenericColumn;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedIterable;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import java.util.Iterator;
/**
*/
*/
public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector
{
private final BitmapFactory bitmapFactory;
private final ColumnSelector index;
public ColumnSelectorBitmapIndexSelector(
final BitmapFactory bitmapFactory,
final ColumnSelector index
)
{
this.bitmapFactory = bitmapFactory;
this.index = index;
}
@ -100,33 +104,40 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector
}
@Override
public ImmutableConciseSet getConciseInvertedIndex(String dimension, String value)
public BitmapFactory getBitmapFactory()
{
final Column column = index.getColumn(dimension.toLowerCase());
if (column == null) {
return new ImmutableConciseSet();
}
if (!column.getCapabilities().hasBitmapIndexes()) {
return new ImmutableConciseSet();
}
return column.getBitmapIndex().getConciseSet(value);
return bitmapFactory;
}
@Override
public ImmutableConciseSet getConciseInvertedIndex(String dimension, int idx)
public ImmutableBitmap getBitmapIndex(String dimension, String value)
{
final Column column = index.getColumn(dimension.toLowerCase());
if (column == null) {
return new ImmutableConciseSet();
return bitmapFactory.makeEmptyImmutableBitmap();
}
if (!column.getCapabilities().hasBitmapIndexes()) {
return new ImmutableConciseSet();
bitmapFactory.makeEmptyImmutableBitmap();
}
return column.getBitmapIndex().getBitmap(value);
}
@Override
public ImmutableBitmap getBitmapIndex(String dimension, int idx)
{
final Column column = index.getColumn(dimension.toLowerCase());
if (column == null || column.getCapabilities() == null) {
bitmapFactory.makeEmptyImmutableBitmap();
}
if (!column.getCapabilities().hasBitmapIndexes()) {
bitmapFactory.makeEmptyImmutableBitmap();
}
// This is a workaround given the current state of indexing, I feel shame
final int index1 = column.getBitmapIndex().hasNulls() ? idx + 1 : idx;
return column.getBitmapIndex().getConciseSet(index1);
return column.getBitmapIndex().getBitmap(index1);
}
@Override

View File

@ -35,6 +35,7 @@ public class CompressedPools
{
private static final Logger log = new Logger(CompressedPools.class);
public static final int BUFFER_SIZE = 0x10000;
private static final StupidPool<ChunkEncoder> chunkEncoderPool = new StupidPool<ChunkEncoder>(
new Supplier<ChunkEncoder>()
{
@ -44,7 +45,7 @@ public class CompressedPools
public ChunkEncoder get()
{
log.info("Allocating new chunkEncoder[%,d]", counter.incrementAndGet());
return new ChunkEncoder(0xFFFF);
return new ChunkEncoder(BUFFER_SIZE);
}
}
);
@ -63,7 +64,7 @@ public class CompressedPools
public byte[] get()
{
log.info("Allocating new outputBytesPool[%,d]", counter.incrementAndGet());
return new byte[0xFFFF];
return new byte[BUFFER_SIZE];
}
}
);
@ -82,7 +83,7 @@ public class CompressedPools
public ByteBuffer get()
{
log.info("Allocating new bigEndByteBuf[%,d]", counter.incrementAndGet());
return ByteBuffer.allocateDirect(0xFFFF).order(ByteOrder.BIG_ENDIAN);
return ByteBuffer.allocateDirect(BUFFER_SIZE).order(ByteOrder.BIG_ENDIAN);
}
}
);
@ -96,7 +97,7 @@ public class CompressedPools
public ByteBuffer get()
{
log.info("Allocating new littleEndByteBuf[%,d]", counter.incrementAndGet());
return ByteBuffer.allocateDirect(0xFFFF).order(ByteOrder.LITTLE_ENDIAN);
return ByteBuffer.allocateDirect(BUFFER_SIZE).order(ByteOrder.LITTLE_ENDIAN);
}
}
);

View File

@ -35,6 +35,10 @@ import com.google.common.primitives.Ints;
import com.google.inject.Binder;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ConciseBitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.common.IAE;
import com.metamx.common.ISE;
@ -47,7 +51,7 @@ import com.metamx.emitter.EmittingLogger;
import io.druid.common.utils.SerializerUtils;
import io.druid.guice.ConfigProvider;
import io.druid.guice.GuiceInjectors;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.guice.JsonConfigProvider;
import io.druid.query.DruidProcessingConfig;
import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnBuilder;
@ -55,9 +59,10 @@ import io.druid.segment.column.ColumnConfig;
import io.druid.segment.column.ColumnDescriptor;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.ArrayIndexed;
import io.druid.segment.data.BitmapSerde;
import io.druid.segment.data.BitmapSerdeFactory;
import io.druid.segment.data.ByteBufferSerializer;
import io.druid.segment.data.CompressedLongsIndexedSupplier;
import io.druid.segment.data.ConciseCompressedIndexedInts;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.IndexedIterable;
import io.druid.segment.data.IndexedRTree;
@ -73,8 +78,6 @@ import io.druid.segment.serde.FloatGenericColumnSupplier;
import io.druid.segment.serde.LongGenericColumnPartSerde;
import io.druid.segment.serde.LongGenericColumnSupplier;
import io.druid.segment.serde.SpatialIndexColumnPartSupplier;
import it.uniroma3.mat.extendedset.intset.ConciseSet;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import org.joda.time.Interval;
import java.io.ByteArrayOutputStream;
@ -120,6 +123,8 @@ public class IndexIO
private static final SerializerUtils serializerUtils = new SerializerUtils();
private static final ObjectMapper mapper;
private static final BitmapSerdeFactory bitmapSerdeFactory;
protected static final ColumnConfig columnConfig;
static {
@ -136,26 +141,19 @@ public class IndexIO
ImmutableMap.of("base_path", "druid.processing")
);
binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class);
JsonConfigProvider.bind(binder, "druid.processing.bitmap", BitmapSerdeFactory.class);
}
}
)
);
mapper = injector.getInstance(ObjectMapper.class);
columnConfig = injector.getInstance(ColumnConfig.class);
}
private static volatile IndexIOHandler handler = null;
@Deprecated
public static MMappedIndex mapDir(final File inDir) throws IOException
{
init();
return handler.mapDir(inDir);
bitmapSerdeFactory = injector.getInstance(BitmapSerdeFactory.class);
}
public static QueryableIndex loadIndex(File inDir) throws IOException
{
init();
final int version = SegmentUtils.getVersionFromDir(inDir);
final IndexLoader loader = indexLoaders.get(version);
@ -167,27 +165,6 @@ public class IndexIO
}
}
public static boolean hasHandler()
{
return (IndexIO.handler != null);
}
public static void registerHandler(IndexIOHandler handler)
{
if (IndexIO.handler == null) {
IndexIO.handler = handler;
} else {
throw new ISE("Already have a handler[%s], cannot register another[%s]", IndexIO.handler, handler);
}
}
private static void init()
{
if (handler == null) {
handler = new DefaultIndexIOHandler();
}
}
public static int getVersionFromDir(File inDir) throws IOException
{
File versionFile = new File(inDir, "version.bin");
@ -228,7 +205,7 @@ public class IndexIO
case 6:
case 7:
log.info("Old version, re-persisting.");
IndexMaker.append(
IndexMerger.append(
Arrays.<IndexableAdapter>asList(new QueryableIndexIndexableAdapter(loadIndex(toConvert))),
converted
);
@ -280,6 +257,7 @@ public class IndexIO
indexBuffer, GenericIndexed.stringStrategy
);
final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer));
final BitmapSerdeFactory bitmapSerdeFactory = new BitmapSerde.LegacyBitmapSerdeFactory();
CompressedLongsIndexedSupplier timestamps = CompressedLongsIndexedSupplier.fromByteBuffer(
smooshedFiles.mapFile(makeTimeFile(inDir, BYTE_ORDER).getName()), BYTE_ORDER
@ -298,7 +276,7 @@ public class IndexIO
Map<String, GenericIndexed<String>> dimValueLookups = Maps.newHashMap();
Map<String, VSizeIndexed> dimColumns = Maps.newHashMap();
Map<String, GenericIndexed<ImmutableConciseSet>> invertedIndexed = Maps.newHashMap();
Map<String, GenericIndexed<ImmutableBitmap>> bitmaps = Maps.newHashMap();
for (String dimension : IndexedIterable.create(availableDimensions)) {
ByteBuffer dimBuffer = smooshedFiles.mapFile(makeDimFile(inDir, dimension).getName());
@ -316,9 +294,9 @@ public class IndexIO
ByteBuffer invertedBuffer = smooshedFiles.mapFile("inverted.drd");
for (int i = 0; i < availableDimensions.size(); ++i) {
invertedIndexed.put(
bitmaps.put(
serializerUtils.readString(invertedBuffer),
GenericIndexed.read(invertedBuffer, ConciseCompressedIndexedInts.objectStrategy)
GenericIndexed.read(invertedBuffer, bitmapSerdeFactory.getObjectStrategy())
);
}
@ -327,7 +305,10 @@ public class IndexIO
while (spatialBuffer != null && spatialBuffer.hasRemaining()) {
spatialIndexed.put(
serializerUtils.readString(spatialBuffer),
ByteBufferSerializer.read(spatialBuffer, IndexedRTree.objectStrategy)
ByteBufferSerializer.read(
spatialBuffer,
new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory())
)
);
}
@ -339,7 +320,7 @@ public class IndexIO
metrics,
dimValueLookups,
dimColumns,
invertedIndexed,
bitmaps,
spatialIndexed,
smooshedFiles
);
@ -371,13 +352,14 @@ public class IndexIO
final FileSmoosher v9Smoosher = new FileSmoosher(v9Dir);
ByteStreams.write(Ints.toByteArray(9), Files.newOutputStreamSupplier(new File(v9Dir, "version.bin")));
Map<String, GenericIndexed<ImmutableConciseSet>> bitmapIndexes = Maps.newHashMap();
Map<String, GenericIndexed<ImmutableBitmap>> bitmapIndexes = Maps.newHashMap();
final ByteBuffer invertedBuffer = v8SmooshedFiles.mapFile("inverted.drd");
while (invertedBuffer.hasRemaining()) {
final String dimName = serializerUtils.readString(invertedBuffer);
bitmapIndexes.put(
serializerUtils.readString(invertedBuffer),
GenericIndexed.read(invertedBuffer, ConciseCompressedIndexedInts.objectStrategy)
dimName,
GenericIndexed.read(invertedBuffer, bitmapSerdeFactory.getObjectStrategy())
);
}
@ -386,7 +368,11 @@ public class IndexIO
while (spatialBuffer != null && spatialBuffer.hasRemaining()) {
spatialIndexes.put(
serializerUtils.readString(spatialBuffer),
ByteBufferSerializer.read(spatialBuffer, IndexedRTree.objectStrategy)
ByteBufferSerializer.read(
spatialBuffer, new IndexedRTree.ImmutableRTreeObjectStrategy(
bitmapSerdeFactory.getBitmapFactory()
)
)
);
}
@ -422,11 +408,12 @@ public class IndexIO
VSizeIndexedInts singleValCol = null;
VSizeIndexed multiValCol = VSizeIndexed.readFromByteBuffer(dimBuffer.asReadOnlyBuffer());
GenericIndexed<ImmutableConciseSet> bitmaps = bitmapIndexes.get(dimension);
GenericIndexed<ImmutableBitmap> bitmaps = bitmapIndexes.get(dimension);
ImmutableRTree spatialIndex = spatialIndexes.get(dimension);
final BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
boolean onlyOneValue = true;
ConciseSet nullsSet = null;
MutableBitmap nullsSet = null;
for (int i = 0; i < multiValCol.size(); ++i) {
VSizeIndexedInts rowValue = multiValCol.get(i);
if (!onlyOneValue) {
@ -437,7 +424,7 @@ public class IndexIO
}
if (rowValue.size() == 0) {
if (nullsSet == null) {
nullsSet = new ConciseSet();
nullsSet = bitmapFactory.makeEmptyMutableBitmap();
}
nullsSet.add(i);
}
@ -448,7 +435,7 @@ public class IndexIO
final boolean bumpedDictionary;
if (nullsSet != null) {
log.info("Dimension[%s] has null rows.", dimension);
final ImmutableConciseSet theNullSet = ImmutableConciseSet.newImmutableFromMutable(nullsSet);
final ImmutableBitmap theNullSet = bitmapFactory.makeImmutableBitmap(nullsSet);
if (dictionary.get(0) != null) {
log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension);
@ -463,16 +450,19 @@ public class IndexIO
bitmaps = GenericIndexed.fromIterable(
Iterables.concat(Arrays.asList(theNullSet), bitmaps),
ConciseCompressedIndexedInts.objectStrategy
bitmapSerdeFactory.getObjectStrategy()
);
} else {
bumpedDictionary = false;
bitmaps = GenericIndexed.fromIterable(
Iterables.concat(
Arrays.asList(ImmutableConciseSet.union(theNullSet, bitmaps.get(0))),
Arrays.asList(
bitmapFactory
.union(Arrays.asList(theNullSet, bitmaps.get(0)))
),
Iterables.skip(bitmaps, 1)
),
ConciseCompressedIndexedInts.objectStrategy
bitmapSerdeFactory.getObjectStrategy()
);
}
} else {
@ -508,6 +498,7 @@ public class IndexIO
dictionary,
singleValCol,
multiValCol,
bitmapSerdeFactory,
bitmaps,
spatialIndex
)
@ -606,13 +597,13 @@ public class IndexIO
final GenericIndexed<String> dims9 = GenericIndexed.fromIterable(
Iterables.filter(
dims8, new Predicate<String>()
{
@Override
public boolean apply(String s)
{
return !skippedDimensions.contains(s);
}
}
{
@Override
public boolean apply(String s)
{
return !skippedDimensions.contains(s);
}
}
),
GenericIndexed.stringStrategy
);
@ -620,6 +611,10 @@ public class IndexIO
indexBuffer, GenericIndexed.stringStrategy
);
final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer));
final BitmapSerdeFactory segmentBitmapSerdeFactory = mapper.readValue(
serializerUtils.readString(indexBuffer),
BitmapSerdeFactory.class
);
Set<String> columns = Sets.newTreeSet();
columns.addAll(Lists.newArrayList(dims9));
@ -627,12 +622,17 @@ public class IndexIO
GenericIndexed<String> cols = GenericIndexed.fromIterable(columns, GenericIndexed.stringStrategy);
final long numBytes = cols.getSerializedSize() + dims9.getSerializedSize() + 16;
final String segmentBitmapSerdeFactoryString = mapper.writeValueAsString(segmentBitmapSerdeFactory);
final long numBytes = cols.getSerializedSize() + dims9.getSerializedSize() + 16
+ serializerUtils.getSerializedStringByteSize(segmentBitmapSerdeFactoryString);
final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes);
cols.writeToChannel(writer);
dims9.writeToChannel(writer);
serializerUtils.writeLong(writer, dataInterval.getStartMillis());
serializerUtils.writeLong(writer, dataInterval.getEndMillis());
serializerUtils.writeString(writer, segmentBitmapSerdeFactoryString);
writer.close();
log.info("Skipped files[%s]", skippedFiles);
@ -648,10 +648,12 @@ public class IndexIO
static class LegacyIndexLoader implements IndexLoader
{
private static final IndexIOHandler legacyHandler = new DefaultIndexIOHandler();
@Override
public QueryableIndex load(File inDir) throws IOException
{
MMappedIndex index = IndexIO.mapDir(inDir);
MMappedIndex index = legacyHandler.mapDir(inDir);
Map<String, Column> columns = Maps.newHashMap();
@ -669,7 +671,9 @@ public class IndexIO
)
.setBitmapIndex(
new BitmapIndexColumnPartSupplier(
index.getInvertedIndexes().get(dimension), index.getDimValueLookup(dimension)
new ConciseBitmapFactory(),
index.getBitmapIndexes().get(dimension),
index.getDimValueLookup(dimension)
)
);
if (index.getSpatialIndexes().get(dimension) != null) {
@ -719,14 +723,17 @@ public class IndexIO
}
String[] cols = colSet.toArray(new String[colSet.size()]);
columns.put(Column.TIME_COLUMN_NAME, new ColumnBuilder()
.setType(ValueType.LONG)
.setGenericColumn(new LongGenericColumnSupplier(index.timestamps))
.build());
columns.put(
Column.TIME_COLUMN_NAME, new ColumnBuilder()
.setType(ValueType.LONG)
.setGenericColumn(new LongGenericColumnSupplier(index.timestamps))
.build()
);
return new SimpleQueryableIndex(
index.getDataInterval(),
new ArrayIndexed<>(cols, String.class),
index.getAvailableDimensions(),
new ConciseBitmapFactory(),
columns,
index.getFileMapper()
);
@ -749,9 +756,24 @@ public class IndexIO
SmooshedFileMapper smooshedFiles = Smoosh.map(inDir);
ByteBuffer indexBuffer = smooshedFiles.mapFile("index.drd");
/**
* Index.drd should consist of the segment version, the columns and dimensions of the segment as generic
* indexes, the interval start and end millis as longs (in 16 bytes), and a bitmap index type.
*/
final GenericIndexed<String> cols = GenericIndexed.read(indexBuffer, GenericIndexed.stringStrategy);
final GenericIndexed<String> dims = GenericIndexed.read(indexBuffer, GenericIndexed.stringStrategy);
final Interval dataInterval = new Interval(indexBuffer.getLong(), indexBuffer.getLong());
final BitmapSerdeFactory segmentBitmapSerdeFactory;
/**
* This is a workaround for the fact that in v8 segments, we have no information about the type of bitmap
* index to use. Since we cannot very cleanly build v9 segments directly, we are using a workaround where
* this information is appended to the end of index.drd.
*/
if (indexBuffer.hasRemaining()) {
segmentBitmapSerdeFactory = mapper.readValue(serializerUtils.readString(indexBuffer), BitmapSerdeFactory.class);
} else {
segmentBitmapSerdeFactory = new BitmapSerde.LegacyBitmapSerdeFactory();
}
Map<String, Column> columns = Maps.newHashMap();
@ -762,7 +784,7 @@ public class IndexIO
columns.put(Column.TIME_COLUMN_NAME, deserializeColumn(mapper, smooshedFiles.mapFile("__time")));
final QueryableIndex index = new SimpleQueryableIndex(
dataInterval, cols, dims, columns, smooshedFiles
dataInterval, cols, dims, segmentBitmapSerdeFactory.getBitmapFactory(), columns, smooshedFiles
);
log.debug("Mapped v9 index[%s] in %,d millis", inDir, System.currentTimeMillis() - startTime);

View File

@ -24,6 +24,7 @@ import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Predicate;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
@ -33,7 +34,12 @@ import com.google.common.collect.Sets;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import com.google.common.primitives.Ints;
import com.google.inject.Binder;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.collections.spatial.RTree;
import com.metamx.collections.spatial.split.LinearGutmanSplitStrategy;
@ -46,21 +52,20 @@ import com.metamx.common.io.smoosh.FileSmoosher;
import com.metamx.common.io.smoosh.SmooshedWriter;
import com.metamx.common.logger.Logger;
import io.druid.collections.CombiningIterable;
import io.druid.collections.ResourceHolder;
import io.druid.collections.StupidPool;
import io.druid.common.utils.JodaUtils;
import io.druid.common.utils.SerializerUtils;
import io.druid.guice.GuiceInjectors;
import io.druid.guice.JsonConfigProvider;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.ToLowerCaseAggregatorFactory;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ColumnCapabilitiesImpl;
import io.druid.segment.column.ColumnDescriptor;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.BitmapSerdeFactory;
import io.druid.segment.data.CompressedFloatsIndexedSupplier;
import io.druid.segment.data.CompressedLongsIndexedSupplier;
import io.druid.segment.data.CompressedObjectStrategy;
import io.druid.segment.data.ConciseCompressedIndexedInts;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts;
@ -76,15 +81,12 @@ import io.druid.segment.serde.ComplexMetrics;
import io.druid.segment.serde.DictionaryEncodedColumnPartSerde;
import io.druid.segment.serde.FloatGenericColumnPartSerde;
import io.druid.segment.serde.LongGenericColumnPartSerde;
import it.uniroma3.mat.extendedset.intset.ConciseSet;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import org.apache.commons.io.FileUtils;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
@ -109,10 +111,23 @@ public class IndexMaker
private static final int INVALID_ROW = -1;
private static final Splitter SPLITTER = Splitter.on(",");
private static final ObjectMapper mapper;
private static final BitmapSerdeFactory bitmapSerdeFactory;
static {
final Injector injector = GuiceInjectors.makeStartupInjector();
final Injector injector = GuiceInjectors.makeStartupInjectorWithModules(
ImmutableList.<Module>of(
new Module()
{
@Override
public void configure(Binder binder)
{
JsonConfigProvider.bind(binder, "druid.processing.bitmap", BitmapSerdeFactory.class);
}
}
)
);
mapper = injector.getInstance(ObjectMapper.class);
bitmapSerdeFactory = injector.getInstance(BitmapSerdeFactory.class);
}
public static File persist(final IncrementalIndex index, File outDir) throws IOException
@ -170,7 +185,7 @@ public class IndexMaker
log.info("Starting persist for interval[%s], rows[%,d]", dataInterval, index.size());
return merge(
Arrays.<IndexableAdapter>asList(new IncrementalIndexAdapter(dataInterval, index)),
Arrays.<IndexableAdapter>asList(new IncrementalIndexAdapter(dataInterval, index, bitmapSerdeFactory.getBitmapFactory())),
index.getMetricAggs(),
outDir,
progress
@ -839,13 +854,14 @@ public class IndexMaker
? new MultiValColumnDictionaryEntryStore()
: new SingleValColumnDictionaryEntryStore();
ConciseSet nullSet = null;
final BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
MutableBitmap nullSet = null;
int rowCount = 0;
for (Rowboat theRow : theRows) {
if (dimIndex > theRow.getDims().length) {
if (nullSet == null) {
nullSet = new ConciseSet();
nullSet = bitmapFactory.makeEmptyMutableBitmap();
}
nullSet.add(rowCount);
adder.add(null);
@ -853,7 +869,7 @@ public class IndexMaker
int[] dimVals = theRow.getDims()[dimIndex];
if (dimVals == null || dimVals.length == 0) {
if (nullSet == null) {
nullSet = new ConciseSet();
nullSet = bitmapFactory.makeEmptyMutableBitmap();
}
nullSet.add(rowCount);
}
@ -1062,18 +1078,18 @@ public class IndexMaker
}
// Make bitmap indexes
List<ConciseSet> conciseSets = Lists.newArrayList();
List<MutableBitmap> mutableBitmaps = Lists.newArrayList();
for (String dimVal : dimensionValues) {
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size());
for (int j = 0; j < adapters.size(); ++j) {
convertedInverteds.add(
new ConvertingIndexedInts(
adapters.get(j).getInverteds(dimension, dimVal), rowNumConversions.get(j)
adapters.get(j).getBitmapIndex(dimension, dimVal), rowNumConversions.get(j)
)
);
}
ConciseSet bitset = new ConciseSet();
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
for (Integer row : CombiningIterable.createSplatted(
convertedInverteds,
Ordering.<Integer>natural().nullsFirst()
@ -1083,40 +1099,40 @@ public class IndexMaker
}
}
conciseSets.add(bitset);
mutableBitmaps.add(bitset);
}
GenericIndexed<ImmutableConciseSet> bitmaps;
GenericIndexed<ImmutableBitmap> bitmaps;
if (nullSet != null) {
final ImmutableConciseSet theNullSet = ImmutableConciseSet.newImmutableFromMutable(nullSet);
final ImmutableBitmap theNullSet = bitmapFactory.makeImmutableBitmap(nullSet);
if (bumpDictionary) {
bitmaps = GenericIndexed.fromIterable(
Iterables.concat(
Arrays.asList(theNullSet),
Iterables.transform(
conciseSets,
new Function<ConciseSet, ImmutableConciseSet>()
mutableBitmaps,
new Function<MutableBitmap, ImmutableBitmap>()
{
@Override
public ImmutableConciseSet apply(ConciseSet input)
public ImmutableBitmap apply(MutableBitmap input)
{
return ImmutableConciseSet.newImmutableFromMutable(input);
return bitmapFactory.makeImmutableBitmap(input);
}
}
)
),
ConciseCompressedIndexedInts.objectStrategy
bitmapSerdeFactory.getObjectStrategy()
);
} else {
Iterable<ImmutableConciseSet> immutableConciseSets = Iterables.transform(
conciseSets,
new Function<ConciseSet, ImmutableConciseSet>()
Iterable<ImmutableBitmap> immutableBitmaps = Iterables.transform(
mutableBitmaps,
new Function<MutableBitmap, ImmutableBitmap>()
{
@Override
public ImmutableConciseSet apply(ConciseSet input)
public ImmutableBitmap apply(MutableBitmap input)
{
return ImmutableConciseSet.newImmutableFromMutable(input);
return bitmapFactory.makeImmutableBitmap(input);
}
}
);
@ -1124,30 +1140,27 @@ public class IndexMaker
bitmaps = GenericIndexed.fromIterable(
Iterables.concat(
Arrays.asList(
ImmutableConciseSet.union(
theNullSet,
Iterables.getFirst(immutableConciseSets, null)
)
theNullSet.union(Iterables.getFirst(immutableBitmaps, null))
),
Iterables.skip(immutableConciseSets, 1)
Iterables.skip(immutableBitmaps, 1)
),
ConciseCompressedIndexedInts.objectStrategy
bitmapSerdeFactory.getObjectStrategy()
);
}
} else {
bitmaps = GenericIndexed.fromIterable(
Iterables.transform(
conciseSets,
new Function<ConciseSet, ImmutableConciseSet>()
mutableBitmaps,
new Function<MutableBitmap, ImmutableBitmap>()
{
@Override
public ImmutableConciseSet apply(ConciseSet input)
public ImmutableBitmap apply(MutableBitmap input)
{
return ImmutableConciseSet.newImmutableFromMutable(input);
return bitmapFactory.makeImmutableBitmap(input);
}
}
),
ConciseCompressedIndexedInts.objectStrategy
bitmapSerdeFactory.getObjectStrategy()
);
}
@ -1156,7 +1169,11 @@ public class IndexMaker
boolean hasSpatialIndexes = columnCapabilities.get(dimension).hasSpatialIndexes();
RTree tree = null;
if (hasSpatialIndexes) {
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50));
tree = new RTree(
2,
new LinearGutmanSplitStrategy(0, 50, bitmapSerdeFactory.getBitmapFactory()),
bitmapSerdeFactory.getBitmapFactory()
);
}
int dimValIndex = 0;
@ -1168,7 +1185,7 @@ public class IndexMaker
for (int j = 0; j < coords.length; j++) {
coords[j] = Float.valueOf(stringCoords.get(j));
}
tree.insert(coords, conciseSets.get(dimValIndex));
tree.insert(coords, mutableBitmaps.get(dimValIndex));
}
dimValIndex++;
}
@ -1185,6 +1202,7 @@ public class IndexMaker
dictionary,
singleValCol,
multiValCol,
bitmapSerdeFactory,
bitmaps,
spatialIndex
),
@ -1354,7 +1372,12 @@ public class IndexMaker
GenericIndexed<String> cols = GenericIndexed.fromIterable(finalColumns, GenericIndexed.stringStrategy);
GenericIndexed<String> dims = GenericIndexed.fromIterable(finalDimensions, GenericIndexed.stringStrategy);
final long numBytes = cols.getSerializedSize() + dims.getSerializedSize() + 16;
final String bitmapSerdeFactoryType = mapper.writeValueAsString(bitmapSerdeFactory);
final long numBytes = cols.getSerializedSize()
+ dims.getSerializedSize()
+ 16
+ serializerUtils.getSerializedStringByteSize(bitmapSerdeFactoryType);
final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes);
cols.writeToChannel(writer);
@ -1371,6 +1394,9 @@ public class IndexMaker
serializerUtils.writeLong(writer, dataInterval.getStartMillis());
serializerUtils.writeLong(writer, dataInterval.getEndMillis());
serializerUtils.writeString(
writer, bitmapSerdeFactoryType
);
writer.close();
IndexIO.checkFileSize(new File(outDir, "index.drd"));

View File

@ -19,9 +19,11 @@
package io.druid.segment;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
@ -32,6 +34,12 @@ import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import com.google.common.io.OutputSupplier;
import com.google.common.primitives.Ints;
import com.google.inject.Binder;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.collections.spatial.RTree;
import com.metamx.collections.spatial.split.LinearGutmanSplitStrategy;
@ -48,15 +56,17 @@ import io.druid.common.guava.FileOutputSupplier;
import io.druid.common.guava.GuavaUtils;
import io.druid.common.utils.JodaUtils;
import io.druid.common.utils.SerializerUtils;
import io.druid.guice.GuiceInjectors;
import io.druid.guice.JsonConfigProvider;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.ToLowerCaseAggregatorFactory;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ColumnCapabilitiesImpl;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.BitmapSerdeFactory;
import io.druid.segment.data.ByteBufferWriter;
import io.druid.segment.data.CompressedLongsSupplierSerializer;
import io.druid.segment.data.CompressedObjectStrategy;
import io.druid.segment.data.ConciseCompressedIndexedInts;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.GenericIndexedWriter;
import io.druid.segment.data.IOPeon;
@ -71,8 +81,6 @@ import io.druid.segment.incremental.IncrementalIndexAdapter;
import io.druid.segment.serde.ComplexMetricColumnSerializer;
import io.druid.segment.serde.ComplexMetricSerde;
import io.druid.segment.serde.ComplexMetrics;
import it.uniroma3.mat.extendedset.intset.ConciseSet;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import org.apache.commons.io.FileUtils;
import org.joda.time.DateTime;
import org.joda.time.Interval;
@ -103,6 +111,27 @@ public class IndexMerger
private static final int INVALID_ROW = -1;
private static final Splitter SPLITTER = Splitter.on(",");
private static final ObjectMapper mapper;
private static final BitmapSerdeFactory bitmapSerdeFactory;
static {
final Injector injector = GuiceInjectors.makeStartupInjectorWithModules(
ImmutableList.<Module>of(
new Module()
{
@Override
public void configure(Binder binder)
{
JsonConfigProvider.bind(binder, "druid.processing.bitmap", BitmapSerdeFactory.class);
}
}
)
);
mapper = injector.getInstance(ObjectMapper.class);
bitmapSerdeFactory = injector.getInstance(BitmapSerdeFactory.class);
}
public static File persist(final IncrementalIndex index, File outDir) throws IOException
{
return persist(index, index.getInterval(), outDir);
@ -153,7 +182,13 @@ public class IndexMerger
log.info("Starting persist for interval[%s], rows[%,d]", dataInterval, index.size());
return merge(
Arrays.<IndexableAdapter>asList(new IncrementalIndexAdapter(dataInterval, index)),
Arrays.<IndexableAdapter>asList(
new IncrementalIndexAdapter(
dataInterval,
index,
bitmapSerdeFactory.getBitmapFactory()
)
),
index.getMetricAggs(),
outDir,
progress
@ -458,6 +493,7 @@ public class IndexMerger
dataInterval = new Interval(minTime, maxTime);
serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
serializerUtils.writeString(channel, mapper.writeValueAsString(bitmapSerdeFactory));
}
finally {
CloseQuietly.close(channel);
@ -756,8 +792,8 @@ public class IndexMerger
Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.stringStrategy);
log.info("Starting dimension[%s] with cardinality[%,d]", dimension, dimVals.size());
GenericIndexedWriter<ImmutableConciseSet> writer = new GenericIndexedWriter<ImmutableConciseSet>(
ioPeon, dimension, ConciseCompressedIndexedInts.objectStrategy
GenericIndexedWriter<ImmutableBitmap> writer = new GenericIndexedWriter<>(
ioPeon, dimension, bitmapSerdeFactory.getObjectStrategy()
);
writer.open();
@ -766,11 +802,12 @@ public class IndexMerger
RTree tree = null;
IOPeon spatialIoPeon = new TmpFileIOPeon();
if (isSpatialDim) {
BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
spatialWriter = new ByteBufferWriter<ImmutableRTree>(
spatialIoPeon, dimension, IndexedRTree.objectStrategy
spatialIoPeon, dimension, new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapFactory)
);
spatialWriter.open();
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50));
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
}
for (String dimVal : IndexedIterable.create(dimVals)) {
@ -779,12 +816,12 @@ public class IndexMerger
for (int j = 0; j < indexes.size(); ++j) {
convertedInverteds.add(
new ConvertingIndexedInts(
indexes.get(j).getInverteds(dimension, dimVal), rowNumConversions.get(j)
indexes.get(j).getBitmapIndex(dimension, dimVal), rowNumConversions.get(j)
)
);
}
ConciseSet bitset = new ConciseSet();
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
for (Integer row : CombiningIterable.createSplatted(
convertedInverteds,
Ordering.<Integer>natural().nullsFirst()
@ -794,7 +831,9 @@ public class IndexMerger
}
}
writer.write(ImmutableConciseSet.newImmutableFromMutable(bitset));
writer.write(
bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset)
);
if (isSpatialDim && dimVal != null) {
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
@ -906,6 +945,9 @@ public class IndexMerger
serializerUtils.writeString(
channel, String.format("%s/%s", dataInterval.getStart(), dataInterval.getEnd())
);
serializerUtils.writeString(
channel, mapper.writeValueAsString(bitmapSerdeFactory)
);
}
finally {
CloseQuietly.close(channel);

View File

@ -42,7 +42,7 @@ public interface IndexableAdapter
Iterable<Rowboat> getRows();
IndexedInts getInverteds(String dimension, String value);
IndexedInts getBitmapIndex(String dimension, String value);
String getMetricType(String metric);

View File

@ -19,13 +19,13 @@
package io.druid.segment;
import com.google.common.collect.Maps;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.WrappedImmutableConciseBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.common.io.smoosh.SmooshedFileMapper;
import com.metamx.common.logger.Logger;
import io.druid.segment.data.CompressedLongsIndexedSupplier;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.IndexedLongs;
import io.druid.segment.data.VSizeIndexed;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import org.joda.time.Interval;
@ -38,7 +38,6 @@ import java.util.Map;
public class MMappedIndex
{
private static final Logger log = new Logger(MMappedIndex.class);
private static final ImmutableConciseSet emptySet = new ImmutableConciseSet();
final GenericIndexed<String> availableDimensions;
final GenericIndexed<String> availableMetrics;
@ -47,12 +46,10 @@ public class MMappedIndex
final Map<String, MetricHolder> metrics;
final Map<String, GenericIndexed<String>> dimValueLookups;
final Map<String, VSizeIndexed> dimColumns;
final Map<String, GenericIndexed<ImmutableConciseSet>> invertedIndexes;
final Map<String, GenericIndexed<ImmutableBitmap>> invertedIndexes;
final Map<String, ImmutableRTree> spatialIndexes;
final SmooshedFileMapper fileMapper;
private final Map<String, Integer> metricIndexes = Maps.newHashMap();
public MMappedIndex(
GenericIndexed<String> availableDimensions,
GenericIndexed<String> availableMetrics,
@ -61,7 +58,7 @@ public class MMappedIndex
Map<String, MetricHolder> metrics,
Map<String, GenericIndexed<String>> dimValueLookups,
Map<String, VSizeIndexed> dimColumns,
Map<String, GenericIndexed<ImmutableConciseSet>> invertedIndexes,
Map<String, GenericIndexed<ImmutableBitmap>> invertedIndexes,
Map<String, ImmutableRTree> spatialIndexes,
SmooshedFileMapper fileMapper
)
@ -76,10 +73,6 @@ public class MMappedIndex
this.invertedIndexes = invertedIndexes;
this.spatialIndexes = spatialIndexes;
this.fileMapper = fileMapper;
for (int i = 0; i < availableMetrics.size(); i++) {
metricIndexes.put(availableMetrics.get(i), i);
}
}
public CompressedLongsIndexedSupplier getTimestamps()
@ -102,21 +95,11 @@ public class MMappedIndex
return metrics;
}
public Integer getMetricIndex(String metricName)
{
return metricIndexes.get(metricName);
}
public Interval getDataInterval()
{
return dataInterval;
}
public IndexedLongs getReadOnlyTimestamps()
{
return timestamps.get();
}
public MetricHolder getMetricHolder(String metric)
{
final MetricHolder retVal = metrics.get(metric);
@ -138,7 +121,7 @@ public class MMappedIndex
return dimColumns.get(dimension);
}
public Map<String, GenericIndexed<ImmutableConciseSet>> getInvertedIndexes()
public Map<String, GenericIndexed<ImmutableBitmap>> getBitmapIndexes()
{
return invertedIndexes;
}
@ -148,22 +131,6 @@ public class MMappedIndex
return spatialIndexes;
}
public ImmutableConciseSet getInvertedIndex(String dimension, String value)
{
final GenericIndexed<String> lookup = dimValueLookups.get(dimension);
if (lookup == null) {
return emptySet;
}
int indexOf = lookup.indexOf(value);
if (indexOf < 0) {
return emptySet;
}
ImmutableConciseSet retVal = invertedIndexes.get(dimension).get(indexOf);
return (retVal == null) ? emptySet : retVal;
}
public SmooshedFileMapper getFileMapper()
{
return fileMapper;

View File

@ -17,7 +17,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.segment;import io.druid.segment.data.Indexed;
package io.druid.segment;
import com.metamx.collections.bitmap.BitmapFactory;
import io.druid.segment.data.Indexed;
import org.joda.time.Interval;
import java.io.IOException;
@ -30,6 +33,7 @@ public interface QueryableIndex extends ColumnSelector
public int getNumRows();
public Indexed<String> getColumnNames();
public Indexed<String> getAvailableDimensions();
public BitmapFactory getBitmapFactoryForDimensions();
/**
* The close method shouldn't actually be here as this is nasty. We will adjust it in the future.

View File

@ -35,7 +35,7 @@ import io.druid.segment.column.IndexedFloatsGenericColumn;
import io.druid.segment.column.IndexedLongsGenericColumn;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.ArrayBasedIndexedInts;
import io.druid.segment.data.ConciseCompressedIndexedInts;
import io.druid.segment.data.BitmapCompressedIndexedInts;
import io.druid.segment.data.EmptyIndexedInts;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts;
@ -279,7 +279,7 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
}
@Override
public IndexedInts getInverteds(String dimension, String value)
public IndexedInts getBitmapIndex(String dimension, String value)
{
final Column column = input.getColumn(dimension);
@ -292,7 +292,7 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
return new EmptyIndexedInts();
}
return new ConciseCompressedIndexedInts(bitmaps.getConciseSet(value));
return new BitmapCompressedIndexedInts(bitmaps.getBitmap(value));
}
@Override

View File

@ -154,16 +154,22 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
actualInterval = actualInterval.withEnd(dataInterval.getEnd());
}
final Sequence<Cursor> sequence;
final Offset offset;
if (filter == null) {
sequence = new NoFilterCursorSequenceBuilder(index, actualInterval, gran).build();
offset = new NoFilterOffset(0, index.getNumRows());
} else {
Offset offset = new ConciseOffset(filter.goConcise(new ColumnSelectorBitmapIndexSelector(index)));
final ColumnSelectorBitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(
index.getBitmapFactoryForDimensions(),
index
);
sequence = new CursorSequenceBuilder(index, actualInterval, gran, offset).build();
offset = new BitmapOffset(selector.getBitmapFactory(), filter.getBitmapIndex(selector));
}
return Sequences.filter(sequence, Predicates.<Cursor>notNull());
return Sequences.filter(
new CursorSequenceBuilder(index, actualInterval, gran, offset).build(),
Predicates.<Cursor>notNull()
);
}
private static class CursorSequenceBuilder
@ -267,8 +273,8 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
final Column columnDesc = index.getColumn(dimensionName);
if (cachedColumn == null && columnDesc != null) {
cachedColumn = columnDesc.getDictionaryEncoding();
dictionaryColumnCache.put(dimensionName, cachedColumn);
cachedColumn = columnDesc.getDictionaryEncoding();
dictionaryColumnCache.put(dimensionName, cachedColumn);
}
final DictionaryEncodedColumn column = cachedColumn;
@ -539,7 +545,7 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
return columnVals.lookupName(multiValueRow.get(0));
} else {
final String[] strings = new String[multiValueRow.size()];
for (int i = 0 ; i < multiValueRow.size() ; i++) {
for (int i = 0; i < multiValueRow.size(); i++) {
strings[i] = columnVals.lookupName(multiValueRow.get(i));
}
return strings;
@ -600,7 +606,7 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
CloseQuietly.close(complexColumn);
}
for (Object column : objectColumnCache.values()) {
if(column instanceof Closeable) {
if (column instanceof Closeable) {
CloseQuietly.close((Closeable) column);
}
}
@ -615,6 +621,7 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
private final Offset baseOffset;
private final GenericColumn timestamps;
private final long threshold;
private final boolean allWithinThreshold;
public TimestampCheckingOffset(
Offset baseOffset,
@ -625,6 +632,8 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
this.baseOffset = baseOffset;
this.timestamps = timestamps;
this.threshold = threshold;
// checks if all the values are within the Threshold specified, skips timestamp lookups and checks if all values are within threshold.
this.allWithinThreshold = timestamps.getLongSingleValueRow(timestamps.length() - 1) < threshold;
}
@Override
@ -642,7 +651,8 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
@Override
public boolean withinBounds()
{
return baseOffset.withinBounds() && timestamps.getLongSingleValueRow(baseOffset.getOffset()) < threshold;
return baseOffset.withinBounds() && (allWithinThreshold
|| timestamps.getLongSingleValueRow(baseOffset.getOffset()) < threshold);
}
@Override
@ -652,441 +662,39 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
}
}
private static class NoFilterCursorSequenceBuilder
private static class NoFilterOffset implements Offset
{
private final ColumnSelector index;
private final Interval interval;
private final QueryGranularity gran;
private final int rowCount;
private volatile int currentOffset;
public NoFilterCursorSequenceBuilder(
ColumnSelector index,
Interval interval,
QueryGranularity gran
)
NoFilterOffset(int currentOffset, int rowCount)
{
this.index = index;
this.interval = interval;
this.gran = gran;
this.currentOffset = currentOffset;
this.rowCount = rowCount;
}
/**
* This produces iterators of Cursor objects that must be fully processed (until isDone() returns true) before the
* next Cursor is processed. It is *not* safe to pass these cursors off to another thread for parallel processing
*
* @return
*/
public Sequence<Cursor> build()
@Override
public void increment()
{
final Map<String, DictionaryEncodedColumn> dictionaryColumnCache = Maps.newHashMap();
final Map<String, GenericColumn> genericColumnCache = Maps.newHashMap();
final Map<String, ComplexColumn> complexColumnCache = Maps.newHashMap();
final Map<String, Object> objectColumnCache = Maps.newHashMap();
currentOffset++;
}
final GenericColumn timestamps = index.getColumn(Column.TIME_COLUMN_NAME).getGenericColumn();
@Override
public boolean withinBounds()
{
return currentOffset < rowCount;
}
return Sequences.withBaggage(
Sequences.map(
Sequences.simple(gran.iterable(interval.getStartMillis(), interval.getEndMillis())),
new Function<Long, Cursor>()
{
private int currRow = 0;
@Override
public Offset clone()
{
return new NoFilterOffset(currentOffset, rowCount);
}
@Override
public Cursor apply(final Long input)
{
final long timeStart = Math.max(interval.getStartMillis(), input);
while (currRow < timestamps.length() && timestamps.getLongSingleValueRow(currRow) < timeStart) {
++currRow;
}
return new Cursor()
{
private final DateTime myBucket = gran.toDateTime(input);
private final long nextBucket = Math.min(gran.next(myBucket.getMillis()), interval.getEndMillis());
private final int initRow = currRow;
@Override
public DateTime getTime()
{
return myBucket;
}
@Override
public void advance()
{
if (Thread.interrupted()) {
throw new QueryInterruptedException();
}
++currRow;
}
@Override
public void advanceTo(int offset)
{
currRow += offset;
}
@Override
public boolean isDone()
{
return currRow >= timestamps.length() || timestamps.getLongSingleValueRow(currRow) >= nextBucket;
}
@Override
public void reset()
{
currRow = initRow;
}
@Override
public DimensionSelector makeDimensionSelector(String dimension)
{
final String dimensionName = dimension.toLowerCase();
DictionaryEncodedColumn cachedColumn = dictionaryColumnCache.get(dimensionName);
final Column columnDesc = index.getColumn(dimensionName);
if (cachedColumn == null && columnDesc != null) {
cachedColumn = columnDesc.getDictionaryEncoding();
dictionaryColumnCache.put(dimensionName, cachedColumn);
}
final DictionaryEncodedColumn column = cachedColumn;
if (column == null) {
return null;
} else if (columnDesc.getCapabilities().hasMultipleValues()) {
return new DimensionSelector()
{
@Override
public IndexedInts getRow()
{
return column.getMultiValueRow(currRow);
}
@Override
public int getValueCardinality()
{
return column.getCardinality();
}
@Override
public String lookupName(int id)
{
final String retVal = column.lookupName(id);
return retVal == null ? "" : retVal;
}
@Override
public int lookupId(String name)
{
return column.lookupId(name);
}
};
} else {
return new DimensionSelector()
{
@Override
public IndexedInts getRow()
{
// using an anonymous class is faster than creating a class that stores a copy of the value
return new IndexedInts()
{
@Override
public int size()
{
return 1;
}
@Override
public int get(int index)
{
return column.getSingleValueRow(currRow);
}
@Override
public Iterator<Integer> iterator()
{
return Iterators.singletonIterator(column.getSingleValueRow(currRow));
}
};
}
@Override
public int getValueCardinality()
{
return column.getCardinality();
}
@Override
public String lookupName(int id)
{
return column.lookupName(id);
}
@Override
public int lookupId(String name)
{
return column.lookupId(name);
}
};
}
}
@Override
public FloatColumnSelector makeFloatColumnSelector(String columnName)
{
final String metricName = columnName.toLowerCase();
GenericColumn cachedMetricVals = genericColumnCache.get(metricName);
if (cachedMetricVals == null) {
Column holder = index.getColumn(metricName);
if (holder != null && (holder.getCapabilities().getType() == ValueType.LONG
|| holder.getCapabilities().getType() == ValueType.FLOAT)) {
cachedMetricVals = holder.getGenericColumn();
genericColumnCache.put(metricName, cachedMetricVals);
}
}
if (cachedMetricVals == null) {
return new FloatColumnSelector()
{
@Override
public float get()
{
return 0.0f;
}
};
}
final GenericColumn metricVals = cachedMetricVals;
return new FloatColumnSelector()
{
@Override
public float get()
{
return metricVals.getFloatSingleValueRow(currRow);
}
};
}
@Override
public LongColumnSelector makeLongColumnSelector(String columnName)
{
final String metricName = columnName.toLowerCase();
GenericColumn cachedMetricVals = genericColumnCache.get(metricName);
if (cachedMetricVals == null) {
Column holder = index.getColumn(metricName);
if (holder != null && (holder.getCapabilities().getType() == ValueType.LONG
|| holder.getCapabilities().getType() == ValueType.FLOAT)) {
cachedMetricVals = holder.getGenericColumn();
genericColumnCache.put(metricName, cachedMetricVals);
}
}
if (cachedMetricVals == null) {
return new LongColumnSelector()
{
@Override
public long get()
{
return 0L;
}
};
}
final GenericColumn metricVals = cachedMetricVals;
return new LongColumnSelector()
{
@Override
public long get()
{
return metricVals.getLongSingleValueRow(currRow);
}
};
}
@Override
public ObjectColumnSelector makeObjectColumnSelector(String column)
{
final String columnName = column.toLowerCase();
Object cachedColumnVals = objectColumnCache.get(columnName);
if (cachedColumnVals == null) {
Column holder = index.getColumn(columnName);
if (holder != null) {
final ValueType type = holder.getCapabilities().getType();
if (holder.getCapabilities().isDictionaryEncoded()) {
cachedColumnVals = holder.getDictionaryEncoding();
} else if (type == ValueType.COMPLEX) {
cachedColumnVals = holder.getComplexColumn();
} else {
cachedColumnVals = holder.getGenericColumn();
}
}
if (cachedColumnVals != null) {
objectColumnCache.put(columnName, cachedColumnVals);
}
}
if (cachedColumnVals == null) {
return null;
}
if (cachedColumnVals instanceof GenericColumn) {
final GenericColumn columnVals = (GenericColumn) cachedColumnVals;
final ValueType type = columnVals.getType();
if (columnVals.hasMultipleValues()) {
throw new UnsupportedOperationException(
"makeObjectColumnSelector does not support multivalued GenericColumns"
);
}
if (type == ValueType.FLOAT) {
return new ObjectColumnSelector<Float>()
{
@Override
public Class classOfObject()
{
return Float.TYPE;
}
@Override
public Float get()
{
return columnVals.getFloatSingleValueRow(currRow);
}
};
}
if (type == ValueType.LONG) {
return new ObjectColumnSelector<Long>()
{
@Override
public Class classOfObject()
{
return Long.TYPE;
}
@Override
public Long get()
{
return columnVals.getLongSingleValueRow(currRow);
}
};
}
if (type == ValueType.STRING) {
return new ObjectColumnSelector<String>()
{
@Override
public Class classOfObject()
{
return String.class;
}
@Override
public String get()
{
return columnVals.getStringSingleValueRow(currRow);
}
};
}
}
if (cachedColumnVals instanceof DictionaryEncodedColumn) {
final DictionaryEncodedColumn columnVals = (DictionaryEncodedColumn) cachedColumnVals;
if (columnVals.hasMultipleValues()) {
return new ObjectColumnSelector<Object>()
{
@Override
public Class classOfObject()
{
return Object.class;
}
@Override
public Object get()
{
final IndexedInts multiValueRow = columnVals.getMultiValueRow(currRow);
if (multiValueRow.size() == 0) {
return null;
} else if (multiValueRow.size() == 1) {
return columnVals.lookupName(multiValueRow.get(0));
} else {
final String[] strings = new String[multiValueRow.size()];
for (int i = 0 ; i < multiValueRow.size() ; i++) {
strings[i] = columnVals.lookupName(multiValueRow.get(i));
}
return strings;
}
}
};
} else {
return new ObjectColumnSelector<String>()
{
@Override
public Class classOfObject()
{
return String.class;
}
@Override
public String get()
{
return columnVals.lookupName(columnVals.getSingleValueRow(currRow));
}
};
}
}
final ComplexColumn columnVals = (ComplexColumn) cachedColumnVals;
return new ObjectColumnSelector()
{
@Override
public Class classOfObject()
{
return columnVals.getClazz();
}
@Override
public Object get()
{
return columnVals.getRowValue(currRow);
}
};
}
};
}
}
),
new Closeable()
{
@Override
public void close() throws IOException
{
CloseQuietly.close(timestamps);
for (DictionaryEncodedColumn column : dictionaryColumnCache.values()) {
CloseQuietly.close(column);
}
for (GenericColumn column : genericColumnCache.values()) {
CloseQuietly.close(column);
}
for (ComplexColumn complexColumn : complexColumnCache.values()) {
CloseQuietly.close(complexColumn);
}
for (Object column : objectColumnCache.values()) {
if (column instanceof Closeable) {
CloseQuietly.close((Closeable) column);
}
}
}
}
);
@Override
public int getOffset()
{
return currentOffset;
}
}
}

View File

@ -76,9 +76,9 @@ public class RowboatFilteringIndexAdapter implements IndexableAdapter
}
@Override
public IndexedInts getInverteds(String dimension, String value)
public IndexedInts getBitmapIndex(String dimension, String value)
{
return baseAdapter.getInverteds(dimension, value);
return baseAdapter.getBitmapIndex(dimension, value);
}
@Override

View File

@ -20,6 +20,7 @@
package io.druid.segment;
import com.google.common.base.Preconditions;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.common.io.smoosh.SmooshedFileMapper;
import io.druid.segment.column.Column;
import io.druid.segment.data.Indexed;
@ -35,6 +36,7 @@ public class SimpleQueryableIndex implements QueryableIndex
private final Interval dataInterval;
private final Indexed<String> columnNames;
private final Indexed<String> availableDimensions;
private final BitmapFactory bitmapFactory;
private final Map<String, Column> columns;
private final SmooshedFileMapper fileMapper;
@ -42,6 +44,7 @@ public class SimpleQueryableIndex implements QueryableIndex
Interval dataInterval,
Indexed<String> columnNames,
Indexed<String> dimNames,
BitmapFactory bitmapFactory,
Map<String, Column> columns,
SmooshedFileMapper fileMapper
)
@ -50,6 +53,7 @@ public class SimpleQueryableIndex implements QueryableIndex
this.dataInterval = dataInterval;
this.columnNames = columnNames;
this.availableDimensions = dimNames;
this.bitmapFactory = bitmapFactory;
this.columns = columns;
this.fileMapper = fileMapper;
}
@ -78,6 +82,12 @@ public class SimpleQueryableIndex implements QueryableIndex
return availableDimensions;
}
@Override
public BitmapFactory getBitmapFactoryForDimensions()
{
return bitmapFactory;
}
@Override
public Column getColumn(String columnName)
{

View File

@ -19,15 +19,22 @@
package io.druid.segment.column;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
/**
*/
public interface BitmapIndex
{
public int getCardinality();
public String getValue(int index);
public boolean hasNulls();
public ImmutableConciseSet getConciseSet(String value);
public ImmutableConciseSet getConciseSet(int idx);
public BitmapFactory getBitmapFactory();
public ImmutableBitmap getBitmap(String value);
public ImmutableBitmap getBitmap(int idx);
}

View File

@ -0,0 +1,109 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.segment.data;
import com.google.common.collect.Ordering;
import com.metamx.collections.bitmap.ImmutableBitmap;
import org.roaringbitmap.IntIterator;
import javax.annotation.Nullable;
import java.util.Iterator;
/**
*/
public class BitmapCompressedIndexedInts implements IndexedInts, Comparable<ImmutableBitmap>
{
private static Ordering<ImmutableBitmap> comparator = new Ordering<ImmutableBitmap>()
{
@Override
public int compare(
ImmutableBitmap set, ImmutableBitmap set1
)
{
if (set.size() == 0 && set1.size() == 0) {
return 0;
}
if (set.size() == 0) {
return -1;
}
if (set1.size() == 0) {
return 1;
}
return set.compareTo(set1);
}
}.nullsFirst();
private final ImmutableBitmap immutableBitmap;
public BitmapCompressedIndexedInts(ImmutableBitmap immutableBitmap)
{
this.immutableBitmap = immutableBitmap;
}
@Override
public int compareTo(@Nullable ImmutableBitmap otherBitmap)
{
return comparator.compare(immutableBitmap, otherBitmap);
}
@Override
public int size()
{
return immutableBitmap.size();
}
@Override
public int get(int index)
{
throw new UnsupportedOperationException("This is really slow, so it's just not supported.");
}
public ImmutableBitmap getImmutableBitmap()
{
return immutableBitmap;
}
@Override
public Iterator<Integer> iterator()
{
return new Iterator<Integer>()
{
IntIterator baseIterator = immutableBitmap.iterator();
@Override
public boolean hasNext()
{
return baseIterator.hasNext();
}
@Override
public Integer next()
{
return baseIterator.next();
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
}

View File

@ -0,0 +1,37 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2014 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.segment.data;
import com.metamx.common.ISE;
public class BitmapSerde
{
// default bitmap indices for Druid >= 0.7.x
public static class DefaultBitmapSerdeFactory extends ConciseBitmapSerdeFactory {}
// default bitmap indices in Druid <= 0.6.x
public static class LegacyBitmapSerdeFactory extends ConciseBitmapSerdeFactory {}
public static BitmapSerdeFactory createLegacyFactory()
{
return new LegacyBitmapSerdeFactory();
}
}

View File

@ -17,31 +17,23 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.metadata;
package io.druid.segment.data;
import com.google.api.client.repackaged.com.google.common.base.Throwables;
import org.skife.jdbi.v2.tweak.ConnectionFactory;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class DerbyConnectionFactory implements ConnectionFactory
/**
*/
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = BitmapSerde.DefaultBitmapSerdeFactory.class)
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = "concise", value = ConciseBitmapSerdeFactory.class),
@JsonSubTypes.Type(name = "roaring", value = RoaringBitmapSerdeFactory.class)
})
public interface BitmapSerdeFactory
{
final private String dbName;
public ObjectStrategy<ImmutableBitmap> getObjectStrategy();
public DerbyConnectionFactory(String dbName) {
this.dbName = dbName;
}
public Connection openConnection() throws SQLException {
final String nsURL=String.format("jdbc:derby://localhost:1527/%s;create=true", dbName);
try {
Class.forName("org.apache.derby.jdbc.ClientDriver");
} catch (Exception e) {
throw Throwables.propagate(e);
}
return DriverManager.getConnection(nsURL);
}
public BitmapFactory getBitmapFactory();
}

View File

@ -28,6 +28,7 @@ import com.metamx.common.IAE;
import com.metamx.common.guava.CloseQuietly;
import io.druid.collections.ResourceHolder;
import io.druid.collections.StupidResourceHolder;
import io.druid.segment.CompressedPools;
import java.io.IOException;
import java.nio.ByteBuffer;
@ -42,7 +43,7 @@ public class CompressedFloatsIndexedSupplier implements Supplier<IndexedFloats>
{
public static final byte LZF_VERSION = 0x1;
public static final byte version = 0x2;
public static final int MAX_FLOATS_IN_BUFFER = (0xFFFF >> 2);
public static final int MAX_FLOATS_IN_BUFFER = CompressedPools.BUFFER_SIZE / Floats.BYTES;
private final int totalSize;
private final int sizePer;
@ -70,87 +71,28 @@ public class CompressedFloatsIndexedSupplier implements Supplier<IndexedFloats>
@Override
public IndexedFloats get()
{
return new IndexedFloats()
{
int currIndex = -1;
ResourceHolder<FloatBuffer> holder;
FloatBuffer buffer;
final int div = Integer.numberOfTrailingZeros(sizePer);
final int rem = sizePer - 1;
final boolean powerOf2 = sizePer == (1 << div);
if(powerOf2) {
return new CompressedIndexedFloats() {
@Override
public float get(int index)
{
// optimize division and remainder for powers of 2
final int bufferNum = index >> div;
@Override
public int size()
{
return totalSize;
}
@Override
public float get(int index)
{
int bufferNum = index / sizePer;
int bufferIndex = index % sizePer;
if (bufferNum != currIndex) {
loadBuffer(bufferNum);
}
return buffer.get(buffer.position() + bufferIndex);
}
@Override
public void fill(int index, float[] toFill)
{
if (totalSize - index < toFill.length) {
throw new IndexOutOfBoundsException(
String.format(
"Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize
)
);
}
int bufferNum = index / sizePer;
int bufferIndex = index % sizePer;
int leftToFill = toFill.length;
while (leftToFill > 0) {
if (bufferNum != currIndex) {
loadBuffer(bufferNum);
}
buffer.mark();
buffer.position(buffer.position() + bufferIndex);
final int numToGet = Math.min(buffer.remaining(), leftToFill);
buffer.get(toFill, toFill.length - leftToFill, numToGet);
buffer.reset();
leftToFill -= numToGet;
++bufferNum;
bufferIndex = 0;
final int bufferIndex = index & rem;
return buffer.get(buffer.position() + bufferIndex);
}
}
private void loadBuffer(int bufferNum)
{
CloseQuietly.close(holder);
holder = baseFloatBuffers.get(bufferNum);
buffer = holder.get();
currIndex = bufferNum;
}
@Override
public String toString()
{
return "CompressedFloatsIndexedSupplier_Anonymous{" +
"currIndex=" + currIndex +
", sizePer=" + sizePer +
", numChunks=" + baseFloatBuffers.size() +
", totalSize=" + totalSize +
'}';
}
@Override
public void close() throws IOException
{
Closeables.close(holder, false);
}
};
};
} else {
return new CompressedIndexedFloats();
}
}
public long getSerializedSize()
@ -185,11 +127,6 @@ public class CompressedFloatsIndexedSupplier implements Supplier<IndexedFloats>
return baseFloatBuffers;
}
public static int numFloatsInBuffer(int numFloatsInChunk)
{
return MAX_FLOATS_IN_BUFFER - (MAX_FLOATS_IN_BUFFER % numFloatsInChunk);
}
public static CompressedFloatsIndexedSupplier fromByteBuffer(ByteBuffer buffer, ByteOrder order)
{
byte versionFromBuffer = buffer.get();
@ -245,7 +182,7 @@ public class CompressedFloatsIndexedSupplier implements Supplier<IndexedFloats>
)
{
Preconditions.checkArgument(
chunkFactor * Floats.BYTES <= 0xffff, "Chunks must be <= 64k bytes. chunkFactor was[%s]", chunkFactor
chunkFactor <= MAX_FLOATS_IN_BUFFER, "Chunks must be <= 64k bytes. chunkFactor was[%s]", chunkFactor
);
return new CompressedFloatsIndexedSupplier(
@ -294,4 +231,85 @@ public class CompressedFloatsIndexedSupplier implements Supplier<IndexedFloats>
);
}
private class CompressedIndexedFloats implements IndexedFloats
{
int currIndex = -1;
ResourceHolder<FloatBuffer> holder;
FloatBuffer buffer;
@Override
public int size()
{
return totalSize;
}
@Override
public float get(final int index)
{
// division + remainder is optimized by the compiler so keep those together
final int bufferNum = index / sizePer;
final int bufferIndex = index % sizePer;
if (bufferNum != currIndex) {
loadBuffer(bufferNum);
}
return buffer.get(buffer.position() + bufferIndex);
}
@Override
public void fill(int index, float[] toFill)
{
if (totalSize - index < toFill.length) {
throw new IndexOutOfBoundsException(
String.format(
"Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize
)
);
}
int bufferNum = index / sizePer;
int bufferIndex = index % sizePer;
int leftToFill = toFill.length;
while (leftToFill > 0) {
if (bufferNum != currIndex) {
loadBuffer(bufferNum);
}
buffer.mark();
buffer.position(buffer.position() + bufferIndex);
final int numToGet = Math.min(buffer.remaining(), leftToFill);
buffer.get(toFill, toFill.length - leftToFill, numToGet);
buffer.reset();
leftToFill -= numToGet;
++bufferNum;
bufferIndex = 0;
}
}
protected void loadBuffer(int bufferNum)
{
CloseQuietly.close(holder);
holder = baseFloatBuffers.get(bufferNum);
buffer = holder.get();
currIndex = bufferNum;
}
@Override
public String toString()
{
return "CompressedFloatsIndexedSupplier_Anonymous{" +
"currIndex=" + currIndex +
", sizePer=" + sizePer +
", numChunks=" + baseFloatBuffers.size() +
", totalSize=" + totalSize +
'}';
}
@Override
public void close() throws IOException
{
Closeables.close(holder, false);
}
}
}

View File

@ -28,6 +28,7 @@ import com.metamx.common.IAE;
import com.metamx.common.guava.CloseQuietly;
import io.druid.collections.ResourceHolder;
import io.druid.collections.StupidResourceHolder;
import io.druid.segment.CompressedPools;
import java.io.IOException;
import java.nio.ByteBuffer;
@ -42,6 +43,8 @@ public class CompressedLongsIndexedSupplier implements Supplier<IndexedLongs>
{
public static final byte LZF_VERSION = 0x1;
public static final byte version = 0x2;
public static final int MAX_LONGS_IN_BUFFER = CompressedPools.BUFFER_SIZE / Longs.BYTES;
private final int totalSize;
private final int sizePer;
@ -69,99 +72,28 @@ public class CompressedLongsIndexedSupplier implements Supplier<IndexedLongs>
@Override
public IndexedLongs get()
{
return new IndexedLongs()
{
int currIndex = -1;
ResourceHolder<LongBuffer> holder;
LongBuffer buffer;
final int div = Integer.numberOfTrailingZeros(sizePer);
final int rem = sizePer - 1;
final boolean powerOf2 = sizePer == (1 << div);
if(powerOf2) {
return new CompressedIndexedLongs() {
@Override
public long get(int index)
{
// optimize division and remainder for powers of 2
final int bufferNum = index >> div;
@Override
public int size()
{
return totalSize;
}
@Override
public long get(int index)
{
int bufferNum = index / sizePer;
int bufferIndex = index % sizePer;
if (bufferNum != currIndex) {
loadBuffer(bufferNum);
}
return buffer.get(buffer.position() + bufferIndex);
}
@Override
public void fill(int index, long[] toFill)
{
if (totalSize - index < toFill.length) {
throw new IndexOutOfBoundsException(
String.format(
"Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize
)
);
}
int bufferNum = index / sizePer;
int bufferIndex = index % sizePer;
int leftToFill = toFill.length;
while (leftToFill > 0) {
if (bufferNum != currIndex) {
loadBuffer(bufferNum);
}
buffer.mark();
buffer.position(buffer.position() + bufferIndex);
final int numToGet = Math.min(buffer.remaining(), leftToFill);
buffer.get(toFill, toFill.length - leftToFill, numToGet);
buffer.reset();
leftToFill -= numToGet;
++bufferNum;
bufferIndex = 0;
final int bufferIndex = index & rem;
return buffer.get(buffer.position() + bufferIndex);
}
}
private void loadBuffer(int bufferNum)
{
CloseQuietly.close(holder);
holder = baseLongBuffers.get(bufferNum);
buffer = holder.get();
currIndex = bufferNum;
}
@Override
public int binarySearch(long key)
{
throw new UnsupportedOperationException();
}
@Override
public int binarySearch(long key, int from, int to)
{
throw new UnsupportedOperationException();
}
@Override
public String toString()
{
return "CompressedLongsIndexedSupplier_Anonymous{" +
"currIndex=" + currIndex +
", sizePer=" + sizePer +
", numChunks=" + baseLongBuffers.size() +
", totalSize=" + totalSize +
'}';
}
@Override
public void close() throws IOException
{
Closeables.close(holder, false);
}
};
};
} else {
return new CompressedIndexedLongs();
}
}
public long getSerializedSize()
@ -227,7 +159,7 @@ public class CompressedLongsIndexedSupplier implements Supplier<IndexedLongs>
public static CompressedLongsIndexedSupplier fromLongBuffer(LongBuffer buffer, final ByteOrder byteOrder, CompressedObjectStrategy.CompressionStrategy compression)
{
return fromLongBuffer(buffer, 0xFFFF / Longs.BYTES, byteOrder, compression);
return fromLongBuffer(buffer, MAX_LONGS_IN_BUFFER, byteOrder, compression);
}
public static CompressedLongsIndexedSupplier fromLongBuffer(
@ -235,7 +167,7 @@ public class CompressedLongsIndexedSupplier implements Supplier<IndexedLongs>
)
{
Preconditions.checkArgument(
chunkFactor * Longs.BYTES <= 0xffff, "Chunks must be <= 64k bytes. chunkFactor was[%s]", chunkFactor
chunkFactor <= MAX_LONGS_IN_BUFFER, "Chunks must be <= 64k bytes. chunkFactor was[%s]", chunkFactor
);
return new CompressedLongsIndexedSupplier(
@ -284,4 +216,97 @@ public class CompressedLongsIndexedSupplier implements Supplier<IndexedLongs>
);
}
private class CompressedIndexedLongs implements IndexedLongs
{
int currIndex = -1;
ResourceHolder<LongBuffer> holder;
LongBuffer buffer;
@Override
public int size()
{
return totalSize;
}
@Override
public long get(int index)
{
final int bufferNum = index / sizePer;
final int bufferIndex = index % sizePer;
if (bufferNum != currIndex) {
loadBuffer(bufferNum);
}
return buffer.get(buffer.position() + bufferIndex);
}
@Override
public void fill(int index, long[] toFill)
{
if (totalSize - index < toFill.length) {
throw new IndexOutOfBoundsException(
String.format(
"Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize
)
);
}
int bufferNum = index / sizePer;
int bufferIndex = index % sizePer;
int leftToFill = toFill.length;
while (leftToFill > 0) {
if (bufferNum != currIndex) {
loadBuffer(bufferNum);
}
buffer.mark();
buffer.position(buffer.position() + bufferIndex);
final int numToGet = Math.min(buffer.remaining(), leftToFill);
buffer.get(toFill, toFill.length - leftToFill, numToGet);
buffer.reset();
leftToFill -= numToGet;
++bufferNum;
bufferIndex = 0;
}
}
protected void loadBuffer(int bufferNum)
{
CloseQuietly.close(holder);
holder = baseLongBuffers.get(bufferNum);
buffer = holder.get();
currIndex = bufferNum;
}
@Override
public int binarySearch(long key)
{
throw new UnsupportedOperationException();
}
@Override
public int binarySearch(long key, int from, int to)
{
throw new UnsupportedOperationException();
}
@Override
public String toString()
{
return "CompressedLongsIndexedSupplier_Anonymous{" +
"currIndex=" + currIndex +
", sizePer=" + sizePer +
", numChunks=" + baseLongBuffers.size() +
", totalSize=" + totalSize +
'}';
}
@Override
public void close() throws IOException
{
Closeables.close(holder, false);
}
}
}

View File

@ -25,6 +25,7 @@ import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import io.druid.collections.ResourceHolder;
import io.druid.collections.StupidResourceHolder;
import io.druid.segment.CompressedPools;
import java.io.IOException;
import java.io.OutputStream;
@ -39,11 +40,10 @@ public class CompressedLongsSupplierSerializer
IOPeon ioPeon, final String filenameBase, final ByteOrder order, final CompressedObjectStrategy.CompressionStrategy compression
) throws IOException
{
final int sizePer = 0xFFFF / Longs.BYTES;
final CompressedLongsSupplierSerializer retVal = new CompressedLongsSupplierSerializer(
sizePer,
CompressedLongsIndexedSupplier.MAX_LONGS_IN_BUFFER,
new GenericIndexedWriter<ResourceHolder<LongBuffer>>(
ioPeon, filenameBase, CompressedLongBufferObjectStrategy.getBufferForOrder(order, compression, sizePer)
ioPeon, filenameBase, CompressedLongBufferObjectStrategy.getBufferForOrder(order, compression, CompressedLongsIndexedSupplier.MAX_LONGS_IN_BUFFER)
),
compression
);

View File

@ -0,0 +1,102 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.segment.data;
import com.google.common.collect.Ordering;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ConciseBitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.WrappedImmutableConciseBitmap;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import java.nio.ByteBuffer;
/**
*/
public class ConciseBitmapSerdeFactory implements BitmapSerdeFactory
{
private static final ObjectStrategy<ImmutableBitmap> objectStrategy = new ImmutableConciseSetObjectStrategy();
private static final BitmapFactory bitmapFactory = new ConciseBitmapFactory();
@Override
public ObjectStrategy<ImmutableBitmap> getObjectStrategy()
{
return objectStrategy;
}
@Override
public BitmapFactory getBitmapFactory()
{
return bitmapFactory;
}
private static Ordering<WrappedImmutableConciseBitmap> conciseComparator = new Ordering<WrappedImmutableConciseBitmap>()
{
@Override
public int compare(
WrappedImmutableConciseBitmap conciseSet, WrappedImmutableConciseBitmap conciseSet1
)
{
if (conciseSet.size() == 0 && conciseSet1.size() == 0) {
return 0;
}
if (conciseSet.size() == 0) {
return -1;
}
if (conciseSet1.size() == 0) {
return 1;
}
return conciseSet.compareTo(conciseSet1);
}
}.nullsFirst();
private static class ImmutableConciseSetObjectStrategy
implements ObjectStrategy<ImmutableBitmap>
{
@Override
public Class<ImmutableBitmap> getClazz()
{
return ImmutableBitmap.class;
}
@Override
public WrappedImmutableConciseBitmap fromByteBuffer(ByteBuffer buffer, int numBytes)
{
final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer();
readOnlyBuffer.limit(readOnlyBuffer.position() + numBytes);
return new WrappedImmutableConciseBitmap(new ImmutableConciseSet(readOnlyBuffer));
}
@Override
public byte[] toBytes(ImmutableBitmap val)
{
if (val == null || val.size() == 0) {
return new byte[]{};
}
return val.toBytes();
}
@Override
public int compare(ImmutableBitmap o1, ImmutableBitmap o2)
{
return conciseComparator.compare((WrappedImmutableConciseBitmap) o1, (WrappedImmutableConciseBitmap) o2);
}
}
}

View File

@ -1,146 +0,0 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.segment.data;
import com.google.common.collect.Ordering;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import it.uniroma3.mat.extendedset.intset.IntSet;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.Iterator;
/**
*/
public class ConciseCompressedIndexedInts implements IndexedInts, Comparable<ConciseCompressedIndexedInts>
{
public static ObjectStrategy<ImmutableConciseSet> objectStrategy =
new ImmutableConciseSetObjectStrategy();
private static Ordering<ImmutableConciseSet> comparator = new Ordering<ImmutableConciseSet>()
{
@Override
public int compare(
@Nullable ImmutableConciseSet conciseSet, @Nullable ImmutableConciseSet conciseSet1
)
{
if (conciseSet.size() == 0 && conciseSet1.size() == 0) {
return 0;
}
if (conciseSet.size() == 0) {
return -1;
}
if (conciseSet1.size() == 0) {
return 1;
}
return conciseSet.compareTo(conciseSet1);
}
}.nullsFirst();
private final ImmutableConciseSet immutableConciseSet;
public ConciseCompressedIndexedInts(ImmutableConciseSet conciseSet)
{
this.immutableConciseSet = conciseSet;
}
@Override
public int compareTo(ConciseCompressedIndexedInts conciseCompressedIndexedInts)
{
return immutableConciseSet.compareTo(conciseCompressedIndexedInts.getImmutableConciseSet());
}
@Override
public int size()
{
return immutableConciseSet.size();
}
@Override
public int get(int index)
{
throw new UnsupportedOperationException("This is really slow, so it's just not supported.");
}
public ImmutableConciseSet getImmutableConciseSet()
{
return immutableConciseSet;
}
@Override
public Iterator<Integer> iterator()
{
return new Iterator<Integer>()
{
IntSet.IntIterator baseIterator = immutableConciseSet.iterator();
@Override
public boolean hasNext()
{
return baseIterator.hasNext();
}
@Override
public Integer next()
{
return baseIterator.next();
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
private static class ImmutableConciseSetObjectStrategy
implements ObjectStrategy<ImmutableConciseSet>
{
@Override
public Class<? extends ImmutableConciseSet> getClazz()
{
return ImmutableConciseSet.class;
}
@Override
public ImmutableConciseSet fromByteBuffer(ByteBuffer buffer, int numBytes)
{
final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer();
readOnlyBuffer.limit(readOnlyBuffer.position() + numBytes);
return new ImmutableConciseSet(readOnlyBuffer);
}
@Override
public byte[] toBytes(ImmutableConciseSet val)
{
if (val == null || val.size() == 0) {
return new byte[]{};
}
return val.toBytes();
}
@Override
public int compare(ImmutableConciseSet o1, ImmutableConciseSet o2)
{
return comparator.compare(o1, o2);
}
}
}

View File

@ -20,6 +20,7 @@
package io.druid.segment.data;
import com.google.common.collect.Ordering;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.spatial.ImmutableRTree;
import java.nio.ByteBuffer;
@ -28,9 +29,6 @@ import java.nio.ByteBuffer;
*/
public class IndexedRTree implements Comparable<IndexedRTree>
{
public static ObjectStrategy<ImmutableRTree> objectStrategy =
new ImmutableRTreeObjectStrategy();
private static Ordering<ImmutableRTree> comparator = new Ordering<ImmutableRTree>()
{
@Override
@ -69,9 +67,16 @@ public class IndexedRTree implements Comparable<IndexedRTree>
return immutableRTree;
}
private static class ImmutableRTreeObjectStrategy
public static class ImmutableRTreeObjectStrategy
implements ObjectStrategy<ImmutableRTree>
{
private final BitmapFactory bitmapFactory;
public ImmutableRTreeObjectStrategy(BitmapFactory bitmapFactory)
{
this.bitmapFactory = bitmapFactory;
}
@Override
public Class<? extends ImmutableRTree> getClazz()
{
@ -81,10 +86,9 @@ public class IndexedRTree implements Comparable<IndexedRTree>
@Override
public ImmutableRTree fromByteBuffer(ByteBuffer buffer, int numBytes)
{
final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer();
readOnlyBuffer.limit(readOnlyBuffer.position() + numBytes);
return new ImmutableRTree(readOnlyBuffer);
return new ImmutableRTree(readOnlyBuffer, bitmapFactory);
}
@Override

View File

@ -0,0 +1,103 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.segment.data;
import com.google.common.collect.Ordering;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.RoaringBitmapFactory;
import com.metamx.collections.bitmap.WrappedImmutableRoaringBitmap;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import java.nio.ByteBuffer;
/**
*/
public class RoaringBitmapSerdeFactory implements BitmapSerdeFactory
{
private static final ObjectStrategy<ImmutableBitmap> objectStrategy = new ImmutableRoaringBitmapObjectStrategy();
private static final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
@Override
public ObjectStrategy<ImmutableBitmap> getObjectStrategy()
{
return objectStrategy;
}
@Override
public BitmapFactory getBitmapFactory()
{
return bitmapFactory;
}
private static Ordering<WrappedImmutableRoaringBitmap> roaringComparator = new Ordering<WrappedImmutableRoaringBitmap>()
{
@Override
public int compare(
WrappedImmutableRoaringBitmap set1, WrappedImmutableRoaringBitmap set2
)
{
if (set1.size() == 0 && set2.size() == 0) {
return 0;
}
if (set1.size() == 0) {
return -1;
}
if (set2.size() == 0) {
return 1;
}
return set1.compareTo(set2);
}
}.nullsFirst();
private static class ImmutableRoaringBitmapObjectStrategy
implements ObjectStrategy<ImmutableBitmap>
{
@Override
public Class<ImmutableBitmap> getClazz()
{
return ImmutableBitmap.class;
}
@Override
public ImmutableBitmap fromByteBuffer(ByteBuffer buffer, int numBytes)
{
final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer();
readOnlyBuffer.limit(readOnlyBuffer.position() + numBytes);
return new WrappedImmutableRoaringBitmap(new ImmutableRoaringBitmap(readOnlyBuffer));
}
@Override
public byte[] toBytes(ImmutableBitmap val)
{
if (val == null || val.size() == 0) {
return new byte[]{};
}
return val.toBytes();
}
@Override
public int compare(ImmutableBitmap o1, ImmutableBitmap o2)
{
return roaringComparator.compare((WrappedImmutableRoaringBitmap) o1, (WrappedImmutableRoaringBitmap) o2);
}
}
}

View File

@ -20,11 +20,11 @@
package io.druid.segment.filter;
import com.google.common.collect.Lists;
import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import java.util.List;
@ -42,18 +42,18 @@ public class AndFilter implements Filter
}
@Override
public ImmutableConciseSet goConcise(BitmapIndexSelector selector)
public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector)
{
if (filters.size() == 1) {
return filters.get(0).goConcise(selector);
return filters.get(0).getBitmapIndex(selector);
}
List<ImmutableConciseSet> conciseSets = Lists.newArrayList();
List<ImmutableBitmap> bitmaps = Lists.newArrayList();
for (int i = 0; i < filters.size(); i++) {
conciseSets.add(filters.get(i).goConcise(selector));
bitmaps.add(filters.get(i).getBitmapIndex(selector));
}
return ImmutableConciseSet.intersection(conciseSets);
return selector.getBitmapFactory().intersection(bitmaps);
}
@Override

View File

@ -21,13 +21,13 @@ package io.druid.segment.filter;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.common.guava.FunctionalIterable;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import io.druid.segment.data.Indexed;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import javax.annotation.Nullable;
@ -48,23 +48,23 @@ class DimensionPredicateFilter implements Filter
}
@Override
public ImmutableConciseSet goConcise(final BitmapIndexSelector selector)
public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector)
{
Indexed<String> dimValues = selector.getDimensionValues(dimension);
if (dimValues == null || dimValues.size() == 0 || predicate == null) {
return new ImmutableConciseSet();
return selector.getBitmapFactory().makeEmptyImmutableBitmap();
}
return ImmutableConciseSet.union(
return selector.getBitmapFactory().union(
FunctionalIterable.create(dimValues)
.filter(predicate)
.transform(
new Function<String, ImmutableConciseSet>()
new Function<String, ImmutableBitmap>()
{
@Override
public ImmutableConciseSet apply(@Nullable String input)
public ImmutableBitmap apply(@Nullable String input)
{
return selector.getConciseInvertedIndex(dimension, input);
return selector.getBitmapIndex(dimension, input);
}
}
)

View File

@ -20,13 +20,13 @@
package io.druid.segment.filter;
import com.google.common.collect.Lists;
import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.query.extraction.DimExtractionFn;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import io.druid.segment.data.Indexed;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import java.util.List;
@ -34,8 +34,6 @@ import java.util.List;
*/
public class ExtractionFilter implements Filter
{
private static final int MAX_SIZE = 50000;
private final String dimension;
private final String value;
private final DimExtractionFn fn;
@ -67,9 +65,9 @@ public class ExtractionFilter implements Filter
}
@Override
public ImmutableConciseSet goConcise(BitmapIndexSelector selector)
public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector)
{
return new OrFilter(makeFilters(selector)).goConcise(selector);
return new OrFilter(makeFilters(selector)).getBitmapIndex(selector);
}
@Override

View File

@ -21,13 +21,13 @@ package io.druid.segment.filter;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.common.guava.FunctionalIterable;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import io.druid.segment.data.Indexed;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
@ -46,39 +46,42 @@ public class JavaScriptFilter implements Filter
}
@Override
public ImmutableConciseSet goConcise(final BitmapIndexSelector selector)
public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector)
{
final Context cx = Context.enter();
try {
final Indexed<String> dimValues = selector.getDimensionValues(dimension);
ImmutableConciseSet conciseSet;
ImmutableBitmap bitmap;
if (dimValues == null) {
conciseSet = new ImmutableConciseSet();
bitmap = selector.getBitmapFactory().makeEmptyImmutableBitmap();
} else {
conciseSet = ImmutableConciseSet.union(
bitmap = selector.getBitmapFactory().union(
FunctionalIterable.create(dimValues)
.filter(new Predicate<String>()
{
@Override
public boolean apply(@Nullable String input)
{
return predicate.applyInContext(cx, input);
}
})
.transform(
new com.google.common.base.Function<String, ImmutableConciseSet>()
{
@Override
public ImmutableConciseSet apply(@Nullable String input)
{
return selector.getConciseInvertedIndex(dimension, input);
}
}
)
.filter(
new Predicate<String>()
{
@Override
public boolean apply(@Nullable String input)
{
return predicate.applyInContext(cx, input);
}
}
)
.transform(
new com.google.common.base.Function<String, ImmutableBitmap>()
{
@Override
public ImmutableBitmap apply(@Nullable String input)
{
return selector.getBitmapIndex(dimension, input);
}
}
)
);
}
return conciseSet;
} finally {
return bitmap;
}
finally {
Context.exit();
}
}
@ -107,7 +110,8 @@ public class JavaScriptFilter implements Filter
scope = cx.initStandardObjects();
fnApply = cx.compileFunction(scope, script, "script", 1, null);
} finally {
}
finally {
Context.exit();
}
}
@ -119,7 +123,8 @@ public class JavaScriptFilter implements Filter
final Context cx = Context.enter();
try {
return applyInContext(cx, input);
} finally {
}
finally {
Context.exit();
}

View File

@ -19,11 +19,11 @@
package io.druid.segment.filter;
import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
/**
*/
@ -39,10 +39,10 @@ public class NotFilter implements Filter
}
@Override
public ImmutableConciseSet goConcise(BitmapIndexSelector selector)
public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector)
{
return ImmutableConciseSet.complement(
baseFilter.goConcise(selector),
return selector.getBitmapFactory().complement(
baseFilter.getBitmapIndex(selector),
selector.getNumRows()
);
}

View File

@ -20,11 +20,11 @@
package io.druid.segment.filter;
import com.google.common.collect.Lists;
import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import java.util.List;
@ -46,18 +46,18 @@ public class OrFilter implements Filter
}
@Override
public ImmutableConciseSet goConcise(BitmapIndexSelector selector)
public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector)
{
if (filters.size() == 1) {
return filters.get(0).goConcise(selector);
return filters.get(0).getBitmapIndex(selector);
}
List<ImmutableConciseSet> conciseSets = Lists.newArrayList();
List<ImmutableBitmap> bitmaps = Lists.newArrayList();
for (int i = 0; i < filters.size(); i++) {
conciseSets.add(filters.get(i).goConcise(selector));
bitmaps.add(filters.get(i).getBitmapIndex(selector));
}
return ImmutableConciseSet.union(conciseSets);
return selector.getBitmapFactory().union(bitmaps);
}
@Override

View File

@ -19,11 +19,11 @@
package io.druid.segment.filter;
import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
/**
*/
@ -42,9 +42,9 @@ public class SelectorFilter implements Filter
}
@Override
public ImmutableConciseSet goConcise(BitmapIndexSelector selector)
public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector)
{
return selector.getConciseInvertedIndex(dimension, value);
return selector.getBitmapIndex(dimension, value);
}
@Override

View File

@ -18,12 +18,12 @@
*/
package io.druid.segment.filter;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.spatial.search.Bound;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
/**
*/
@ -42,9 +42,13 @@ public class SpatialFilter implements Filter
}
@Override
public ImmutableConciseSet goConcise(final BitmapIndexSelector selector)
public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector)
{
return ImmutableConciseSet.union(selector.getSpatialIndex(dimension).search(bound));
Iterable<ImmutableBitmap> search = selector.getSpatialIndex(dimension).search(bound);
for (ImmutableBitmap immutableBitmap : search) {
System.out.println(immutableBitmap);
}
return selector.getBitmapFactory().union(search);
}
@Override

View File

@ -21,6 +21,8 @@ package io.druid.segment.incremental;
import com.google.common.base.Function;
import com.google.common.collect.Maps;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.common.guava.FunctionalIterable;
import com.metamx.common.logger.Logger;
import io.druid.segment.IndexableAdapter;
@ -31,9 +33,8 @@ import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts;
import io.druid.segment.data.IndexedIterable;
import io.druid.segment.data.ListIndexed;
import it.uniroma3.mat.extendedset.intset.ConciseSet;
import it.uniroma3.mat.extendedset.intset.IntSet;
import org.joda.time.Interval;
import org.roaringbitmap.IntIterator;
import javax.annotation.Nullable;
import java.util.Iterator;
@ -46,10 +47,10 @@ public class IncrementalIndexAdapter implements IndexableAdapter
private static final Logger log = new Logger(IncrementalIndexAdapter.class);
private final Interval dataInterval;
private final IncrementalIndex index;
private final Map<String, Map<String, ConciseSet>> invertedIndexes;
private final Map<String, Map<String, MutableBitmap>> invertedIndexes;
public IncrementalIndexAdapter(
Interval dataInterval, IncrementalIndex index
Interval dataInterval, IncrementalIndex index, BitmapFactory bitmapFactory
)
{
this.dataInterval = dataInterval;
@ -58,7 +59,7 @@ public class IncrementalIndexAdapter implements IndexableAdapter
this.invertedIndexes = Maps.newHashMap();
for (String dimension : index.getDimensions()) {
invertedIndexes.put(dimension, Maps.<String, ConciseSet>newHashMap());
invertedIndexes.put(dimension, Maps.<String, MutableBitmap>newHashMap());
}
int rowNum = 0;
@ -67,10 +68,10 @@ public class IncrementalIndexAdapter implements IndexableAdapter
for (String dimension : index.getDimensions()) {
int dimIndex = index.getDimensionIndex(dimension);
Map<String, ConciseSet> conciseSets = invertedIndexes.get(dimension);
Map<String, MutableBitmap> bitmapIndexes = invertedIndexes.get(dimension);
if (conciseSets == null || dims == null) {
log.error("conciseSets and dims are null!");
if (bitmapIndexes == null || dims == null) {
log.error("bitmapIndexes and dims are null!");
continue;
}
if (dimIndex >= dims.length || dims[dimIndex] == null) {
@ -78,15 +79,15 @@ public class IncrementalIndexAdapter implements IndexableAdapter
}
for (String dimValue : dims[dimIndex]) {
ConciseSet conciseSet = conciseSets.get(dimValue);
MutableBitmap mutableBitmap = bitmapIndexes.get(dimValue);
if (conciseSet == null) {
conciseSet = new ConciseSet();
conciseSets.put(dimValue, conciseSet);
if (mutableBitmap == null) {
mutableBitmap = bitmapFactory.makeEmptyMutableBitmap();
bitmapIndexes.put(dimValue, mutableBitmap);
}
try {
conciseSet.add(rowNum);
mutableBitmap.add(rowNum);
}
catch (Exception e) {
log.info(e.toString());
@ -220,17 +221,17 @@ public class IncrementalIndexAdapter implements IndexableAdapter
}
@Override
public IndexedInts getInverteds(String dimension, String value)
public IndexedInts getBitmapIndex(String dimension, String value)
{
Map<String, ConciseSet> dimInverted = invertedIndexes.get(dimension);
Map<String, MutableBitmap> dimInverted = invertedIndexes.get(dimension);
if (dimInverted == null) {
return new EmptyIndexedInts();
}
final ConciseSet conciseSet = dimInverted.get(value);
final MutableBitmap bitmapIndex = dimInverted.get(value);
if (conciseSet == null) {
if (bitmapIndex == null) {
return new EmptyIndexedInts();
}
@ -239,7 +240,7 @@ public class IncrementalIndexAdapter implements IndexableAdapter
@Override
public int size()
{
return conciseSet.size();
return bitmapIndex.size();
}
@Override
@ -253,7 +254,7 @@ public class IncrementalIndexAdapter implements IndexableAdapter
{
return new Iterator<Integer>()
{
IntSet.IntIterator baseIter = conciseSet.iterator();
IntIterator baseIter = bitmapIndex.iterator();
@Override
public boolean hasNext()

View File

@ -20,23 +20,26 @@
package io.druid.segment.serde;
import com.google.common.base.Supplier;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.segment.column.BitmapIndex;
import io.druid.segment.data.GenericIndexed;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
/**
*/
*/
public class BitmapIndexColumnPartSupplier implements Supplier<BitmapIndex>
{
private static final ImmutableConciseSet EMPTY_SET = new ImmutableConciseSet();
private final GenericIndexed<ImmutableConciseSet> bitmaps;
private final BitmapFactory bitmapFactory;
private final GenericIndexed<ImmutableBitmap> bitmaps;
private final GenericIndexed<String> dictionary;
public BitmapIndexColumnPartSupplier(
GenericIndexed<ImmutableConciseSet> bitmaps,
BitmapFactory bitmapFactory,
GenericIndexed<ImmutableBitmap> bitmaps,
GenericIndexed<String> dictionary
) {
)
{
this.bitmapFactory = bitmapFactory;
this.bitmaps = bitmaps;
this.dictionary = dictionary;
}
@ -65,22 +68,28 @@ public class BitmapIndexColumnPartSupplier implements Supplier<BitmapIndex>
}
@Override
public ImmutableConciseSet getConciseSet(String value)
public BitmapFactory getBitmapFactory()
{
final int index = dictionary.indexOf(value);
return getConciseSet(index);
return bitmapFactory;
}
@Override
public ImmutableConciseSet getConciseSet(int idx)
public ImmutableBitmap getBitmap(String value)
{
final int index = dictionary.indexOf(value);
return getBitmap(index);
}
@Override
public ImmutableBitmap getBitmap(int idx)
{
if (idx < 0) {
return EMPTY_SET;
return bitmapFactory.makeEmptyImmutableBitmap();
}
final ImmutableConciseSet bitmap = bitmaps.get(idx);
return bitmap == null ? EMPTY_SET : bitmap;
final ImmutableBitmap bitmap = bitmaps.get(idx);
return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap;
}
};
}

View File

@ -22,18 +22,19 @@ package io.druid.segment.serde;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.primitives.Ints;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.common.IAE;
import io.druid.segment.column.ColumnBuilder;
import io.druid.segment.column.ColumnConfig;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.BitmapSerde;
import io.druid.segment.data.BitmapSerdeFactory;
import io.druid.segment.data.ByteBufferSerializer;
import io.druid.segment.data.ConciseCompressedIndexedInts;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.IndexedRTree;
import io.druid.segment.data.VSizeIndexed;
import io.druid.segment.data.VSizeIndexedInts;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import java.io.IOException;
import java.nio.ByteBuffer;
@ -44,11 +45,12 @@ import java.nio.channels.WritableByteChannel;
public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
{
private final boolean isSingleValued;
private final BitmapSerdeFactory bitmapSerdeFactory;
private final GenericIndexed<String> dictionary;
private final VSizeIndexedInts singleValuedColumn;
private final VSizeIndexed multiValuedColumn;
private final GenericIndexed<ImmutableConciseSet> bitmaps;
private final GenericIndexed<ImmutableBitmap> bitmaps;
private final ImmutableRTree spatialIndex;
private final long size;
@ -57,11 +59,14 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
GenericIndexed<String> dictionary,
VSizeIndexedInts singleValCol,
VSizeIndexed multiValCol,
GenericIndexed<ImmutableConciseSet> bitmaps,
BitmapSerdeFactory bitmapSerdeFactory,
GenericIndexed<ImmutableBitmap> bitmaps,
ImmutableRTree spatialIndex
)
{
this.isSingleValued = multiValCol == null;
this.bitmapSerdeFactory = bitmapSerdeFactory;
this.dictionary = dictionary;
this.singleValuedColumn = singleValCol;
this.multiValuedColumn = multiValCol;
@ -86,10 +91,14 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
@JsonCreator
public DictionaryEncodedColumnPartSerde(
@JsonProperty("isSingleValued") boolean isSingleValued
@JsonProperty("isSingleValued") boolean isSingleValued,
@JsonProperty("bitmapSerdeFactory") BitmapSerdeFactory bitmapSerdeFactory
)
{
this.isSingleValued = isSingleValued;
this.bitmapSerdeFactory = bitmapSerdeFactory == null
? new BitmapSerde.LegacyBitmapSerdeFactory()
: bitmapSerdeFactory;
this.dictionary = null;
this.singleValuedColumn = null;
@ -105,6 +114,12 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
return isSingleValued;
}
@JsonProperty
public BitmapSerdeFactory getBitmapSerdeFactory()
{
return bitmapSerdeFactory;
}
@Override
public long numBytes()
{
@ -135,7 +150,11 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
}
if (spatialIndex != null) {
ByteBufferSerializer.writeToChannel(spatialIndex, IndexedRTree.objectStrategy, channel);
ByteBufferSerializer.writeToChannel(
spatialIndex,
new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory()),
channel
);
}
}
@ -153,23 +172,43 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
singleValuedColumn = VSizeIndexedInts.readFromByteBuffer(buffer);
multiValuedColumn = null;
builder.setHasMultipleValues(false)
.setDictionaryEncodedColumn(new DictionaryEncodedColumnSupplier(dictionary, singleValuedColumn, null, columnConfig.columnCacheSizeBytes()));
.setDictionaryEncodedColumn(
new DictionaryEncodedColumnSupplier(
dictionary,
singleValuedColumn,
null,
columnConfig.columnCacheSizeBytes()
)
);
} else {
singleValuedColumn = null;
multiValuedColumn = VSizeIndexed.readFromByteBuffer(buffer);
builder.setHasMultipleValues(true)
.setDictionaryEncodedColumn(new DictionaryEncodedColumnSupplier(dictionary, null, multiValuedColumn, columnConfig.columnCacheSizeBytes()));
.setDictionaryEncodedColumn(
new DictionaryEncodedColumnSupplier(
dictionary,
null,
multiValuedColumn,
columnConfig.columnCacheSizeBytes()
)
);
}
GenericIndexed<ImmutableConciseSet> bitmaps = GenericIndexed.read(
buffer, ConciseCompressedIndexedInts.objectStrategy
GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.read(
buffer, bitmapSerdeFactory.getObjectStrategy()
);
builder.setBitmapIndex(
new BitmapIndexColumnPartSupplier(
bitmapSerdeFactory.getBitmapFactory(),
bitmaps,
dictionary
)
);
builder.setBitmapIndex(new BitmapIndexColumnPartSupplier(bitmaps, dictionary));
ImmutableRTree spatialIndex = null;
if (buffer.hasRemaining()) {
spatialIndex = ByteBufferSerializer.read(
buffer, IndexedRTree.objectStrategy
buffer, new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory())
);
builder.setSpatialIndex(new SpatialIndexColumnPartSupplier(spatialIndex));
}
@ -178,6 +217,7 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
dictionary,
singleValuedColumn,
multiValuedColumn,
bitmapSerdeFactory,
bitmaps,
spatialIndex
);

View File

@ -9,6 +9,7 @@ import com.metamx.common.guava.Sequences;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.query.aggregation.LongSumAggregatorFactory;
import io.druid.query.timeseries.TimeseriesQuery;
import io.druid.query.timeseries.TimeseriesQueryQueryToolChest;
import io.druid.query.timeseries.TimeseriesResultValue;
import io.druid.segment.SegmentMissingException;
import org.joda.time.DateTime;
@ -63,14 +64,21 @@ public class RetryQueryRunnerTest
return Sequences.empty();
}
},
(QueryToolChest) new TimeseriesQueryQueryToolChest(
new QueryConfig()
),
new RetryQueryRunnerConfig()
{
private int numTries = 0;
private boolean returnPartialResults = true;
@Override
public int getNumTries() {
return 0;
}
public int numTries() { return numTries; }
public boolean returnPartialResults() { return returnPartialResults; }
@Override
public boolean isReturnPartialResults()
{
return true;
}
},
jsonMapper
);
@ -128,12 +136,15 @@ public class RetryQueryRunnerTest
}
}
},
(QueryToolChest) new TimeseriesQueryQueryToolChest(
new QueryConfig()
),
new RetryQueryRunnerConfig()
{
private int numTries = 1;
private boolean returnPartialResults = true;
public int numTries() { return numTries; }
public int getNumTries() { return numTries; }
public boolean returnPartialResults() { return returnPartialResults; }
},
@ -192,12 +203,15 @@ public class RetryQueryRunnerTest
}
}
},
(QueryToolChest) new TimeseriesQueryQueryToolChest(
new QueryConfig()
),
new RetryQueryRunnerConfig()
{
private int numTries = 4;
private boolean returnPartialResults = true;
public int numTries() { return numTries; }
public int getNumTries() { return numTries; }
public boolean returnPartialResults() { return returnPartialResults; }
},
@ -241,12 +255,15 @@ public class RetryQueryRunnerTest
return Sequences.empty();
}
},
(QueryToolChest) new TimeseriesQueryQueryToolChest(
new QueryConfig()
),
new RetryQueryRunnerConfig()
{
private int numTries = 1;
private boolean returnPartialResults = false;
public int numTries() { return numTries; }
public int getNumTries() { return numTries; }
public boolean returnPartialResults() { return returnPartialResults; }
},

Some files were not shown because too many files have changed in this diff Show More