mirror of https://github.com/apache/druid.git
more docs about how to use different versions of hadoop in druid
This commit is contained in:
parent
6192602893
commit
4862852b43
|
@ -6,7 +6,7 @@ There are two choices for batch data ingestion to your Druid cluster, you can us
|
||||||
Which should I use?
|
Which should I use?
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
The [Indexing service](Indexing-service.html) is a node that can run as part of your Druid cluster and can accomplish a number of different types of indexing tasks. Even if all you care about is batch indexing, it provides for the encapsulation of things like the [database](MySQL.html) that is used for segment metadata and other things, so that your indexing tasks do not need to include such information. Long-term, the indexing service is going to be the preferred method of ingesting data.
|
The [Indexing service](Indexing-service.html) is a node that can run as part of your Druid cluster and can accomplish a number of different types of indexing tasks. Even if all you care about is batch indexing, it provides for the encapsulation of things like the [database](MySQL.html) that is used for segment metadata and other things, so that your indexing tasks do not need to include such information. The indexing service was created such that external systems could programmatically interact with it and run periodic indexing tasks. Long-term, the indexing service is going to be the preferred method of ingesting data.
|
||||||
|
|
||||||
The `HadoopDruidIndexer` runs hadoop jobs in order to separate and index data segments. It takes advantage of Hadoop as a job scheduling and distributed job execution platform. It is a simple method if you already have Hadoop running and don’t want to spend the time configuring and deploying the [Indexing service](Indexing service.html) just yet.
|
The `HadoopDruidIndexer` runs hadoop jobs in order to separate and index data segments. It takes advantage of Hadoop as a job scheduling and distributed job execution platform. It is a simple method if you already have Hadoop running and don’t want to spend the time configuring and deploying the [Indexing service](Indexing service.html) just yet.
|
||||||
|
|
||||||
|
@ -229,6 +229,7 @@ The schema of the Hadoop Index Task contains a task "type" and a Hadoop Index Co
|
||||||
|--------|-----------|---------|
|
|--------|-----------|---------|
|
||||||
|type|This should be "index_hadoop".|yes|
|
|type|This should be "index_hadoop".|yes|
|
||||||
|config|A Hadoop Index Config.|yes|
|
|config|A Hadoop Index Config.|yes|
|
||||||
|
|hadoopCoordinates|The Maven <groupId>:<artifactId>:<version> of Hadoop to use. The default is "org.apache.hadoop:hadoop-core:1.0.3".|no|
|
||||||
|
|
||||||
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
|
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
|
||||||
|
|
||||||
|
|
|
@ -311,6 +311,8 @@ After the task is completed, the segment should be assigned to your historical n
|
||||||
|
|
||||||
Next Steps
|
Next Steps
|
||||||
----------
|
----------
|
||||||
|
We demonstrated using the indexing service as a way to ingest data into Druid. Previous versions of Druid used the [HadoopDruidIndexer](Batch-ingestion.html) to ingest batch data. The `HadoopDruidIndexer` still remains a valid option for batch ingestion, however, we recommend using the indexing service as the preferred method of getting batch data into Druid.
|
||||||
|
|
||||||
For more information on querying, check out this [tutorial](Tutorial%3A-All-About-Queries.html).
|
For more information on querying, check out this [tutorial](Tutorial%3A-All-About-Queries.html).
|
||||||
|
|
||||||
Additional Information
|
Additional Information
|
||||||
|
|
|
@ -19,27 +19,17 @@
|
||||||
|
|
||||||
package io.druid.indexing.common.task;
|
package io.druid.indexing.common.task;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JacksonInject;
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.ImmutableList;
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.inject.Binder;
|
|
||||||
import com.google.inject.Inject;
|
|
||||||
import com.google.inject.Injector;
|
|
||||||
import com.google.inject.Key;
|
|
||||||
import com.google.inject.Module;
|
|
||||||
import com.metamx.common.logger.Logger;
|
import com.metamx.common.logger.Logger;
|
||||||
import io.druid.common.utils.JodaUtils;
|
import io.druid.common.utils.JodaUtils;
|
||||||
import io.druid.guice.JsonConfigProvider;
|
|
||||||
import io.druid.guice.annotations.Self;
|
|
||||||
import io.druid.indexer.HadoopDruidIndexerConfig;
|
import io.druid.indexer.HadoopDruidIndexerConfig;
|
||||||
import io.druid.indexer.HadoopDruidIndexerConfigBuilder;
|
import io.druid.indexer.HadoopDruidIndexerConfigBuilder;
|
||||||
import io.druid.indexer.HadoopDruidIndexerJob;
|
import io.druid.indexer.HadoopDruidIndexerJob;
|
||||||
|
@ -49,7 +39,6 @@ import io.druid.indexing.common.TaskStatus;
|
||||||
import io.druid.indexing.common.TaskToolbox;
|
import io.druid.indexing.common.TaskToolbox;
|
||||||
import io.druid.indexing.common.actions.SegmentInsertAction;
|
import io.druid.indexing.common.actions.SegmentInsertAction;
|
||||||
import io.druid.initialization.Initialization;
|
import io.druid.initialization.Initialization;
|
||||||
import io.druid.server.DruidNode;
|
|
||||||
import io.druid.server.initialization.ExtensionsConfig;
|
import io.druid.server.initialization.ExtensionsConfig;
|
||||||
import io.druid.timeline.DataSegment;
|
import io.druid.timeline.DataSegment;
|
||||||
import io.tesla.aether.internal.DefaultTeslaAether;
|
import io.tesla.aether.internal.DefaultTeslaAether;
|
||||||
|
@ -67,12 +56,10 @@ public class HadoopIndexTask extends AbstractTask
|
||||||
private static final Logger log = new Logger(HadoopIndexTask.class);
|
private static final Logger log = new Logger(HadoopIndexTask.class);
|
||||||
private static String defaultHadoopCoordinates = "org.apache.hadoop:hadoop-core:1.0.3";
|
private static String defaultHadoopCoordinates = "org.apache.hadoop:hadoop-core:1.0.3";
|
||||||
|
|
||||||
private static final Injector injector;
|
|
||||||
private static final ExtensionsConfig extensionsConfig;
|
private static final ExtensionsConfig extensionsConfig;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
injector = Initialization.makeStartupInjector();
|
extensionsConfig = Initialization.makeStartupInjector().getInstance(ExtensionsConfig.class);
|
||||||
extensionsConfig = injector.getInstance(ExtensionsConfig.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
|
|
Loading…
Reference in New Issue