diff --git a/docs/content/Batch-ingestion.md b/docs/content/Batch-ingestion.md index e3efaab8c65..d9b6ebfefca 100644 --- a/docs/content/Batch-ingestion.md +++ b/docs/content/Batch-ingestion.md @@ -6,7 +6,7 @@ There are two choices for batch data ingestion to your Druid cluster, you can us Which should I use? ------------------- -The [Indexing service](Indexing-service.html) is a node that can run as part of your Druid cluster and can accomplish a number of different types of indexing tasks. Even if all you care about is batch indexing, it provides for the encapsulation of things like the [database](MySQL.html) that is used for segment metadata and other things, so that your indexing tasks do not need to include such information. Long-term, the indexing service is going to be the preferred method of ingesting data. +The [Indexing service](Indexing-service.html) is a node that can run as part of your Druid cluster and can accomplish a number of different types of indexing tasks. Even if all you care about is batch indexing, it provides for the encapsulation of things like the [database](MySQL.html) that is used for segment metadata and other things, so that your indexing tasks do not need to include such information. The indexing service was created such that external systems could programmatically interact with it and run periodic indexing tasks. Long-term, the indexing service is going to be the preferred method of ingesting data. The `HadoopDruidIndexer` runs hadoop jobs in order to separate and index data segments. It takes advantage of Hadoop as a job scheduling and distributed job execution platform. It is a simple method if you already have Hadoop running and don’t want to spend the time configuring and deploying the [Indexing service](Indexing service.html) just yet. @@ -229,6 +229,7 @@ The schema of the Hadoop Index Task contains a task "type" and a Hadoop Index Co |--------|-----------|---------| |type|This should be "index_hadoop".|yes| |config|A Hadoop Index Config.|yes| +|hadoopCoordinates|The Maven :: of Hadoop to use. The default is "org.apache.hadoop:hadoop-core:1.0.3".|no| The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally. diff --git a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md index e12d551939d..e57aab5821e 100644 --- a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md +++ b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md @@ -311,6 +311,8 @@ After the task is completed, the segment should be assigned to your historical n Next Steps ---------- +We demonstrated using the indexing service as a way to ingest data into Druid. Previous versions of Druid used the [HadoopDruidIndexer](Batch-ingestion.html) to ingest batch data. The `HadoopDruidIndexer` still remains a valid option for batch ingestion, however, we recommend using the indexing service as the preferred method of getting batch data into Druid. + For more information on querying, check out this [tutorial](Tutorial%3A-All-About-Queries.html). Additional Information diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index b9573a03bd4..f636db08d1b 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -19,27 +19,17 @@ package io.druid.indexing.common.task; -import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.google.inject.Binder; -import com.google.inject.Inject; -import com.google.inject.Injector; -import com.google.inject.Key; -import com.google.inject.Module; import com.metamx.common.logger.Logger; import io.druid.common.utils.JodaUtils; -import io.druid.guice.JsonConfigProvider; -import io.druid.guice.annotations.Self; import io.druid.indexer.HadoopDruidIndexerConfig; import io.druid.indexer.HadoopDruidIndexerConfigBuilder; import io.druid.indexer.HadoopDruidIndexerJob; @@ -49,7 +39,6 @@ import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.SegmentInsertAction; import io.druid.initialization.Initialization; -import io.druid.server.DruidNode; import io.druid.server.initialization.ExtensionsConfig; import io.druid.timeline.DataSegment; import io.tesla.aether.internal.DefaultTeslaAether; @@ -67,12 +56,10 @@ public class HadoopIndexTask extends AbstractTask private static final Logger log = new Logger(HadoopIndexTask.class); private static String defaultHadoopCoordinates = "org.apache.hadoop:hadoop-core:1.0.3"; - private static final Injector injector; private static final ExtensionsConfig extensionsConfig; static { - injector = Initialization.makeStartupInjector(); - extensionsConfig = injector.getInstance(ExtensionsConfig.class); + extensionsConfig = Initialization.makeStartupInjector().getInstance(ExtensionsConfig.class); } @JsonIgnore