Merge pull request #2457 from bjozet/docs/fixes

Default value for maxRowsInMemory
2016-03-01 07:43:26 -08:00 · 2016-03-01 07:43:26 -08:00 · 9340cae985
parent 31b502773a 2462c82c0e
commit 9340cae985
8 changed files with 8 additions and 8 deletions
--- a/docs/content/configuration/indexing-service.md
+++ b/docs/content/configuration/indexing-service.md
@ -279,7 +279,7 @@ Additional peon configs include:
 |`druid.indexer.task.baseDir`|Base temporary working directory.|`System.getProperty("java.io.tmpdir")`|
 |`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|`${druid.indexer.task.baseDir}/persistent/tasks`|
 |`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.3.0|
-|`druid.indexer.task.defaultRowFlushBoundary`|Highest row count before persisting to disk. Used for indexing generating tasks.|50000|
+|`druid.indexer.task.defaultRowFlushBoundary`|Highest row count before persisting to disk. Used for indexing generating tasks.|75000|
 |`druid.indexer.task.directoryLockTimeout`|Wait this long for zombie peons to exit before giving up on their replacements.|PT10M|
 |`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on middleManager restart for restorable tasks to gracefully exit.|PT5M|
 |`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
--- a/docs/content/ingestion/batch-ingestion.md
+++ b/docs/content/ingestion/batch-ingestion.md
@ -154,7 +154,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
 |workingPath|String|The working path to use for intermediate results (results between Hadoop jobs).|no (default == '/tmp/druid-indexing')|
 |version|String|The version of created segments.|no (default == datetime that indexing starts at)|
 |partitionsSpec|Object|A specification of how to partition each time bucket into segments, absence of this property means no partitioning will occur.More details below.|no (default == 'hashed')|
-|maxRowsInMemory|Integer|The number of rows to aggregate before persisting. This number is the post-aggregation rows, so it is not equivalent to the number of input events, but the number of aggregated rows that those events result in. This is used to manage the required JVM heap size.|no (default == 5 million)|
+|maxRowsInMemory|Integer|The number of rows to aggregate before persisting. This number is the post-aggregation rows, so it is not equivalent to the number of input events, but the number of aggregated rows that those events result in. This is used to manage the required JVM heap size.|no (default == 75000)|
 |leaveIntermediate|Boolean|Leave behind intermediate files (for debugging) in the workingPath when a job completes, whether it passes or fails.|no (default == false)|
 |cleanupOnFailure|Boolean|Clean up intermediate files when a job fails (unless leaveIntermediate is on).|no (default == true)|
 |overwriteFiles|Boolean|Override existing files found during indexing.|no (default == false)|
--- a/docs/content/ingestion/stream-pull.md
+++ b/docs/content/ingestion/stream-pull.md
@ -142,7 +142,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
 |Field|Type|Description|Required|
 |-----|----|-----------|--------|
 |type|String|This should always be 'realtime'.|no|
-|maxRowsInMemory|Integer|The number of rows to aggregate before persisting. This number is the post-aggregation rows, so it is not equivalent to the number of input events, but the number of aggregated rows that those events result in. This is used to manage the required JVM heap size. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|no (default == 500000)|
+|maxRowsInMemory|Integer|The number of rows to aggregate before persisting. This number is the post-aggregation rows, so it is not equivalent to the number of input events, but the number of aggregated rows that those events result in. This is used to manage the required JVM heap size. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|no (default == 75000)|
 |windowPeriod|ISO 8601 Period String|The amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.|no (default == PT10m)|
 |intermediatePersistPeriod|ISO8601 Period String|The period that determines the rate at which intermediate persists occur. These persists determine how often commits happen against the incoming realtime stream. If the realtime data loading process is interrupted at time T, it should be restarted to re-read data that arrived at T minus this period.|no (default == PT10m)|
 |basePersistDirectory|String|The directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.|no (default == java tmp dir)|
--- a/docs/content/ingestion/tasks.md
+++ b/docs/content/ingestion/tasks.md
@ -110,7 +110,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
 |--------|-----------|-------|---------|
 |type|The task type, this should always be "index".|None.|yes|
 |targetPartitionSize|Used in sharding. Determines how many rows are in each segment. Set this to -1 to use numShards instead for sharding.|5000000|no|
-|rowFlushBoundary|Used in determining when intermediate persist should occur to disk.|500000|no|
+|rowFlushBoundary|Used in determining when intermediate persist should occur to disk.|75000|no|
 |numShards|Directly specify the number of shards to create. You can skip the intermediate persist step if you specify the number of shards you want and set targetPartitionSize=-1.|null|no|
 |indexSpec|defines segment storage format options to be used at indexing time, see [IndexSpec](#indexspec)|null|no|

--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java
@ -41,7 +41,7 @@ public class HadoopTuningConfig implements TuningConfig
  private static final PartitionsSpec DEFAULT_PARTITIONS_SPEC = HashedPartitionsSpec.makeDefaultHashedPartitionsSpec();
  private static final Map<DateTime, List<HadoopyShardSpec>> DEFAULT_SHARD_SPECS = ImmutableMap.of();
  private static final IndexSpec DEFAULT_INDEX_SPEC = new IndexSpec();
-  private static final int DEFAULT_ROW_FLUSH_BOUNDARY = 80000;
+  private static final int DEFAULT_ROW_FLUSH_BOUNDARY = 75000;
  private static final boolean DEFAULT_USE_COMBINER = false;
  private static final Boolean DEFAULT_BUILD_V9_DIRECTLY = Boolean.FALSE;
  private static final int DEFAULT_NUM_BACKGROUND_PERSIST_THREADS = 0;
--- a/indexing-service/src/main/java/io/druid/indexing/common/config/TaskConfig.java
+++ b/indexing-service/src/main/java/io/druid/indexing/common/config/TaskConfig.java
@ -78,7 +78,7 @@ public class TaskConfig
    this.baseTaskDir = new File(defaultDir(baseTaskDir, "persistent/task"));
    // This is usually on HDFS or similar, so we can't use java.io.tmpdir
    this.hadoopWorkingPath = hadoopWorkingPath == null ? "/tmp/druid-indexing" : hadoopWorkingPath;
-    this.defaultRowFlushBoundary = defaultRowFlushBoundary == null ? 500000 : defaultRowFlushBoundary;
+    this.defaultRowFlushBoundary = defaultRowFlushBoundary == null ? 75000 : defaultRowFlushBoundary;
    this.defaultHadoopCoordinates = defaultHadoopCoordinates == null
                                    ? DEFAULT_DEFAULT_HADOOP_COORDINATES
                                    : defaultHadoopCoordinates;
--- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java
+++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java
@ -499,7 +499,7 @@ public class IndexTask extends AbstractFixedIntervalTask
  public static class IndexTuningConfig implements TuningConfig
  {
    private static final int DEFAULT_TARGET_PARTITION_SIZE = 5000000;
-    private static final int DEFAULT_ROW_FLUSH_BOUNDARY = 500000;
+    private static final int DEFAULT_ROW_FLUSH_BOUNDARY = 75000;
    private static final IndexSpec DEFAULT_INDEX_SPEC = new IndexSpec();
    private static final Boolean DEFAULT_BUILD_V9_DIRECTLY = Boolean.FALSE;

--- a/server/src/main/java/io/druid/segment/indexing/RealtimeTuningConfig.java
+++ b/server/src/main/java/io/druid/segment/indexing/RealtimeTuningConfig.java
@ -37,7 +37,7 @@ import java.io.File;
 */
 public class RealtimeTuningConfig implements TuningConfig
 {
-  private static final int defaultMaxRowsInMemory = 500000;
+  private static final int defaultMaxRowsInMemory = 75000;
  private static final Period defaultIntermediatePersistPeriod = new Period("PT10M");
  private static final Period defaultWindowPeriod = new Period("PT10M");
  private static final File defaultBasePersistDirectory = Files.createTempDir();