From aff4a05ca385adbd4e3e0986aa18be155fa2b01d Mon Sep 17 00:00:00 2001 From: Fangjin Yang Date: Mon, 7 Jan 2013 13:41:26 -0800 Subject: [PATCH 01/12] db based configs for indexer workers --- .../java/com/metamx/druid/db/DbConnector.java | 12 + .../merger/coordinator/RemoteTaskRunner.java | 23 +- .../config/EC2AutoScalingStrategyConfig.java | 17 -- .../config/RemoteTaskRunnerConfig.java | 7 - .../config/WorkerSetupManagerConfig.java | 17 ++ .../http/IndexerCoordinatorNode.java | 50 +++- .../http/IndexerCoordinatorResource.java | 28 ++- .../http/IndexerCoordinatorServletModule.java | 7 +- .../scaling/EC2AutoScalingStrategy.java | 36 +-- .../merger/coordinator/setup/EC2NodeData.java | 52 +++++ .../coordinator/setup/GalaxyUserData.java | 43 ++++ .../coordinator/setup/WorkerNodeData.java | 14 ++ .../coordinator/setup/WorkerSetupData.java | 52 +++++ .../coordinator/setup/WorkerSetupManager.java | 216 ++++++++++++++++++ .../coordinator/setup/WorkerUserData.java | 14 ++ .../coordinator/RemoteTaskRunnerTest.java | 37 +-- .../scaling/EC2AutoScalingStrategyTest.java | 65 +++--- 17 files changed, 582 insertions(+), 108 deletions(-) create mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java create mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java create mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java create mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java create mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java create mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java create mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java diff --git a/common/src/main/java/com/metamx/druid/db/DbConnector.java b/common/src/main/java/com/metamx/druid/db/DbConnector.java index 99712df22a5..75cf0ba27e7 100644 --- a/common/src/main/java/com/metamx/druid/db/DbConnector.java +++ b/common/src/main/java/com/metamx/druid/db/DbConnector.java @@ -59,6 +59,18 @@ public class DbConnector ); } + public static void createWorkerSetupTable(final DBI dbi, final String workerTableName) + { + createTable( + dbi, + workerTableName, + String.format( + "CREATE table %s (minVersion TINYTEXT NOT NULL, minNumWorkers SMALLINT NOT NULL, nodeData LONGTEXT NOT NULL, userData LONGTEXT NOT NULL)", + workerTableName + ) + ); + } + public static void createTable( final DBI dbi, final String tableName, diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java index 2a235b88d86..addb789762f 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java @@ -39,6 +39,7 @@ import com.metamx.druid.merger.common.task.Task; import com.metamx.druid.merger.coordinator.config.RemoteTaskRunnerConfig; import com.metamx.druid.merger.coordinator.scaling.AutoScalingData; import com.metamx.druid.merger.coordinator.scaling.ScalingStrategy; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.druid.merger.worker.Worker; import com.metamx.emitter.EmittingLogger; import com.netflix.curator.framework.CuratorFramework; @@ -88,6 +89,7 @@ public class RemoteTaskRunner implements TaskRunner private final ScheduledExecutorService scheduledExec; private final RetryPolicyFactory retryPolicyFactory; private final ScalingStrategy strategy; + private final WorkerSetupManager workerSetupManager; // all workers that exist in ZK private final Map zkWorkers = new ConcurrentHashMap(); @@ -109,7 +111,8 @@ public class RemoteTaskRunner implements TaskRunner PathChildrenCache workerPathCache, ScheduledExecutorService scheduledExec, RetryPolicyFactory retryPolicyFactory, - ScalingStrategy strategy + ScalingStrategy strategy, + WorkerSetupManager workerSetupManager ) { this.jsonMapper = jsonMapper; @@ -119,6 +122,7 @@ public class RemoteTaskRunner implements TaskRunner this.scheduledExec = scheduledExec; this.retryPolicyFactory = retryPolicyFactory; this.strategy = strategy; + this.workerSetupManager = workerSetupManager; } @LifecycleStart @@ -169,7 +173,7 @@ public class RemoteTaskRunner implements TaskRunner public void run() { if (currentlyTerminating.isEmpty()) { - if (zkWorkers.size() <= config.getMinNumWorkers()) { + if (zkWorkers.size() <= workerSetupManager.getWorkerSetupData().getMinNumWorkers()) { return; } @@ -388,8 +392,7 @@ public class RemoteTaskRunner implements TaskRunner synchronized (statusLock) { try { if (event.getType().equals(PathChildrenCacheEvent.Type.CHILD_ADDED) || - event.getType().equals(PathChildrenCacheEvent.Type.CHILD_UPDATED)) - { + event.getType().equals(PathChildrenCacheEvent.Type.CHILD_UPDATED)) { final String taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); final TaskStatus taskStatus; @@ -399,7 +402,7 @@ public class RemoteTaskRunner implements TaskRunner event.getData().getData(), TaskStatus.class ); - if(!taskStatus.getId().equals(taskId)) { + if (!taskStatus.getId().equals(taskId)) { // Sanity check throw new ISE( "Worker[%s] status id does not match payload id: %s != %s", @@ -408,7 +411,8 @@ public class RemoteTaskRunner implements TaskRunner taskStatus.getId() ); } - } catch (Exception e) { + } + catch (Exception e) { log.warn(e, "Worker[%s] wrote bogus status for task: %s", worker.getHost(), taskId); retryTask(new CleanupPaths(worker.getHost(), taskId), tasks.get(taskId)); throw Throwables.propagate(e); @@ -446,7 +450,8 @@ public class RemoteTaskRunner implements TaskRunner } } } - } catch(Exception e) { + } + catch (Exception e) { log.makeAlert(e, "Failed to handle new worker status") .addData("worker", worker.getHost()) .addData("znode", event.getData().getPath()) @@ -526,7 +531,9 @@ public class RemoteTaskRunner implements TaskRunner public boolean apply(WorkerWrapper input) { return (!input.isAtCapacity() && - input.getWorker().getVersion().compareTo(config.getMinWorkerVersion()) >= 0); + input.getWorker() + .getVersion() + .compareTo(workerSetupManager.getWorkerSetupData().getMinVersion()) >= 0); } } ) diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/EC2AutoScalingStrategyConfig.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/EC2AutoScalingStrategyConfig.java index c364070e313..a8cfcf8df22 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/EC2AutoScalingStrategyConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/EC2AutoScalingStrategyConfig.java @@ -26,24 +26,7 @@ import org.skife.config.Default; */ public abstract class EC2AutoScalingStrategyConfig { - @Config("druid.indexer.amiId") - public abstract String getAmiId(); - @Config("druid.indexer.worker.port") @Default("8080") public abstract String getWorkerPort(); - - @Config("druid.indexer.instanceType") - public abstract String getInstanceType(); - - @Config("druid.indexer.minNumInstancesToProvision") - @Default("1") - public abstract int getMinNumInstancesToProvision(); - - @Config("druid.indexer.maxNumInstancesToProvision") - @Default("1") - public abstract int getMaxNumInstancesToProvision(); - - @Config("druid.indexer.userDataFile") - public abstract String getUserDataFile(); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java index 00b869ea6da..2e20c4ffff2 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java @@ -37,13 +37,6 @@ public abstract class RemoteTaskRunnerConfig extends IndexerZkConfig @Default("2012-01-01T00:55:00.000Z") public abstract DateTime getTerminateResourcesOriginDateTime(); - @Config("druid.indexer.minWorkerVersion") - public abstract String getMinWorkerVersion(); - - @Config("druid.indexer.minNumWorkers") - @Default("1") - public abstract int getMinNumWorkers(); - @Config("druid.indexer.maxWorkerIdleTimeMillisBeforeDeletion") @Default("1") public abstract int getMaxWorkerIdleTimeMillisBeforeDeletion(); diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java new file mode 100644 index 00000000000..ad7444b657e --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java @@ -0,0 +1,17 @@ +package com.metamx.druid.merger.coordinator.config; + +import org.joda.time.Duration; +import org.skife.config.Config; +import org.skife.config.Default; + +/** + */ +public abstract class WorkerSetupManagerConfig +{ + @Config("druid.indexer.workerSetupTable") + public abstract String getWorkerSetupTable(); + + @Config("druid.indexer.poll.duration") + @Default("PT1M") + public abstract Duration getPollDuration(); +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java index 1c11c62cb7b..348a62b9a28 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java @@ -69,6 +69,8 @@ import com.metamx.druid.merger.coordinator.config.RetryPolicyConfig; import com.metamx.druid.merger.coordinator.scaling.EC2AutoScalingStrategy; import com.metamx.druid.merger.coordinator.scaling.NoopScalingStrategy; import com.metamx.druid.merger.coordinator.scaling.ScalingStrategy; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; +import com.metamx.druid.merger.coordinator.config.WorkerSetupManagerConfig; import com.metamx.druid.realtime.S3SegmentPusher; import com.metamx.druid.realtime.S3SegmentPusherConfig; import com.metamx.druid.realtime.SegmentPusher; @@ -98,6 +100,7 @@ import org.mortbay.jetty.servlet.DefaultServlet; import org.mortbay.jetty.servlet.FilterHolder; import org.mortbay.jetty.servlet.ServletHolder; import org.skife.config.ConfigurationObjectFactory; +import org.skife.jdbi.v2.DBI; import java.net.URL; import java.util.Arrays; @@ -133,6 +136,7 @@ public class IndexerCoordinatorNode extends RegisteringNode private CuratorFramework curatorFramework = null; private ScheduledExecutorFactory scheduledExecutorFactory = null; private IndexerZkConfig indexerZkConfig; + private WorkerSetupManager workerSetupManager = null; private TaskRunnerFactory taskRunnerFactory = null; private TaskMaster taskMaster = null; private Server server = null; @@ -160,14 +164,16 @@ public class IndexerCoordinatorNode extends RegisteringNode return this; } - public void setMergerDBCoordinator(MergerDBCoordinator mergerDBCoordinator) + public IndexerCoordinatorNode setMergerDBCoordinator(MergerDBCoordinator mergerDBCoordinator) { this.mergerDBCoordinator = mergerDBCoordinator; + return this; } - public void setTaskQueue(TaskQueue taskQueue) + public IndexerCoordinatorNode setTaskQueue(TaskQueue taskQueue) { this.taskQueue = taskQueue; + return this; } public IndexerCoordinatorNode setMergeDbCoordinator(MergerDBCoordinator mergeDbCoordinator) @@ -182,9 +188,16 @@ public class IndexerCoordinatorNode extends RegisteringNode return this; } - public void setTaskRunnerFactory(TaskRunnerFactory taskRunnerFactory) + public IndexerCoordinatorNode setWorkerSetupManager(WorkerSetupManager workerSetupManager) + { + this.workerSetupManager = workerSetupManager; + return this; + } + + public IndexerCoordinatorNode setTaskRunnerFactory(TaskRunnerFactory taskRunnerFactory) { this.taskRunnerFactory = taskRunnerFactory; + return this; } public void init() throws Exception @@ -202,6 +215,7 @@ public class IndexerCoordinatorNode extends RegisteringNode initializeJacksonSubtypes(); initializeCurator(); initializeIndexerZkConfig(); + initializeWorkerSetupManager(); initializeTaskRunnerFactory(); initializeTaskMaster(); initializeServer(); @@ -220,7 +234,8 @@ public class IndexerCoordinatorNode extends RegisteringNode jsonMapper, config, emitter, - taskQueue + taskQueue, + workerSetupManager ) ); @@ -447,6 +462,27 @@ public class IndexerCoordinatorNode extends RegisteringNode } } + public void initializeWorkerSetupManager() + { + if (workerSetupManager == null) { + final DbConnectorConfig dbConnectorConfig = configFactory.build(DbConnectorConfig.class); + final DBI dbi = new DbConnector(dbConnectorConfig).getDBI(); + final WorkerSetupManagerConfig workerSetupManagerConfig = configFactory.build(WorkerSetupManagerConfig.class); + + DbConnector.createWorkerSetupTable(dbi, workerSetupManagerConfig.getWorkerSetupTable()); + workerSetupManager = new WorkerSetupManager( + dbi, Executors.newScheduledThreadPool( + 1, + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("WorkerSetupManagerExec--%d") + .build() + ), jsonMapper, workerSetupManagerConfig + ); + } + lifecycle.addManagedInstance(workerSetupManager); + } + public void initializeTaskRunnerFactory() { if (taskRunnerFactory == null) { @@ -476,7 +512,8 @@ public class IndexerCoordinatorNode extends RegisteringNode PropUtils.getProperty(props, "com.metamx.aws.secretKey") ) ), - configFactory.build(EC2AutoScalingStrategyConfig.class) + configFactory.build(EC2AutoScalingStrategyConfig.class), + workerSetupManager ); } else if (config.getStrategyImpl().equalsIgnoreCase("noop")) { strategy = new NoopScalingStrategy(); @@ -491,7 +528,8 @@ public class IndexerCoordinatorNode extends RegisteringNode new PathChildrenCache(curatorFramework, indexerZkConfig.getAnnouncementPath(), true), retryScheduledExec, new RetryPolicyFactory(configFactory.build(RetryPolicyConfig.class)), - strategy + strategy, + workerSetupManager ); } }; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java index 6cf9b0a7c16..e4acd93514f 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java @@ -28,6 +28,8 @@ import com.metamx.druid.merger.common.task.MergeTask; import com.metamx.druid.merger.common.task.Task; import com.metamx.druid.merger.coordinator.TaskQueue; import com.metamx.druid.merger.coordinator.config.IndexerCoordinatorConfig; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.emitter.service.ServiceEmitter; import javax.ws.rs.Consumes; @@ -48,18 +50,21 @@ public class IndexerCoordinatorResource private final IndexerCoordinatorConfig config; private final ServiceEmitter emitter; private final TaskQueue tasks; + private final WorkerSetupManager workerSetupManager; @Inject public IndexerCoordinatorResource( IndexerCoordinatorConfig config, ServiceEmitter emitter, - TaskQueue tasks + TaskQueue tasks, + WorkerSetupManager workerSetupManager ) throws Exception { this.config = config; this.emitter = emitter; this.tasks = tasks; + this.workerSetupManager = workerSetupManager; } @POST @@ -115,4 +120,25 @@ public class IndexerCoordinatorResource { return Response.ok(ImmutableMap.of("task", taskid)).build(); } + + @GET + @Path("/worker/setup") + @Produces("application/json") + public Response getWorkerSetupData() + { + return Response.ok(workerSetupManager.getWorkerSetupData()).build(); + } + + @POST + @Path("/worker/setup") + @Consumes("application/json") + public Response setWorkerSetupData( + final WorkerSetupData workerSetupData + ) + { + if (!workerSetupManager.setWorkerSetupData(workerSetupData)) { + return Response.status(Response.Status.BAD_REQUEST).build(); + } + return Response.ok().build(); + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorServletModule.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorServletModule.java index 9c657bdc292..4cc1df9fa6f 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorServletModule.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorServletModule.java @@ -22,6 +22,7 @@ package com.metamx.druid.merger.coordinator.http; import com.google.inject.Provides; import com.metamx.druid.merger.coordinator.TaskQueue; import com.metamx.druid.merger.coordinator.config.IndexerCoordinatorConfig; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.emitter.service.ServiceEmitter; import com.sun.jersey.guice.JerseyServletModule; import com.sun.jersey.guice.spi.container.servlet.GuiceContainer; @@ -38,18 +39,21 @@ public class IndexerCoordinatorServletModule extends JerseyServletModule private final IndexerCoordinatorConfig indexerCoordinatorConfig; private final ServiceEmitter emitter; private final TaskQueue tasks; + private final WorkerSetupManager workerSetupManager; public IndexerCoordinatorServletModule( ObjectMapper jsonMapper, IndexerCoordinatorConfig indexerCoordinatorConfig, ServiceEmitter emitter, - TaskQueue tasks + TaskQueue tasks, + WorkerSetupManager workerSetupManager ) { this.jsonMapper = jsonMapper; this.indexerCoordinatorConfig = indexerCoordinatorConfig; this.emitter = emitter; this.tasks = tasks; + this.workerSetupManager = workerSetupManager; } @Override @@ -60,6 +64,7 @@ public class IndexerCoordinatorServletModule extends JerseyServletModule bind(IndexerCoordinatorConfig.class).toInstance(indexerCoordinatorConfig); bind(ServiceEmitter.class).toInstance(emitter); bind(TaskQueue.class).toInstance(tasks); + bind(WorkerSetupManager.class).toInstance(workerSetupManager); serve("/*").with(GuiceContainer.class); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java index 265fe62287c..eed69ae7f9e 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java @@ -24,19 +24,20 @@ import com.amazonaws.services.ec2.model.DescribeInstancesRequest; import com.amazonaws.services.ec2.model.DescribeInstancesResult; import com.amazonaws.services.ec2.model.Filter; import com.amazonaws.services.ec2.model.Instance; -import com.amazonaws.services.ec2.model.InstanceType; import com.amazonaws.services.ec2.model.Reservation; import com.amazonaws.services.ec2.model.RunInstancesRequest; import com.amazonaws.services.ec2.model.RunInstancesResult; import com.amazonaws.services.ec2.model.TerminateInstancesRequest; import com.google.common.base.Function; import com.google.common.collect.Lists; +import com.metamx.common.ISE; import com.metamx.druid.merger.coordinator.config.EC2AutoScalingStrategyConfig; +import com.metamx.druid.merger.coordinator.setup.EC2NodeData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.emitter.EmittingLogger; import org.codehaus.jackson.map.ObjectMapper; -import javax.annotation.Nullable; -import java.io.File; import java.util.List; /** @@ -48,31 +49,40 @@ public class EC2AutoScalingStrategy implements ScalingStrategy private final ObjectMapper jsonMapper; private final AmazonEC2Client amazonEC2Client; private final EC2AutoScalingStrategyConfig config; + private final WorkerSetupManager workerSetupManager; public EC2AutoScalingStrategy( ObjectMapper jsonMapper, AmazonEC2Client amazonEC2Client, - EC2AutoScalingStrategyConfig config + EC2AutoScalingStrategyConfig config, + WorkerSetupManager workerSetupManager ) { this.jsonMapper = jsonMapper; this.amazonEC2Client = amazonEC2Client; this.config = config; + this.workerSetupManager = workerSetupManager; } @Override public AutoScalingData provision() { try { + WorkerSetupData setupData = workerSetupManager.getWorkerSetupData(); + if (!(setupData.getNodeData() instanceof EC2NodeData)) { + throw new ISE("DB misconfiguration! Node data is an instance of [%s]", setupData.getNodeData().getClass()); + } + EC2NodeData workerConfig = (EC2NodeData) setupData.getNodeData(); + log.info("Creating new instance(s)..."); RunInstancesResult result = amazonEC2Client.runInstances( new RunInstancesRequest( - config.getAmiId(), - config.getMinNumInstancesToProvision(), - config.getMaxNumInstancesToProvision() + workerConfig.getAmiId(), + workerConfig.getMinInstances(), + workerConfig.getMaxInstances() ) - .withInstanceType(InstanceType.fromValue(config.getInstanceType())) - .withUserData(jsonMapper.writeValueAsString(new File(config.getUserDataFile()))) + .withInstanceType(workerConfig.getInstanceType()) + .withUserData(jsonMapper.writeValueAsString(setupData.getUserData())) ); List instanceIds = Lists.transform( @@ -80,7 +90,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy new Function() { @Override - public String apply(@Nullable Instance input) + public String apply(Instance input) { return input.getInstanceId(); } @@ -95,7 +105,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy new Function() { @Override - public String apply(@Nullable Instance input) + public String apply(Instance input) { return String.format("%s:%s", input.getPrivateIpAddress(), config.getWorkerPort()); } @@ -135,7 +145,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy new Function() { @Override - public String apply(@Nullable Instance input) + public String apply(Instance input) { return input.getInstanceId(); } @@ -150,7 +160,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy new Function() { @Override - public String apply(@Nullable Instance input) + public String apply(Instance input) { return String.format("%s:%s", input.getPrivateIpAddress(), config.getWorkerPort()); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java new file mode 100644 index 00000000000..209444c6731 --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java @@ -0,0 +1,52 @@ +package com.metamx.druid.merger.coordinator.setup; + +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +/** + */ +public class EC2NodeData implements WorkerNodeData +{ + private final String amiId; + private final String instanceType; + private final int minInstances; + private final int maxInstances; + + @JsonCreator + public EC2NodeData( + @JsonProperty("amiId") String amiId, + @JsonProperty("instanceType") String instanceType, + @JsonProperty("minInstances") int minInstances, + @JsonProperty("maxInstances") int maxInstances + ) + { + this.amiId = amiId; + this.instanceType = instanceType; + this.minInstances = minInstances; + this.maxInstances = maxInstances; + } + + @JsonProperty + public String getAmiId() + { + return amiId; + } + + @JsonProperty + public String getInstanceType() + { + return instanceType; + } + + @JsonProperty + public int getMinInstances() + { + return minInstances; + } + + @JsonProperty + public int getMaxInstances() + { + return maxInstances; + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java new file mode 100644 index 00000000000..0baa0ddfb9a --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java @@ -0,0 +1,43 @@ +package com.metamx.druid.merger.coordinator.setup; + +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +/** + */ +public class GalaxyUserData implements WorkerUserData +{ + public final String env; + public final String ver; + public final String type; + + @JsonCreator + public GalaxyUserData( + @JsonProperty("env") String env, + @JsonProperty("ver") String ver, + @JsonProperty("type") String type + ) + { + this.env = env; + this.ver = ver; + this.type = type; + } + + @JsonProperty + public String getEnv() + { + return env; + } + + @JsonProperty + public String getVer() + { + return ver; + } + + @JsonProperty + public String getType() + { + return type; + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java new file mode 100644 index 00000000000..8068a4c267b --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java @@ -0,0 +1,14 @@ +package com.metamx.druid.merger.coordinator.setup; + +import org.codehaus.jackson.annotate.JsonSubTypes; +import org.codehaus.jackson.annotate.JsonTypeInfo; + +/** + */ +@JsonTypeInfo(use=JsonTypeInfo.Id.NAME, property="type") +@JsonSubTypes(value={ + @JsonSubTypes.Type(name="ec2", value=EC2NodeData.class) +}) +public interface WorkerNodeData +{ +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java new file mode 100644 index 00000000000..224b22167ae --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java @@ -0,0 +1,52 @@ +package com.metamx.druid.merger.coordinator.setup; + +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +/** + */ +public class WorkerSetupData +{ + private final String minVersion; + private final int minNumWorkers; + private final WorkerNodeData nodeData; + private final WorkerUserData userData; + + @JsonCreator + public WorkerSetupData( + @JsonProperty("minVersion") String minVersion, + @JsonProperty("minNumWorkers") int minNumWorkers, + @JsonProperty("nodeData") WorkerNodeData nodeData, + @JsonProperty("userData") WorkerUserData userData + ) + { + this.minVersion = minVersion; + this.minNumWorkers = minNumWorkers; + this.nodeData = nodeData; + this.userData = userData; + } + + @JsonProperty + public String getMinVersion() + { + return minVersion; + } + + @JsonProperty + public int getMinNumWorkers() + { + return minNumWorkers; + } + + @JsonProperty + public WorkerNodeData getNodeData() + { + return nodeData; + } + + @JsonProperty + public WorkerUserData getUserData() + { + return userData; + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java new file mode 100644 index 00000000000..a4ab8fe8d5d --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java @@ -0,0 +1,216 @@ +package com.metamx.druid.merger.coordinator.setup; + +import com.google.common.base.Throwables; +import com.google.common.collect.Lists; +import com.metamx.common.ISE; +import com.metamx.common.concurrent.ScheduledExecutors; +import com.metamx.common.lifecycle.LifecycleStart; +import com.metamx.common.lifecycle.LifecycleStop; +import com.metamx.common.logger.Logger; +import com.metamx.druid.merger.coordinator.config.WorkerSetupManagerConfig; +import org.apache.commons.collections.MapUtils; +import org.codehaus.jackson.map.ObjectMapper; +import org.joda.time.Duration; +import org.skife.jdbi.v2.DBI; +import org.skife.jdbi.v2.FoldController; +import org.skife.jdbi.v2.Folder3; +import org.skife.jdbi.v2.Handle; +import org.skife.jdbi.v2.StatementContext; +import org.skife.jdbi.v2.tweak.HandleCallback; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.atomic.AtomicReference; + +/** + */ +public class WorkerSetupManager +{ + private static final Logger log = new Logger(WorkerSetupManager.class); + + private final DBI dbi; + private final ObjectMapper jsonMapper; + private final ScheduledExecutorService exec; + private final WorkerSetupManagerConfig config; + + private final Object lock = new Object(); + + private volatile AtomicReference workerSetupData = new AtomicReference(null); + private volatile boolean started = false; + + public WorkerSetupManager( + DBI dbi, + ScheduledExecutorService exec, + ObjectMapper jsonMapper, + WorkerSetupManagerConfig config + ) + { + this.dbi = dbi; + this.exec = exec; + this.jsonMapper = jsonMapper; + this.config = config; + } + + @LifecycleStart + public void start() + { + synchronized (lock) { + if (started) { + return; + } + + ScheduledExecutors.scheduleWithFixedDelay( + exec, + new Duration(0), + config.getPollDuration(), + new Runnable() + { + @Override + public void run() + { + poll(); + } + } + ); + + started = true; + } + } + + @LifecycleStop + public void stop() + { + synchronized (lock) { + if (!started) { + return; + } + + started = false; + } + } + + public void poll() + { + try { + List setupDataList = dbi.withHandle( + new HandleCallback>() + { + @Override + public List withHandle(Handle handle) throws Exception + { + return handle.createQuery( + String.format( + "SELECT minVersion, minNumWorkers, nodeData, userData FROM %s", + config.getWorkerSetupTable() + ) + ).fold( + Lists.newArrayList(), + new Folder3, Map>() + { + @Override + public ArrayList fold( + ArrayList workerNodeConfigurations, + Map stringObjectMap, + FoldController foldController, + StatementContext statementContext + ) throws SQLException + { + try { + // stringObjectMap lowercases and jackson may fail serde + workerNodeConfigurations.add( + new WorkerSetupData( + MapUtils.getString(stringObjectMap, "minVersion"), + MapUtils.getInteger(stringObjectMap, "minNumWorkers"), + jsonMapper.readValue( + MapUtils.getString(stringObjectMap, "nodeData"), + WorkerNodeData.class + ), + jsonMapper.readValue( + MapUtils.getString(stringObjectMap, "userData"), + WorkerUserData.class + ) + ) + ); + return workerNodeConfigurations; + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + } + ); + } + } + ); + + if (setupDataList.isEmpty()) { + throw new ISE("WTF?! No configuration found for worker nodes!"); + } else if (setupDataList.size() != 1) { + throw new ISE("WTF?! Found more than one configuration for worker nodes"); + } + + workerSetupData.set(setupDataList.get(0)); + } + catch (Exception e) { + log.error(e, "Exception while polling for worker setup data!"); + } + } + + @SuppressWarnings("unchecked") + public WorkerSetupData getWorkerSetupData() + { + synchronized (lock) { + if (!started) { + throw new ISE("Must start WorkerSetupManager first!"); + } + + return workerSetupData.get(); + } + } + + public boolean setWorkerSetupData(final WorkerSetupData value) + { + synchronized (lock) { + try { + if (!started) { + throw new ISE("Must start WorkerSetupManager first!"); + } + + dbi.withHandle( + new HandleCallback() + { + @Override + public Void withHandle(Handle handle) throws Exception + { + handle.createStatement(String.format("DELETE FROM %s", config.getWorkerSetupTable())).execute(); + handle.createStatement( + String.format( + "INSERT INTO %s (minVersion, minNumWorkers, nodeData, userData) VALUES (:minVersion, :minNumWorkers, :nodeData, :userData)", + config.getWorkerSetupTable() + ) + ) + .bind("minVersion", value.getMinVersion()) + .bind("minNumWorkers", value.getMinNumWorkers()) + .bind("nodeData", jsonMapper.writeValueAsString(value.getNodeData())) + .bind("userData", jsonMapper.writeValueAsString(value.getUserData())) + .execute(); + + return null; + } + } + ); + + workerSetupData.set(value); + } + catch (Exception e) { + log.error(e, "Exception updating worker config"); + return false; + } + } + + return true; + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java new file mode 100644 index 00000000000..80857fb58d1 --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java @@ -0,0 +1,14 @@ +package com.metamx.druid.merger.coordinator.setup; + +import org.codehaus.jackson.annotate.JsonSubTypes; +import org.codehaus.jackson.annotate.JsonTypeInfo; + +/** + */ +@JsonTypeInfo(use=JsonTypeInfo.Id.NAME, property="classType") +@JsonSubTypes(value={ + @JsonSubTypes.Type(name="galaxy", value=GalaxyUserData.class) +}) +public interface WorkerUserData +{ +} diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java index eb10731abd9..3282ca5dd3a 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java @@ -17,6 +17,8 @@ import com.metamx.druid.merger.coordinator.config.RemoteTaskRunnerConfig; import com.metamx.druid.merger.coordinator.config.RetryPolicyConfig; import com.metamx.druid.merger.coordinator.scaling.AutoScalingData; import com.metamx.druid.merger.coordinator.scaling.ScalingStrategy; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.druid.merger.worker.TaskMonitor; import com.metamx.druid.merger.worker.Worker; import com.metamx.druid.merger.worker.WorkerCuratorCoordinator; @@ -62,6 +64,7 @@ public class RemoteTaskRunnerTest private PathChildrenCache pathChildrenCache; private RemoteTaskRunner remoteTaskRunner; private TaskMonitor taskMonitor; + private WorkerSetupManager workerSetupManager; private ScheduledExecutorService scheduledExec; @@ -69,7 +72,6 @@ public class RemoteTaskRunnerTest private Worker worker1; - @Before public void setUp() throws Exception { @@ -141,9 +143,10 @@ public class RemoteTaskRunnerTest { remoteTaskRunner.run(task1, new TaskContext(new DateTime().toString(), Sets.newHashSet()), null); try { - remoteTaskRunner.run(task1, new TaskContext(new DateTime().toString(), Sets.newHashSet()), null); - fail("ISE expected"); - } catch (ISE expected) { + remoteTaskRunner.run(task1, new TaskContext(new DateTime().toString(), Sets.newHashSet()), null); + fail("ISE expected"); + } + catch (ISE expected) { } } @@ -333,6 +336,17 @@ public class RemoteTaskRunnerTest private void makeRemoteTaskRunner() throws Exception { scheduledExec = EasyMock.createMock(ScheduledExecutorService.class); + workerSetupManager = EasyMock.createMock(WorkerSetupManager.class); + + EasyMock.expect(workerSetupManager.getWorkerSetupData()).andReturn( + new WorkerSetupData( + "0", + 0, + null, + null + ) + ); + EasyMock.replay(workerSetupManager); remoteTaskRunner = new RemoteTaskRunner( jsonMapper, @@ -341,7 +355,8 @@ public class RemoteTaskRunnerTest pathChildrenCache, scheduledExec, new RetryPolicyFactory(new TestRetryPolicyConfig()), - new TestScalingStrategy() + new TestScalingStrategy(), + workerSetupManager ); // Create a single worker and wait for things for be ready @@ -405,18 +420,6 @@ public class RemoteTaskRunnerTest return null; } - @Override - public String getMinWorkerVersion() - { - return "0"; - } - - @Override - public int getMinNumWorkers() - { - return 0; - } - @Override public int getMaxWorkerIdleTimeMillisBeforeDeletion() { diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java index 958a2c1d836..2cbdfe83efe 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java @@ -29,6 +29,10 @@ import com.amazonaws.services.ec2.model.RunInstancesResult; import com.amazonaws.services.ec2.model.TerminateInstancesRequest; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.merger.coordinator.config.EC2AutoScalingStrategyConfig; +import com.metamx.druid.merger.coordinator.setup.EC2NodeData; +import com.metamx.druid.merger.coordinator.setup.GalaxyUserData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import org.easymock.EasyMock; import org.junit.After; import org.junit.Assert; @@ -52,6 +56,7 @@ public class EC2AutoScalingStrategyTest private Reservation reservation; private Instance instance; private EC2AutoScalingStrategy strategy; + private WorkerSetupManager workerSetupManager; @Before public void setUp() throws Exception @@ -60,6 +65,7 @@ public class EC2AutoScalingStrategyTest runInstancesResult = EasyMock.createMock(RunInstancesResult.class); describeInstancesResult = EasyMock.createMock(DescribeInstancesResult.class); reservation = EasyMock.createMock(Reservation.class); + workerSetupManager = EasyMock.createMock(WorkerSetupManager.class); instance = new Instance() .withInstanceId(INSTANCE_ID) @@ -69,44 +75,16 @@ public class EC2AutoScalingStrategyTest strategy = new EC2AutoScalingStrategy( new DefaultObjectMapper(), - amazonEC2Client, new EC2AutoScalingStrategyConfig() - { - @Override - public String getAmiId() - { - return AMI_ID; - } - - @Override - public String getWorkerPort() - { - return "8080"; - } - - @Override - public String getInstanceType() - { - return "t1.micro"; - } - - @Override - public int getMinNumInstancesToProvision() - { - return 1; - } - - @Override - public int getMaxNumInstancesToProvision() - { - return 1; - } - - @Override - public String getUserDataFile() - { - return ""; - } - } + amazonEC2Client, + new EC2AutoScalingStrategyConfig() + { + @Override + public String getWorkerPort() + { + return "8080"; + } + }, + workerSetupManager ); } @@ -117,11 +95,22 @@ public class EC2AutoScalingStrategyTest EasyMock.verify(runInstancesResult); EasyMock.verify(describeInstancesResult); EasyMock.verify(reservation); + EasyMock.verify(workerSetupManager); } @Test public void testScale() { + EasyMock.expect(workerSetupManager.getWorkerSetupData()).andReturn( + new WorkerSetupData( + "0", + 0, + new EC2NodeData(AMI_ID, INSTANCE_ID, 1, 1), + new GalaxyUserData("env", "ver", "type") + ) + ); + EasyMock.replay(workerSetupManager); + EasyMock.expect(amazonEC2Client.runInstances(EasyMock.anyObject(RunInstancesRequest.class))).andReturn( runInstancesResult ); From 79e9ff94a67753dfb0619e040d7965b1dba1fa27 Mon Sep 17 00:00:00 2001 From: Fangjin Yang Date: Mon, 7 Jan 2013 14:07:47 -0800 Subject: [PATCH 02/12] add missing headers --- .../config/WorkerSetupManagerConfig.java | 19 +++++++++++++++++++ .../coordinator/scaling/AutoScalingData.java | 19 +++++++++++++++++++ .../scaling/NoopScalingStrategy.java | 19 +++++++++++++++++++ .../merger/coordinator/setup/EC2NodeData.java | 19 +++++++++++++++++++ .../coordinator/setup/GalaxyUserData.java | 19 +++++++++++++++++++ .../coordinator/setup/WorkerNodeData.java | 19 +++++++++++++++++++ .../coordinator/setup/WorkerSetupData.java | 19 +++++++++++++++++++ .../coordinator/setup/WorkerSetupManager.java | 19 +++++++++++++++++++ .../coordinator/setup/WorkerUserData.java | 19 +++++++++++++++++++ 9 files changed, 171 insertions(+) diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java index ad7444b657e..97368c9f77e 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.config; import org.joda.time.Duration; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/AutoScalingData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/AutoScalingData.java index 6cce08f8731..5a1bb4980e5 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/AutoScalingData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/AutoScalingData.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.scaling; import java.util.List; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java index 67eb99293e4..d723df537dc 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.scaling; import com.metamx.emitter.EmittingLogger; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java index 209444c6731..d1531f7974a 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.setup; import org.codehaus.jackson.annotate.JsonCreator; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java index 0baa0ddfb9a..21e09f5fd40 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.setup; import org.codehaus.jackson.annotate.JsonCreator; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java index 8068a4c267b..b7cec6d68d0 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.setup; import org.codehaus.jackson.annotate.JsonSubTypes; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java index 224b22167ae..f54818f8437 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.setup; import org.codehaus.jackson.annotate.JsonCreator; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java index a4ab8fe8d5d..03bdd770671 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.setup; import com.google.common.base.Throwables; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java index 80857fb58d1..4a42c9b3bac 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.setup; import org.codehaus.jackson.annotate.JsonSubTypes; From 4c2da933895477110d50a23bcfcb858fe2b13776 Mon Sep 17 00:00:00 2001 From: Fangjin Yang Date: Wed, 9 Jan 2013 14:51:35 -0800 Subject: [PATCH 03/12] bug fixes with ec2 auto scaling --- .../java/com/metamx/druid/db/DbConnector.java | 2 +- .../merger/coordinator/RemoteTaskRunner.java | 23 ++++---- .../http/IndexerCoordinatorNode.java | 6 +- .../scaling/EC2AutoScalingStrategy.java | 57 +++++++++++++------ .../scaling/NoopScalingStrategy.java | 7 +++ .../coordinator/scaling/ScalingStrategy.java | 7 +++ .../merger/coordinator/setup/EC2NodeData.java | 22 ++++++- .../coordinator/setup/GalaxyUserData.java | 10 ++-- .../coordinator/setup/WorkerSetupData.java | 22 ++++++- .../coordinator/setup/WorkerSetupManager.java | 16 +++++- .../coordinator/RemoteTaskRunnerTest.java | 8 +++ .../scaling/EC2AutoScalingStrategyTest.java | 9 ++- pom.xml | 2 +- 13 files changed, 146 insertions(+), 45 deletions(-) diff --git a/common/src/main/java/com/metamx/druid/db/DbConnector.java b/common/src/main/java/com/metamx/druid/db/DbConnector.java index 75cf0ba27e7..b918af2b2b5 100644 --- a/common/src/main/java/com/metamx/druid/db/DbConnector.java +++ b/common/src/main/java/com/metamx/druid/db/DbConnector.java @@ -65,7 +65,7 @@ public class DbConnector dbi, workerTableName, String.format( - "CREATE table %s (minVersion TINYTEXT NOT NULL, minNumWorkers SMALLINT NOT NULL, nodeData LONGTEXT NOT NULL, userData LONGTEXT NOT NULL)", + "CREATE table %s (minVersion TINYTEXT NOT NULL, minNumWorkers SMALLINT NOT NULL, nodeData LONGTEXT NOT NULL, userData LONGTEXT NOT NULL, securityGroupIds LONGTEXT NOT NULL, keyName TINYTEXT NOT NULL)", workerTableName ) ); diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java index addb789762f..0e8927cb0b9 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java @@ -54,6 +54,7 @@ import org.joda.time.Duration; import org.joda.time.Period; import javax.annotation.Nullable; +import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -148,7 +149,7 @@ public class RemoteTaskRunner implements TaskRunner Worker.class ); log.info("Worker[%s] removed!", worker.getHost()); - removeWorker(worker.getHost()); + removeWorker(worker); } } } @@ -222,7 +223,7 @@ public class RemoteTaskRunner implements TaskRunner } log.info( - "[%s] still terminating. Wait for all nodes to terminate before trying again.", + "%s still terminating. Wait for all nodes to terminate before trying again.", currentlyTerminating ); } @@ -372,7 +373,7 @@ public class RemoteTaskRunner implements TaskRunner private void addWorker(final Worker worker) { try { - currentlyProvisioning.remove(worker.getHost()); + currentlyProvisioning.removeAll(strategy.ipLookup(Arrays.asList(worker.getIp()))); final String workerStatusPath = JOINER.join(config.getStatusPath(), worker.getHost()); final PathChildrenCache statusCache = new PathChildrenCache(cf, workerStatusPath, true); @@ -483,22 +484,22 @@ public class RemoteTaskRunner implements TaskRunner * When a ephemeral worker node disappears from ZK, we have to make sure there are no tasks still assigned * to the worker. If tasks remain, they are retried. * - * @param workerId - id of the removed worker + * @param worker - the removed worker */ - private void removeWorker(final String workerId) + private void removeWorker(final Worker worker) { - currentlyTerminating.remove(workerId); + currentlyTerminating.remove(worker.getHost()); - WorkerWrapper workerWrapper = zkWorkers.get(workerId); + WorkerWrapper workerWrapper = zkWorkers.get(worker.getHost()); if (workerWrapper != null) { try { Set tasksToRetry = Sets.newHashSet(workerWrapper.getRunningTasks()); - tasksToRetry.addAll(cf.getChildren().forPath(JOINER.join(config.getTaskPath(), workerId))); + tasksToRetry.addAll(cf.getChildren().forPath(JOINER.join(config.getTaskPath(), worker.getHost()))); for (String taskId : tasksToRetry) { TaskWrapper taskWrapper = tasks.get(taskId); if (taskWrapper != null) { - retryTask(new CleanupPaths(workerId, taskId), tasks.get(taskId)); + retryTask(new CleanupPaths(worker.getHost(), taskId), tasks.get(taskId)); } } @@ -508,7 +509,7 @@ public class RemoteTaskRunner implements TaskRunner log.error(e, "Failed to cleanly remove worker[%s]"); } } - zkWorkers.remove(workerId); + zkWorkers.remove(worker.getHost()); } private WorkerWrapper findWorkerForTask() @@ -558,7 +559,7 @@ public class RemoteTaskRunner implements TaskRunner } log.info( - "[%s] still provisioning. Wait for all provisioned nodes to complete before requesting new worker.", + "%s still provisioning. Wait for all provisioned nodes to complete before requesting new worker.", currentlyProvisioning ); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java index 5248b30712e..15d76cf2fec 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java @@ -47,6 +47,9 @@ import com.metamx.druid.initialization.Initialization; import com.metamx.druid.initialization.ServerConfig; import com.metamx.druid.initialization.ServiceDiscoveryConfig; import com.metamx.druid.jackson.DefaultObjectMapper; +import com.metamx.druid.loading.S3SegmentPusher; +import com.metamx.druid.loading.S3SegmentPusherConfig; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.merger.common.TaskToolbox; import com.metamx.druid.merger.common.config.IndexerZkConfig; import com.metamx.druid.merger.common.index.StaticS3FirehoseFactory; @@ -70,9 +73,6 @@ import com.metamx.druid.merger.coordinator.config.WorkerSetupManagerConfig; import com.metamx.druid.merger.coordinator.scaling.EC2AutoScalingStrategy; import com.metamx.druid.merger.coordinator.scaling.NoopScalingStrategy; import com.metamx.druid.merger.coordinator.scaling.ScalingStrategy; -import com.metamx.druid.loading.S3SegmentPusher; -import com.metamx.druid.loading.S3SegmentPusherConfig; -import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.druid.utils.PropUtils; import com.metamx.emitter.EmittingLogger; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java index eed69ae7f9e..35f9d7d25cb 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java @@ -36,6 +36,7 @@ import com.metamx.druid.merger.coordinator.setup.EC2NodeData; import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.emitter.EmittingLogger; +import org.apache.commons.codec.binary.Base64; import org.codehaus.jackson.map.ObjectMapper; import java.util.List; @@ -82,7 +83,15 @@ public class EC2AutoScalingStrategy implements ScalingStrategy workerConfig.getMaxInstances() ) .withInstanceType(workerConfig.getInstanceType()) - .withUserData(jsonMapper.writeValueAsString(setupData.getUserData())) + .withSecurityGroupIds(workerConfig.getSecurityGroupIds()) + .withKeyName(workerConfig.getKeyName()) + .withUserData( + Base64.encodeBase64String( + jsonMapper.writeValueAsBytes( + setupData.getUserData() + ) + ) + ) ); List instanceIds = Lists.transform( @@ -107,7 +116,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy @Override public String apply(Instance input) { - return String.format("%s:%s", input.getPrivateIpAddress(), config.getWorkerPort()); + return input.getInstanceId(); } } ), @@ -127,7 +136,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy DescribeInstancesResult result = amazonEC2Client.describeInstances( new DescribeInstancesRequest() .withFilters( - new Filter("private-ip-address", nodeIds) + new Filter("instance-id", nodeIds) ) ); @@ -139,19 +148,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy try { log.info("Terminating instance[%s]", instances); amazonEC2Client.terminateInstances( - new TerminateInstancesRequest( - Lists.transform( - instances, - new Function() - { - @Override - public String apply(Instance input) - { - return input.getInstanceId(); - } - } - ) - ) + new TerminateInstancesRequest(nodeIds) ); return new AutoScalingData( @@ -175,4 +172,32 @@ public class EC2AutoScalingStrategy implements ScalingStrategy return null; } + + @Override + public List ipLookup(List ips) + { + DescribeInstancesResult result = amazonEC2Client.describeInstances( + new DescribeInstancesRequest() + .withFilters( + new Filter("private-ip-address", ips) + ) + ); + + List instances = Lists.newArrayList(); + for (Reservation reservation : result.getReservations()) { + instances.addAll(reservation.getInstances()); + } + + return Lists.transform( + instances, + new Function() + { + @Override + public String apply(Instance input) + { + return input.getInstanceId(); + } + } + ); + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java index d723df537dc..2b412ca6202 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java @@ -43,4 +43,11 @@ public class NoopScalingStrategy implements ScalingStrategy log.info("If I were a real strategy I'd terminate %s now", nodeIds); return null; } + + @Override + public List ipLookup(List ips) + { + log.info("I'm not a real strategy so I'm returning what I got %s", ips); + return ips; + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java index 9b7da8fb3a4..52105451ed3 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java @@ -28,4 +28,11 @@ public interface ScalingStrategy public AutoScalingData provision(); public AutoScalingData terminate(List nodeIds); + + /** + * Provides a lookup of ip addresses to node ids + * @param ips + * @return + */ + public List ipLookup(List ips); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java index d1531f7974a..3e986f94d56 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java @@ -22,6 +22,8 @@ package com.metamx.druid.merger.coordinator.setup; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; +import java.util.List; + /** */ public class EC2NodeData implements WorkerNodeData @@ -30,19 +32,25 @@ public class EC2NodeData implements WorkerNodeData private final String instanceType; private final int minInstances; private final int maxInstances; + private final List securityGroupIds; + private final String keyName; @JsonCreator public EC2NodeData( @JsonProperty("amiId") String amiId, @JsonProperty("instanceType") String instanceType, @JsonProperty("minInstances") int minInstances, - @JsonProperty("maxInstances") int maxInstances + @JsonProperty("maxInstances") int maxInstances, + @JsonProperty("securityGroupIds") List securityGroupIds, + @JsonProperty("keyName") String keyName ) { this.amiId = amiId; this.instanceType = instanceType; this.minInstances = minInstances; this.maxInstances = maxInstances; + this.securityGroupIds = securityGroupIds; + this.keyName = keyName; } @JsonProperty @@ -68,4 +76,16 @@ public class EC2NodeData implements WorkerNodeData { return maxInstances; } + + @JsonProperty + public List getSecurityGroupIds() + { + return securityGroupIds; + } + + @JsonProperty + public String getKeyName() + { + return keyName; + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java index 21e09f5fd40..046a8b55f32 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java @@ -27,18 +27,18 @@ import org.codehaus.jackson.annotate.JsonProperty; public class GalaxyUserData implements WorkerUserData { public final String env; - public final String ver; + public final String version; public final String type; @JsonCreator public GalaxyUserData( @JsonProperty("env") String env, - @JsonProperty("ver") String ver, + @JsonProperty("version") String version, @JsonProperty("type") String type ) { this.env = env; - this.ver = ver; + this.version = version; this.type = type; } @@ -49,9 +49,9 @@ public class GalaxyUserData implements WorkerUserData } @JsonProperty - public String getVer() + public String getVersion() { - return ver; + return version; } @JsonProperty diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java index f54818f8437..d0173ef06b8 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java @@ -22,6 +22,8 @@ package com.metamx.druid.merger.coordinator.setup; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; +import java.util.List; + /** */ public class WorkerSetupData @@ -30,19 +32,25 @@ public class WorkerSetupData private final int minNumWorkers; private final WorkerNodeData nodeData; private final WorkerUserData userData; + private final List securityGroupIds; + private final String keyName; @JsonCreator public WorkerSetupData( @JsonProperty("minVersion") String minVersion, @JsonProperty("minNumWorkers") int minNumWorkers, @JsonProperty("nodeData") WorkerNodeData nodeData, - @JsonProperty("userData") WorkerUserData userData + @JsonProperty("userData") WorkerUserData userData, + @JsonProperty("securityGroupIds") List securityGroupIds, + @JsonProperty("keyName") String keyName ) { this.minVersion = minVersion; this.minNumWorkers = minNumWorkers; this.nodeData = nodeData; this.userData = userData; + this.securityGroupIds = securityGroupIds; + this.keyName = keyName; } @JsonProperty @@ -68,4 +76,16 @@ public class WorkerSetupData { return userData; } + + @JsonProperty + public List getSecurityGroupIds() + { + return securityGroupIds; + } + + @JsonProperty + public String getKeyName() + { + return keyName; + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java index 03bdd770671..42f1a880eda 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java @@ -29,6 +29,7 @@ import com.metamx.common.logger.Logger; import com.metamx.druid.merger.coordinator.config.WorkerSetupManagerConfig; import org.apache.commons.collections.MapUtils; import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.type.TypeReference; import org.joda.time.Duration; import org.skife.jdbi.v2.DBI; import org.skife.jdbi.v2.FoldController; @@ -122,7 +123,7 @@ public class WorkerSetupManager { return handle.createQuery( String.format( - "SELECT minVersion, minNumWorkers, nodeData, userData FROM %s", + "SELECT minVersion, minNumWorkers, nodeData, userData, securityGroupIds, keyName FROM %s", config.getWorkerSetupTable() ) ).fold( @@ -150,7 +151,14 @@ public class WorkerSetupManager jsonMapper.readValue( MapUtils.getString(stringObjectMap, "userData"), WorkerUserData.class - ) + ), + (List) jsonMapper.readValue( + MapUtils.getString(stringObjectMap, "securityGroupIds"), + new TypeReference>() + { + } + ), + MapUtils.getString(stringObjectMap, "keyName") ) ); return workerNodeConfigurations; @@ -207,7 +215,7 @@ public class WorkerSetupManager handle.createStatement(String.format("DELETE FROM %s", config.getWorkerSetupTable())).execute(); handle.createStatement( String.format( - "INSERT INTO %s (minVersion, minNumWorkers, nodeData, userData) VALUES (:minVersion, :minNumWorkers, :nodeData, :userData)", + "INSERT INTO %s (minVersion, minNumWorkers, nodeData, userData, securityGroupIds, keyName) VALUES (:minVersion, :minNumWorkers, :nodeData, :userData, :securityGroupIds, :keyName)", config.getWorkerSetupTable() ) ) @@ -215,6 +223,8 @@ public class WorkerSetupManager .bind("minNumWorkers", value.getMinNumWorkers()) .bind("nodeData", jsonMapper.writeValueAsString(value.getNodeData())) .bind("userData", jsonMapper.writeValueAsString(value.getUserData())) + .bind("securityGroupIds", jsonMapper.writeValueAsString(value.getSecurityGroupIds())) + .bind("keyName", jsonMapper.writeValueAsString(value.getKeyName())) .execute(); return null; diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java index 3282ca5dd3a..edf3499ff74 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java @@ -343,6 +343,8 @@ public class RemoteTaskRunnerTest "0", 0, null, + null, + Lists.newArrayList(), null ) ); @@ -404,6 +406,12 @@ public class RemoteTaskRunnerTest { return null; } + + @Override + public List ipLookup(List ips) + { + return ips; + } } private static class TestRemoteTaskRunnerConfig extends RemoteTaskRunnerConfig diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java index 2cbdfe83efe..2660a22952e 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java @@ -27,6 +27,7 @@ import com.amazonaws.services.ec2.model.Reservation; import com.amazonaws.services.ec2.model.RunInstancesRequest; import com.amazonaws.services.ec2.model.RunInstancesResult; import com.amazonaws.services.ec2.model.TerminateInstancesRequest; +import com.google.common.collect.Lists; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.merger.coordinator.config.EC2AutoScalingStrategyConfig; import com.metamx.druid.merger.coordinator.setup.EC2NodeData; @@ -105,8 +106,10 @@ public class EC2AutoScalingStrategyTest new WorkerSetupData( "0", 0, - new EC2NodeData(AMI_ID, INSTANCE_ID, 1, 1), - new GalaxyUserData("env", "ver", "type") + new EC2NodeData(AMI_ID, INSTANCE_ID, 1, 1, Lists.newArrayList(), "foo"), + new GalaxyUserData("env", "version", "type"), + Arrays.asList("foo"), + "foo2" ) ); EasyMock.replay(workerSetupManager); @@ -133,7 +136,7 @@ public class EC2AutoScalingStrategyTest Assert.assertEquals(created.getNodeIds().size(), 1); Assert.assertEquals(created.getNodes().size(), 1); - Assert.assertEquals(String.format("%s:8080", IP), created.getNodeIds().get(0)); + Assert.assertEquals("theInstance", created.getNodeIds().get(0)); AutoScalingData deleted = strategy.terminate(Arrays.asList("dummyHost")); diff --git a/pom.xml b/pom.xml index 5a165a16ba2..898fc197b93 100644 --- a/pom.xml +++ b/pom.xml @@ -84,7 +84,7 @@ commons-codec commons-codec - 1.3 + 1.7 commons-httpclient From d1f4317af770e854c56ccaf46d639d6aca2fdeba Mon Sep 17 00:00:00 2001 From: Fangjin Yang Date: Mon, 14 Jan 2013 14:55:04 -0800 Subject: [PATCH 04/12] updates to autoscaling config based on code review comments --- .../java/com/metamx/druid/db/DbConnector.java | 2 +- .../merger/coordinator/RemoteTaskRunner.java | 7 ++-- .../config/RemoteTaskRunnerConfig.java | 2 +- .../scaling/EC2AutoScalingStrategy.java | 39 ++++++++++++------- .../coordinator/scaling/ScalingStrategy.java | 2 +- .../merger/coordinator/setup/EC2NodeData.java | 2 +- .../coordinator/setup/GalaxyUserData.java | 2 +- .../coordinator/setup/WorkerNodeData.java | 33 ---------------- .../coordinator/setup/WorkerSetupData.java | 30 +++----------- .../coordinator/setup/WorkerSetupManager.java | 32 +++------------ .../coordinator/setup/WorkerUserData.java | 33 ---------------- .../merger/worker/config/WorkerConfig.java | 3 +- .../coordinator/RemoteTaskRunnerTest.java | 2 - .../scaling/EC2AutoScalingStrategyTest.java | 6 +-- 14 files changed, 50 insertions(+), 145 deletions(-) delete mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java delete mode 100644 merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java diff --git a/common/src/main/java/com/metamx/druid/db/DbConnector.java b/common/src/main/java/com/metamx/druid/db/DbConnector.java index b918af2b2b5..45a0b937964 100644 --- a/common/src/main/java/com/metamx/druid/db/DbConnector.java +++ b/common/src/main/java/com/metamx/druid/db/DbConnector.java @@ -65,7 +65,7 @@ public class DbConnector dbi, workerTableName, String.format( - "CREATE table %s (minVersion TINYTEXT NOT NULL, minNumWorkers SMALLINT NOT NULL, nodeData LONGTEXT NOT NULL, userData LONGTEXT NOT NULL, securityGroupIds LONGTEXT NOT NULL, keyName TINYTEXT NOT NULL)", + "CREATE table %s (config LONGTEXT NOT NULL)", workerTableName ) ); diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java index 0e8927cb0b9..5537a6b6420 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java @@ -53,7 +53,6 @@ import org.joda.time.DateTime; import org.joda.time.Duration; import org.joda.time.Period; -import javax.annotation.Nullable; import java.util.Arrays; import java.util.Comparator; import java.util.List; @@ -185,7 +184,7 @@ public class RemoteTaskRunner implements TaskRunner new Predicate() { @Override - public boolean apply(@Nullable WorkerWrapper input) + public boolean apply(WorkerWrapper input) { return input.getRunningTasks().isEmpty() && System.currentTimeMillis() - input.getLastCompletedTaskTime().getMillis() @@ -201,9 +200,9 @@ public class RemoteTaskRunner implements TaskRunner new Function() { @Override - public String apply(@Nullable WorkerWrapper input) + public String apply(WorkerWrapper input) { - return input.getWorker().getHost(); + return input.getWorker().getIp(); } } ) diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java index 2e20c4ffff2..c9badf7ef88 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java @@ -38,7 +38,7 @@ public abstract class RemoteTaskRunnerConfig extends IndexerZkConfig public abstract DateTime getTerminateResourcesOriginDateTime(); @Config("druid.indexer.maxWorkerIdleTimeMillisBeforeDeletion") - @Default("1") + @Default("10000") public abstract int getMaxWorkerIdleTimeMillisBeforeDeletion(); @Config("druid.indexer.maxScalingDuration") diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java index 35f9d7d25cb..8d51da61afd 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java @@ -30,7 +30,6 @@ import com.amazonaws.services.ec2.model.RunInstancesResult; import com.amazonaws.services.ec2.model.TerminateInstancesRequest; import com.google.common.base.Function; import com.google.common.collect.Lists; -import com.metamx.common.ISE; import com.metamx.druid.merger.coordinator.config.EC2AutoScalingStrategyConfig; import com.metamx.druid.merger.coordinator.setup.EC2NodeData; import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; @@ -39,6 +38,7 @@ import com.metamx.emitter.EmittingLogger; import org.apache.commons.codec.binary.Base64; import org.codehaus.jackson.map.ObjectMapper; +import javax.annotation.Nullable; import java.util.List; /** @@ -70,10 +70,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy { try { WorkerSetupData setupData = workerSetupManager.getWorkerSetupData(); - if (!(setupData.getNodeData() instanceof EC2NodeData)) { - throw new ISE("DB misconfiguration! Node data is an instance of [%s]", setupData.getNodeData().getClass()); - } - EC2NodeData workerConfig = (EC2NodeData) setupData.getNodeData(); + EC2NodeData workerConfig = setupData.getNodeData(); log.info("Creating new instance(s)..."); RunInstancesResult result = amazonEC2Client.runInstances( @@ -131,12 +128,12 @@ public class EC2AutoScalingStrategy implements ScalingStrategy } @Override - public AutoScalingData terminate(List nodeIds) + public AutoScalingData terminate(List ids) { DescribeInstancesResult result = amazonEC2Client.describeInstances( new DescribeInstancesRequest() .withFilters( - new Filter("instance-id", nodeIds) + new Filter("private-ip-address", ids) ) ); @@ -148,18 +145,30 @@ public class EC2AutoScalingStrategy implements ScalingStrategy try { log.info("Terminating instance[%s]", instances); amazonEC2Client.terminateInstances( - new TerminateInstancesRequest(nodeIds) + new TerminateInstancesRequest( + Lists.transform( + instances, + new Function() + { + @Override + public String apply(Instance input) + { + return input.getInstanceId(); + } + } + ) + ) ); return new AutoScalingData( Lists.transform( - instances, - new Function() + ids, + new Function() { @Override - public String apply(Instance input) + public String apply(@Nullable String input) { - return String.format("%s:%s", input.getPrivateIpAddress(), config.getWorkerPort()); + return String.format("%s:%s", input, config.getWorkerPort()); } } ), @@ -188,7 +197,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy instances.addAll(reservation.getInstances()); } - return Lists.transform( + List retVal = Lists.transform( instances, new Function() { @@ -199,5 +208,9 @@ public class EC2AutoScalingStrategy implements ScalingStrategy } } ); + + log.info("Performing lookup: %s --> %s", ips, retVal); + + return retVal; } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java index 52105451ed3..150de1357e0 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java @@ -27,7 +27,7 @@ public interface ScalingStrategy { public AutoScalingData provision(); - public AutoScalingData terminate(List nodeIds); + public AutoScalingData terminate(List ids); /** * Provides a lookup of ip addresses to node ids diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java index 3e986f94d56..8d302df25f6 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java @@ -26,7 +26,7 @@ import java.util.List; /** */ -public class EC2NodeData implements WorkerNodeData +public class EC2NodeData { private final String amiId; private final String instanceType; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java index 046a8b55f32..876a2635273 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java @@ -24,7 +24,7 @@ import org.codehaus.jackson.annotate.JsonProperty; /** */ -public class GalaxyUserData implements WorkerUserData +public class GalaxyUserData { public final String env; public final String version; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java deleted file mode 100644 index b7cec6d68d0..00000000000 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerNodeData.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.merger.coordinator.setup; - -import org.codehaus.jackson.annotate.JsonSubTypes; -import org.codehaus.jackson.annotate.JsonTypeInfo; - -/** - */ -@JsonTypeInfo(use=JsonTypeInfo.Id.NAME, property="type") -@JsonSubTypes(value={ - @JsonSubTypes.Type(name="ec2", value=EC2NodeData.class) -}) -public interface WorkerNodeData -{ -} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java index d0173ef06b8..8395fa2d6c8 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java @@ -30,27 +30,21 @@ public class WorkerSetupData { private final String minVersion; private final int minNumWorkers; - private final WorkerNodeData nodeData; - private final WorkerUserData userData; - private final List securityGroupIds; - private final String keyName; + private final EC2NodeData nodeData; + private final GalaxyUserData userData; @JsonCreator public WorkerSetupData( @JsonProperty("minVersion") String minVersion, @JsonProperty("minNumWorkers") int minNumWorkers, - @JsonProperty("nodeData") WorkerNodeData nodeData, - @JsonProperty("userData") WorkerUserData userData, - @JsonProperty("securityGroupIds") List securityGroupIds, - @JsonProperty("keyName") String keyName + @JsonProperty("nodeData") EC2NodeData nodeData, + @JsonProperty("userData") GalaxyUserData userData ) { this.minVersion = minVersion; this.minNumWorkers = minNumWorkers; this.nodeData = nodeData; this.userData = userData; - this.securityGroupIds = securityGroupIds; - this.keyName = keyName; } @JsonProperty @@ -66,26 +60,14 @@ public class WorkerSetupData } @JsonProperty - public WorkerNodeData getNodeData() + public EC2NodeData getNodeData() { return nodeData; } @JsonProperty - public WorkerUserData getUserData() + public GalaxyUserData getUserData() { return userData; } - - @JsonProperty - public List getSecurityGroupIds() - { - return securityGroupIds; - } - - @JsonProperty - public String getKeyName() - { - return keyName; - } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java index 42f1a880eda..baa56290af7 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java @@ -123,7 +123,7 @@ public class WorkerSetupManager { return handle.createQuery( String.format( - "SELECT minVersion, minNumWorkers, nodeData, userData, securityGroupIds, keyName FROM %s", + "SELECT config FROM %s", config.getWorkerSetupTable() ) ).fold( @@ -141,24 +141,9 @@ public class WorkerSetupManager try { // stringObjectMap lowercases and jackson may fail serde workerNodeConfigurations.add( - new WorkerSetupData( - MapUtils.getString(stringObjectMap, "minVersion"), - MapUtils.getInteger(stringObjectMap, "minNumWorkers"), - jsonMapper.readValue( - MapUtils.getString(stringObjectMap, "nodeData"), - WorkerNodeData.class - ), - jsonMapper.readValue( - MapUtils.getString(stringObjectMap, "userData"), - WorkerUserData.class - ), - (List) jsonMapper.readValue( - MapUtils.getString(stringObjectMap, "securityGroupIds"), - new TypeReference>() - { - } - ), - MapUtils.getString(stringObjectMap, "keyName") + jsonMapper.readValue( + MapUtils.getString(stringObjectMap, "config"), + WorkerSetupData.class ) ); return workerNodeConfigurations; @@ -215,16 +200,11 @@ public class WorkerSetupManager handle.createStatement(String.format("DELETE FROM %s", config.getWorkerSetupTable())).execute(); handle.createStatement( String.format( - "INSERT INTO %s (minVersion, minNumWorkers, nodeData, userData, securityGroupIds, keyName) VALUES (:minVersion, :minNumWorkers, :nodeData, :userData, :securityGroupIds, :keyName)", + "INSERT INTO %s (config) VALUES (:config)", config.getWorkerSetupTable() ) ) - .bind("minVersion", value.getMinVersion()) - .bind("minNumWorkers", value.getMinNumWorkers()) - .bind("nodeData", jsonMapper.writeValueAsString(value.getNodeData())) - .bind("userData", jsonMapper.writeValueAsString(value.getUserData())) - .bind("securityGroupIds", jsonMapper.writeValueAsString(value.getSecurityGroupIds())) - .bind("keyName", jsonMapper.writeValueAsString(value.getKeyName())) + .bind("config", jsonMapper.writeValueAsString(value)) .execute(); return null; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java deleted file mode 100644 index 4a42c9b3bac..00000000000 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerUserData.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.merger.coordinator.setup; - -import org.codehaus.jackson.annotate.JsonSubTypes; -import org.codehaus.jackson.annotate.JsonTypeInfo; - -/** - */ -@JsonTypeInfo(use=JsonTypeInfo.Id.NAME, property="classType") -@JsonSubTypes(value={ - @JsonSubTypes.Type(name="galaxy", value=GalaxyUserData.class) -}) -public interface WorkerUserData -{ -} diff --git a/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java b/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java index 5b5f3a0a6e7..4689acef261 100644 --- a/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java @@ -41,6 +41,7 @@ public abstract class WorkerConfig public int getCapacity() { - return Runtime.getRuntime().availableProcessors() - 1; + return 1; + //return Runtime.getRuntime().availableProcessors() - 1; } } diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java index edf3499ff74..1e31efa121c 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java @@ -343,8 +343,6 @@ public class RemoteTaskRunnerTest "0", 0, null, - null, - Lists.newArrayList(), null ) ); diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java index 2660a22952e..c3aa8378b07 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java @@ -107,9 +107,7 @@ public class EC2AutoScalingStrategyTest "0", 0, new EC2NodeData(AMI_ID, INSTANCE_ID, 1, 1, Lists.newArrayList(), "foo"), - new GalaxyUserData("env", "version", "type"), - Arrays.asList("foo"), - "foo2" + new GalaxyUserData("env", "version", "type") ) ); EasyMock.replay(workerSetupManager); @@ -138,7 +136,7 @@ public class EC2AutoScalingStrategyTest Assert.assertEquals(created.getNodes().size(), 1); Assert.assertEquals("theInstance", created.getNodeIds().get(0)); - AutoScalingData deleted = strategy.terminate(Arrays.asList("dummyHost")); + AutoScalingData deleted = strategy.terminate(Arrays.asList("dummyIP")); Assert.assertEquals(deleted.getNodeIds().size(), 1); Assert.assertEquals(deleted.getNodes().size(), 1); From 8b31d8db9f93ec1a2e3441e77c367862c037720c Mon Sep 17 00:00:00 2001 From: Eric Tschetter Date: Wed, 16 Jan 2013 10:01:46 -0600 Subject: [PATCH 05/12] 1) Adjust IndexMerger to create convert the indexes it creates from the old format to the new. This is done quite sub-optimally, but it will work for now... --- .../com/metamx/druid/index/v1/IndexIO.java | 2 +- .../metamx/druid/index/v1/IndexMerger.java | 44 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java index 2afcbed6446..6836b9233c5 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java @@ -332,7 +332,7 @@ public class IndexIO throw new UnsupportedOperationException("Shouldn't ever happen in a cluster that is not owned by MMX."); } - public void convertV8toV9(File v8Dir, File v9Dir) throws IOException + public static void convertV8toV9(File v8Dir, File v9Dir) throws IOException { log.info("Converting v8[%s] to v9[%s]", v8Dir, v9Dir); diff --git a/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java b/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java index 10613561daa..6f9892ee805 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java +++ b/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java @@ -384,7 +384,6 @@ public class IndexMerger final Function>, Iterable> rowMergerFn ) throws IOException { - // TODO: make v9 index, complain to Eric when you see this, cause he should be doing it. Map metricTypes = Maps.newTreeMap(Ordering.natural().nullsFirst()); for (IndexableAdapter adapter : indexes) { for (String metric : adapter.getAvailableMetrics()) { @@ -392,11 +391,12 @@ public class IndexMerger } } final Interval dataInterval; + File v8OutDir = new File(outDir, "v8-tmp"); /************* Main index.drd file **************/ progress.progress(); long startTime = System.currentTimeMillis(); - File indexFile = new File(outDir, "index.drd"); + File indexFile = new File(v8OutDir, "index.drd"); FileOutputStream fileOutputStream = null; FileChannel channel = null; @@ -426,7 +426,7 @@ public class IndexMerger fileOutputStream = null; } IndexIO.checkFileSize(indexFile); - log.info("outDir[%s] completed index.drd in %,d millis.", outDir, System.currentTimeMillis() - startTime); + log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); /************* Setup Dim Conversions **************/ progress.progress(); @@ -499,7 +499,7 @@ public class IndexMerger } dimensionCardinalities.put(dimension, count); - FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(outDir, dimension), true); + FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(v8OutDir, dimension), true); dimOuts.add(dimOut); writer.close(); @@ -514,7 +514,7 @@ public class IndexMerger ioPeon.cleanup(); } - log.info("outDir[%s] completed dim conversions in %,d millis.", outDir, System.currentTimeMillis() - startTime); + log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); /************* Walk through data sets and merge them *************/ progress.progress(); @@ -595,7 +595,7 @@ public class IndexMerger String metric = entry.getKey(); String typeName = entry.getValue(); if ("float".equals(typeName)) { - metWriters.add(new FloatMetricColumnSerializer(metric, outDir, ioPeon)); + metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon)); } else { ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName); @@ -603,7 +603,7 @@ public class IndexMerger throw new ISE("Unknown type[%s]", typeName); } - metWriters.add(new ComplexMetricColumnSerializer(metric, outDir, ioPeon, serde)); + metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde)); } } for (MetricColumnSerializer metWriter : metWriters) { @@ -650,7 +650,7 @@ public class IndexMerger if ((++rowCount % 500000) == 0) { log.info( - "outDir[%s] walked 500,000/%,d rows in %,d millis.", outDir, rowCount, System.currentTimeMillis() - time + "outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time ); time = System.currentTimeMillis(); } @@ -660,13 +660,13 @@ public class IndexMerger rowNumConversion.rewind(); } - final File littleEndianFile = IndexIO.makeTimeFile(outDir, ByteOrder.LITTLE_ENDIAN); + final File littleEndianFile = IndexIO.makeTimeFile(v8OutDir, ByteOrder.LITTLE_ENDIAN); littleEndianFile.delete(); OutputSupplier out = Files.newOutputStreamSupplier(littleEndianFile, true); littleEndianTimeWriter.closeAndConsolidate(out); IndexIO.checkFileSize(littleEndianFile); - final File bigEndianFile = IndexIO.makeTimeFile(outDir, ByteOrder.BIG_ENDIAN); + final File bigEndianFile = IndexIO.makeTimeFile(v8OutDir, ByteOrder.BIG_ENDIAN); bigEndianFile.delete(); out = Files.newOutputStreamSupplier(bigEndianFile, true); bigEndianTimeWriter.closeAndConsolidate(out); @@ -684,7 +684,7 @@ public class IndexMerger ioPeon.cleanup(); log.info( "outDir[%s] completed walk through of %,d rows in %,d millis.", - outDir, + v8OutDir, rowCount, System.currentTimeMillis() - startTime ); @@ -692,7 +692,7 @@ public class IndexMerger /************ Create Inverted Indexes *************/ startTime = System.currentTimeMillis(); - final File invertedFile = new File(outDir, "inverted.drd"); + final File invertedFile = new File(v8OutDir, "inverted.drd"); Files.touch(invertedFile); out = Files.newOutputStreamSupplier(invertedFile, true); for (int i = 0; i < mergedDimensions.size(); ++i) { @@ -725,10 +725,7 @@ public class IndexMerger } ConciseSet bitset = new ConciseSet(); - for (Integer row : CombiningIterable.createSplatted( - convertedInverteds, - Ordering.natural().nullsFirst() - )) { + for (Integer row : CombiningIterable.createSplatted(convertedInverteds, Ordering.natural().nullsFirst())) { if (row != INVALID_ROW) { bitset.add(row); } @@ -744,7 +741,7 @@ public class IndexMerger log.info("Completed dimension[%s] in %,d millis.", dimension, System.currentTimeMillis() - dimStartTime); } - log.info("outDir[%s] completed inverted.drd in %,d millis.", outDir, System.currentTimeMillis() - startTime); + log.info("outDir[%s] completed inverted.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); final ArrayList expectedFiles = Lists.newArrayList( Iterables.concat( @@ -759,18 +756,18 @@ public class IndexMerger Map files = Maps.newLinkedHashMap(); for (String fileName : expectedFiles) { - files.put(fileName, new File(outDir, fileName)); + files.put(fileName, new File(v8OutDir, fileName)); } - File smooshDir = new File(outDir, "smoosher"); + File smooshDir = new File(v8OutDir, "smoosher"); smooshDir.mkdir(); - for (Map.Entry entry : Smoosh.smoosh(outDir, smooshDir, files).entrySet()) { + for (Map.Entry entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) { entry.getValue().delete(); } for (File file : smooshDir.listFiles()) { - Files.move(file, new File(outDir, file.getName())); + Files.move(file, new File(v8OutDir, file.getName())); } if (!smooshDir.delete()) { @@ -780,12 +777,15 @@ public class IndexMerger createIndexDrdFile( IndexIO.CURRENT_VERSION_ID, - outDir, + v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.stringStrategy), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.stringStrategy), dataInterval ); + IndexIO.DefaultIndexIOHandler.convertV8toV9(v8OutDir, outDir); + FileUtils.deleteDirectory(v8OutDir); + return outDir; } From 6fc350bfbac649060546f1802cd5c2fc541411a9 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 16 Jan 2013 11:30:24 -0800 Subject: [PATCH 06/12] YeOldePlumberSchool: Populate dimension metadata in segment descriptor --- .../druid/merger/common/index/YeOldePlumberSchool.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java index a496f560970..77ce51ccb73 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java @@ -21,6 +21,7 @@ package com.metamx.druid.merger.common.index; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -138,7 +139,13 @@ public class YeOldePlumberSchool implements PlumberSchool IndexMerger.mergeMMapped(indexes, schema.getAggregators(), fileToUpload); } - final DataSegment segmentToUpload = theSink.getSegment().withVersion(version); + // Map merged segment so we can extract dimensions + final MMappedIndex mappedSegment = IndexIO.mapDir(fileToUpload); + + final DataSegment segmentToUpload = theSink.getSegment() + .withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())) + .withVersion(version); + segmentPusher.push(fileToUpload, segmentToUpload); log.info( From c8cb96b00654d9de38e9bf4e28675984b20ae57f Mon Sep 17 00:00:00 2001 From: Eric Tschetter Date: Wed, 16 Jan 2013 17:10:33 -0600 Subject: [PATCH 07/12] 1) Remove vast majority of usages of IndexIO.mapDir() and deprecated it. IndexIO.loadIndex() is the new IndexIO.mapDir() 2) Fix bug with IndexMerger and null columns 3) Add QueryableIndexIndexableAdapter so that QueryableIndexes can be merged 4) Adjust twitter example to have multiple values for each hash tag 5) Adjusted GroupByQueryEngine to just drop dimensions that don't exist instead of throwing an NPE --- examples/twitter/group_by_query.body | 16 +- .../TwitterSpritzerFirehoseFactory.java | 47 +-- examples/twitter/twitter_realtime.spec | 4 +- .../metamx/druid/index/QueryableIndex.java | 1 + .../druid/index/SimpleQueryableIndex.java | 6 + .../com/metamx/druid/index/column/Column.java | 1 + .../druid/index/column/ComplexColumn.java | 1 + .../druid/index/column/ComplexColumnImpl.java | 12 +- .../index/column/DictionaryEncodedColumn.java | 1 + .../druid/index/column/FloatColumn.java | 13 +- .../druid/index/column/GenericColumn.java | 2 + .../index/column/IndexedComplexColumn.java | 10 +- .../column/IndexedFloatsGenericColumn.java | 12 + .../column/IndexedLongsGenericColumn.java | 12 + .../metamx/druid/index/column/LongColumn.java | 13 +- .../druid/index/column/SimpleColumn.java | 14 + .../column/SimpleDictionaryEncodedColumn.java | 8 +- .../index/column/StringMultiValueColumn.java | 12 + .../serde/ComplexColumnPartSupplier.java | 5 +- .../v1/CompressedFloatsIndexedSupplier.java | 5 + .../druid/index/v1/IncrementalIndex.java | 2 +- .../com/metamx/druid/index/v1/IndexIO.java | 65 ++++- .../druid/indexer/IndexGeneratorJob.java | 8 +- .../common/index/YeOldePlumberSchool.java | 10 +- .../druid/merger/common/task/AppendTask.java | 6 +- .../merger/common/task/DefaultMergeTask.java | 10 +- .../druid/realtime/RealtimePlumberSchool.java | 23 +- .../v1/ComplexMetricColumnSerializer.java | 16 +- .../index/v1/FloatMetricColumnSerializer.java | 35 +-- .../metamx/druid/index/v1/IndexMerger.java | 61 ++-- .../index/v1/MMappedIndexQueryableIndex.java | 8 +- .../v1/QueryableIndexIndexableAdapter.java | 267 ++++++++++++++++++ .../druid/query/group/GroupByQueryEngine.java | 5 +- .../metamx/druid/index/v1/EmptyIndexTest.java | 12 +- .../druid/index/v1/IndexMergerTest.java | 27 +- .../com/metamx/druid/index/v1/TestIndex.java | 4 +- 36 files changed, 570 insertions(+), 184 deletions(-) create mode 100644 server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java diff --git a/examples/twitter/group_by_query.body b/examples/twitter/group_by_query.body index 4ea7806e94c..e0607aa1554 100644 --- a/examples/twitter/group_by_query.body +++ b/examples/twitter/group_by_query.body @@ -2,19 +2,11 @@ "queryType": "groupBy", "dataSource": "twitterstream", "granularity": "all", - "dimensions": ["lang"], + "dimensions": ["lang", "utc_offset"], "aggregations":[ - { "type": "count", "name": "rows"}, - { "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}, - - { "type": "max", "fieldName": "max_statuses_count", "name": "theMaxStatusesCount"}, - { "type": "max", "fieldName": "max_retweet_count", "name": "theMaxRetweetCount"}, - - { "type": "max", "fieldName": "max_friends_count", "name": "theMaxFriendsCount"}, - { "type": "max", "fieldName": "max_follower_count", "name": "theMaxFollowerCount"}, - - { "type": "doubleSum", "fieldName": "total_statuses_count", "name": "total_tweets_all_time"} - + { "type": "count", "name": "rows"}, + { "type": "doubleSum", "fieldName": "tweets", "name": "tweets"} ], + "filter": { "type": "selector", "dimension": "lang", "value": "en" }, "intervals":["2012-10-01T00:00/2020-01-01T00"] } diff --git a/examples/twitter/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java b/examples/twitter/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java index 826218afbd8..992cd239487 100644 --- a/examples/twitter/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java +++ b/examples/twitter/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java @@ -1,25 +1,34 @@ package druid.examples.twitter; +import com.google.common.collect.Lists; import com.metamx.common.logger.Logger; import com.metamx.druid.input.InputRow; import com.metamx.druid.input.MapBasedInputRow; import com.metamx.druid.realtime.Firehose; import com.metamx.druid.realtime.FirehoseFactory; import org.codehaus.jackson.annotate.JsonCreator; -import org.codehaus.jackson.annotate.JsonTypeName; import org.codehaus.jackson.annotate.JsonProperty; -import org.codehaus.jackson.map.ObjectMapper; -import twitter4j.*; +import org.codehaus.jackson.annotate.JsonTypeName; +import twitter4j.ConnectionLifeCycleListener; +import twitter4j.HashtagEntity; +import twitter4j.Status; +import twitter4j.StatusDeletionNotice; +import twitter4j.StatusListener; +import twitter4j.TwitterStream; +import twitter4j.TwitterStreamFactory; +import twitter4j.User; import java.io.IOException; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; -import static java.lang.Thread.*; +import static java.lang.Thread.sleep; /** @@ -241,30 +250,26 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory { } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } - //log.info("twitterStatus: "+ status.getCreatedAt() + " @" + status.getUser().getScreenName() + " - " + status.getText());//DEBUG - - // theMap.put("twid", status.getUser().getScreenName()); - // theMap.put("msg", status.getText()); // ToDo: verify encoding HashtagEntity[] hts = status.getHashtagEntities(); if (hts != null && hts.length > 0) { - // ToDo: get all the hash tags instead of just the first one - theMap.put("htags", hts[0].getText()); - } else { - theMap.put("htags", null); + List hashTags = Lists.newArrayListWithExpectedSize(hts.length); + for (HashtagEntity ht : hts) { + hashTags.add(ht.getText()); + } + + theMap.put("htags", Arrays.asList(hashTags.get(0))); } long retweetCount = status.getRetweetCount(); theMap.put("retweet_count", retweetCount); - User u = status.getUser(); - if (u != null) { - theMap.put("follower_count", u.getFollowersCount()); - theMap.put("friends_count", u.getFriendsCount()); - theMap.put("lang", u.getLang()); - theMap.put("utc_offset", u.getUtcOffset()); // resolution in seconds, -1 if not available? - theMap.put("statuses_count", u.getStatusesCount()); - } else { - log.error("status.getUser() is null"); + User user = status.getUser(); + if (user != null) { + theMap.put("follower_count", user.getFollowersCount()); + theMap.put("friends_count", user.getFriendsCount()); + theMap.put("lang", user.getLang()); + theMap.put("utc_offset", user.getUtcOffset()); // resolution in seconds, -1 if not available? + theMap.put("statuses_count", user.getStatusesCount()); } return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); diff --git a/examples/twitter/twitter_realtime.spec b/examples/twitter/twitter_realtime.spec index 14d34421c6b..00b1707028d 100644 --- a/examples/twitter/twitter_realtime.spec +++ b/examples/twitter/twitter_realtime.spec @@ -31,8 +31,8 @@ "firehose": { "type": "twitzer", - "maxEventCount": 50000, - "maxRunMinutes": 10 + "maxEventCount": 500000, + "maxRunMinutes": 120 }, "plumber": { diff --git a/index-common/src/main/java/com/metamx/druid/index/QueryableIndex.java b/index-common/src/main/java/com/metamx/druid/index/QueryableIndex.java index 0c4f57cf9b9..82cee9e54dd 100644 --- a/index-common/src/main/java/com/metamx/druid/index/QueryableIndex.java +++ b/index-common/src/main/java/com/metamx/druid/index/QueryableIndex.java @@ -28,6 +28,7 @@ import org.joda.time.Interval; public interface QueryableIndex extends ColumnSelector { public Interval getDataInterval(); + public int getNumRows(); public Indexed getColumnNames(); public Indexed getAvailableDimensions(); } diff --git a/index-common/src/main/java/com/metamx/druid/index/SimpleQueryableIndex.java b/index-common/src/main/java/com/metamx/druid/index/SimpleQueryableIndex.java index 7e8a6ba7635..2f60b73adc6 100644 --- a/index-common/src/main/java/com/metamx/druid/index/SimpleQueryableIndex.java +++ b/index-common/src/main/java/com/metamx/druid/index/SimpleQueryableIndex.java @@ -56,6 +56,12 @@ public class SimpleQueryableIndex implements QueryableIndex return dataInterval; } + @Override + public int getNumRows() + { + return timeColumn.getLength(); + } + @Override public Indexed getColumnNames() { diff --git a/index-common/src/main/java/com/metamx/druid/index/column/Column.java b/index-common/src/main/java/com/metamx/druid/index/column/Column.java index 5702b42206e..fa418a3398a 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/Column.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/Column.java @@ -25,6 +25,7 @@ public interface Column { public ColumnCapabilities getCapabilities(); + public int getLength(); public DictionaryEncodedColumn getDictionaryEncoding(); public RunLengthColumn getRunLengthColumn(); public GenericColumn getGenericColumn(); diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumn.java index 7d21987f6f9..f7cfb706e44 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumn.java @@ -26,5 +26,6 @@ import java.io.Closeable; public interface ComplexColumn extends Closeable { public Class getClazz(); + public String getTypeName(); public Object getRowValue(int rowNum); } diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumnImpl.java b/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumnImpl.java index 8f9a302adc5..46f665c57c6 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumnImpl.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumnImpl.java @@ -29,10 +29,12 @@ public class ComplexColumnImpl extends AbstractColumn .setType(ValueType.COMPLEX); private final Indexed column; + private final String typeName; - public ComplexColumnImpl(Indexed column) + public ComplexColumnImpl(String typeName, Indexed column) { this.column = column; + this.typeName = typeName; } @Override @@ -41,9 +43,15 @@ public class ComplexColumnImpl extends AbstractColumn return CAPABILITIES; } + @Override + public int getLength() + { + return column.size(); + } + @Override public ComplexColumn getComplexColumn() { - return new IndexedComplexColumn(column); + return new IndexedComplexColumn(typeName, column); } } diff --git a/index-common/src/main/java/com/metamx/druid/index/column/DictionaryEncodedColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/DictionaryEncodedColumn.java index e2bd177a25c..9301734f970 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/DictionaryEncodedColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/DictionaryEncodedColumn.java @@ -26,6 +26,7 @@ import com.metamx.druid.kv.IndexedInts; public interface DictionaryEncodedColumn { public int size(); + public boolean hasMultipleValues(); public int getSingleValueRow(int rowNum); public IndexedInts getMultiValueRow(int rowNum); public String lookupName(int id); diff --git a/index-common/src/main/java/com/metamx/druid/index/column/FloatColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/FloatColumn.java index 491baa3c41d..44ffd7e970a 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/FloatColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/FloatColumn.java @@ -19,8 +19,7 @@ package com.metamx.druid.index.column; -import com.google.common.base.Supplier; -import com.metamx.druid.kv.IndexedFloats; +import com.metamx.druid.index.v1.CompressedFloatsIndexedSupplier; /** */ @@ -29,9 +28,9 @@ public class FloatColumn extends AbstractColumn private static final ColumnCapabilitiesImpl CAPABILITIES = new ColumnCapabilitiesImpl() .setType(ValueType.FLOAT); - private final Supplier column; + private final CompressedFloatsIndexedSupplier column; - public FloatColumn(Supplier column) + public FloatColumn(CompressedFloatsIndexedSupplier column) { this.column = column; } @@ -42,6 +41,12 @@ public class FloatColumn extends AbstractColumn return CAPABILITIES; } + @Override + public int getLength() + { + return column.size(); + } + @Override public GenericColumn getGenericColumn() { diff --git a/index-common/src/main/java/com/metamx/druid/index/column/GenericColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/GenericColumn.java index 58ea4b86d98..c41b4906bd1 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/GenericColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/GenericColumn.java @@ -30,6 +30,8 @@ import java.io.Closeable; public interface GenericColumn extends Closeable { public int size(); + public ValueType getType(); + public boolean hasMultipleValues(); public String getStringSingleValueRow(int rowNum); public Indexed getStringMultiValueRow(int rowNum); diff --git a/index-common/src/main/java/com/metamx/druid/index/column/IndexedComplexColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/IndexedComplexColumn.java index 192734aeec2..bafb6977dd0 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/IndexedComplexColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/IndexedComplexColumn.java @@ -28,12 +28,14 @@ import java.io.IOException; public class IndexedComplexColumn implements ComplexColumn { private final Indexed column; + private final String typeName; public IndexedComplexColumn( - Indexed column + String typeName, Indexed column ) { this.column = column; + this.typeName = typeName; } @Override public Class getClazz() @@ -41,6 +43,12 @@ public class IndexedComplexColumn implements ComplexColumn return column.getClazz(); } + @Override + public String getTypeName() + { + return typeName; + } + @Override public Object getRowValue(int rowNum) { diff --git a/index-common/src/main/java/com/metamx/druid/index/column/IndexedFloatsGenericColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/IndexedFloatsGenericColumn.java index 815ff84c583..423b046e106 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/IndexedFloatsGenericColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/IndexedFloatsGenericColumn.java @@ -43,6 +43,18 @@ public class IndexedFloatsGenericColumn implements GenericColumn return column.size(); } + @Override + public ValueType getType() + { + return ValueType.FLOAT; + } + + @Override + public boolean hasMultipleValues() + { + return false; + } + @Override public String getStringSingleValueRow(int rowNum) { diff --git a/index-common/src/main/java/com/metamx/druid/index/column/IndexedLongsGenericColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/IndexedLongsGenericColumn.java index 01ff6063870..0e96a63924b 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/IndexedLongsGenericColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/IndexedLongsGenericColumn.java @@ -43,6 +43,18 @@ public class IndexedLongsGenericColumn implements GenericColumn return column.size(); } + @Override + public ValueType getType() + { + return ValueType.LONG; + } + + @Override + public boolean hasMultipleValues() + { + return false; + } + @Override public String getStringSingleValueRow(int rowNum) { diff --git a/index-common/src/main/java/com/metamx/druid/index/column/LongColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/LongColumn.java index 76d74ba377e..1ec297ea61c 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/LongColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/LongColumn.java @@ -19,8 +19,7 @@ package com.metamx.druid.index.column; -import com.google.common.base.Supplier; -import com.metamx.druid.kv.IndexedLongs; +import com.metamx.druid.index.v1.CompressedLongsIndexedSupplier; /** */ @@ -29,9 +28,9 @@ public class LongColumn extends AbstractColumn private static final ColumnCapabilitiesImpl CAPABILITIES = new ColumnCapabilitiesImpl() .setType(ValueType.LONG); - private final Supplier column; + private final CompressedLongsIndexedSupplier column; - public LongColumn(Supplier column) + public LongColumn(CompressedLongsIndexedSupplier column) { this.column = column; } @@ -42,6 +41,12 @@ public class LongColumn extends AbstractColumn return CAPABILITIES; } + @Override + public int getLength() + { + return column.size(); + } + @Override public GenericColumn getGenericColumn() { diff --git a/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java index 2d3fae51abf..a3884203227 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java @@ -20,6 +20,7 @@ package com.metamx.druid.index.column; import com.google.common.base.Supplier; +import com.google.common.io.Closeables; /** */ @@ -55,6 +56,19 @@ class SimpleColumn implements Column return capabilities; } + @Override + public int getLength() + { + GenericColumn column = null; + try { + column = genericColumn.get(); + return column.size(); + } + finally { + Closeables.closeQuietly(column); + } + } + @Override public DictionaryEncodedColumn getDictionaryEncoding() { diff --git a/index-common/src/main/java/com/metamx/druid/index/column/SimpleDictionaryEncodedColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/SimpleDictionaryEncodedColumn.java index 87bc5bb6a2d..7a28a53b0af 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/SimpleDictionaryEncodedColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/SimpleDictionaryEncodedColumn.java @@ -46,7 +46,13 @@ public class SimpleDictionaryEncodedColumn implements DictionaryEncodedColumn @Override public int size() { - return column == null ? multiValueColumn.size() : column.size(); + return hasMultipleValues() ? multiValueColumn.size() : column.size(); + } + + @Override + public boolean hasMultipleValues() + { + return column == null; } @Override diff --git a/index-common/src/main/java/com/metamx/druid/index/column/StringMultiValueColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/StringMultiValueColumn.java index 7d52e42ba4f..053bcee1956 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/StringMultiValueColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/StringMultiValueColumn.java @@ -55,6 +55,12 @@ public class StringMultiValueColumn extends AbstractColumn return CAPABILITIES; } + @Override + public int getLength() + { + return column.size(); + } + @Override public DictionaryEncodedColumn getDictionaryEncoding() { @@ -66,6 +72,12 @@ public class StringMultiValueColumn extends AbstractColumn return column.size(); } + @Override + public boolean hasMultipleValues() + { + return true; + } + @Override public int getSingleValueRow(int rowNum) { diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSupplier.java b/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSupplier.java index cb1a4bea395..4a4cfce4f77 100644 --- a/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSupplier.java +++ b/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSupplier.java @@ -32,8 +32,7 @@ public class ComplexColumnPartSupplier implements Supplier private final String typeName; public ComplexColumnPartSupplier( - final GenericIndexed complexType, - final String typeName + final String typeName, final GenericIndexed complexType ) { this.complexType = complexType; this.typeName = typeName; @@ -42,6 +41,6 @@ public class ComplexColumnPartSupplier implements Supplier @Override public ComplexColumn get() { - return new IndexedComplexColumn(complexType); + return new IndexedComplexColumn(typeName, complexType); } } diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplier.java b/index-common/src/main/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplier.java index 2ff8f747adc..1def2af031f 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplier.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplier.java @@ -59,6 +59,11 @@ public class CompressedFloatsIndexedSupplier implements Supplier this.baseFloatBuffers = baseFloatBuffers; } + public int size() + { + return totalSize; + } + @Override public IndexedFloats get() { diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/IncrementalIndex.java b/index-common/src/main/java/com/metamx/druid/index/v1/IncrementalIndex.java index 31ed9efbbed..624d6d4b375 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/IncrementalIndex.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/IncrementalIndex.java @@ -524,7 +524,7 @@ public class IncrementalIndex implements Iterable public String get(String value) { - return poorMansInterning.get(value); + return value == null ? null : poorMansInterning.get(value); } public int getId(String value) diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java index 6836b9233c5..afedbb6a742 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java @@ -21,6 +21,7 @@ package com.metamx.druid.index.v1; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -58,6 +59,7 @@ import com.metamx.druid.kv.IndexedIterable; import com.metamx.druid.kv.VSizeIndexed; import com.metamx.druid.kv.VSizeIndexedInts; import com.metamx.druid.utils.SerializerUtils; +import it.uniroma3.mat.extendedset.intset.ConciseSet; import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import org.codehaus.jackson.map.ObjectMapper; import org.joda.time.Interval; @@ -70,6 +72,7 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.AbstractList; +import java.util.Arrays; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -99,7 +102,7 @@ public class IndexIO private static final Logger log = new Logger(IndexIO.class); private static final SerializerUtils serializerUtils = new SerializerUtils(); - private static final ByteOrder BYTE_ORDER = ByteOrder.nativeOrder(); + public static final ByteOrder BYTE_ORDER = ByteOrder.nativeOrder(); // This should really be provided by DI, should be changed once we switch around to using a DI framework private static final ObjectMapper mapper = new DefaultObjectMapper(); @@ -120,6 +123,7 @@ public class IndexIO return handler.canBeMapped(inDir); } + @Deprecated public static MMappedIndex mapDir(final File inDir) throws IOException { init(); @@ -383,22 +387,70 @@ public class IndexIO serializerUtils.writeString(nameBAOS, dimension); outParts.add(ByteBuffer.wrap(nameBAOS.toByteArray())); - final GenericIndexed dictionary = GenericIndexed.read( + GenericIndexed dictionary = GenericIndexed.read( dimBuffer, GenericIndexed.stringStrategy ); + VSizeIndexedInts singleValCol = null; VSizeIndexed multiValCol = VSizeIndexed.readFromByteBuffer(dimBuffer.asReadOnlyBuffer()); + GenericIndexed bitmaps = bitmapIndexes.get(dimension); + boolean onlyOneValue = true; - for (VSizeIndexedInts rowValue : multiValCol) { + ConciseSet nullsSet = null; + for (int i = 0; i < multiValCol.size(); ++i) { + VSizeIndexedInts rowValue = multiValCol.get(i); if (!onlyOneValue) { break; } if (rowValue.size() > 1) { onlyOneValue = false; } + if (rowValue.size() == 0) { + if (nullsSet == null) { + nullsSet = new ConciseSet(); + } + nullsSet.add(i); + } } if (onlyOneValue) { + log.info("Dimension[%s] is single value, converting...", dimension); + final boolean bumpedDictionary; + if (nullsSet != null) { + log.info("Dimension[%s] has null rows.", dimension); + final ImmutableConciseSet theNullSet = ImmutableConciseSet.newImmutableFromMutable(nullsSet); + + if (dictionary.get(0) != null) { + log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension); + bumpedDictionary = true; + final List nullList = Lists.newArrayList(); + nullList.add(null); + + dictionary = GenericIndexed.fromIterable( + Iterables.concat(nullList, dictionary), + GenericIndexed.stringStrategy + ); + + bitmaps = GenericIndexed.fromIterable( + Iterables.concat(Arrays.asList(theNullSet), bitmaps), + ConciseCompressedIndexedInts.objectStrategy + ); + } + else { + bumpedDictionary = false; + bitmaps = GenericIndexed.fromIterable( + Iterables.concat( + Arrays.asList(ImmutableConciseSet.union(theNullSet, bitmaps.get(0))), + Iterables.skip(bitmaps, 1) + ), + ConciseCompressedIndexedInts.objectStrategy + ); + } + } + else { + bumpedDictionary = false; + } + final VSizeIndexed finalMultiValCol = multiValCol; singleValCol = VSizeIndexedInts.fromList( new AbstractList() @@ -406,7 +458,8 @@ public class IndexIO @Override public Integer get(int index) { - return finalMultiValCol.get(index).get(0); + final VSizeIndexedInts ints = finalMultiValCol.get(index); + return ints.size() == 0 ? 0 : ints.get(0) + (bumpedDictionary ? 1 : 0); } @Override @@ -423,7 +476,7 @@ public class IndexIO } builder.addSerde( - new DictionaryEncodedColumnPartSerde(dictionary, singleValCol, multiValCol, bitmapIndexes.get(dimension)) + new DictionaryEncodedColumnPartSerde(dictionary, singleValCol, multiValCol, bitmaps) ); final ColumnDescriptor serdeficator = builder.build(); @@ -587,7 +640,7 @@ public class IndexIO .setType(ValueType.COMPLEX) .setComplexColumn( new ComplexColumnPartSupplier( - (GenericIndexed) metricHolder.complexType, metricHolder.getTypeName() + metricHolder.getTypeName(), (GenericIndexed) metricHolder.complexType ) ) .build() diff --git a/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java b/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java index 34d743fc9be..28dacd1ca9a 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java @@ -38,10 +38,10 @@ import com.metamx.common.parsers.Parser; import com.metamx.common.parsers.ParserUtils; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.IncrementalIndex; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.index.v1.MMappedIndex; import com.metamx.druid.indexer.rollup.DataRollupSpec; import com.metamx.druid.input.MapBasedInputRow; import org.apache.commons.io.FileUtils; @@ -359,7 +359,7 @@ public class IndexGeneratorJob implements Jobby log.info("%,d lines completed.", lineCount); - List indexes = Lists.newArrayListWithCapacity(indexCount); + List indexes = Lists.newArrayListWithCapacity(indexCount); final File mergedBase; if (toMerge.size() == 0) { @@ -389,9 +389,9 @@ public class IndexGeneratorJob implements Jobby toMerge.add(finalFile); for (File file : toMerge) { - indexes.add(IndexIO.mapDir(file)); + indexes.add(IndexIO.loadIndex(file)); } - mergedBase = IndexMerger.mergeMMapped( + mergedBase = IndexMerger.mergeQueryableIndex( indexes, aggs, new File(baseFlushFile, "merged"), new IndexMerger.ProgressIndicator() { @Override diff --git a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java index a496f560970..345af0207ba 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java @@ -27,16 +27,16 @@ import com.google.common.collect.Sets; import com.metamx.common.logger.Logger; import com.metamx.druid.Query; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.index.v1.MMappedIndex; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.realtime.FireDepartmentMetrics; import com.metamx.druid.realtime.FireHydrant; import com.metamx.druid.realtime.Plumber; import com.metamx.druid.realtime.PlumberSchool; import com.metamx.druid.realtime.Schema; -import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.realtime.Sink; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; @@ -129,13 +129,13 @@ public class YeOldePlumberSchool implements PlumberSchool } else if(spilled.size() == 1) { fileToUpload = Iterables.getOnlyElement(spilled); } else { - List indexes = Lists.newArrayList(); + List indexes = Lists.newArrayList(); for (final File oneSpill : spilled) { - indexes.add(IndexIO.mapDir(oneSpill)); + indexes.add(IndexIO.loadIndex(oneSpill)); } fileToUpload = new File(tmpSegmentDir, "merged"); - IndexMerger.mergeMMapped(indexes, schema.getAggregators(), fileToUpload); + IndexMerger.mergeQueryableIndex(indexes, schema.getAggregators(), fileToUpload); } final DataSegment segmentToUpload = theSink.getSegment().withVersion(version); diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/AppendTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/AppendTask.java index f9898070aa1..f1153e5c43c 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/AppendTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/AppendTask.java @@ -30,7 +30,7 @@ import com.metamx.druid.client.DataSegment; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; import com.metamx.druid.index.v1.IndexableAdapter; -import com.metamx.druid.index.v1.MMappedIndexAdapter; +import com.metamx.druid.index.v1.QueryableIndexIndexableAdapter; import com.metamx.druid.index.v1.Rowboat; import com.metamx.druid.index.v1.RowboatFilteringIndexAdapter; import org.codehaus.jackson.annotate.JsonCreator; @@ -90,8 +90,8 @@ public class AppendTask extends MergeTask for (final SegmentToMergeHolder holder : segmentsToMerge) { adapters.add( new RowboatFilteringIndexAdapter( - new MMappedIndexAdapter( - IndexIO.mapDir(holder.getFile()) + new QueryableIndexIndexableAdapter( + IndexIO.loadIndex(holder.getFile()) ), new Predicate() { diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/DefaultMergeTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/DefaultMergeTask.java index 6dfc95c2271..e17db4b980e 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/DefaultMergeTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/DefaultMergeTask.java @@ -25,9 +25,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.index.v1.MMappedIndex; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; @@ -57,16 +57,16 @@ public class DefaultMergeTask extends MergeTask public File merge(final Map segments, final File outDir) throws Exception { - return IndexMerger.mergeMMapped( + return IndexMerger.mergeQueryableIndex( Lists.transform( ImmutableList.copyOf(segments.values()), - new Function() + new Function() { @Override - public MMappedIndex apply(@Nullable File input) + public QueryableIndex apply(@Nullable File input) { try { - return IndexIO.mapDir(input); + return IndexIO.loadIndex(input); } catch (Exception e) { throw Throwables.propagate(e); diff --git a/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java b/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java index 603b9ee1a36..02bd6acbb4c 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java @@ -308,30 +308,21 @@ public class RealtimePlumberSchool implements PlumberSchool final File mergedFile; try { - List indexes = Lists.newArrayList(); + List indexes = Lists.newArrayList(); for (FireHydrant fireHydrant : sink) { Segment segment = fireHydrant.getSegment(); final QueryableIndex queryableIndex = segment.asQueryableIndex(); - if (queryableIndex instanceof MMappedIndexQueryableIndex) { - log.info("Adding hydrant[%s]", fireHydrant); - indexes.add(((MMappedIndexQueryableIndex) queryableIndex).getIndex()); - } - else { - log.makeAlert("[%s] Failure to merge-n-push", schema.getDataSource()) - .addData("type", "Unknown segment type") - .addData("adapterClass", segment.getClass().toString()) - .emit(); - return; - } + log.info("Adding hydrant[%s]", fireHydrant); + indexes.add(queryableIndex); } - mergedFile = IndexMerger.mergeMMapped( + mergedFile = IndexMerger.mergeQueryableIndex( indexes, schema.getAggregators(), new File(computePersistDir(schema, interval), "merged") ); - MMappedIndex index = IndexIO.mapDir(mergedFile); + QueryableIndex index = IndexIO.loadIndex(mergedFile); DataSegment segment = segmentPusher.push( mergedFile, @@ -503,9 +494,7 @@ public class RealtimePlumberSchool implements PlumberSchool new File(computePersistDir(schema, interval), String.valueOf(indexToPersist.getCount())) ); - indexToPersist.swapSegment( - new QueryableIndexSegment(null, new MMappedIndexQueryableIndex(IndexIO.mapDir(persistedFile))) - ); + indexToPersist.swapSegment(new QueryableIndexSegment(null, IndexIO.loadIndex(persistedFile))); return numRows; } diff --git a/server/src/main/java/com/metamx/druid/index/v1/ComplexMetricColumnSerializer.java b/server/src/main/java/com/metamx/druid/index/v1/ComplexMetricColumnSerializer.java index 526877176a9..d09581c1c5b 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/ComplexMetricColumnSerializer.java +++ b/server/src/main/java/com/metamx/druid/index/v1/ComplexMetricColumnSerializer.java @@ -19,7 +19,6 @@ package com.metamx.druid.index.v1; -import com.google.common.io.ByteStreams; import com.google.common.io.Files; import com.metamx.druid.index.v1.serde.ComplexMetricSerde; import com.metamx.druid.kv.FlattenedArrayWriter; @@ -27,7 +26,6 @@ import com.metamx.druid.kv.IOPeon; import java.io.File; import java.io.IOException; -import java.nio.ByteOrder; /** */ @@ -75,18 +73,12 @@ public class ComplexMetricColumnSerializer implements MetricColumnSerializer { writer.close(); - final File littleEndianFile = IndexIO.makeMetricFile(outDir, metricName, ByteOrder.LITTLE_ENDIAN); - littleEndianFile.delete(); + final File outFile = IndexIO.makeMetricFile(outDir, metricName, IndexIO.BYTE_ORDER); + outFile.delete(); MetricHolder.writeComplexMetric( - Files.newOutputStreamSupplier(littleEndianFile, true), metricName, serde.getTypeName(), writer - ); - IndexIO.checkFileSize(littleEndianFile); - - final File bigEndianFile = IndexIO.makeMetricFile(outDir, metricName, ByteOrder.BIG_ENDIAN); - ByteStreams.copy( - Files.newInputStreamSupplier(littleEndianFile), - Files.newOutputStreamSupplier(bigEndianFile, false) + Files.newOutputStreamSupplier(outFile, true), metricName, serde.getTypeName(), writer ); + IndexIO.checkFileSize(outFile); writer = null; } diff --git a/server/src/main/java/com/metamx/druid/index/v1/FloatMetricColumnSerializer.java b/server/src/main/java/com/metamx/druid/index/v1/FloatMetricColumnSerializer.java index 8207897e502..20ec5a4d30d 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/FloatMetricColumnSerializer.java +++ b/server/src/main/java/com/metamx/druid/index/v1/FloatMetricColumnSerializer.java @@ -24,7 +24,6 @@ import com.metamx.druid.kv.IOPeon; import java.io.File; import java.io.IOException; -import java.nio.ByteOrder; /** */ @@ -34,8 +33,7 @@ public class FloatMetricColumnSerializer implements MetricColumnSerializer private final IOPeon ioPeon; private final File outDir; - private CompressedFloatsSupplierSerializer littleMetricsWriter; - private CompressedFloatsSupplierSerializer bigEndianMetricsWriter; + private CompressedFloatsSupplierSerializer writer; public FloatMetricColumnSerializer( String metricName, @@ -51,43 +49,30 @@ public class FloatMetricColumnSerializer implements MetricColumnSerializer @Override public void open() throws IOException { - littleMetricsWriter = CompressedFloatsSupplierSerializer.create( - ioPeon, String.format("%s_little", metricName), ByteOrder.LITTLE_ENDIAN - ); - bigEndianMetricsWriter = CompressedFloatsSupplierSerializer.create( - ioPeon, String.format("%s_big", metricName), ByteOrder.BIG_ENDIAN + writer = CompressedFloatsSupplierSerializer.create( + ioPeon, String.format("%s_little", metricName), IndexIO.BYTE_ORDER ); - littleMetricsWriter.open(); - bigEndianMetricsWriter.open(); + writer.open(); } @Override public void serialize(Object obj) throws IOException { float val = (obj == null) ? 0 : ((Number) obj).floatValue(); - littleMetricsWriter.add(val); - bigEndianMetricsWriter.add(val); + writer.add(val); } @Override public void close() throws IOException { - final File littleEndianFile = IndexIO.makeMetricFile(outDir, metricName, ByteOrder.LITTLE_ENDIAN); - littleEndianFile.delete(); + final File outFile = IndexIO.makeMetricFile(outDir, metricName, IndexIO.BYTE_ORDER); + outFile.delete(); MetricHolder.writeFloatMetric( - Files.newOutputStreamSupplier(littleEndianFile, true), metricName, littleMetricsWriter + Files.newOutputStreamSupplier(outFile, true), metricName, writer ); - IndexIO.checkFileSize(littleEndianFile); + IndexIO.checkFileSize(outFile); - final File bigEndianFile = IndexIO.makeMetricFile(outDir, metricName, ByteOrder.BIG_ENDIAN); - bigEndianFile.delete(); - MetricHolder.writeFloatMetric( - Files.newOutputStreamSupplier(bigEndianFile, true), metricName, bigEndianMetricsWriter - ); - IndexIO.checkFileSize(bigEndianFile); - - littleMetricsWriter = null; - bigEndianMetricsWriter = null; + writer = null; } } diff --git a/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java b/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java index 6f9892ee805..4eb8db5fcc7 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java +++ b/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java @@ -44,6 +44,7 @@ import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.aggregation.ToLowerCaseAggregatorFactory; import com.metamx.druid.guava.FileOutputSupplier; import com.metamx.druid.guava.GuavaUtils; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.serde.ComplexMetricSerde; import com.metamx.druid.index.v1.serde.ComplexMetrics; import com.metamx.druid.kv.ConciseCompressedIndexedInts; @@ -75,6 +76,7 @@ import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.TreeSet; @@ -139,26 +141,26 @@ public class IndexMerger ); } - public static File mergeMMapped( - List indexes, final AggregatorFactory[] metricAggs, File outDir + public static File mergeQueryableIndex( + List indexes, final AggregatorFactory[] metricAggs, File outDir ) throws IOException { - return mergeMMapped(indexes, metricAggs, outDir, new NoopProgressIndicator()); + return mergeQueryableIndex(indexes, metricAggs, outDir, new NoopProgressIndicator()); } - public static File mergeMMapped( - List indexes, final AggregatorFactory[] metricAggs, File outDir, ProgressIndicator progress + public static File mergeQueryableIndex( + List indexes, final AggregatorFactory[] metricAggs, File outDir, ProgressIndicator progress ) throws IOException { return merge( Lists.transform( indexes, - new Function() + new Function() { @Override - public IndexableAdapter apply(@Nullable final MMappedIndex input) + public IndexableAdapter apply(final QueryableIndex input) { - return new MMappedIndexAdapter(input); + return new QueryableIndexIndexableAdapter(input); } } ), @@ -392,6 +394,7 @@ public class IndexMerger } final Interval dataInterval; File v8OutDir = new File(outDir, "v8-tmp"); + v8OutDir.mkdirs(); /************* Main index.drd file **************/ progress.progress(); @@ -573,15 +576,11 @@ public class IndexMerger Iterable theRows = rowMergerFn.apply(boats); - CompressedLongsSupplierSerializer littleEndianTimeWriter = CompressedLongsSupplierSerializer.create( - ioPeon, "little_end_time", ByteOrder.LITTLE_ENDIAN - ); - CompressedLongsSupplierSerializer bigEndianTimeWriter = CompressedLongsSupplierSerializer.create( - ioPeon, "big_end_time", ByteOrder.BIG_ENDIAN + CompressedLongsSupplierSerializer timeWriter = CompressedLongsSupplierSerializer.create( + ioPeon, "little_end_time", IndexIO.BYTE_ORDER ); - littleEndianTimeWriter.open(); - bigEndianTimeWriter.open(); + timeWriter.open(); ArrayList forwardDimWriters = Lists.newArrayListWithCapacity(mergedDimensions.size()); for (String dimension : mergedDimensions) { @@ -621,8 +620,7 @@ public class IndexMerger for (Rowboat theRow : theRows) { progress.progress(); - littleEndianTimeWriter.add(theRow.getTimestamp()); - bigEndianTimeWriter.add(theRow.getTimestamp()); + timeWriter.add(theRow.getTimestamp()); final Object[] metrics = theRow.getMetrics(); for (int i = 0; i < metrics.length; ++i) { @@ -660,17 +658,11 @@ public class IndexMerger rowNumConversion.rewind(); } - final File littleEndianFile = IndexIO.makeTimeFile(v8OutDir, ByteOrder.LITTLE_ENDIAN); - littleEndianFile.delete(); - OutputSupplier out = Files.newOutputStreamSupplier(littleEndianFile, true); - littleEndianTimeWriter.closeAndConsolidate(out); - IndexIO.checkFileSize(littleEndianFile); - - final File bigEndianFile = IndexIO.makeTimeFile(v8OutDir, ByteOrder.BIG_ENDIAN); - bigEndianFile.delete(); - out = Files.newOutputStreamSupplier(bigEndianFile, true); - bigEndianTimeWriter.closeAndConsolidate(out); - IndexIO.checkFileSize(bigEndianFile); + final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER); + timeFile.delete(); + OutputSupplier out = Files.newOutputStreamSupplier(timeFile, true); + timeWriter.closeAndConsolidate(out); + IndexIO.checkFileSize(timeFile); for (int i = 0; i < mergedDimensions.size(); ++i) { forwardDimWriters.get(i).close(); @@ -746,11 +738,12 @@ public class IndexMerger final ArrayList expectedFiles = Lists.newArrayList( Iterables.concat( Arrays.asList( - "index.drd", "inverted.drd", "time_BIG_ENDIAN.drd", "time_LITTLE_ENDIAN.drd" + "index.drd", "inverted.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER) ), Iterables.transform(mergedDimensions, GuavaUtils.formatFunction("dim_%s.drd")), - Iterables.transform(mergedMetrics, GuavaUtils.formatFunction("met_%s_LITTLE_ENDIAN.drd")), - Iterables.transform(mergedMetrics, GuavaUtils.formatFunction("met_%s_BIG_ENDIAN.drd")) + Iterables.transform( + mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER)) + ) ) ); @@ -791,11 +784,13 @@ public class IndexMerger private static ArrayList mergeIndexed(final List> indexedLists) { - TreeSet retVal = Sets.newTreeSet(Ordering.natural().nullsFirst()); + LinkedHashSet retVal = Sets.newLinkedHashSet(); for (Iterable indexedList : indexedLists) { for (T val : indexedList) { - retVal.add(val); + if (val != null) { + retVal.add(val); + } } } diff --git a/server/src/main/java/com/metamx/druid/index/v1/MMappedIndexQueryableIndex.java b/server/src/main/java/com/metamx/druid/index/v1/MMappedIndexQueryableIndex.java index 8aed193edd7..3c9d62d6776 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/MMappedIndexQueryableIndex.java +++ b/server/src/main/java/com/metamx/druid/index/v1/MMappedIndexQueryableIndex.java @@ -53,6 +53,12 @@ public class MMappedIndexQueryableIndex implements QueryableIndex return index.getDataInterval(); } + @Override + public int getNumRows() + { + return index.getTimestamps().size(); + } + @Override public Indexed getColumnNames() { @@ -91,7 +97,7 @@ public class MMappedIndexQueryableIndex implements QueryableIndex return new FloatColumn(metricHolder.floatType); } else { - return new ComplexColumnImpl(metricHolder.getComplexType()); + return new ComplexColumnImpl(metricHolder.getTypeName(), metricHolder.getComplexType()); } } } diff --git a/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java b/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java new file mode 100644 index 00000000000..72eddb9141c --- /dev/null +++ b/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java @@ -0,0 +1,267 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.v1; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.common.io.Closeables; +import com.metamx.common.ISE; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.column.Column; +import com.metamx.druid.index.column.ComplexColumn; +import com.metamx.druid.index.column.DictionaryEncodedColumn; +import com.metamx.druid.index.column.GenericColumn; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.kv.ArrayBasedIndexedInts; +import com.metamx.druid.kv.ConciseCompressedIndexedInts; +import com.metamx.druid.kv.Indexed; +import com.metamx.druid.kv.IndexedInts; +import com.metamx.druid.kv.IndexedIterable; +import com.metamx.druid.kv.ListIndexed; +import org.joda.time.Interval; + +import java.io.Closeable; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; + +/** +*/ +public class QueryableIndexIndexableAdapter implements IndexableAdapter +{ + private final int numRows; + private final QueryableIndex input; + + public QueryableIndexIndexableAdapter(QueryableIndex input) + { + this.input = input; + numRows = input.getNumRows(); + } + + @Override + public Interval getDataInterval() + { + return input.getDataInterval(); + } + + @Override + public int getNumRows() + { + return numRows; + } + + @Override + public Indexed getAvailableDimensions() + { + return input.getAvailableDimensions(); + } + + @Override + public Indexed getAvailableMetrics() + { + final Set columns = Sets.newLinkedHashSet(input.getColumnNames()); + final HashSet dimensions = Sets.newHashSet(getAvailableDimensions()); + + return new ListIndexed( + Lists.newArrayList(Sets.difference(columns, dimensions)), + String.class + ); + } + + @Override + public Indexed getDimValueLookup(String dimension) + { + final DictionaryEncodedColumn dict = input.getColumn(dimension).getDictionaryEncoding(); + return new Indexed() + { + @Override + public Class getClazz() + { + return String.class; + } + + @Override + public int size() + { + return dict.getCardinality(); + } + + @Override + public String get(int index) + { + return dict.lookupName(index); + } + + @Override + public int indexOf(String value) + { + return dict.lookupId(value); + } + + @Override + public Iterator iterator() + { + return IndexedIterable.create(this).iterator(); + } + }; + } + + @Override + public Iterable getRows() + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return new Iterator() + { + final GenericColumn timestamps = input.getTimeColumn().getGenericColumn(); + final Object[] metrics; + final Map dimensions; + + final int numMetrics = getAvailableMetrics().size(); + + int currRow = 0; + boolean done = false; + + { + dimensions = Maps.newLinkedHashMap(); + for (String dim : input.getAvailableDimensions()) { + dimensions.put(dim, input.getColumn(dim).getDictionaryEncoding()); + } + + final Indexed availableMetrics = getAvailableMetrics(); + metrics = new Object[availableMetrics.size()]; + for (int i = 0; i < metrics.length; ++i) { + final Column column = input.getColumn(availableMetrics.get(i)); + final ValueType type = column.getCapabilities().getType(); + switch (type) { + case FLOAT: + metrics[i] = column.getGenericColumn(); + break; + case COMPLEX: + metrics[i] = column.getComplexColumn(); + break; + default: + throw new ISE("Cannot handle type[%s]", type); + } + } + } + + @Override + public boolean hasNext() + { + final boolean hasNext = currRow < numRows; + if (!hasNext && !done) { + Closeables.closeQuietly(timestamps); + for (Object metric : metrics) { + if (metric instanceof Closeable) { + Closeables.closeQuietly((Closeable) metric); + } + } + done = true; + } + return hasNext; + } + + @Override + public Rowboat next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + int[][] dims = new int[dimensions.size()][]; + int dimIndex = 0; + for (String dim : dimensions.keySet()) { + final DictionaryEncodedColumn dict = dimensions.get(dim); + final IndexedInts dimVals; + if (dict.hasMultipleValues()) { + dimVals = dict.getMultiValueRow(currRow); + } + else { + dimVals = new ArrayBasedIndexedInts(new int[]{dict.getSingleValueRow(currRow)}); + } + + int[] theVals = new int[dimVals.size()]; + for (int j = 0; j < theVals.length; ++j) { + theVals[j] = dimVals.get(j); + } + + dims[dimIndex++] = theVals; + } + + Object[] metricArray = new Object[numMetrics]; + for (int i = 0; i < metricArray.length; ++i) { + if (metrics[i] instanceof GenericColumn) { + metricArray[i] = ((GenericColumn) metrics[i]).getFloatSingleValueRow(currRow); + } + else if (metrics[i] instanceof ComplexColumn) { + metricArray[i] = ((ComplexColumn) metrics[i]).getRowValue(currRow); + } + } + + final Rowboat retVal = new Rowboat( + timestamps.getLongSingleValueRow(currRow), dims, metricArray, currRow + ); + + ++currRow; + + return retVal; + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + @Override + public IndexedInts getInverteds(String dimension, String value) + { + return new ConciseCompressedIndexedInts( + input.getColumn(dimension).getBitmapIndex().getConciseSet(value) + ); + } + + @Override + public String getMetricType(String metric) + { + final Column column = input.getColumn(metric); + + final ValueType type = column.getCapabilities().getType(); + switch (type) { + case FLOAT: + return "float"; + case COMPLEX: + return column.getComplexColumn().getTypeName(); + default: + throw new ISE("Unknown type[%s]", type); + } + } +} diff --git a/server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngine.java b/server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngine.java index 88cb5840950..788e1adb02a 100644 --- a/server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngine.java +++ b/server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngine.java @@ -277,7 +277,10 @@ public class GroupByQueryEngine dimNames = new String[dimensionSpecs.size()]; for (int i = 0; i < dimensionSpecs.size(); ++i) { final DimensionSpec dimSpec = dimensionSpecs.get(i); - dimensions.add(cursor.makeDimensionSelector(dimSpec.getDimension())); + final DimensionSelector selector = cursor.makeDimensionSelector(dimSpec.getDimension()); + if (selector != null) { + dimensions.add(selector); + } dimNames[i] = dimSpec.getOutputName(); } diff --git a/server/src/test/java/com/metamx/druid/index/v1/EmptyIndexTest.java b/server/src/test/java/com/metamx/druid/index/v1/EmptyIndexTest.java index 01dd32a3097..55c4b7be6e5 100644 --- a/server/src/test/java/com/metamx/druid/index/v1/EmptyIndexTest.java +++ b/server/src/test/java/com/metamx/druid/index/v1/EmptyIndexTest.java @@ -23,12 +23,12 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.metamx.druid.QueryGranularity; import com.metamx.druid.aggregation.AggregatorFactory; +import com.metamx.druid.index.QueryableIndex; import org.joda.time.Interval; import org.junit.Assert; import org.junit.Test; import java.io.File; -import java.util.ArrayList; public class EmptyIndexTest { @@ -48,11 +48,11 @@ public class EmptyIndexTest IncrementalIndexAdapter emptyIndexAdapter = new IncrementalIndexAdapter(new Interval("2012-08-01/P3D"), emptyIndex); IndexMerger.merge(Lists.newArrayList(emptyIndexAdapter), new AggregatorFactory[0], tmpDir); - MMappedIndex emptyIndexMMapped = IndexIO.mapDir(tmpDir); + QueryableIndex emptyQueryableIndex = IndexIO.loadIndex(tmpDir); - Assert.assertEquals("getAvailableDimensions", 0, Iterables.size(emptyIndexMMapped.getAvailableDimensions())); - Assert.assertEquals("getAvailableMetrics", 0, Iterables.size(emptyIndexMMapped.getAvailableMetrics())); - Assert.assertEquals("getDataInterval", new Interval("2012-08-01/P3D"), emptyIndexMMapped.getDataInterval()); - Assert.assertEquals("getReadOnlyTimestamps", 0, emptyIndexMMapped.getReadOnlyTimestamps().size()); + Assert.assertEquals("getAvailableDimensions", 0, Iterables.size(emptyQueryableIndex.getAvailableDimensions())); + Assert.assertEquals("getAvailableMetrics", 0, Iterables.size(emptyQueryableIndex.getColumnNames())); + Assert.assertEquals("getDataInterval", new Interval("2012-08-01/P3D"), emptyQueryableIndex.getDataInterval()); + Assert.assertEquals("getReadOnlyTimestamps", 0, emptyQueryableIndex.getTimeColumn().getLength()); } } diff --git a/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java b/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java index c3afa5eb94e..097762106a2 100644 --- a/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java +++ b/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java @@ -24,6 +24,7 @@ import com.google.common.collect.Lists; import com.google.common.io.Files; import com.metamx.druid.QueryGranularity; import com.metamx.druid.aggregation.AggregatorFactory; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.input.MapBasedInputRow; import junit.framework.Assert; import org.apache.commons.io.FileUtils; @@ -44,11 +45,11 @@ public class IndexMergerTest final File tempDir = Files.createTempDir(); try { - MMappedIndex index = IndexIO.mapDir(IndexMerger.persist(toPersist, tempDir)); + QueryableIndex index = IndexIO.loadIndex(IndexMerger.persist(toPersist, tempDir)); - Assert.assertEquals(2, index.getTimestamps().size()); + Assert.assertEquals(2, index.getTimeColumn().getLength()); Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index.getAvailableDimensions())); - Assert.assertEquals(0, index.getAvailableMetrics().size()); + Assert.assertEquals(2, index.getColumnNames().size()); } finally { tempDir.delete(); @@ -84,25 +85,25 @@ public class IndexMergerTest final File tempDir2 = Files.createTempDir(); final File mergedDir = Files.createTempDir(); try { - MMappedIndex index1 = IndexIO.mapDir(IndexMerger.persist(toPersist1, tempDir1)); + QueryableIndex index1 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tempDir1)); - Assert.assertEquals(2, index1.getTimestamps().size()); + Assert.assertEquals(2, index1.getTimeColumn().getLength()); Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index1.getAvailableDimensions())); - Assert.assertEquals(0, index1.getAvailableMetrics().size()); + Assert.assertEquals(2, index1.getColumnNames().size()); - MMappedIndex index2 = IndexIO.mapDir(IndexMerger.persist(toPersist2, tempDir2)); + QueryableIndex index2 = IndexIO.loadIndex(IndexMerger.persist(toPersist2, tempDir2)); - Assert.assertEquals(2, index2.getTimestamps().size()); + Assert.assertEquals(2, index2.getTimeColumn().getLength()); Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index2.getAvailableDimensions())); - Assert.assertEquals(0, index2.getAvailableMetrics().size()); + Assert.assertEquals(2, index2.getColumnNames().size()); - MMappedIndex merged = IndexIO.mapDir( - IndexMerger.mergeMMapped(Arrays.asList(index1, index2), new AggregatorFactory[]{}, mergedDir) + QueryableIndex merged = IndexIO.loadIndex( + IndexMerger.mergeQueryableIndex(Arrays.asList(index1, index2), new AggregatorFactory[]{}, mergedDir) ); - Assert.assertEquals(3, merged.getTimestamps().size()); + Assert.assertEquals(3, merged.getTimeColumn().getLength()); Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(merged.getAvailableDimensions())); - Assert.assertEquals(0, merged.getAvailableMetrics().size()); + Assert.assertEquals(2, merged.getColumnNames().size()); } finally { FileUtils.deleteQuietly(tempDir1); diff --git a/server/src/test/java/com/metamx/druid/index/v1/TestIndex.java b/server/src/test/java/com/metamx/druid/index/v1/TestIndex.java index 6b07bfa5902..164c18a13fc 100644 --- a/server/src/test/java/com/metamx/druid/index/v1/TestIndex.java +++ b/server/src/test/java/com/metamx/druid/index/v1/TestIndex.java @@ -119,8 +119,8 @@ public class TestIndex IndexMerger.persist(bottom, DATA_INTERVAL, bottomFile); mergedRealtime = IndexIO.loadIndex( - IndexMerger.mergeMMapped( - Arrays.asList(IndexIO.mapDir(topFile), IndexIO.mapDir(bottomFile)), + IndexMerger.mergeQueryableIndex( + Arrays.asList(IndexIO.loadIndex(topFile), IndexIO.loadIndex(bottomFile)), METRIC_AGGS, mergedFile ) From 5b1e03530cedc0f3f8ddbbbaf397a90c2aa23594 Mon Sep 17 00:00:00 2001 From: Eric Tschetter Date: Wed, 16 Jan 2013 21:06:57 -0600 Subject: [PATCH 08/12] 1) Fix some bugs found by external test suite --- .../serde/BitmapIndexColumnPartSupplier.java | 7 ++++- .../index/serde/ComplexColumnPartSerde.java | 13 +++++--- .../metamx/druid/index/v1/IndexMerger.java | 7 ++--- .../v1/QueryableIndexIndexableAdapter.java | 31 ++++++++++++++++--- 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/BitmapIndexColumnPartSupplier.java b/index-common/src/main/java/com/metamx/druid/index/serde/BitmapIndexColumnPartSupplier.java index b282ab56a5c..6813541ef09 100644 --- a/index-common/src/main/java/com/metamx/druid/index/serde/BitmapIndexColumnPartSupplier.java +++ b/index-common/src/main/java/com/metamx/druid/index/serde/BitmapIndexColumnPartSupplier.java @@ -51,7 +51,12 @@ public class BitmapIndexColumnPartSupplier implements Supplier { final int index = dictionary.indexOf(value); - return index >= 0 ? bitmaps.get(index) : EMPTY_SET; + if (index < 0) { + return EMPTY_SET; + } + + final ImmutableConciseSet bitmap = bitmaps.get(index); + return bitmap == null ? EMPTY_SET : bitmap; } }; } diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSerde.java b/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSerde.java index 7e425f9f525..760fcbb3f20 100644 --- a/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSerde.java +++ b/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSerde.java @@ -36,25 +36,28 @@ public class ComplexColumnPartSerde implements ColumnPartSerde { @JsonCreator public static ComplexColumnPartSerde createDeserializer( - @JsonProperty("complexType") String complexType + @JsonProperty("typeName") String complexType ) { return new ComplexColumnPartSerde(null, complexType); } private final GenericIndexed column; + private final String typeName; + private final ComplexMetricSerde serde; - public ComplexColumnPartSerde(GenericIndexed column, String complexType) + public ComplexColumnPartSerde(GenericIndexed column, String typeName) { this.column = column; - serde = ComplexMetrics.getSerdeForType(complexType); + this.typeName = typeName; + serde = ComplexMetrics.getSerdeForType(typeName); } @JsonProperty - public GenericIndexed getColumn() + public String getTypeName() { - return column; + return typeName; } @Override diff --git a/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java b/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java index 4eb8db5fcc7..57828855b38 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java +++ b/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java @@ -79,6 +79,7 @@ import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeSet; /** @@ -784,13 +785,11 @@ public class IndexMerger private static ArrayList mergeIndexed(final List> indexedLists) { - LinkedHashSet retVal = Sets.newLinkedHashSet(); + Set retVal = Sets.newTreeSet(Ordering.natural().nullsFirst()); for (Iterable indexedList : indexedLists) { for (T val : indexedList) { - if (val != null) { - retVal.add(val); - } + retVal.add(val); } } diff --git a/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java b/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java index 72eddb9141c..d05864716af 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java +++ b/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java @@ -19,12 +19,14 @@ package com.metamx.druid.index.v1; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.io.Closeables; import com.metamx.common.ISE; import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.column.BitmapIndex; import com.metamx.druid.index.column.Column; import com.metamx.druid.index.column.ComplexColumn; import com.metamx.druid.index.column.DictionaryEncodedColumn; @@ -32,6 +34,7 @@ import com.metamx.druid.index.column.GenericColumn; import com.metamx.druid.index.column.ValueType; import com.metamx.druid.kv.ArrayBasedIndexedInts; import com.metamx.druid.kv.ConciseCompressedIndexedInts; +import com.metamx.druid.kv.EmptyIndexedInts; import com.metamx.druid.kv.Indexed; import com.metamx.druid.kv.IndexedInts; import com.metamx.druid.kv.IndexedIterable; @@ -91,7 +94,18 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter @Override public Indexed getDimValueLookup(String dimension) { - final DictionaryEncodedColumn dict = input.getColumn(dimension).getDictionaryEncoding(); + final Column column = input.getColumn(dimension); + + if (column == null) { + return null; + } + + final DictionaryEncodedColumn dict = column.getDictionaryEncoding(); + + if (dict == null) { + return null; + } + return new Indexed() { @Override @@ -244,9 +258,18 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter @Override public IndexedInts getInverteds(String dimension, String value) { - return new ConciseCompressedIndexedInts( - input.getColumn(dimension).getBitmapIndex().getConciseSet(value) - ); + final Column column = input.getColumn(dimension); + + if (column == null) { + return new EmptyIndexedInts(); + } + + final BitmapIndex bitmaps = column.getBitmapIndex(); + if (bitmaps == null) { + return new EmptyIndexedInts(); + } + + return new ConciseCompressedIndexedInts(bitmaps.getConciseSet(value)); } @Override From 689ce4f8e1536d0d27ff250d44fa2a11e713fa1d Mon Sep 17 00:00:00 2001 From: Eric Tschetter Date: Thu, 17 Jan 2013 13:10:11 -0600 Subject: [PATCH 09/12] 1) Upgrade java-util dependency to include "ruby" time --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 20ca0f1bada..c580894b379 100644 --- a/pom.xml +++ b/pom.xml @@ -68,7 +68,7 @@ com.metamx java-util - 0.18.0 + 0.19.1 com.metamx From 38b2041ad9f436913db6352e43983f384a914f39 Mon Sep 17 00:00:00 2001 From: Fangjin Yang Date: Thu, 17 Jan 2013 14:56:48 -0800 Subject: [PATCH 10/12] key/value config table --- .../java/com/metamx/druid/db/DbConnector.java | 8 +-- .../config/WorkerSetupManagerConfig.java | 7 +- .../http/IndexerCoordinatorNode.java | 2 +- .../coordinator/setup/WorkerSetupManager.java | 71 ++++++++++--------- .../merger/worker/config/WorkerConfig.java | 3 +- 5 files changed, 47 insertions(+), 44 deletions(-) diff --git a/common/src/main/java/com/metamx/druid/db/DbConnector.java b/common/src/main/java/com/metamx/druid/db/DbConnector.java index 45a0b937964..f3c4c1f13d3 100644 --- a/common/src/main/java/com/metamx/druid/db/DbConnector.java +++ b/common/src/main/java/com/metamx/druid/db/DbConnector.java @@ -59,14 +59,14 @@ public class DbConnector ); } - public static void createWorkerSetupTable(final DBI dbi, final String workerTableName) + public static void createConfigTable(final DBI dbi, final String configTableName) { createTable( dbi, - workerTableName, + configTableName, String.format( - "CREATE table %s (config LONGTEXT NOT NULL)", - workerTableName + "CREATE table %s (name VARCHAR(255) NOT NULL, payload LONGTEXT NOT NULL, INDEX(name), PRIMARY KEY(name))", + configTableName ) ); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java index 97368c9f77e..16eeb1c3439 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java @@ -27,8 +27,11 @@ import org.skife.config.Default; */ public abstract class WorkerSetupManagerConfig { - @Config("druid.indexer.workerSetupTable") - public abstract String getWorkerSetupTable(); + @Config("druid.indexer.configTable") + public abstract String getConfigTable(); + + @Config("druid.indexer.workerSetupConfigName") + public abstract String getWorkerSetupConfigName(); @Config("druid.indexer.poll.duration") @Default("PT1M") diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java index 15d76cf2fec..3dae4046764 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java @@ -469,7 +469,7 @@ public class IndexerCoordinatorNode extends RegisteringNode final DBI dbi = new DbConnector(dbConnectorConfig).getDBI(); final WorkerSetupManagerConfig workerSetupManagerConfig = configFactory.build(WorkerSetupManagerConfig.class); - DbConnector.createWorkerSetupTable(dbi, workerSetupManagerConfig.getWorkerSetupTable()); + DbConnector.createConfigTable(dbi, workerSetupManagerConfig.getConfigTable()); workerSetupManager = new WorkerSetupManager( dbi, Executors.newScheduledThreadPool( 1, diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java index baa56290af7..5e43e68ae66 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java @@ -29,7 +29,6 @@ import com.metamx.common.logger.Logger; import com.metamx.druid.merger.coordinator.config.WorkerSetupManagerConfig; import org.apache.commons.collections.MapUtils; import org.codehaus.jackson.map.ObjectMapper; -import org.codehaus.jackson.type.TypeReference; import org.joda.time.Duration; import org.skife.jdbi.v2.DBI; import org.skife.jdbi.v2.FoldController; @@ -123,37 +122,39 @@ public class WorkerSetupManager { return handle.createQuery( String.format( - "SELECT config FROM %s", - config.getWorkerSetupTable() + "SELECT payload FROM %s WHERE name = :name", + config.getConfigTable() ) - ).fold( - Lists.newArrayList(), - new Folder3, Map>() - { - @Override - public ArrayList fold( - ArrayList workerNodeConfigurations, - Map stringObjectMap, - FoldController foldController, - StatementContext statementContext - ) throws SQLException - { - try { - // stringObjectMap lowercases and jackson may fail serde - workerNodeConfigurations.add( - jsonMapper.readValue( - MapUtils.getString(stringObjectMap, "config"), - WorkerSetupData.class - ) - ); - return workerNodeConfigurations; - } - catch (Exception e) { - throw Throwables.propagate(e); - } - } - } - ); + ) + .bind("name", config.getWorkerSetupConfigName()) + .fold( + Lists.newArrayList(), + new Folder3, Map>() + { + @Override + public ArrayList fold( + ArrayList workerNodeConfigurations, + Map stringObjectMap, + FoldController foldController, + StatementContext statementContext + ) throws SQLException + { + try { + // stringObjectMap lowercases and jackson may fail serde + workerNodeConfigurations.add( + jsonMapper.readValue( + MapUtils.getString(stringObjectMap, "payload"), + WorkerSetupData.class + ) + ); + return workerNodeConfigurations; + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + } + ); } } ); @@ -197,14 +198,14 @@ public class WorkerSetupManager @Override public Void withHandle(Handle handle) throws Exception { - handle.createStatement(String.format("DELETE FROM %s", config.getWorkerSetupTable())).execute(); handle.createStatement( String.format( - "INSERT INTO %s (config) VALUES (:config)", - config.getWorkerSetupTable() + "INSERT INTO %s (name, payload) VALUES (:name, :payload) ON DUPLICATE KEY UPDATE payload = :payload", + config.getConfigTable() ) ) - .bind("config", jsonMapper.writeValueAsString(value)) + .bind("name", config.getWorkerSetupConfigName()) + .bind("payload", jsonMapper.writeValueAsString(value)) .execute(); return null; diff --git a/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java b/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java index 4689acef261..5b5f3a0a6e7 100644 --- a/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java @@ -41,7 +41,6 @@ public abstract class WorkerConfig public int getCapacity() { - return 1; - //return Runtime.getRuntime().availableProcessors() - 1; + return Runtime.getRuntime().availableProcessors() - 1; } } From 71665346667bcd907b1fc349103b2e612c91d347 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 17 Jan 2013 16:03:18 -0800 Subject: [PATCH 11/12] YeOldePlumberSchool: Tweak for IndexIO changes --- .../metamx/druid/merger/common/index/YeOldePlumberSchool.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java index 48d639726ae..c5a2bc11826 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java @@ -140,7 +140,7 @@ public class YeOldePlumberSchool implements PlumberSchool } // Map merged segment so we can extract dimensions - final MMappedIndex mappedSegment = IndexIO.mapDir(fileToUpload); + final QueryableIndex mappedSegment = IndexIO.loadIndex(fileToUpload); final DataSegment segmentToUpload = theSink.getSegment() .withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())) From ac31afbce57c2974b044ed8de1a9ed224e2eb9ad Mon Sep 17 00:00:00 2001 From: Fangjin Yang Date: Fri, 18 Jan 2013 16:45:23 -0800 Subject: [PATCH 12/12] remove redundant index for primary key in config table --- common/src/main/java/com/metamx/druid/db/DbConnector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/main/java/com/metamx/druid/db/DbConnector.java b/common/src/main/java/com/metamx/druid/db/DbConnector.java index f3c4c1f13d3..73013ce6aa2 100644 --- a/common/src/main/java/com/metamx/druid/db/DbConnector.java +++ b/common/src/main/java/com/metamx/druid/db/DbConnector.java @@ -65,7 +65,7 @@ public class DbConnector dbi, configTableName, String.format( - "CREATE table %s (name VARCHAR(255) NOT NULL, payload LONGTEXT NOT NULL, INDEX(name), PRIMARY KEY(name))", + "CREATE table %s (name VARCHAR(255) NOT NULL, payload LONGTEXT NOT NULL, PRIMARY KEY(name))", configTableName ) );