svn merge -c 1407551 FIXES: MAPREDUCE-4266. remove Ant remnants from MR (tgraves via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1407552 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Joseph Evans 2012-11-09 17:28:58 +00:00
parent 82ccfb726a
commit 80a05764be
659 changed files with 2 additions and 134689 deletions

View File

@ -440,6 +440,8 @@ Release 0.23.5 - UNRELEASED
MAPREDUCE-4752. Reduce MR AM memory usage through String Interning (Robert
Evans via tgraves)
MAPREDUCE-4266. remove Ant remnants from MR (tgraves via bobby)
OPTIMIZATIONS
BUG FIXES

View File

@ -1,33 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Contains utilities that are common for the main and contrib builds.
-->
<project name="build-utils">
<!-- Load properties from build properties file, if available -->
<dirname property="build-utils.basedir" file="${ant.file.build-utils}"/>
<property file="${build-utils.basedir}/build.properties"/>
<target name="forrest.check" unless="forrest.home">
<fail message="'forrest.home' is not defined. Please pass -Dforrest.home=&lt;base of Apache Forrest installation&gt; to Ant on the command-line, or set forest.home in build properties file." />
</target>
</project>

File diff suppressed because it is too large Load Diff

View File

@ -1,167 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<ivy-module version="1.0" xmlns:m="http://ant.apache.org/ivy/maven">
<info organisation="org.apache.hadoop" module="${ant.project.name}" revision="${version}">
<license name="Apache 2.0"/>
<ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
<description>
Hadoop Core
</description>
</info>
<configurations defaultconfmapping="default">
<!--these match the Maven configurations-->
<conf name="default" extends="master,runtime"/>
<conf name="master" description="contains the artifact but no dependencies"/>
<conf name="compile" description="contains the artifact but no dependencies"/>
<conf name="runtime" description="runtime but not the artifact"/>
<!--
These public configurations contain the core dependencies for running hadoop client or server.
The server is effectively a superset of the client.
-->
<!--Private configurations. -->
<conf name="common" visibility="private" extends="compile" description="common artifacts"/>
<conf name="mapred" visibility="private" extends="compile,runtime" description="Mapred dependent artifacts"/>
<conf name="javadoc" visibility="private" description="artiracts required while performing doc generation" extends="common"/>
<conf name="test" extends="master" visibility="private" description="the classpath needed to run tests"/>
<conf name="package" extends="master" description="the classpath needed for packaging"/>
<conf name="system" extends="test" visibility="private" description="the classpath needed to run system tests"/>
<conf name="test-hdfswithmr" extends="test" visibility="private" description="the classpath needed to run tests"/>
<conf name="releaseaudit" visibility="private" description="Artifacts required for releaseaudit target"/>
<conf name="jdiff" visibility="private" extends="common"/>
<conf name="checkstyle" visibility="private"/>
</configurations>
<publications>
<!--get the artifact from our module name-->
<artifact conf="master"/>
</publications>
<dependencies>
<dependency org="org.apache.hadoop" name="hadoop-annotations" rev="${hadoop-common.version}" conf="compile->default"/>
<dependency org="org.apache.hadoop" name="hadoop-common"
rev="${hadoop-common.version}" conf="compile->default">
<artifact name="hadoop-common" ext="jar" />
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests" />
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-hdfs"
rev="${hadoop-hdfs.version}" conf="compile->default"/>
<dependency org="org.apache.hadoop" name="hadoop-common-instrumented"
rev="${hadoop-common.version}" conf="system->default"/>
<dependency org="org.apache.hadoop" name="hadoop-hdfs-instrumented"
rev="${hadoop-hdfs.version}" conf="system->default"/>
<dependency org="commons-logging" name="commons-logging"
rev="${commons-logging.version}" conf="compile->master"/>
<dependency org="org.slf4j" name="slf4j-api" rev="${slf4j-api.version}"
conf="compile->master"/>
<dependency org="org.slf4j" name="slf4j-log4j12"
rev="${slf4j-log4j12.version}" conf="mapred->master"/>
<dependency org="org.apache.hadoop" name="hadoop-hdfs"
rev="${hadoop-hdfs.version}" conf="test->default">
<artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-common"
rev="${hadoop-common.version}" conf="test->default">
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests" />
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-yarn-server-common"
rev="${yarn.version}" conf="compile->default"/>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
rev="${yarn.version}" conf="compile->default"/>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common"
rev="${yarn.version}" conf="compile->default"/>
<dependency org="org.apache.hadoop" name="hadoop-yarn-common"
rev="${yarn.version}" conf="compile->default"/>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-examples"
rev="${yarn.version}" conf="compile->default"/>
<dependency org="log4j" name="log4j" rev="${log4j.version}"
conf="compile->master"/>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient"
rev="${yarn.version}" conf="compile->default">
<artifact name="hadoop-mapreduce-client-jobclient" type="tests" ext="jar" m:classifier="tests"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-rumen"
rev="${hadoop-common.version}" conf="compile->default"/>
<dependency org="org.apache.hadoop" name="hadoop-archives"
rev="${hadoop-common.version}" conf="compile->default"/>
<dependency org="checkstyle" name="checkstyle" rev="${checkstyle.version}"
conf="checkstyle->default"/>
<dependency org="jdiff" name="jdiff" rev="${jdiff.version}"
conf="jdiff->default"/>
<dependency org="xerces" name="xerces" rev="${xerces.version}"
conf="jdiff->default"/>
<dependency org="org.apache.rat" name="apache-rat-tasks"
rev="${rats-lib.version}" conf="releaseaudit->default"/>
<dependency org="commons-lang" name="commons-lang"
rev="${commons-lang.version}" conf="releaseaudit->default"/>
<dependency org="commons-collections" name="commons-collections"
rev="${commons-collections.version}"
conf="releaseaudit->default"/>
<dependency org="org.apache.lucene" name="lucene-core"
rev="${lucene-core.version}" conf="javadoc->default"/>
<dependency org="org.apache.avro" name="avro-compiler" rev="${avro.version}"
conf="compile->master">
<exclude module="ant"/>
<exclude module="jetty"/>
<exclude module="slf4j-simple"/>
</dependency>
<dependency org="org.apache.avro" name="avro" rev="${avro.version}"
conf="compile->default">
<exclude module="ant"/>
<exclude module="jetty"/>
<exclude module="slf4j-simple"/>
</dependency>
<dependency org="junit" name="junit" rev="${junit.version}"
conf="test->default"/>
<dependency org="org.mockito" name="mockito-all" rev="${mockito-all.version}"
conf="test->default"/>
<dependency org="org.vafer" name="jdeb" rev="${jdeb.version}" conf="package->master"/>
<dependency org="org.mortbay.jetty" name="jetty-servlet-tester" rev="${jetty.version}"
conf="test->default"/>
<!-- dependency for rumen anonymization -->
<dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="${jackson.version}"
conf="compile->default"/>
<dependency org="org.codehaus.jackson" name="jackson-mapper-asl" rev="${jackson.version}"
conf="compile->default"/>
<!-- dependency addition for the fault injection -->
<dependency org="org.aspectj" name="aspectjrt" rev="${aspectj.version}"
conf="compile->default"/>
<dependency org="org.aspectj" name="aspectjtools" rev="${aspectj.version}"
conf="compile->default"/>
<!-- Exclusions for transitive dependencies pulled in by log4j -->
<exclude org="com.sun.jdmk"/>
<exclude org="com.sun.jmx"/>
<exclude org="javax.jms"/>
<exclude org="javax.mail"/>
<exclude org="org.apache.hadoop" module="avro"/>
<exclude org="org.apache.commons" module="commons-daemon"/>
</dependencies>
</ivy-module>

View File

@ -1,28 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapred-examples</artifactId>
<packaging>jar</packaging>
<version>@version</version>
<dependencies/>
</project>

View File

@ -1,34 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapred-instrumented</artifactId>
<packaging>jar</packaging>
<version>@version</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
</dependencies>
</project>

View File

@ -1,34 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapred-test-instrumented</artifactId>
<packaging>jar</packaging>
<version>@version</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapred</artifactId>
<version>@version</version>
</dependency>
</dependencies>
</project>

View File

@ -1,34 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapred</artifactId>
<packaging>jar</packaging>
<version>@version</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
</dependencies>
</project>

View File

@ -1,34 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapred-test</artifactId>
<packaging>jar</packaging>
<version>@version</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapred</artifactId>
<version>@version</version>
</dependency>
</dependencies>
</project>

View File

@ -1,28 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapred-tools</artifactId>
<packaging>jar</packaging>
<version>@version</version>
<dependencies/>
</project>

View File

@ -1,70 +0,0 @@
<ivysettings>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
see http://www.jayasoft.org/ivy/doc/configuration
-->
<!-- you can override this property to use mirrors
http://repo1.maven.org/maven2/
http://mirrors.dotsrc.org/maven2
http://ftp.ggi-project.org/pub/packages/maven2
http://mirrors.sunsite.dk/maven2
http://public.planetmirror.com/pub/maven2
http://ibiblio.lsu.edu/main/pub/packages/maven2
http://www.ibiblio.net/pub/packages/maven2
-->
<property name="repo.maven.org" value="http://repo1.maven.org/maven2/" override="false"/>
<property name="snapshot.apache.org" value="https://repository.apache.org/content/repositories/snapshots/" override="false"/>
<property name="maven2.pattern" value="[organisation]/[module]/[revision]/[module]-[revision](-[classifier])"/>
<property name="repo.dir" value="${user.home}/.m2/repository"/>
<property name="maven2.pattern.ext" value="${maven2.pattern}.[ext]"/>
<property name="resolvers" value="default" override="false"/>
<property name="force-resolve" value="false" override="false"/>
<settings defaultResolver="${resolvers}"/>
<resolvers>
<ibiblio name="maven2" root="${repo.maven.org}" pattern="${maven2.pattern.ext}" m2compatible="true" checkconsistency="false"/>
<ibiblio name="apache-snapshot" root="${snapshot.apache.org}" m2compatible="true"
checkmodified="true" changingPattern=".*SNAPSHOT" checkconsistency="false"/>
<filesystem name="fs" m2compatible="true" checkconsistency="false" force="${force-resolve}">
<artifact pattern="${repo.dir}/${maven2.pattern.ext}"/>
<ivy pattern="${repo.dir}/[organisation]/[module]/[revision]/[module]-[revision].pom"/>
</filesystem>
<chain name="default" dual="true" checkmodified="true" changingPattern=".*SNAPSHOT">
<resolver ref="apache-snapshot"/>
<resolver ref="maven2"/>
</chain>
<chain name="internal" dual="true">
<resolver ref="fs"/>
<resolver ref="apache-snapshot"/>
<resolver ref="maven2"/>
</chain>
<chain name="external">
<resolver ref="maven2"/>
</chain>
</resolvers>
<modules>
<module organisation="org.apache.hadoop" name="hadoop-*" resolver="${resolvers}"/>
</modules>
</ivysettings>

View File

@ -1,86 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#This properties file lists the versions of the various artifacts used by hadoop and components.
#It drives ivy and the generation of a maven POM
#These are the versions of our dependencies (in alphabetical order)
ant-task.version=2.0.10
#Aspectj depedency for Fault injection
#This property has to be updated synchronously with aop.xml
aspectj.version=1.6.5
avro.version=1.5.2
paranamer.version=2.2
checkstyle.version=4.2
commons-cli.version=1.2
commons-collections.version=3.1
commons-httpclient.version=3.1
commons-lang.version=2.5
commons-logging.version=1.1.1
commons-logging-api.version=1.1
commons-el.version=1.0
commons-fileupload.version=1.2
commons-io.version=1.4
commons-net.version=1.4.1
core.version=3.1.1
coreplugin.version=1.3.2
ftplet-api.version=1.0.0
ftpserver-core.version=1.0.0
ftpserver-deprecated.version=1.0.0-M2
hadoop-common.version=2.0.3-SNAPSHOT
hadoop-hdfs.version=2.0.3-SNAPSHOT
hsqldb.version=1.8.0.10
ivy.version=2.2.0
jasper.version=5.5.12
jdeb.version=0.8
jsp.version=2.1
jsp-api.version=5.5.12
jets3t.version=0.7.1
jetty.version=6.1.14
jetty-util.version=6.1.14
junit.version=4.8.1
jdiff.version=1.0.9
kfs.version=0.3
log4j.version=1.2.16
lucene-core.version=2.3.1
mina-core.version=2.0.0-M5
mockito-all.version=1.8.2
oro.version=2.0.8
rats-lib.version=0.6
servlet.version=4.0.6
servlet-api-2.5.version=6.1.14
servlet-api.version=2.5
slf4j-api.version=1.5.11
slf4j-log4j12.version=1.5.11
wagon-http.version=1.0-beta-2
xmlenc.version=0.52
xerces.version=1.4.4
jackson.version=1.8.8
yarn.version=2.0.3-SNAPSHOT
hadoop-mapreduce.version=2.0.3-SNAPSHOT

View File

@ -1,168 +0,0 @@
### "Gridmix" Benchmark ###
Contents:
0 Overview
1 Getting Started
1.0 Build
1.1 Configure
1.2 Generate test data
2 Running
2.0 General
2.1 Non-Hod cluster
2.2 Hod
2.2.0 Static cluster
2.2.1 Hod cluster
* 0 Overview
The scripts in this package model a cluster workload. The workload is
simulated by generating random data and submitting map/reduce jobs that
mimic observed data-access patterns in user jobs. The full benchmark
generates approximately 2.5TB of (often compressed) input data operated on
by the following simulated jobs:
1) Three stage map/reduce job
Input: 500GB compressed (2TB uncompressed) SequenceFile
(k,v) = (5 words, 100 words)
hadoop-env: FIXCOMPSEQ
Compute1: keep 10% map, 40% reduce
Compute2: keep 100% map, 77% reduce
Input from Compute1
Compute3: keep 116% map, 91% reduce
Input from Compute2
Motivation: Many user workloads are implemented as pipelined map/reduce
jobs, including Pig workloads
2) Large sort of variable key/value size
Input: 500GB compressed (2TB uncompressed) SequenceFile
(k,v) = (5-10 words, 100-10000 words)
hadoop-env: VARCOMPSEQ
Compute: keep 100% map, 100% reduce
Motivation: Processing large, compressed datsets is common.
3) Reference select
Input: 500GB compressed (2TB uncompressed) SequenceFile
(k,v) = (5-10 words, 100-10000 words)
hadoop-env: VARCOMPSEQ
Compute: keep 0.2% map, 5% reduce
1 Reducer
Motivation: Sampling from a large, reference dataset is common.
4) Indirect Read
Input: 500GB compressed (2TB uncompressed) Text
(k,v) = (5 words, 20 words)
hadoop-env: FIXCOMPTEXT
Compute: keep 50% map, 100% reduce Each map reads 1 input file,
adding additional input files from the output of the
previous iteration for 10 iterations
Motivation: User jobs in the wild will often take input data without
consulting the framework. This simulates an iterative job
whose input data is all "indirect," i.e. given to the
framework sans locality metadata.
5) API text sort (java, pipes, streaming)
Input: 500GB uncompressed Text
(k,v) = (1-10 words, 0-200 words)
hadoop-env: VARINFLTEXT
Compute: keep 100% map, 100% reduce
Motivation: This benchmark should exercise each of the APIs to
map/reduce
Each of these jobs may be run individually or- using the scripts provided-
as a simulation of user activity sized to run in approximately 4 hours on a
480-500 node cluster using Hadoop 0.15.0. The benchmark runs a mix of small,
medium, and large jobs simultaneously, submitting each at fixed intervals.
Notes(1-4): Since input data are compressed, this means that each mapper
outputs a lot more bytes than it reads in, typically causing map output
spills.
* 1 Getting Started
1.0 Build
1) Compile the examples, including the C++ sources:
> ant -Dcompile.c++=yes examples
2) Copy the pipe sort example to a location in the default filesystem
(usually HDFS, default /gridmix/programs)
> $HADOOP_PREFIX/hadoop dfs -mkdir $GRID_MIX_PROG
> $HADOOP_PREFIX/hadoop dfs -put build/c++-examples/$PLATFORM_STR/bin/pipes-sort $GRID_MIX_PROG
1.1 Configure
One must modify hadoop-env to supply the following information:
HADOOP_PREFIX The hadoop install location
GRID_MIX_HOME The location of these scripts
APP_JAR The location of the hadoop example
GRID_MIX_DATA The location of the datsets for these benchmarks
GRID_MIX_PROG The location of the pipe-sort example
Reasonable defaults are provided for all but HADOOP_PREFIX. The datasets used
by each of the respective benchmarks are recorded in the Input::hadoop-env
comment in section 0 and their location may be changed in hadoop-env. Note
that each job expects particular input data and the parameters given to it
must be changed in each script if a different InputFormat, keytype, or
valuetype is desired.
Note that NUM_OF_REDUCERS_FOR_*_JOB properties should be sized to the
cluster on which the benchmarks will be run. The default assumes a large
(450-500 node) cluster.
1.2 Generate test data
Test data is generated using the generateData.sh script. While one may
modify the structure and size of the data generated here, note that many of
the scripts- particularly for medium and small sized jobs- rely not only on
specific InputFormats and key/value types, but also on a particular
structure to the input data. Changing these values will likely be necessary
to run on small and medium-sized clusters, but any modifications must be
informed by an explicit familiarity with the underlying scripts.
It is sufficient to run the script without modification, though it may
require up to 4TB of free space in the default filesystem. Changing the size
of the input data (COMPRESSED_DATA_BYTES, UNCOMPRESSED_DATA_BYTES,
INDIRECT_DATA_BYTES) is safe. A 4x compression ratio for generated, block
compressed data is typical.
* 2 Running
2.0 General
The submissionScripts directory contains the high-level scripts submitting
sized jobs for the gridmix benchmark. Each submits $NUM_OF_*_JOBS_PER_CLASS
instances as specified in the gridmix-env script, where an instance is an
invocation of a script as in $JOBTYPE/$JOBTYPE.$CLASS (e.g.
javasort/text-sort.large). Each instance may submit one or more map/reduce
jobs.
There is a backoff script, submissionScripts/sleep_if_too_busy that can be
modified to define throttling criteria. By default, it simply counts running
java processes.
2.1 Non-Hod cluster
The submissionScripts/allToSameCluster script will invoke each of the other
submission scripts for the gridmix benchmark. Depending on how your cluster
manages job submission, these scripts may require modification. The details
are very context-dependent.
2.2 Hod
Note that there are options in hadoop-env that control jobs sumitted thruogh
Hod. One may specify the location of a config (HOD_CONFIG), the number of
nodes to allocate for classes of jobs, and any additional options one wants
to apply. The default includes an example for supplying a Hadoop tarball for
testing platform changes (see Hod documentation).
2.2.0 Static Cluster
> hod --hod.script=submissionScripts/allToSameCluster -m 500
2.2.1 Hod-allocated cluster
> ./submissionScripts/allThroughHod

View File

@ -1,90 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/gridmix-env
# Smaller data set is used by default.
COMPRESSED_DATA_BYTES=2147483648
UNCOMPRESSED_DATA_BYTES=536870912
INDIRECT_DATA_BYTES=58720256
# Number of partitions for output data
if [ -z ${NUM_MAPS} ] ; then
NUM_MAPS=100
fi
INDIRECT_DATA_FILES=200
# If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real) dataset.
if [ ! -z ${USE_REAL_DATASET} ] ; then
echo "Using real dataset"
# 2TB data compressing to approx 500GB
COMPRESSED_DATA_BYTES=2147483648000
# 500GB
UNCOMPRESSED_DATA_BYTES=536870912000
# Default approx 70MB per data file, compressed
INDIRECT_DATA_BYTES=58720256000
fi
${HADOOP_PREFIX}/bin/hadoop jar \
${EXAMPLE_JAR} randomtextwriter \
-D mapreduce.randomtextwriter.totalbytes=${COMPRESSED_DATA_BYTES} \
-D mapreduce.randomtextwriter.bytespermap=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
-D mapreduce.randomtextwriter.minwordskey=5 \
-D mapreduce.randomtextwriter.maxwordskey=10 \
-D mapreduce.randomtextwriter.minwordsvalue=100 \
-D mapreduce.randomtextwriter.maxwordsvalue=10000 \
-D mapreduce.output.fileoutputformat.compress=true \
-D mapred.map.output.compression.type=BLOCK \
-outFormat org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \
${VARCOMPSEQ} &
${HADOOP_PREFIX}/bin/hadoop jar \
${EXAMPLE_JAR} randomtextwriter \
-D mapreduce.randomtextwriter.totalbytes=${COMPRESSED_DATA_BYTES} \
-D mapreduce.randomtextwriter.bytespermap=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
-D mapreduce.randomtextwriter.minwordskey=5 \
-D mapreduce.randomtextwriter.maxwordskey=5 \
-D mapreduce.randomtextwriter.minwordsvalue=100 \
-D mapreduce.randomtextwriter.maxwordsvalue=100 \
-D mapreduce.output.fileoutputformat.compress=true \
-D mapred.map.output.compression.type=BLOCK \
-outFormat org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \
${FIXCOMPSEQ} &
${HADOOP_PREFIX}/bin/hadoop jar \
${EXAMPLE_JAR} randomtextwriter \
-D mapreduce.randomtextwriter.totalbytes=${UNCOMPRESSED_DATA_BYTES} \
-D mapreduce.randomtextwriter.bytespermap=$((${UNCOMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
-D mapreduce.randomtextwriter.minwordskey=1 \
-D mapreduce.randomtextwriter.maxwordskey=10 \
-D mapreduce.randomtextwriter.minwordsvalue=0 \
-D mapreduce.randomtextwriter.maxwordsvalue=200 \
-D mapreduce.output.fileoutputformat.compress=false \
-outFormat org.apache.hadoop.mapreduce.lib.output.TextOutputFormat \
${VARINFLTEXT} &
${HADOOP_PREFIX}/bin/hadoop jar \
${EXAMPLE_JAR} randomtextwriter \
-D mapreduce.randomtextwriter.totalbytes=${INDIRECT_DATA_BYTES} \
-D mapreduce.randomtextwriter.bytespermap=$((${INDIRECT_DATA_BYTES} / ${INDIRECT_DATA_FILES})) \
-D mapreduce.randomtextwriter.minwordskey=5 \
-D mapreduce.randomtextwriter.maxwordskey=5 \
-D mapreduce.randomtextwriter.minwordsvalue=20 \
-D mapreduce.randomtextwriter.maxwordsvalue=20 \
-D mapreduce.output.fileoutputformat.compress=true \
-D mapred.map.output.compression.type=BLOCK \
-outFormat org.apache.hadoop.mapreduce.lib.output.TextOutputFormat \
${FIXCOMPTEXT} &

View File

@ -1,86 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Environment configuration
# Hadoop installation
# set var only if it has not already been set externally
if [ -z "${HADOOP_PREFIX}" ] ; then
export HADOOP_PREFIX=
fi
# Base directory for gridmix install
# set var only if it has not already been set externally
if [ -z "${GRID_MIX_HOME}" ] ; then
export GRID_MIX_HOME=${GRID_DIR}
fi
# Hadoop example jar
# set var only if it has not already been set externally
if [ -z "${EXAMPLE_JAR}" ] ; then
export EXAMPLE_JAR="${HADOOP_PREFIX}/hadoop-*examples.jar"
fi
# Hadoop test jar
# set var only if it has not already been set externally
if [ -z "${APP_JAR}" ] ; then
export APP_JAR="${HADOOP_PREFIX}/hadoop-*test.jar"
fi
# Hadoop streaming jar
# set var only if it has not already been set externally
if [ -z "${STREAM_JAR}" ] ; then
export STREAM_JAR="${HADOOP_PREFIX}/contrib/streaming/hadoop-*streaming.jar"
fi
# Location on default filesystem for writing gridmix data (usually HDFS)
# Default: /gridmix/data
# set var only if it has not already been set externally
if [ -z "${GRID_MIX_DATA}" ] ; then
export GRID_MIX_DATA=/gridmix/data
fi
# Location of executables in default filesystem (usually HDFS)
# Default: /gridmix/programs
# set var only if it has not already been set externally
if [ -z "${GRID_MIX_PROG}" ] ; then
export GRID_MIX_PROG=/gridmix/programs
fi
## Data sources
# Variable length key, value compressed SequenceFile
export VARCOMPSEQ=${GRID_MIX_DATA}/WebSimulationBlockCompressed
# Fixed length key, value compressed SequenceFile
export FIXCOMPSEQ=${GRID_MIX_DATA}/MonsterQueryBlockCompressed
# Variable length key, value uncompressed Text File
export VARINFLTEXT=${GRID_MIX_DATA}/SortUncompressed
# Fixed length key, value compressed Text File
export FIXCOMPTEXT=${GRID_MIX_DATA}/EntropySimulationCompressed
## Job sizing
export NUM_OF_LARGE_JOBS_FOR_ENTROPY_CLASS=5
export NUM_OF_LARGE_JOBS_PER_CLASS=3
export NUM_OF_MEDIUM_JOBS_PER_CLASS=20
export NUM_OF_SMALL_JOBS_PER_CLASS=40
export NUM_OF_REDUCERS_FOR_LARGE_JOB=370
export NUM_OF_REDUCERS_FOR_MEDIUM_JOB=170
export NUM_OF_REDUCERS_FOR_SMALL_JOB=15
## Throttling
export INTERVAL_BETWEEN_SUBMITION=20
## Hod
#export HOD_OPTIONS=""
export CLUSTER_DIR_BASE=$GRID_MIX_HOME/CLUSTER_DIR_BASE
export HOD_CONFIG=
export ALL_HOD_OPTIONS="-c ${HOD_CONFIG} ${HOD_OPTIONS}"
export SMALL_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 5"
export MEDIUM_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 50"
export LARGE_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 100"

View File

@ -1,25 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
INDIR=${VARINFLTEXT}
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/sort-out-dir-large_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar ${EXAMPLE_JAR} sort -m 1 -r $NUM_OF_REDUCERS_FOR_LARGE_JOB -inFormat org.apache.hadoop.mapred.KeyValueTextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text $INDIR $OUTDIR

View File

@ -1,25 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
INDIR="${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/sort-out-dir-medium_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar ${EXAMPLE_JAR} sort -m 1 -r $NUM_OF_REDUCERS_FOR_MEDIUM_JOB -inFormat org.apache.hadoop.mapred.KeyValueTextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text $INDIR $OUTDIR

View File

@ -1,25 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
INDIR="${VARINFLTEXT}/{part-00000,part-00001,part-00002}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/sort-out-dir-small_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar ${EXAMPLE_JAR} sort -m 1 -r $NUM_OF_REDUCERS_FOR_SMALL_JOB -inFormat org.apache.hadoop.mapred.KeyValueTextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text $INDIR $OUTDIR

View File

@ -1,37 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=100
INDIR=${FIXCOMPTEXT}
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/maxent-out-dir-large_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 50 -keepred 100 -inFormatIndirect org.apache.hadoop.mapred.TextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.LongWritable -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR.1 -r $NUM_OF_REDUCERS
ITER=7
for ((i=1; i<$ITER; ++i))
do
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 50 -keepred 100 -inFormatIndirect org.apache.hadoop.mapred.TextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.LongWritable -outValue org.apache.hadoop.io.Text -indir $INDIR -indir $OUTDIR.$i -outdir $OUTDIR.$(($i+1)) -r $NUM_OF_REDUCERS
if [ $? -ne "0" ]
then exit $?
fi
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR.$i
done
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR.$ITER

View File

@ -1,38 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_LARGE_JOB
INDIR=${FIXCOMPSEQ}
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/mq-out-dir-large_$Date.1
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 10 -keepred 40 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
INDIR=$OUTDIR
OUTDIR=perf-out/mq-out-dir-large_$Date.2
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 77 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
INDIR=$OUTDIR
OUTDIR=perf-out/mq-out-dir-large_$Date.3
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 116 -keepred 91 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,38 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_MEDIUM_JOB
INDIR="${FIXCOMPSEQ}/{part-000*0,part-000*1,part-000*2}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/mq-out-dir-medium_$Date.1
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 10 -keepred 40 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
INDIR=$OUTDIR
OUTDIR=perf-out/mq-out-dir-medium_$Date.2
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 77 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
INDIR=$OUTDIR
OUTDIR=perf-out/mq-out-dir-medium_$Date.3
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 116 -keepred 91 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,38 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_SMALL_JOB
INDIR="${FIXCOMPSEQ}/{part-00000,part-00001,part-00002}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/mq-out-dir-small_$Date.1
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 10 -keepred 40 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
INDIR=$OUTDIR
OUTDIR=perf-out/mq-out-dir-small_$Date.2
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 77 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
INDIR=$OUTDIR
OUTDIR=perf-out/mq-out-dir-small_$Date.3
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 116 -keepred 91 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_LARGE_JOB
INDIR=${VARINFLTEXT}
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/pipe-out-dir-large_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop pipes -input $INDIR -output $OUTDIR -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat -program ${GRID_MIX_PROG}/pipes-sort -reduces $NUM_OF_REDUCERS -jobconf mapreduce.job.output.key.class=org.apache.hadoop.io.Text,mapreduce.job.output.value.class=org.apache.hadoop.io.Text -writer org.apache.hadoop.mapred.TextOutputFormat

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_MEDIUM_JOB
INDIR="${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/pipe-out-dir-medium_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop pipes -input $INDIR -output $OUTDIR -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat -program ${GRID_MIX_PROG}/pipes-sort -reduces $NUM_OF_REDUCERS -jobconf mapreduce.job.output.key.class=org.apache.hadoop.io.Text,mapreduce.job.output.value.class=org.apache.hadoop.io.Text -writer org.apache.hadoop.mapred.TextOutputFormat

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_SMALL_JOB
INDIR="${VARINFLTEXT}/{part-00000,part-00001,part-00002}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/pipe-out-dir-small_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop pipes -input $INDIR -output $OUTDIR -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat -program ${GRID_MIX_PROG}/pipes-sort -reduces $NUM_OF_REDUCERS -jobconf mapreduce.job.output.key.class=org.apache.hadoop.io.Text,mapreduce.job.output.value.class=org.apache.hadoop.io.Text -writer org.apache.hadoop.mapred.TextOutputFormat

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
export NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_LARGE_JOB
export INDIR=${VARINFLTEXT}
Date=`date +%F-%H-%M-%S-%N`
export OUTDIR=perf-out/stream-out-dir-large_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar ${STREAM_JAR} -input $INDIR -output $OUTDIR -mapper cat -reducer cat -numReduceTasks $NUM_OF_REDUCERS

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_MEDIUM_JOB
INDIR="${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/stream-out-dir-medium_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar ${STREAM_JAR} -input $INDIR -output $OUTDIR -mapper cat -reducer cat -numReduceTasks $NUM_OF_REDUCERS

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_SMALL_JOB
INDIR="${VARINFLTEXT}/{part-00000,part-00001,part-00002}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/stream-out-dir-small_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar ${STREAM_JAR} -input $INDIR -output $OUTDIR -mapper cat -reducer cat -numReduceTasks $NUM_OF_REDUCERS

View File

@ -1,24 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
$GRID_MIX_HOME/submissionScripts/maxentHod 2>&1 > maxentHod.out &
$GRID_MIX_HOME/submissionScripts/textSortHod 2>&1 > textSortHod.out &
$GRID_MIX_HOME/submissionScripts/monsterQueriesHod 2>&1 > monsterQueriesHod.out &
$GRID_MIX_HOME/submissionScripts/webdataScanHod 2>&1 > webdataScanHod.out &
$GRID_MIX_HOME/submissionScripts/webdataSortHod 2>&1 > webdataSortHod.out &

View File

@ -1,38 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
PROCESSES=""
$GRID_MIX_HOME/submissionScripts/maxentToSameCluster 2>&1 > maxentToSameCluster.out &
PROCESSES="${PROCESSES} $!"
sleep 20
$GRID_MIX_HOME/submissionScripts/textSortToSameCluster 2>&1 > textSortToSameCluster.out &
PROCESSES="${PROCESSES} $!"
sleep 20
$GRID_MIX_HOME/submissionScripts/monsterQueriesToSameCluster 2>&1 > monsterQueriesToSameCluster.out &
PROCESSES="${PROCESSES} $!"
sleep 20
$GRID_MIX_HOME/submissionScripts/webdataScanToSameCluster 2>&1 > webdataScanToSameCluster.out &
PROCESSES="${PROCESSES} $!"
sleep 20
$GRID_MIX_HOME/submissionScripts/webdataSortToSameCluster 2>&1 > webdataSortToSameCluster.out &
PROCESSES="${PROCESSES} $!"
echo "Waiting for processes: ${PROCESSES}"
for APROC in ${PROCESSES}; do
wait ${APROC}
done

View File

@ -1,26 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
for ((i=0; i < $NUM_OF_LARGE_JOBS_FOR_ENTROPY_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/maxent.large.$i
mkdir $CLUSTER_DIR
#hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/maxent/maxent.large 2>&1 > maxent.large.$i.out &
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/maxent/maxent.large 2>&1 > maxent.large.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done

View File

@ -1,30 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
PROCESSES=""
for ((i=0; i < $NUM_OF_LARGE_JOBS_FOR_ENTROPY_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/maxent/maxent.large 2>&1 > maxent.large.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for APROC in ${PROCESSES}; do
wait ${APROC}
done

View File

@ -1,44 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.small.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.small 2>&1 > monster_query.small.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.medium.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.medium 2>&1 > monster_query.medium.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.large.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.large 2>&1 > monster_query.large.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done

View File

@ -1,45 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
PROCESSES=""
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/monsterQuery/monster_query.small 2>&1 > monster_query.small.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/monsterQuery/monster_query.medium 2>&1 > monster_query.medium.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/monsterQuery/monster_query.large 2>&1 > monster_query.large.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for APROC in ${PROCESSES}; do
wait ${APROC}
done

View File

@ -1,21 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
sleep 1
for ((java_process=$((`ps -ef|grep java|wc -l`-1)); \
java_process > 70; \
java_process=$((`ps -ef|grep java|wc -l`-1))))
do
sleep 10
echo $java_process
done

View File

@ -1,75 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.small.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.small 2>&1 > pipesort.small.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.small.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.small 2>&1 > streamsort.small.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.small.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/javasort/text-sort.small 2>&1 > javasort.small.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.medium.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.medium 2>&1 > pipesort.medium.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.medium.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.medium 2>&1 > streamsort.medium.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.medium.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/javasort/text-sort.medium 2>&1 > javasort.medium.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.large.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.large 2>&1 > pipesort.large.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.large.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.large 2>&1 > streamsort.large.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.large.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/javasort/text-sort.large 2>&1 > javasort.large.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done

View File

@ -1,64 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
PROCESSES=""
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/pipesort/text-sort.small 2>&1 > pipesort.small.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
$GRID_MIX_HOME/streamsort/text-sort.small 2>&1 > streamsort.small.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
$GRID_MIX_HOME/javasort/text-sort.small 2>&1 > javasort.small.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/pipesort/text-sort.medium 2>&1 > pipesort.medium.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
$GRID_MIX_HOME/streamsort/text-sort.medium 2>&1 > streamsort.medium.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
$GRID_MIX_HOME/javasort/text-sort.medium 2>&1 > javasort.medium.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/pipesort/text-sort.large 2>&1 > pipesort.large.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
$GRID_MIX_HOME/streamsort/text-sort.large 2>&1 > pipesort.large.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
$GRID_MIX_HOME/javasort/text-sort.large 2>&1 > pipesort.large.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for APROC in ${PROCESSES}; do
wait ${APROC}
done

View File

@ -1,45 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.small.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.small 2>&1 > webdata_scan.small.$i.out&
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.medium.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.medium 2>&1 > webdata_scan.medium.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.large.$i
mkdir $CLUSTER_DIR
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.large 2>&1 > webdata_scan.large.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done

View File

@ -1,45 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
PROCESSES=""
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/webdatascan/webdata_scan.medium 2>&1 > webdata_scan.medium.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/webdatascan/webdata_scan.small 2>&1 > webdata_scan.small.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/webdatascan/webdata_scan.large 2>&1 > webdata_scan.large.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for APROC in ${PROCESSES}; do
wait ${APROC}
done

View File

@ -1,28 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
do
echo $i
CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_sort.large.$i
mkdir $CLUSTER_DIR
#hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/webdatasort/webdata_sort.large 2>&1 > webdata_sort.large.$i.out &
echo "hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatasort/webdata_sort.large "
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatasort/webdata_sort.large 2>&1 > webdata_sort.large.$i.out &
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done

View File

@ -1,29 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
PROCESSES=""
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
do
echo $i
$GRID_MIX_HOME/webdatasort/webdata_sort.large 2>&1 > webdata_sort.large.$i.out &
PROCESSES="${PROCESSES} $!"
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
done
for APROC in ${PROCESSES}; do
wait ${APROC}
done

View File

@ -1,25 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=1
INDIR=${VARCOMPSEQ}
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/webdata-scan-out-dir-large_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 0.2 -keepred 5 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,25 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=1
INDIR="${VARCOMPSEQ}/{part-000*0,part-000*1,part-000*2}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/webdata-scan-out-dir-medium_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar ${APP_JAR} loadgen -keepmap 1 -keepred 5 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,25 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=1
INDIR="${VARCOMPSEQ}/{part-00000,part-00001,part-00002}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/webdata-scan-out-dir-small_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 1 -keepred 5 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_LARGE_JOB
INDIR=${VARCOMPSEQ}/{part-000*0,part-000*1}
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/webdata-sort-out-dir-large_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 100 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_MEDIUM_JOB
INDIR="${VARCOMPSEQ}/{part-0000,part-0001}"
Date=`date +%F-%H-%M-%S-%N`
OUTDIR=perf-out/webdata-sort-out-dir-medium_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 100 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/../gridmix-env
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_SMALL_JOB
INDIR=${VARCOMPSEQ}/part-00000
Date=`date +%F-%H-%M-%S-%N`
export OUTDIR=perf-out/webdata-sort-out-dir-small_$Date
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 100 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS

View File

@ -1,148 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
### "Gridmix" Benchmark ###
Contents:
0 Overview
1 Getting Started
1.0 Build
1.1 Configure
1.2 Generate test data
2 Running
2.0 General
2.1 Non-Hod cluster
2.2 Hod
2.2.0 Static cluster
2.2.1 Hod cluster
* 0 Overview
The scripts in this package model a cluster workload. The workload is
simulated by generating random data and submitting map/reduce jobs that
mimic observed data-access patterns in user jobs. The full benchmark
generates approximately 2.5TB of (often compressed) input data operated on
by the following simulated jobs:
1) Three stage map/reduce job
Input: 500GB compressed (2TB uncompressed) SequenceFile
(k,v) = (5 words, 100 words)
hadoop-env: FIXCOMPSEQ
Compute1: keep 10% map, 40% reduce
Compute2: keep 100% map, 77% reduce
Input from Compute1
Compute3: keep 116% map, 91% reduce
Input from Compute2
Motivation: Many user workloads are implemented as pipelined map/reduce
jobs, including Pig workloads
2) Large sort of variable key/value size
Input: 500GB compressed (2TB uncompressed) SequenceFile
(k,v) = (5-10 words, 100-10000 words)
hadoop-env: VARCOMPSEQ
Compute: keep 100% map, 100% reduce
Motivation: Processing large, compressed datsets is common.
3) Reference select
Input: 500GB compressed (2TB uncompressed) SequenceFile
(k,v) = (5-10 words, 100-10000 words)
hadoop-env: VARCOMPSEQ
Compute: keep 0.2% map, 5% reduce
1 Reducer
Motivation: Sampling from a large, reference dataset is common.
4) API text sort (java, streaming)
Input: 500GB uncompressed Text
(k,v) = (1-10 words, 0-200 words)
hadoop-env: VARINFLTEXT
Compute: keep 100% map, 100% reduce
Motivation: This benchmark should exercise each of the APIs to
map/reduce
5) Jobs with combiner (word count jobs)
A benchmark load is a mix of different numbers of small, medium, and large jobs of the above types.
The exact mix is specified in an xml file (gridmix_config.xml). We have a Java program to
construct those jobs based on the xml file and put them under the control of a JobControl object.
The JobControl object then submitts the jobs to the cluster and monitors their progress until all jobs complete.
Notes(1-3): Since input data are compressed, this means that each mapper
outputs a lot more bytes than it reads in, typically causing map output
spills.
* 1 Getting Started
1.0 Build
In the src/benchmarks/gridmix dir, type "ant".
gridmix.jar will be created in the build subdir.
copy gridmix.jar to gridmix dir.
1.1 Configure environment variables
One must modify gridmix-env-2 to set the following variables:
HADOOP_PREFIX The hadoop install location
HADOOP_VERSION The exact hadoop version to be used. e.g. hadoop-0.18.2-dev
HADOOP_CONF_DIR The dir containing the hadoop-site.xml for teh cluster to be used.
USE_REAL_DATA A large data-set will be created and used by the benchmark if it is set to true.
1.2 Configure the job mixture
A default gridmix_conf.xml file is provided.
One may make appropriate changes as necessary on the number of jobs of various types
and sizes. One can also change the number of reducers of each jobs, and specify whether
to compress the output data of a map/reduce job.
Note that one can specify multiple numbers of in the
numOfJobs field and numOfReduces field, like:
<property>
<name>javaSort.smallJobs.numOfJobs</name>
<value>8,2</value>
<description></description>
</property>
<property>
<name>javaSort.smallJobs.numOfReduces</name>
<value>15,70</value>
<description></description>
</property>
The above spec means that we will have 8 small java sort jobs with 15 reducers and 2 small java sort
jobs with 17 reducers.
1.3 Generate test data
Test data is generated using the generateGridmix2Data.sh script.
./generateGridmix2Data.sh
One may modify the structure and size of the data generated here.
It is sufficient to run the script without modification, though it may
require up to 4TB of free space in the default filesystem. Changing the size
of the input data (COMPRESSED_DATA_BYTES, UNCOMPRESSED_DATA_BYTES,
INDIRECT_DATA_BYTES) is safe. A 4x compression ratio for generated, block
compressed data is typical.
* 2 Running
You need to set HADOOP_CONF_DIR to the right directory where hadoop-site.xml exists.
Then you just need to type
./rungridmix_2
It will create start.out to record the start time, and at the end, it will create end.out to record the
endi time.

View File

@ -1,100 +0,0 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project default="main" basedir=".">
<property name="Name" value="gridmix"/>
<property name="version" value="0.1"/>
<property name="final.name" value="${name}-${version}"/>
<property name="hadoop.dir" value="${basedir}/../../../"/>
<property name="lib.dir" value="${hadoop.dir}/lib"/>
<property name="src.dir" value="${basedir}/src"/>
<property name="conf.dir" value="${basedir}/conf"/>
<property name="docs.dir" value="${basedir}/docs"/>
<property name="build.dir" value="${basedir}/build"/>
<property name="dist.dir" value="${basedir}/dist"/>
<property name="build.classes" value="${build.dir}/classes"/>
<target name="init">
<mkdir dir="${build.dir}"/>
<mkdir dir="${dist.dir}"/>
</target>
<target name="main" depends="init, compile, compress" description="Main target">
<echo>
Building the .jar files.
</echo>
</target>
<target name="compile" depends="init" description="Compilation target">
<javac srcdir="src/java/" destdir="${build.dir}">
<classpath refid="classpath" />
</javac>
</target>
<target name="dev-build" depends="init, dev-compile, compress" description="Developers build target">
<echo>
Building the .jar files.
</echo>
</target>
<target name="dev-compile" depends="init" description="Compilation target">
<path id="dev-classpath">
<pathelement location="${build.classes}"/>
<fileset dir="${hadoop.dir}/build">
<include name="**.jar" />
<include name="contrib/streaming/**.jar" />
</fileset>
<fileset dir="${lib.dir}">
<include name="*.jar" />
<exclude name="**/excluded/" />
</fileset>
</path>
<javac srcdir="src/java/" destdir="${build.dir}" classpathref="dev-classpath"/>
</target>
<target name="compress" depends="compile" description="Compression target">
<jar jarfile="${build.dir}/gridmix.jar" basedir="${build.dir}" includes="**/*.class" />
<copy todir="." includeEmptyDirs="false">
<fileset dir="${build.dir}">
<exclude name="**" />
<include name="**/*.jar" />
</fileset>
</copy>
</target>
<!-- ================================================================== -->
<!-- Clean. Delete the build files, and their directories -->
<!-- ================================================================== -->
<target name="clean" description="Clean. Delete the build files, and their directories">
<delete dir="${build.dir}"/>
<delete dir="${dist.dir}"/>
</target>
<!-- the normal classpath -->
<path id="classpath">
<pathelement location="${build.classes}"/>
<fileset dir="${lib.dir}">
<include name="*.jar" />
<exclude name="**/excluded/" />
</fileset>
<fileset dir="${hadoop.dir}">
<include name="**.jar" />
<include name="contrib/streaming/*.jar" />
</fileset>
</path>
</project>

View File

@ -1,94 +0,0 @@
#!/usr/bin/env bash
##############################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#####################################################################
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/gridmix-env-2
# Smaller data set is used by default.
COMPRESSED_DATA_BYTES=2147483648
UNCOMPRESSED_DATA_BYTES=536870912
# Number of partitions for output data
NUM_MAPS=100
# If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real) dataset.
if [ ! -z ${USE_REAL_DATASET} ] ; then
echo "Using real dataset"
NUM_MAPS=492
# 2TB data compressing to approx 500GB
COMPRESSED_DATA_BYTES=2147483648000
# 500GB
UNCOMPRESSED_DATA_BYTES=536870912000
fi
## Data sources
export GRID_MIX_DATA=/gridmix/data
# Variable length key, value compressed SequenceFile
export VARCOMPSEQ=${GRID_MIX_DATA}/WebSimulationBlockCompressed
# Fixed length key, value compressed SequenceFile
export FIXCOMPSEQ=${GRID_MIX_DATA}/MonsterQueryBlockCompressed
# Variable length key, value uncompressed Text File
export VARINFLTEXT=${GRID_MIX_DATA}/SortUncompressed
# Fixed length key, value compressed Text File
export FIXCOMPTEXT=${GRID_MIX_DATA}/EntropySimulationCompressed
${HADOOP_PREFIX}/bin/hadoop jar \
${EXAMPLE_JAR} randomtextwriter \
-D mapreduce.randomtextwriter.totalbytes=${COMPRESSED_DATA_BYTES} \
-D mapreduce.randomtextwriter.bytespermap=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
-D mapreduce.randomtextwriter.minwordskey=5 \
-D mapreduce.randomtextwriter.maxwordskey=10 \
-D mapreduce.randomtextwriter.minwordsvalue=100 \
-D mapreduce.randomtextwriter.maxwordsvalue=10000 \
-D mapreduce.output.fileoutputformat.compress=true \
-D mapred.map.output.compression.type=BLOCK \
-outFormat org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \
${VARCOMPSEQ} &
${HADOOP_PREFIX}/bin/hadoop jar \
${EXAMPLE_JAR} randomtextwriter \
-D mapreduce.randomtextwriter.totalbytes=${COMPRESSED_DATA_BYTES} \
-D mapreduce.randomtextwriter.bytespermap=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
-D mapreduce.randomtextwriter.minwordskey=5 \
-D mapreduce.randomtextwriter.maxwordskey=5 \
-D mapreduce.randomtextwriter.minwordsvalue=100 \
-D mapreduce.randomtextwriter.maxwordsvalue=100 \
-D mapreduce.output.fileoutputformat.compress=true \
-D mapred.map.output.compression.type=BLOCK \
-outFormat org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \
${FIXCOMPSEQ} &
${HADOOP_PREFIX}/bin/hadoop jar \
${EXAMPLE_JAR} randomtextwriter \
-D mapreduce.randomtextwriter.totalbytes=${UNCOMPRESSED_DATA_BYTES} \
-D mapreduce.randomtextwriter.bytespermap=$((${UNCOMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
-D mapreduce.randomtextwriter.minwordskey=1 \
-D mapreduce.randomtextwriter.maxwordskey=10 \
-D mapreduce.randomtextwriter.minwordsvalue=0 \
-D mapreduce.randomtextwriter.maxwordsvalue=200 \
-D mapreduce.output.fileoutputformat.compress=false \
-outFormat org.apache.hadoop.mapreduce.lib.output.TextOutputFormat \
${VARINFLTEXT} &

View File

@ -1,35 +0,0 @@
#!/usr/bin/env bash
##############################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#####################################################################
## Environment configuration
# Hadoop installation
export HADOOP_VERSION=hadoop-0.18.2-dev
export HADOOP_PREFIX=${HADOOP_INSTALL_HOME}/${HADOOP_VERSION}
export HADOOP_CONF_DIR=
export USE_REAL_DATASET=TRUE
export APP_JAR=${HADOOP_PREFIX}/${HADOOP_VERSION}-test.jar
export EXAMPLE_JAR=${HADOOP_PREFIX}/${HADOOP_VERSION}-examples.jar
export STREAMING_JAR=${HADOOP_PREFIX}/contrib/streaming/${HADOOP_VERSION}-streaming.jar

View File

@ -1,567 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>GRID_MIX_DATA</name>
<value>/gridmix/data</value>
<description></description>
</property>
<property>
<name>FIXCOMPTEXT</name>
<value>${GRID_MIX_DATA}/EntropySimulationCompressed</value>
<description></description>
</property>
<property>
<name>VARINFLTEXT</name>
<value>${GRID_MIX_DATA}/SortUncompressed</value>
<description></description>
</property>
<property>
<name>FIXCOMPSEQ</name>
<value>${GRID_MIX_DATA}/MonsterQueryBlockCompressed</value>
<description></description>
</property>
<property>
<name>VARCOMPSEQ</name>
<value>${GRID_MIX_DATA}/WebSimulationBlockCompressed</value>
<description></description>
</property>
<property>
<name>streamSort.smallJobs.inputFiles</name>
<value>${VARINFLTEXT}/{part-*-00000,part-*-00001,part-*-00002}</value>
<description></description>
</property>
<property>
<name>streamSort.smallJobs.numOfJobs</name>
<value>40</value>
<description></description>
</property>
<property>
<name>streamSort.smallJobs.numOfReduces</name>
<value>15</value>
<description></description>
</property>
<property>
<name>streamSort.smallJobs.numOfMapoutputCompressed</name>
<value>40</value>
<description> </description>
</property>
<property>
<name>streamSort.smallJobs.numOfOutputCompressed</name>
<value>20</value>
<description> </description>
</property>
<property>
<name>streamSort.mediumJobs.numOfJobs</name>
<value>16</value>
<description></description>
</property>
<property>
<name>streamSort.mediumJobs.inputFiles</name>
<value>${VARINFLTEXT}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
<description></description>
</property>
<property>
<name>streamSort.mediumJobs.numOfReduces</name>
<value>170</value>
<description></description>
</property>
<property>
<name>streamSort.mediumJobs.numOfMapoutputCompressed</name>
<value>16</value>
<description> </description>
</property>
<property>
<name>streamSort.mediumJobs.numOfOutputCompressed</name>
<value>12</value>
<description> </description>
</property>
<property>
<name>streamSort.largeJobs.numOfJobs</name>
<value>5</value>
<description></description>
</property>
<property>
<name>streamSort.largeJobs.inputFiles</name>
<value>${VARINFLTEXT}</value>
<description></description>
</property>
<property>
<name>streamSort.largeJobs.numOfReduces</name>
<value>370</value>
<description></description>
</property>
<property>
<name>streamSort.largeJobs.numOfMapoutputCompressed</name>
<value>5</value>
<description> </description>
</property>
<property>
<name>streamSort.largeJobs.numOfOutputCompressed</name>
<value>3</value>
<description> </description>
</property>
<property>
<name>javaSort.smallJobs.numOfJobs</name>
<value>8,2</value>
<description></description>
</property>
<property>
<name>javaSort.smallJobs.inputFiles</name>
<value>${VARINFLTEXT}/{part-*-00000,part-*-00001,part-*-00002}</value>
<description></description>
</property>
<property>
<name>javaSort.smallJobs.numOfReduces</name>
<value>15,70</value>
<description></description>
</property>
<property>
<name>javaSort.smallJobs.numOfMapoutputCompressed</name>
<value>10</value>
<description> </description>
</property>
<property>
<name>javaSort.smallJobs.numOfOutputCompressed</name>
<value>3</value>
<description> </description>
</property>
<property>
<name>javaSort.mediumJobs.numOfJobs</name>
<value>4,2</value>
<description></description>
</property>
<property>
<name>javaSort.mediumJobs.inputFiles</name>
<value>${VARINFLTEXT}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
<description></description>
</property>
<property>
<name>javaSort.mediumJobs.numOfReduces</name>
<value>170,70</value>
<description></description>
</property>
<property>
<name>javaSort.mediumJobs.numOfMapoutputCompressed</name>
<value>6</value>
<description> </description>
</property>
<property>
<name>javaSort.mediumJobs.numOfOutputCompressed</name>
<value>4</value>
<description> </description>
</property>
<property>
<name>javaSort.largeJobs.numOfJobs</name>
<value>3</value>
<description></description>
</property>
<property>
<name>javaSort.largeJobs.inputFiles</name>
<value>${VARINFLTEXT}</value>
<description></description>
</property>
<property>
<name>javaSort.largeJobs.numOfReduces</name>
<value>370</value>
<description></description>
</property>
<property>
<name>javaSort.largeJobs.numOfMapoutputCompressed</name>
<value>3</value>
<description> </description>
</property>
<property>
<name>javaSort.largeJobs.numOfOutputCompressed</name>
<value>2</value>
<description> </description>
</property>
<property>
<name>combiner.smallJobs.numOfJobs</name>
<value>11,4</value>
<description></description>
</property>
<property>
<name>combiner.smallJobs.inputFiles</name>
<value>${VARINFLTEXT}/{part-*-00000,part-*-00001,part-*-00002}</value>
<description></description>
</property>
<property>
<name>combiner.smallJobs.numOfReduces</name>
<value>10,1</value>
<description></description>
</property>
<property>
<name>combiner.smallJobs.numOfMapoutputCompressed</name>
<value>15</value>
<description> </description>
</property>
<property>
<name>combiner.smallJobs.numOfOutputCompressed</name>
<value>0</value>
<description> </description>
</property>
<property>
<name>combiner.mediumJobs.numOfJobs</name>
<value>8</value>
<description></description>
</property>
<property>
<name>combiner.mediumJobs.inputFiles</name>
<value>${VARINFLTEXT}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
<description></description>
</property>
<property>
<name>combiner.mediumJobs.numOfReduces</name>
<value>100</value>
<description></description>
</property>
<property>
<name>combiner.mediumJobs.numOfMapoutputCompressed</name>
<value>8</value>
<description> </description>
</property>
<property>
<name>combiner.mediumJobs.numOfOutputCompressed</name>
<value>0</value>
<description> </description>
</property>
<property>
<name>combiner.largeJobs.numOfJobs</name>
<value>4</value>
<description></description>
</property>
<property>
<name>combiner.largeJobs.inputFiles</name>
<value>${VARINFLTEXT}</value>
<description></description>
</property>
<property>
<name>combiner.largeJobs.numOfReduces</name>
<value>360</value>
<description></description>
</property>
<property>
<name>combiner.largeJobs.numOfMapoutputCompressed</name>
<value>4</value>
<description> </description>
</property>
<property>
<name>combiner.largeJobs.numOfOutputCompressed</name>
<value>0</value>
<description> </description>
</property>
<property>
<name>monsterQuery.smallJobs.numOfJobs</name>
<value>7</value>
<description></description>
</property>
<property>
<name>monsterQuery.smallJobs.inputFiles</name>
<value>${FIXCOMPSEQ}/{part-*-00000,part-*-00001,part-*-00002}</value>
<description></description>
</property>
<property>
<name>monsterQuery.smallJobs.numOfReduces</name>
<value>5</value>
<description></description>
</property>
<property>
<name>monsterQuery.smallJobs.numOfMapoutputCompressed</name>
<value>7</value>
<description> </description>
</property>
<property>
<name>monsterQuery.smallJobs.numOfOutputCompressed</name>
<value>0</value>
<description> </description>
</property>
<property>
<name>monsterQuery.mediumJobs.numOfJobs</name>
<value>5</value>
<description></description>
</property>
<property>
<name>monsterQuery.mediumJobs.inputFiles</name>
<value>${FIXCOMPSEQ}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
<description></description>
</property>
<property>
<name>monsterQuery.mediumJobs.numOfReduces</name>
<value>100</value>
<description></description>
</property>
<property>
<name>monsterQuery.mediumJobs.numOfMapoutputCompressed</name>
<value>5</value>
<description> </description>
</property>
<property>
<name>monsterQuery.mediumJobs.numOfOutputCompressed</name>
<value>0</value>
<description> </description>
</property>
<property>
<name>monsterQuery.largeJobs.numOfJobs</name>
<value>3</value>
<description></description>
</property>
<property>
<name>monsterQuery.largeJobs.inputFiles</name>
<value>${FIXCOMPSEQ}</value>
<description></description>
</property>
<property>
<name>monsterQuery.largeJobs.numOfReduces</name>
<value>370</value>
<description></description>
</property>
<property>
<name>monsterQuery.largeJobs.numOfMapoutputCompressed</name>
<value>3</value>
<description> </description>
</property>
<property>
<name>monsterQuery.largeJobs.numOfOutputCompressed</name>
<value>0</value>
<description> </description>
</property>
<property>
<name>webdataScan.smallJobs.numOfJobs</name>
<value>24</value>
<description></description>
</property>
<property>
<name>webdataScan.smallJobs.inputFiles</name>
<value>${VARCOMPSEQ}/{part-*-00000,part-*-00001,part-*-00002}</value>
<description></description>
</property>
<property>
<name>webdataScan.smallJobs.numOfMapoutputCompressed</name>
<value>24</value>
<description> </description>
</property>
<property>
<name>webdataScan.smallJobs.numOfOutputCompressed</name>
<value>0</value>
<description> </description>
</property>
<property>
<name>webdataScan.mediumJobs.numOfJobs</name>
<value>12</value>
<description></description>
</property>
<property>
<name>webdataScan.mediumJobs.inputFiles</name>
<value>${VARCOMPSEQ}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
<description></description>
</property>
<property>
<name>webdataScan.mediumJobs.numOfMapoutputCompressed</name>
<value>12</value>
<description> </description>
</property>
<property>
<name>webdataScan.mediumJobs.numOfReduces</name>
<value>7</value>
<description></description>
</property>
<property>
<name>webdataScan.mediumJobs.numOfOutputCompressed</name>
<value>0</value>
<description> </description>
</property>
<property>
<name>webdataScan.largeJobs.numOfJobs</name>
<value>2</value>
<description></description>
</property>
<property>
<name>webdataScan.largeJobs.inputFiles</name>
<value>${VARCOMPSEQ}</value>
<description></description>
</property>
<property>
<name>webdataScan.largeJobs.numOfMapoutputCompressed</name>
<value>3</value>
<description> </description>
</property>
<property>
<name>webdataScan.largeJobs.numOfReduces</name>
<value>70</value>
<description></description>
</property>
<property>
<name>webdataScan.largeJobs.numOfOutputCompressed</name>
<value>3</value>
<description> </description>
</property>
<property>
<name>webdataSort.smallJobs.numOfJobs</name>
<value>7</value>
<description></description>
</property>
<property>
<name>webdataSort.smallJobs.inputFiles</name>
<value>${VARCOMPSEQ}/{part-*-00000,part-*-00001,part-*-00002}</value>
<description></description>
</property>
<property>
<name>webdataSort.smallJobs.numOfReduces</name>
<value>15</value>
<description></description>
</property>
<property>
<name>webdataSort.smallJobs.numOfMapoutputCompressed</name>
<value>7</value>
<description> </description>
</property>
<property>
<name>webdataSort.smallJobs.numOfOutputCompressed</name>
<value>7</value>
<description> </description>
</property>
<property>
<name>webdataSort.mediumJobs.numOfJobs</name>
<value>4</value>
<description></description>
</property>
<property>
<name>webdataSort.mediumJobs.inputFiles</name>
<value>${VARCOMPSEQ}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
<description></description>
</property>
<property>
<name>webdataSort.mediumJobs.numOfReduces</name>
<value>170</value>
<description></description>
</property>
<property>
<name>webdataSort.mediumJobs.numOfMapoutputCompressed</name>
<value>4</value>
<description> </description>
</property>
<property>
<name>webdataSort.mediumJobs.numOfOutputCompressed</name>
<value>4</value>
<description> </description>
</property>
<property>
<name>webdataSort.largeJobs.numOfJobs</name>
<value>1</value>
<description></description>
</property>
<property>
<name>webdataSort.largeJobs.inputFiles</name>
<value>${VARCOMPSEQ}</value>
<description></description>
</property>
<property>
<name>webdataSort.largeJobs.numOfReduces</name>
<value>800</value>
<description></description>
</property>
<property>
<name>webdataSort.largeJobs.numOfMapoutputCompressed</name>
<value>1</value>
<description> </description>
</property>
<property>
<name>webdataSort.largeJobs.numOfOutputCompressed</name>
<value>1</value>
<description> </description>
</property>
</configuration>

View File

@ -1,37 +0,0 @@
#!/usr/bin/env bash
##############################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#####################################################################
## Environment configuration
GRID_DIR=`dirname "$0"`
GRID_DIR=`cd "$GRID_DIR"; pwd`
source $GRID_DIR/gridmix-env-2
Date=`date +%F-%H-%M-%S-%N`
echo $Date > $1_start.out
export HADOOP_CLASSPATH=${APP_JAR}:${EXAMPLE_JAR}:${STREAMING_JAR}
export LIBJARS=${APP_JAR},${EXAMPLE_JAR},${STREAMING_JAR}
${HADOOP_PREFIX}/bin/hadoop jar gridmix.jar org.apache.hadoop.mapreduce.GridMixRunner -libjars ${LIBJARS}
Date=`date +%F-%H-%M-%S-%N`
echo $Date > $1_end.out

View File

@ -1,85 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
public class CombinerJobCreator {
public static Job createJob(String[] args) throws Exception {
Configuration conf = new Configuration();
int numReduces = 1;
String indir = null;
String outdir = null;
boolean mapoutputCompressed = false;
boolean outputCompressed = false;
for (int i = 0; i < args.length; ++i) {
try {
if ("-r".equals(args[i])) {
numReduces = Integer.parseInt(args[++i]);
} else if ("-indir".equals(args[i])) {
indir = args[++i];
} else if ("-outdir".equals(args[i])) {
outdir = args[++i];
} else if ("-mapoutputCompressed".equals(args[i])) {
mapoutputCompressed = Boolean.valueOf(args[++i]).booleanValue();
} else if ("-outputCompressed".equals(args[i])) {
outputCompressed = Boolean.valueOf(args[++i]).booleanValue();
}
} catch (NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of " + args[i]);
return null;
} catch (ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from "
+ args[i - 1]);
return null;
}
}
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, mapoutputCompressed);
conf.setBoolean(FileOutputFormat.COMPRESS, outputCompressed);
Job job = new Job(conf);
job.setJobName("GridmixCombinerJob");
// the keys are words (strings)
job.setOutputKeyClass(Text.class);
// the values are counts (ints)
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(TokenCounterMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setNumReduceTasks(numReduces);
if (indir != null) {
FileInputFormat.setInputPaths(job, indir);
}
if (outdir != null) {
FileOutputFormat.setOutputPath(job, new Path(outdir));
}
return job;
}
}

View File

@ -1,100 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce;
import java.util.Random;
import java.util.Stack;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapred.JobClient;
public class GenericMRLoadJobCreator extends GenericMRLoadGenerator {
public static Job createJob(String[] argv, boolean mapoutputCompressed,
boolean outputCompressed) throws Exception {
Job job = new Job();
job.setJarByClass(GenericMRLoadGenerator.class);
job.setMapperClass(SampleMapper.class);
job.setReducerClass(SampleReducer.class);
if (!parseArgs(argv, job)) {
return null;
}
if (null == FileOutputFormat.getOutputPath(job)) {
// No output dir? No writes
job.setOutputFormatClass(NullOutputFormat.class);
}
Configuration conf = job.getConfiguration();
if (0 == FileInputFormat.getInputPaths(job).length) {
// No input dir? Generate random data
System.err.println("No input path; ignoring InputFormat");
confRandom(job);
} else if (null != conf.getClass(INDIRECT_INPUT_FORMAT, null)) {
// specified IndirectInputFormat? Build src list
JobClient jClient = new JobClient(conf);
Path sysdir = jClient.getSystemDir();
Random r = new Random();
Path indirInputFile = new Path(sysdir, Integer.toString(r
.nextInt(Integer.MAX_VALUE), 36)
+ "_files");
conf.set(INDIRECT_INPUT_FILE, indirInputFile.toString());
SequenceFile.Writer writer = SequenceFile.createWriter(sysdir
.getFileSystem(conf), conf, indirInputFile, LongWritable.class,
Text.class, SequenceFile.CompressionType.NONE);
try {
for (Path p : FileInputFormat.getInputPaths(job)) {
FileSystem fs = p.getFileSystem(conf);
Stack<Path> pathstack = new Stack<Path>();
pathstack.push(p);
while (!pathstack.empty()) {
for (FileStatus stat : fs.listStatus(pathstack.pop())) {
if (stat.isDirectory()) {
if (!stat.getPath().getName().startsWith("_")) {
pathstack.push(stat.getPath());
}
} else {
writer.sync();
writer.append(new LongWritable(stat.getLen()), new Text(stat
.getPath().toUri().toString()));
}
}
}
}
} finally {
writer.close();
}
}
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, mapoutputCompressed);
conf.setBoolean(FileOutputFormat.COMPRESS, outputCompressed);
return job;
}
}

View File

@ -1,680 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce;
import java.io.IOException;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.examples.Sort;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.TaskReport;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.streaming.StreamJob;
public class GridMixRunner {
private static final int NUM_OF_LARGE_JOBS_PER_CLASS = 0;
private static final int NUM_OF_MEDIUM_JOBS_PER_CLASS = 0;
private static final int NUM_OF_SMALL_JOBS_PER_CLASS = 0;
private static final int NUM_OF_REDUCERS_FOR_SMALL_JOB = 15;
private static final int NUM_OF_REDUCERS_FOR_MEDIUM_JOB = 170;
private static final int NUM_OF_REDUCERS_FOR_LARGE_JOB = 370;
private static final String GRID_MIX_DATA = "/gridmix/data";
private static final String VARCOMPSEQ =
GRID_MIX_DATA + "/WebSimulationBlockCompressed";
private static final String FIXCOMPSEQ =
GRID_MIX_DATA + "/MonsterQueryBlockCompressed";
private static final String VARINFLTEXT =
GRID_MIX_DATA + "/SortUncompressed";
private static final String GRIDMIXCONFIG = "gridmix_config.xml";
private static final Configuration config = initConfig();
private static final FileSystem fs = initFs();
private final JobControl gridmix;
private int numOfJobs = 0;
private enum Size {
SMALL("small", // name
"/{part-*-00000,part-*-00001,part-*-00002}", // default input subset
NUM_OF_SMALL_JOBS_PER_CLASS, // defuault num jobs
NUM_OF_REDUCERS_FOR_SMALL_JOB), // default num reducers
MEDIUM("medium", // name
"/{part-*-000*0, part-*-000*1, part-*-000*2}", // default input subset
NUM_OF_MEDIUM_JOBS_PER_CLASS, // defuault num jobs
NUM_OF_REDUCERS_FOR_MEDIUM_JOB), // default num reducers
LARGE("large", // name
"", // default input subset
NUM_OF_LARGE_JOBS_PER_CLASS, // defuault num jobs
NUM_OF_REDUCERS_FOR_LARGE_JOB); // default num reducers
private final String str;
private final String path;
private final int numJobs;
private final int numReducers;
Size(String str, String path, int numJobs, int numReducers) {
this.str = str;
this.path = path;
this.numJobs = numJobs;
this.numReducers = numReducers;
}
public String defaultPath(String base) {
return base + path;
}
public int defaultNumJobs() {
return numJobs;
}
public int defaultNumReducers() {
return numReducers;
}
public String toString() {
return str;
}
}
private enum GridMixJob {
STREAMSORT("streamSort") {
public void addJob(int numReducers, boolean mapoutputCompressed,
boolean outputCompressed, Size size, JobControl gridmix) {
final String prop = String.format("streamSort.%sJobs.inputFiles", size);
final String indir =
getInputDirsFor(prop, size.defaultPath(VARINFLTEXT));
final String outdir = addTSSuffix("perf-out/stream-out-dir-" + size);
StringBuffer sb = new StringBuffer();
sb.append("-input ").append(indir).append(" ");
sb.append("-output ").append(outdir).append(" ");
sb.append("-mapper cat ");
sb.append("-reducer cat ");
sb.append("-numReduceTasks ").append(numReducers);
String[] args = sb.toString().split(" ");
clearDir(outdir);
try {
Configuration conf = StreamJob.createJob(args);
conf.setBoolean(FileOutputFormat.COMPRESS, outputCompressed);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, mapoutputCompressed);
Job job = new Job(conf, "GridmixStreamingSorter." + size);
ControlledJob cjob = new ControlledJob(job, null);
gridmix.addJob(cjob);
} catch (Exception ex) {
ex.printStackTrace();
}
}
},
JAVASORT("javaSort") {
public void addJob(int numReducers, boolean mapoutputCompressed,
boolean outputCompressed, Size size, JobControl gridmix) {
final String prop = String.format("javaSort.%sJobs.inputFiles", size);
final String indir = getInputDirsFor(prop,
size.defaultPath(VARINFLTEXT));
final String outdir = addTSSuffix("perf-out/sort-out-dir-" + size);
clearDir(outdir);
try {
Configuration conf = new Configuration();
conf.setBoolean(FileOutputFormat.COMPRESS, outputCompressed);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, mapoutputCompressed);
Job job = new Job(conf);
job.setJarByClass(Sort.class);
job.setJobName("GridmixJavaSorter." + size);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setNumReduceTasks(numReducers);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(org.apache.hadoop.io.Text.class);
job.setOutputValueClass(org.apache.hadoop.io.Text.class);
FileInputFormat.addInputPaths(job, indir);
FileOutputFormat.setOutputPath(job, new Path(outdir));
ControlledJob cjob = new ControlledJob(job, null);
gridmix.addJob(cjob);
} catch (Exception ex) {
ex.printStackTrace();
}
}
},
WEBDATASCAN("webdataScan") {
public void addJob(int numReducers, boolean mapoutputCompressed,
boolean outputCompressed, Size size, JobControl gridmix) {
final String prop = String.format("webdataScan.%sJobs.inputFiles", size);
final String indir = getInputDirsFor(prop, size.defaultPath(VARCOMPSEQ));
final String outdir = addTSSuffix("perf-out/webdata-scan-out-dir-"
+ size);
StringBuffer sb = new StringBuffer();
sb.append("-keepmap 0.2 ");
sb.append("-keepred 5 ");
sb.append("-inFormat");
sb.append(" org.apache.hadoop.mapreduce." +
"lib.input.SequenceFileInputFormat ");
sb.append("-outFormat");
sb.append(" org.apache.hadoop.mapreduce." +
"lib.output.SequenceFileOutputFormat ");
sb.append("-outKey org.apache.hadoop.io.Text ");
sb.append("-outValue org.apache.hadoop.io.Text ");
sb.append("-indir ").append(indir).append(" ");
sb.append("-outdir ").append(outdir).append(" ");
sb.append("-r ").append(numReducers);
String[] args = sb.toString().split(" ");
clearDir(outdir);
try {
Job job = GenericMRLoadJobCreator.createJob(
args, mapoutputCompressed, outputCompressed);
job.setJobName("GridmixWebdatascan." + size);
ControlledJob cjob = new ControlledJob(job, null);
gridmix.addJob(cjob);
} catch (Exception ex) {
System.out.println(ex.getStackTrace());
}
}
},
COMBINER("combiner") {
public void addJob(int numReducers, boolean mapoutputCompressed,
boolean outputCompressed, Size size, JobControl gridmix) {
final String prop = String.format("combiner.%sJobs.inputFiles", size);
final String indir = getInputDirsFor(prop, size.defaultPath(VARCOMPSEQ));
final String outdir = addTSSuffix("perf-out/combiner-out-dir-" + size);
StringBuffer sb = new StringBuffer();
sb.append("-r ").append(numReducers).append(" ");
sb.append("-indir ").append(indir).append(" ");
sb.append("-outdir ").append(outdir);
sb.append("-mapoutputCompressed ");
sb.append(mapoutputCompressed).append(" ");
sb.append("-outputCompressed ").append(outputCompressed);
String[] args = sb.toString().split(" ");
clearDir(outdir);
try {
Job job = CombinerJobCreator.createJob(args);
job.setJobName("GridmixCombinerJob." + size);
ControlledJob cjob = new ControlledJob(job, null);
gridmix.addJob(cjob);
} catch (Exception ex) {
ex.printStackTrace();
}
}
},
MONSTERQUERY("monsterQuery") {
public void addJob(int numReducers, boolean mapoutputCompressed,
boolean outputCompressed, Size size, JobControl gridmix) {
final String prop =
String.format("monsterQuery.%sJobs.inputFiles", size);
final String indir = getInputDirsFor(prop, size.defaultPath(FIXCOMPSEQ));
final String outdir = addTSSuffix("perf-out/mq-out-dir-" + size);
int iter = 3;
try {
ControlledJob pjob = null;
ControlledJob cjob = null;
for (int i = 0; i < iter; i++) {
String outdirfull = outdir + "." + i;
String indirfull = (0 == i) ? indir : outdir + "." + (i - 1);
Path outfile = new Path(outdirfull);
StringBuffer sb = new StringBuffer();
sb.append("-keepmap 10 ");
sb.append("-keepred 40 ");
sb.append("-inFormat");
sb.append(" org.apache.hadoop.mapreduce." +
"lib.input.SequenceFileInputFormat ");
sb.append("-outFormat");
sb.append(" org.apache.hadoop.mapreduce." +
"lib.output.SequenceFileOutputFormat ");
sb.append("-outKey org.apache.hadoop.io.Text ");
sb.append("-outValue org.apache.hadoop.io.Text ");
sb.append("-indir ").append(indirfull).append(" ");
sb.append("-outdir ").append(outdirfull).append(" ");
sb.append("-r ").append(numReducers);
String[] args = sb.toString().split(" ");
try {
fs.delete(outfile, true);
} catch (IOException ex) {
System.out.println(ex.toString());
}
Job job = GenericMRLoadJobCreator.createJob(
args, mapoutputCompressed, outputCompressed);
job.setJobName("GridmixMonsterQuery." + size);
cjob = new ControlledJob(job, null);
if (pjob != null) {
cjob.addDependingJob(pjob);
}
gridmix.addJob(cjob);
pjob = cjob;
}
} catch (Exception e) {
System.out.println(e.getStackTrace());
}
}
},
WEBDATASORT("webdataSort") {
public void addJob(int numReducers, boolean mapoutputCompressed,
boolean outputCompressed, Size size, JobControl gridmix) {
final String prop = String.format("webdataSort.%sJobs.inputFiles", size);
final String indir = getInputDirsFor(prop, size.defaultPath(VARCOMPSEQ));
final String outdir =
addTSSuffix("perf-out/webdata-sort-out-dir-" + size);
StringBuffer sb = new StringBuffer();
sb.append("-keepmap 100 ");
sb.append("-keepred 100 ");
sb.append("-inFormat org.apache.hadoop.mapreduce." +
"lib.input.SequenceFileInputFormat ");
sb.append("-outFormat org.apache.hadoop.mapreduce." +
"lib.output.SequenceFileOutputFormat ");
sb.append("-outKey org.apache.hadoop.io.Text ");
sb.append("-outValue org.apache.hadoop.io.Text ");
sb.append("-indir ").append(indir).append(" ");
sb.append("-outdir ").append(outdir).append(" ");
sb.append("-r ").append(numReducers);
String[] args = sb.toString().split(" ");
clearDir(outdir);
try {
Job job = GenericMRLoadJobCreator.createJob(
args, mapoutputCompressed, outputCompressed);
job.setJobName("GridmixWebdataSort." + size);
ControlledJob cjob = new ControlledJob(job, null);
gridmix.addJob(cjob);
} catch (Exception ex) {
System.out.println(ex.getStackTrace());
}
}
};
private final String name;
GridMixJob(String name) {
this.name = name;
}
public String getName() {
return name;
}
public abstract void addJob(int numReducers, boolean mapComp,
boolean outComp, Size size, JobControl gridmix);
}
public GridMixRunner() throws IOException {
gridmix = new JobControl("GridMix");
if (null == config || null == fs) {
throw new IOException("Bad configuration. Cannot continue.");
}
}
private static FileSystem initFs() {
try {
return FileSystem.get(config);
} catch (Exception e) {
System.out.println("fs initation error: " + e.getMessage());
}
return null;
}
private static Configuration initConfig() {
Configuration conf = new Configuration();
String configFile = System.getenv("GRIDMIXCONFIG");
if (configFile == null) {
String configDir = System.getProperty("user.dir");
if (configDir == null) {
configDir = ".";
}
configFile = configDir + "/" + GRIDMIXCONFIG;
}
try {
Path fileResource = new Path(configFile);
conf.addResource(fileResource);
} catch (Exception e) {
System.err.println("Error reading config file " + configFile + ":" +
e.getMessage());
return null;
}
return conf;
}
private static int[] getInts(Configuration conf, String name, int defaultV) {
String[] vals = conf.getStrings(name, String.valueOf(defaultV));
int[] results = new int[vals.length];
for (int i = 0; i < vals.length; ++i) {
results[i] = Integer.parseInt(vals[i]);
}
return results;
}
private static String getInputDirsFor(String jobType, String defaultIndir) {
String inputFile[] = config.getStrings(jobType, defaultIndir);
StringBuffer indirBuffer = new StringBuffer();
for (int i = 0; i < inputFile.length; i++) {
indirBuffer = indirBuffer.append(inputFile[i]).append(",");
}
return indirBuffer.substring(0, indirBuffer.length() - 1);
}
private static void clearDir(String dir) {
try {
Path outfile = new Path(dir);
fs.delete(outfile, true);
} catch (IOException ex) {
ex.printStackTrace();
System.out.println("delete file error:");
System.out.println(ex.toString());
}
}
private boolean select(int total, int selected, int index) {
if (selected <= 0 || selected >= total) {
return selected > 0;
}
int step = total / selected;
int effectiveTotal = total - total % selected;
return (index <= effectiveTotal - 1 && (index % step == 0));
}
private static String addTSSuffix(String s) {
Date date = Calendar.getInstance().getTime();
String ts = String.valueOf(date.getTime());
return s + "_" + ts;
}
private void addJobs(GridMixJob job, Size size) throws IOException {
final String prefix = String.format("%s.%sJobs", job.getName(), size);
int[] numJobs = getInts(config, prefix + ".numOfJobs",
size.defaultNumJobs());
int[] numReduces = getInts(config, prefix + ".numOfReduces",
size.defaultNumReducers());
if (numJobs.length != numReduces.length) {
throw new IOException("Configuration error: " +
prefix + ".numOfJobs must match " +
prefix + ".numOfReduces");
}
int numMapoutputCompressed = config.getInt(
prefix + ".numOfMapoutputCompressed", 0);
int numOutputCompressed = config.getInt(
prefix + ".numOfOutputCompressed", size.defaultNumJobs());
int totalJobs = 0;
for (int nJob : numJobs) {
totalJobs += nJob;
}
int currentIndex = 0;
for (int i = 0; i < numJobs.length; ++i) {
for (int j = 0; j < numJobs[i]; ++j) {
boolean mapoutputComp =
select(totalJobs, numMapoutputCompressed, currentIndex);
boolean outputComp =
select(totalJobs, numOutputCompressed, currentIndex);
job.addJob(numReduces[i], mapoutputComp, outputComp, size, gridmix);
++numOfJobs;
++currentIndex;
}
}
}
private void addAllJobs(GridMixJob job) throws IOException {
for (Size size : EnumSet.allOf(Size.class)) {
addJobs(job, size);
}
}
public void addjobs() throws IOException {
for (GridMixJob jobtype : EnumSet.allOf(GridMixJob.class)) {
addAllJobs(jobtype);
}
System.out.println("total " +
gridmix.getWaitingJobList().size() + " jobs");
}
class SimpleStats {
long minValue;
long maxValue;
long averageValue;
long mediumValue;
int n;
SimpleStats(long[] data) {
Arrays.sort(data);
n = data.length;
minValue = data[0];
maxValue = data[n - 1];
mediumValue = data[n / 2];
long total = 0;
for (int i = 0; i < n; i++) {
total += data[i];
}
averageValue = total / n;
}
}
class TaskExecutionStats {
TreeMap<String, SimpleStats> theStats;
void computeStats(String name, long[] data) {
SimpleStats v = new SimpleStats(data);
theStats.put(name, v);
}
TaskExecutionStats() {
theStats = new TreeMap<String, SimpleStats>();
}
}
private TreeMap<String, String> getStatForJob(ControlledJob cjob) {
TreeMap<String, String> retv = new TreeMap<String, String>();
JobID mapreduceID = cjob.getMapredJobID();
Job job = cjob.getJob();
String jobName = job.getJobName();
retv.put("JobId", mapreduceID.toString());
retv.put("JobName", jobName);
TaskExecutionStats theTaskExecutionStats = new TaskExecutionStats();
try {
Counters jobCounters = job.getCounters();
Iterator<CounterGroup> groups = jobCounters.iterator();
while (groups.hasNext()) {
CounterGroup g = groups.next();
String gn = g.getName();
Iterator<Counter> cs = g.iterator();
while (cs.hasNext()) {
Counter c = cs.next();
String n = c.getName();
long v = c.getValue();
retv.put(mapreduceID + "." + jobName + "." + gn + "." + n, "" + v);
}
}
JobClient jc = new JobClient(job.getConfiguration());
TaskReport[] maps = jc
.getMapTaskReports((org.apache.hadoop.mapred.JobID)mapreduceID);
TaskReport[] reduces = jc
.getReduceTaskReports((org.apache.hadoop.mapred.JobID)mapreduceID);
retv.put(mapreduceID + "." + jobName + "." + "numOfMapTasks", ""
+ maps.length);
retv.put(mapreduceID + "." + jobName + "." + "numOfReduceTasks", ""
+ reduces.length);
long[] mapExecutionTimes = new long[maps.length];
long[] reduceExecutionTimes = new long[reduces.length];
Date date = Calendar.getInstance().getTime();
long startTime = date.getTime();
long finishTime = 0;
for (int j = 0; j < maps.length; j++) {
TaskReport map = maps[j];
long thisStartTime = map.getStartTime();
long thisFinishTime = map.getFinishTime();
if (thisStartTime > 0 && thisFinishTime > 0) {
mapExecutionTimes[j] = thisFinishTime - thisStartTime;
}
if (startTime > thisStartTime) {
startTime = thisStartTime;
}
if (finishTime < thisFinishTime) {
finishTime = thisFinishTime;
}
}
theTaskExecutionStats.computeStats("mapExecutionTimeStats",
mapExecutionTimes);
retv.put(mapreduceID + "." + jobName + "." + "mapStartTime", ""
+ startTime);
retv.put(mapreduceID + "." + jobName + "." + "mapEndTime", ""
+ finishTime);
for (int j = 0; j < reduces.length; j++) {
TaskReport reduce = reduces[j];
long thisStartTime = reduce.getStartTime();
long thisFinishTime = reduce.getFinishTime();
if (thisStartTime > 0 && thisFinishTime > 0) {
reduceExecutionTimes[j] = thisFinishTime - thisStartTime;
}
if (startTime > thisStartTime) {
startTime = thisStartTime;
}
if (finishTime < thisFinishTime) {
finishTime = thisFinishTime;
}
}
theTaskExecutionStats.computeStats("reduceExecutionTimeStats",
reduceExecutionTimes);
retv.put(mapreduceID + "." + jobName + "." + "reduceStartTime", ""
+ startTime);
retv.put(mapreduceID + "." + jobName + "." + "reduceEndTime", ""
+ finishTime);
if (cjob.getJobState() == ControlledJob.State.SUCCESS) {
retv.put(mapreduceID + "." + "jobStatus", "successful");
} else if (cjob.getJobState() == ControlledJob.State.FAILED) {
retv.put(mapreduceID + "." + jobName + "." + "jobStatus", "failed");
} else {
retv.put(mapreduceID + "." + jobName + "." + "jobStatus", "unknown");
}
Iterator<Entry<String, SimpleStats>> entries =
theTaskExecutionStats.theStats.entrySet().iterator();
while (entries.hasNext()) {
Entry<String, SimpleStats> e = entries.next();
SimpleStats v = e.getValue();
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "min",
"" + v.minValue);
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "max",
"" + v.maxValue);
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "."
+ "medium", "" + v.mediumValue);
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "avg",
"" + v.averageValue);
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "."
+ "numOfItems", "" + v.n);
}
} catch (Exception e) {
e.printStackTrace();
}
return retv;
}
private void printJobStat(TreeMap<String, String> stat) {
Iterator<Entry<String, String>> entries = stat.entrySet().iterator();
while (entries.hasNext()) {
Entry<String, String> e = entries.next();
System.out.println(e.getKey() + "\t" + e.getValue());
}
}
private void printStatsForJobs(List<ControlledJob> jobs) {
for (int i = 0; i < jobs.size(); i++) {
printJobStat(getStatForJob(jobs.get(i)));
}
}
public void run() {
Thread theGridmixRunner = new Thread(gridmix);
theGridmixRunner.start();
long startTime = System.currentTimeMillis();
while (!gridmix.allFinished()) {
System.out.println("Jobs in waiting state: "
+ gridmix.getWaitingJobList().size());
System.out.println("Jobs in ready state: "
+ gridmix.getReadyJobsList().size());
System.out.println("Jobs in running state: "
+ gridmix.getRunningJobList().size());
System.out.println("Jobs in success state: "
+ gridmix.getSuccessfulJobList().size());
System.out.println("Jobs in failed state: "
+ gridmix.getFailedJobList().size());
System.out.println("\n");
try {
Thread.sleep(10 * 1000);
} catch (Exception e) {
}
}
long endTime = System.currentTimeMillis();
List<ControlledJob> fail = gridmix.getFailedJobList();
List<ControlledJob> succeed = gridmix.getSuccessfulJobList();
int numOfSuccessfulJob = succeed.size();
if (numOfSuccessfulJob > 0) {
System.out.println(numOfSuccessfulJob + " jobs succeeded");
printStatsForJobs(succeed);
}
int numOfFailedjob = fail.size();
if (numOfFailedjob > 0) {
System.out.println("------------------------------- ");
System.out.println(numOfFailedjob + " jobs failed");
printStatsForJobs(fail);
}
System.out.println("GridMix results:");
System.out.println("Total num of Jobs: " + numOfJobs);
System.out.println("ExecutionTime: " + ((endTime-startTime) / 1000));
gridmix.stop();
}
public static void main(String argv[]) throws Exception {
GridMixRunner gridmixRunner = new GridMixRunner();
gridmixRunner.addjobs();
gridmixRunner.run();
}
}

View File

@ -1,73 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
COPTS=-g3 -O0 -Wall
all: ${LIBRECORDIO_BUILD_DIR}/librecordio.a
COBJS = $(addprefix ${LIBRECORDIO_BUILD_DIR}/, recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o \
exception.o typeIDs.o fieldTypeInfo.o recordTypeInfo.o utils.o)
CCMD = $(addprefix ${LIBRECORDIO_BUILD_DIR}/, librecordio.a recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o \
exception.o typeIDs.o fieldTypeInfo.o recordTypeInfo.o utils.o)
${LIBRECORDIO_BUILD_DIR}/librecordio.a: ${COBJS}
ar cru ${CCMD}
${LIBRECORDIO_BUILD_DIR}/recordio.o: recordio.cc recordio.hh archive.hh
g++ ${COPTS} -c -I${XERCESCROOT}/include -o ${LIBRECORDIO_BUILD_DIR}/recordio.o recordio.cc
${LIBRECORDIO_BUILD_DIR}/filestream.o: filestream.cc recordio.hh filestream.hh
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/filestream.o filestream.cc
${LIBRECORDIO_BUILD_DIR}/binarchive.o: binarchive.cc recordio.hh binarchive.hh archive.hh
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/binarchive.o binarchive.cc
${LIBRECORDIO_BUILD_DIR}/csvarchive.o: csvarchive.cc recordio.hh csvarchive.hh archive.hh
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/csvarchive.o csvarchive.cc
${LIBRECORDIO_BUILD_DIR}/xmlarchive.o: xmlarchive.cc recordio.hh xmlarchive.hh archive.hh
g++ ${COPTS} -c -I${XERCESCROOT}/include -o ${LIBRECORDIO_BUILD_DIR}/xmlarchive.o xmlarchive.cc
${LIBRECORDIO_BUILD_DIR}/exception.o: exception.cc exception.hh
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/exception.o exception.cc
${LIBRECORDIO_BUILD_DIR}/typeIDs.o: typeIDs.cc typeIDs.hh
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/typeIDs.o typeIDs.cc
${LIBRECORDIO_BUILD_DIR}/fieldTypeInfo.o: fieldTypeInfo.cc fieldTypeInfo.hh
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/fieldTypeInfo.o fieldTypeInfo.cc
${LIBRECORDIO_BUILD_DIR}/recordTypeInfo.o: recordTypeInfo.cc recordTypeInfo.hh
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/recordTypeInfo.o recordTypeInfo.cc
${LIBRECORDIO_BUILD_DIR}/utils.o: utils.cc utils.hh
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/utils.o utils.cc
recordio.cc: recordio.hh archive.hh exception.hh
filestream.cc: recordio.hh filestream.hh
binarchive.cc: recordio.hh binarchive.hh
csvarchive.cc: recordio.hh csvarchive.hh
xmlarchive.cc: recordio.hh xmlarchive.hh
exception.cc: exception.hh
typeIDs.cc: typeIDs.hh
fieldTypeInfo.cc: fieldTypeInfo.hh
recordTypeInfo.cc: recordTypeInfo.hh
utils.cc: utils.hh
test: librecordio.a
make -C test all
clean:
rm -f ${LIBRECORDIO_BUILD_DIR}/*~ ${LIBRECORDIO_BUILD_DIR}/*.o ${LIBRECORDIO_BUILD_DIR}/*.a
make -C test clean

View File

@ -1,122 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ARCHIVE_HH_
#define ARCHIVE_HH_
#include "recordio.hh"
namespace hadoop {
class Index {
public:
virtual bool done() = 0;
virtual void incr() = 0;
virtual ~Index() {}
};
class IArchive {
public:
virtual void deserialize(int8_t& t, const char* tag) = 0;
virtual void deserialize(bool& t, const char* tag) = 0;
virtual void deserialize(int32_t& t, const char* tag) = 0;
virtual void deserialize(int64_t& t, const char* tag) = 0;
virtual void deserialize(float& t, const char* tag) = 0;
virtual void deserialize(double& t, const char* tag) = 0;
virtual void deserialize(std::string& t, const char* tag) = 0;
virtual void deserialize(std::string& t, size_t& len, const char* tag) = 0;
virtual void startRecord(hadoop::Record& s, const char* tag) = 0;
virtual void endRecord(hadoop::Record& s, const char* tag) = 0;
virtual Index* startVector(const char* tag) = 0;
virtual void endVector(Index* idx, const char* tag) = 0;
virtual Index* startMap(const char* tag) = 0;
virtual void endMap(Index* idx, const char* tag) = 0;
virtual void deserialize(hadoop::Record& s, const char* tag) {
s.deserialize(*this, tag);
}
template <typename T>
void deserialize(std::vector<T>& v, const char* tag) {
Index* idx = startVector(tag);
while (!idx->done()) {
T t;
deserialize(t, tag);
v.push_back(t);
idx->incr();
}
endVector(idx, tag);
}
template <typename K, typename V>
void deserialize(std::map<K,V>& v, const char* tag) {
Index* idx = startMap(tag);
while (!idx->done()) {
K key;
deserialize(key, tag);
V value;
deserialize(value, tag);
v[key] = value;
idx->incr();
}
endMap(idx, tag);
}
virtual ~IArchive() {}
};
class OArchive {
public:
virtual void serialize(int8_t t, const char* tag) = 0;
virtual void serialize(bool t, const char* tag) = 0;
virtual void serialize(int32_t t, const char* tag) = 0;
virtual void serialize(int64_t t, const char* tag) = 0;
virtual void serialize(float t, const char* tag) = 0;
virtual void serialize(double t, const char* tag) = 0;
virtual void serialize(const std::string& t, const char* tag) = 0;
virtual void serialize(const std::string& t, size_t len, const char* tag) = 0;
virtual void startRecord(const hadoop::Record& s, const char* tag) = 0;
virtual void endRecord(const hadoop::Record& s, const char* tag) = 0;
virtual void startVector(size_t len, const char* tag) = 0;
virtual void endVector(size_t len, const char* tag) = 0;
virtual void startMap(size_t len, const char* tag) = 0;
virtual void endMap(size_t len, const char* tag) = 0;
virtual void serialize(const hadoop::Record& s, const char* tag) {
s.serialize(*this, tag);
}
template <typename T>
void serialize(const std::vector<T>& v, const char* tag) {
startVector(v.size(), tag);
if (v.size()>0) {
for (size_t cur = 0; cur<v.size(); cur++) {
serialize(v[cur], tag);
}
}
endVector(v.size(), tag);
}
template <typename K, typename V>
void serialize(const std::map<K,V>& v, const char* tag) {
startMap(v.size(), tag);
if (v.size()>0) {
typedef typename std::map<K,V>::const_iterator CI;
for (CI cur = v.begin(); cur!=v.end(); cur++) {
serialize(cur->first, tag);
serialize(cur->second, tag);
}
}
endMap(v.size(), tag);
}
virtual ~OArchive() {}
};
}; // end namespace hadoop
#endif /*ARCHIVE_HH_*/

View File

@ -1,330 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "binarchive.hh"
#include <rpc/types.h>
#include <rpc/xdr.h>
using namespace hadoop;
template <typename T>
static void serialize(T t, OutStream& stream)
{
if (sizeof(T) != stream.write((const void *) &t, sizeof(T))) {
throw new IOException("Error serializing data.");
}
}
template <typename T>
static void deserialize(T& t, InStream& stream)
{
if (sizeof(T) != stream.read((void *) &t, sizeof(T))) {
throw new IOException("Error deserializing data.");
}
}
static void serializeLong(int64_t t, OutStream& stream)
{
if (t >= -112 && t <= 127) {
int8_t b = t;
stream.write(&b, 1);
return;
}
int8_t len = -112;
if (t < 0) {
t ^= 0xFFFFFFFFFFFFFFFFLL; // take one's complement
len = -120;
}
uint64_t tmp = t;
while (tmp != 0) {
tmp = tmp >> 8;
len--;
}
stream.write(&len, 1);
len = (len < -120) ? -(len + 120) : -(len + 112);
for (uint32_t idx = len; idx != 0; idx--) {
uint32_t shiftbits = (idx - 1) * 8;
uint64_t mask = 0xFFLL << shiftbits;
uint8_t b = (t & mask) >> shiftbits;
stream.write(&b, 1);
}
}
static void deserializeLong(int64_t& t, InStream& stream)
{
int8_t b;
if (1 != stream.read(&b, 1)) {
throw new IOException("Error deserializing long.");
}
if (b >= -112) {
t = b;
return;
}
bool isNegative = (b < -120);
b = isNegative ? -(b + 120) : -(b + 112);
uint8_t barr[b];
if (b != stream.read(barr, b)) {
throw new IOException("Error deserializing long.");
}
t = 0;
for (int idx = 0; idx < b; idx++) {
t = t << 8;
t |= (barr[idx] & 0xFF);
}
if (isNegative) {
t ^= 0xFFFFFFFFFFFFFFFFLL;
}
}
static void serializeInt(int32_t t, OutStream& stream)
{
int64_t longVal = t;
::serializeLong(longVal, stream);
}
static void deserializeInt(int32_t& t, InStream& stream)
{
int64_t longVal;
::deserializeLong(longVal, stream);
t = longVal;
}
static void serializeFloat(float t, OutStream& stream)
{
char buf[sizeof(float)];
XDR xdrs;
xdrmem_create(&xdrs, buf, sizeof(float), XDR_ENCODE);
xdr_float(&xdrs, &t);
stream.write(buf, sizeof(float));
}
static void deserializeFloat(float& t, InStream& stream)
{
char buf[sizeof(float)];
if (sizeof(float) != stream.read(buf, sizeof(float))) {
throw new IOException("Error deserializing float.");
}
XDR xdrs;
xdrmem_create(&xdrs, buf, sizeof(float), XDR_DECODE);
xdr_float(&xdrs, &t);
}
static void serializeDouble(double t, OutStream& stream)
{
char buf[sizeof(double)];
XDR xdrs;
xdrmem_create(&xdrs, buf, sizeof(double), XDR_ENCODE);
xdr_double(&xdrs, &t);
stream.write(buf, sizeof(double));
}
static void deserializeDouble(double& t, InStream& stream)
{
char buf[sizeof(double)];
stream.read(buf, sizeof(double));
XDR xdrs;
xdrmem_create(&xdrs, buf, sizeof(double), XDR_DECODE);
xdr_double(&xdrs, &t);
}
static void serializeString(const std::string& t, OutStream& stream)
{
::serializeInt(t.length(), stream);
if (t.length() > 0) {
stream.write(t.data(), t.length());
}
}
static void deserializeString(std::string& t, InStream& stream)
{
int32_t len = 0;
::deserializeInt(len, stream);
if (len > 0) {
// resize the string to the right length
t.resize(len);
// read into the string in 64k chunks
const int bufSize = 65536;
int offset = 0;
char buf[bufSize];
while (len > 0) {
int chunkLength = len > bufSize ? bufSize : len;
stream.read((void *)buf, chunkLength);
t.replace(offset, chunkLength, buf, chunkLength);
offset += chunkLength;
len -= chunkLength;
}
}
}
void hadoop::IBinArchive::deserialize(int8_t& t, const char* tag)
{
::deserialize(t, stream);
}
void hadoop::IBinArchive::deserialize(bool& t, const char* tag)
{
::deserialize(t, stream);
}
void hadoop::IBinArchive::deserialize(int32_t& t, const char* tag)
{
int64_t longVal = 0LL;
::deserializeLong(longVal, stream);
t = longVal;
}
void hadoop::IBinArchive::deserialize(int64_t& t, const char* tag)
{
::deserializeLong(t, stream);
}
void hadoop::IBinArchive::deserialize(float& t, const char* tag)
{
::deserializeFloat(t, stream);
}
void hadoop::IBinArchive::deserialize(double& t, const char* tag)
{
::deserializeDouble(t, stream);
}
void hadoop::IBinArchive::deserialize(std::string& t, const char* tag)
{
::deserializeString(t, stream);
}
void hadoop::IBinArchive::deserialize(std::string& t, size_t& len, const char* tag)
{
::deserializeString(t, stream);
len = t.length();
}
void hadoop::IBinArchive::startRecord(Record& s, const char* tag)
{
}
void hadoop::IBinArchive::endRecord(Record& s, const char* tag)
{
}
Index* hadoop::IBinArchive::startVector(const char* tag)
{
int32_t len;
::deserializeInt(len, stream);
BinIndex *idx = new BinIndex((size_t) len);
return idx;
}
void hadoop::IBinArchive::endVector(Index* idx, const char* tag)
{
delete idx;
}
Index* hadoop::IBinArchive::startMap(const char* tag)
{
int32_t len;
::deserializeInt(len, stream);
BinIndex *idx = new BinIndex((size_t) len);
return idx;
}
void hadoop::IBinArchive::endMap(Index* idx, const char* tag)
{
delete idx;
}
hadoop::IBinArchive::~IBinArchive()
{
}
void hadoop::OBinArchive::serialize(int8_t t, const char* tag)
{
::serialize(t, stream);
}
void hadoop::OBinArchive::serialize(bool t, const char* tag)
{
::serialize(t, stream);
}
void hadoop::OBinArchive::serialize(int32_t t, const char* tag)
{
int64_t longVal = t;
::serializeLong(longVal, stream);
}
void hadoop::OBinArchive::serialize(int64_t t, const char* tag)
{
::serializeLong(t, stream);
}
void hadoop::OBinArchive::serialize(float t, const char* tag)
{
::serializeFloat(t, stream);
}
void hadoop::OBinArchive::serialize(double t, const char* tag)
{
::serializeDouble(t, stream);
}
void hadoop::OBinArchive::serialize(const std::string& t, const char* tag)
{
::serializeString(t, stream);
}
void hadoop::OBinArchive::serialize(const std::string& t, size_t len, const char* tag)
{
::serializeString(t, stream);
}
void hadoop::OBinArchive::startRecord(const Record& s, const char* tag)
{
}
void hadoop::OBinArchive::endRecord(const Record& s, const char* tag)
{
}
void hadoop::OBinArchive::startVector(size_t len, const char* tag)
{
::serializeInt(len, stream);
}
void hadoop::OBinArchive::endVector(size_t len, const char* tag)
{
}
void hadoop::OBinArchive::startMap(size_t len, const char* tag)
{
::serializeInt(len, stream);
}
void hadoop::OBinArchive::endMap(size_t len, const char* tag)
{
}
hadoop::OBinArchive::~OBinArchive()
{
}

View File

@ -1,81 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BINARCHIVE_HH_
#define BINARCHIVE_HH_
#include "recordio.hh"
namespace hadoop {
class BinIndex : public Index {
private:
size_t size;
public:
BinIndex(size_t size_) { size = size_; }
bool done() { return (size==0); }
void incr() { size--; }
~BinIndex() {}
};
class IBinArchive : public IArchive {
private:
InStream& stream;
public:
IBinArchive(InStream& _stream) : stream(_stream) {}
virtual void deserialize(int8_t& t, const char* tag);
virtual void deserialize(bool& t, const char* tag);
virtual void deserialize(int32_t& t, const char* tag);
virtual void deserialize(int64_t& t, const char* tag);
virtual void deserialize(float& t, const char* tag);
virtual void deserialize(double& t, const char* tag);
virtual void deserialize(std::string& t, const char* tag);
virtual void deserialize(std::string& t, size_t& len, const char* tag);
virtual void startRecord(Record& s, const char* tag);
virtual void endRecord(Record& s, const char* tag);
virtual Index* startVector(const char* tag);
virtual void endVector(Index* idx, const char* tag);
virtual Index* startMap(const char* tag);
virtual void endMap(Index* idx, const char* tag);
virtual ~IBinArchive();
};
class OBinArchive : public OArchive {
private:
OutStream& stream;
public:
OBinArchive(OutStream& _stream) : stream(_stream) {}
virtual void serialize(int8_t t, const char* tag);
virtual void serialize(bool t, const char* tag);
virtual void serialize(int32_t t, const char* tag);
virtual void serialize(int64_t t, const char* tag);
virtual void serialize(float t, const char* tag);
virtual void serialize(double t, const char* tag);
virtual void serialize(const std::string& t, const char* tag);
virtual void serialize(const std::string& t, size_t len, const char* tag);
virtual void startRecord(const Record& s, const char* tag);
virtual void endRecord(const Record& s, const char* tag);
virtual void startVector(size_t len, const char* tag);
virtual void endVector(size_t len, const char* tag);
virtual void startMap(size_t len, const char* tag);
virtual void endMap(size_t len, const char* tag);
virtual ~OBinArchive();
};
}
#endif /*BINARCHIVE_HH_*/

View File

@ -1,368 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "csvarchive.hh"
#include <stdlib.h>
using namespace hadoop;
static std::string readUptoTerminator(PushBackInStream& stream)
{
std::string s;
while (1) {
char c;
if (1 != stream.read(&c, 1)) {
throw new IOException("Error in deserialization.");
}
if (c == ',' || c == '\n' || c == '}') {
if (c != ',') {
stream.pushBack(c);
}
break;
}
s.push_back(c);
}
return s;
}
void hadoop::ICsvArchive::deserialize(int8_t& t, const char* tag)
{
std::string s = readUptoTerminator(stream);
t = (int8_t) strtol(s.c_str(), NULL, 10);
}
void hadoop::ICsvArchive::deserialize(bool& t, const char* tag)
{
std::string s = readUptoTerminator(stream);
t = (s == "T") ? true : false;
}
void hadoop::ICsvArchive::deserialize(int32_t& t, const char* tag)
{
std::string s = readUptoTerminator(stream);
t = strtol(s.c_str(), NULL, 10);
}
void hadoop::ICsvArchive::deserialize(int64_t& t, const char* tag)
{
std::string s = readUptoTerminator(stream);
t = strtoll(s.c_str(), NULL, 10);
}
void hadoop::ICsvArchive::deserialize(float& t, const char* tag)
{
std::string s = readUptoTerminator(stream);
t = strtof(s.c_str(), NULL);
}
void hadoop::ICsvArchive::deserialize(double& t, const char* tag)
{
std::string s = readUptoTerminator(stream);
t = strtod(s.c_str(), NULL);
}
void hadoop::ICsvArchive::deserialize(std::string& t, const char* tag)
{
std::string temp = readUptoTerminator(stream);
if (temp[0] != '\'') {
throw new IOException("Errror deserializing string.");
}
t.clear();
// skip first character, replace escaped characters
int len = temp.length();
for (int i = 1; i < len; i++) {
char c = temp.at(i);
if (c == '%') {
// since we escape '%', there have to be at least two chars following a '%'
char ch1 = temp.at(i+1);
char ch2 = temp.at(i+2);
i += 2;
if (ch1 == '0' && ch2 == '0') {
t.append(1, '\0');
} else if (ch1 == '0' && ch2 == 'A') {
t.append(1, '\n');
} else if (ch1 == '0' && ch2 == 'D') {
t.append(1, '\r');
} else if (ch1 == '2' && ch2 == 'C') {
t.append(1, ',');
} else if (ch1 == '7' && ch2 == 'D') {
t.append(1, '}');
} else if (ch1 == '2' && ch2 == '5') {
t.append(1, '%');
} else {
throw new IOException("Error deserializing string.");
}
}
else {
t.append(1, c);
}
}
}
void hadoop::ICsvArchive::deserialize(std::string& t, size_t& len, const char* tag)
{
std::string s = readUptoTerminator(stream);
if (s[0] != '#') {
throw new IOException("Errror deserializing buffer.");
}
s.erase(0, 1); /// erase first character
len = s.length();
if (len%2 == 1) { // len is guaranteed to be even
throw new IOException("Errror deserializing buffer.");
}
len = len >> 1;
for (size_t idx = 0; idx < len; idx++) {
char buf[3];
buf[0] = s[2*idx];
buf[1] = s[2*idx+1];
buf[2] = '\0';
int i;
if (1 != sscanf(buf, "%2x", &i)) {
throw new IOException("Errror deserializing buffer.");
}
t.push_back((char) i);
}
len = t.length();
}
void hadoop::ICsvArchive::startRecord(Record& s, const char* tag)
{
if (tag != NULL) {
char mark[2];
if (2 != stream.read(mark, 2)) {
throw new IOException("Error deserializing record.");
}
if (mark[0] != 's' || mark[1] != '{') {
throw new IOException("Error deserializing record.");
}
}
}
void hadoop::ICsvArchive::endRecord(Record& s, const char* tag)
{
char mark;
if (1 != stream.read(&mark, 1)) {
throw new IOException("Error deserializing record.");
}
if (tag == NULL) {
if (mark != '\n') {
throw new IOException("Error deserializing record.");
}
} else if (mark != '}') {
throw new IOException("Error deserializing record.");
} else {
readUptoTerminator(stream);
}
}
Index* hadoop::ICsvArchive::startVector(const char* tag)
{
char mark[2];
if (2 != stream.read(mark, 2)) {
throw new IOException("Error deserializing vector.");
}
if (mark[0] != 'v' || mark[1] != '{') {
throw new IOException("Error deserializing vector.");
}
return new CsvIndex(stream);
}
void hadoop::ICsvArchive::endVector(Index* idx, const char* tag)
{
delete idx;
char mark;
if (1 != stream.read(&mark, 1)) {
throw new IOException("Error deserializing vector.");
}
if (mark != '}') {
throw new IOException("Error deserializing vector.");
}
readUptoTerminator(stream);
}
Index* hadoop::ICsvArchive::startMap(const char* tag)
{
char mark[2];
if (2 != stream.read(mark, 2)) {
throw new IOException("Error deserializing map.");
}
if (mark[0] != 'm' || mark[1] != '{') {
throw new IOException("Error deserializing map.");
}
return new CsvIndex(stream);
}
void hadoop::ICsvArchive::endMap(Index* idx, const char* tag)
{
delete idx;
char mark;
if (1 != stream.read(&mark, 1)) {
throw new IOException("Error deserializing map.");
}
if (mark != '}') {
throw new IOException("Error deserializing map.");
}
readUptoTerminator(stream);
}
hadoop::ICsvArchive::~ICsvArchive()
{
}
void hadoop::OCsvArchive::serialize(int8_t t, const char* tag)
{
printCommaUnlessFirst();
char sval[5];
sprintf(sval, "%d", t);
stream.write(sval, strlen(sval));
}
void hadoop::OCsvArchive::serialize(bool t, const char* tag)
{
printCommaUnlessFirst();
const char *sval = t ? "T" : "F";
stream.write(sval,1);
}
void hadoop::OCsvArchive::serialize(int32_t t, const char* tag)
{
printCommaUnlessFirst();
char sval[128];
sprintf(sval, "%d", t);
stream.write(sval, strlen(sval));
}
void hadoop::OCsvArchive::serialize(int64_t t, const char* tag)
{
printCommaUnlessFirst();
char sval[128];
sprintf(sval, "%lld", t);
stream.write(sval, strlen(sval));
}
void hadoop::OCsvArchive::serialize(float t, const char* tag)
{
printCommaUnlessFirst();
char sval[128];
sprintf(sval, "%f", t);
stream.write(sval, strlen(sval));
}
void hadoop::OCsvArchive::serialize(double t, const char* tag)
{
printCommaUnlessFirst();
char sval[128];
sprintf(sval, "%lf", t);
stream.write(sval, strlen(sval));
}
void hadoop::OCsvArchive::serialize(const std::string& t, const char* tag)
{
printCommaUnlessFirst();
stream.write("'",1);
int len = t.length();
for (int idx = 0; idx < len; idx++) {
char c = t[idx];
switch(c) {
case '\0':
stream.write("%00",3);
break;
case 0x0A:
stream.write("%0A",3);
break;
case 0x0D:
stream.write("%0D",3);
break;
case 0x25:
stream.write("%25",3);
break;
case 0x2C:
stream.write("%2C",3);
break;
case 0x7D:
stream.write("%7D",3);
break;
default:
stream.write(&c,1);
break;
}
}
}
void hadoop::OCsvArchive::serialize(const std::string& t, size_t len, const char* tag)
{
printCommaUnlessFirst();
stream.write("#",1);
for(size_t idx = 0; idx < len; idx++) {
uint8_t b = t[idx];
char sval[3];
sprintf(sval,"%2x",b);
stream.write(sval, 2);
}
}
void hadoop::OCsvArchive::startRecord(const Record& s, const char* tag)
{
printCommaUnlessFirst();
if (tag != NULL && strlen(tag) != 0) {
stream.write("s{",2);
}
isFirst = true;
}
void hadoop::OCsvArchive::endRecord(const Record& s, const char* tag)
{
if (tag == NULL || strlen(tag) == 0) {
stream.write("\n",1);
isFirst = true;
} else {
stream.write("}",1);
isFirst = false;
}
}
void hadoop::OCsvArchive::startVector(size_t len, const char* tag)
{
printCommaUnlessFirst();
stream.write("v{",2);
isFirst = true;
}
void hadoop::OCsvArchive::endVector(size_t len, const char* tag)
{
stream.write("}",1);
isFirst = false;
}
void hadoop::OCsvArchive::startMap(size_t len, const char* tag)
{
printCommaUnlessFirst();
stream.write("m{",2);
isFirst = true;
}
void hadoop::OCsvArchive::endMap(size_t len, const char* tag)
{
stream.write("}",1);
isFirst = false;
}
hadoop::OCsvArchive::~OCsvArchive()
{
}

View File

@ -1,128 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CSVARCHIVE_HH_
#define CSVARCHIVE_HH_
#include "recordio.hh"
namespace hadoop {
class PushBackInStream {
private:
InStream* stream;
bool isAvail;
char pbchar;
public:
void setStream(InStream* stream_) {
stream = stream_;
isAvail = false;
pbchar = 0;
}
ssize_t read(void* buf, size_t len) {
if (len > 0 && isAvail) {
char* p = (char*) buf;
*p = pbchar;
isAvail = false;
if (len > 1) {
ssize_t ret = stream->read((char*)buf + 1, len - 1);
return ret + 1;
} else {
return 1;
}
} else {
return stream->read(buf, len);
}
}
void pushBack(char c) {
pbchar = c;
isAvail = true;
}
};
class CsvIndex : public Index {
private:
PushBackInStream& stream;
public:
CsvIndex(PushBackInStream& _stream) : stream(_stream) {}
bool done() {
char c;
stream.read(&c, 1);
if (c != ',') {
stream.pushBack(c);
}
return (c == '}') ? true : false;
}
void incr() {}
~CsvIndex() {}
};
class ICsvArchive : public IArchive {
private:
PushBackInStream stream;
public:
ICsvArchive(InStream& _stream) { stream.setStream(&_stream); }
virtual void deserialize(int8_t& t, const char* tag);
virtual void deserialize(bool& t, const char* tag);
virtual void deserialize(int32_t& t, const char* tag);
virtual void deserialize(int64_t& t, const char* tag);
virtual void deserialize(float& t, const char* tag);
virtual void deserialize(double& t, const char* tag);
virtual void deserialize(std::string& t, const char* tag);
virtual void deserialize(std::string& t, size_t& len, const char* tag);
virtual void startRecord(Record& s, const char* tag);
virtual void endRecord(Record& s, const char* tag);
virtual Index* startVector(const char* tag);
virtual void endVector(Index* idx, const char* tag);
virtual Index* startMap(const char* tag);
virtual void endMap(Index* idx, const char* tag);
virtual ~ICsvArchive();
};
class OCsvArchive : public OArchive {
private:
OutStream& stream;
bool isFirst;
void printCommaUnlessFirst() {
if (!isFirst) {
stream.write(",",1);
}
isFirst = false;
}
public:
OCsvArchive(OutStream& _stream) : stream(_stream) {isFirst = true;}
virtual void serialize(int8_t t, const char* tag);
virtual void serialize(bool t, const char* tag);
virtual void serialize(int32_t t, const char* tag);
virtual void serialize(int64_t t, const char* tag);
virtual void serialize(float t, const char* tag);
virtual void serialize(double t, const char* tag);
virtual void serialize(const std::string& t, const char* tag);
virtual void serialize(const std::string& t, size_t len, const char* tag);
virtual void startRecord(const Record& s, const char* tag);
virtual void endRecord(const Record& s, const char* tag);
virtual void startVector(size_t len, const char* tag);
virtual void endVector(size_t len, const char* tag);
virtual void startMap(size_t len, const char* tag);
virtual void endMap(size_t len, const char* tag);
virtual ~OCsvArchive();
};
}
#endif /*CSVARCHIVE_HH_*/

View File

@ -1,152 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "exception.hh"
#ifdef USE_EXECINFO
#include <execinfo.h>
#endif
#include <errno.h>
#include <sstream>
#include <typeinfo>
using std::string;
namespace hadoop {
/**
* Create an exception.
* @param message The message to give to the user.
* @param reason The exception that caused the new exception.
*/
Exception::Exception(const string& message,
const string& component,
const string& location,
const Exception* reason
): mMessage(message),
mComponent(component),
mLocation(location),
mReason(reason)
{
#ifdef USE_EXECINFO
mCalls = backtrace(mCallStack, sMaxCallStackDepth);
#else
mCalls = 0;
#endif
}
/**
* Copy the exception.
* Clones the reason, if there is one.
*/
Exception::Exception(const Exception& other
): mMessage(other.mMessage),
mComponent(other.mComponent),
mLocation(other.mLocation),
mCalls(other.mCalls)
{
for(int i=0; i < mCalls; ++i) {
mCallStack[i] = other.mCallStack[i];
}
if (other.mReason) {
mReason = other.mReason->clone();
} else {
mReason = NULL;
}
}
Exception::~Exception() throw () {
delete mReason;
}
/**
* Print all of the information about the exception.
*/
void Exception::print(std::ostream& stream) const {
stream << "Exception " << getTypename();
if (mComponent.size() != 0) {
stream << " (" << mComponent << ")";
}
stream << ": " << mMessage << "\n";
if (mLocation.size() != 0) {
stream << " thrown at " << mLocation << "\n";
}
#ifdef USE_EXECINFO
printCallStack(stream);
#endif
if (mReason) {
stream << "caused by: ";
mReason->print(stream);
}
stream.flush();
}
/**
* Result of print() as a string.
*/
string Exception::toString() const {
std::ostringstream stream;
print(stream);
return stream.str();
}
#ifdef USE_EXECINFO
/**
* Print the call stack where the exception was created.
*/
void Exception::printCallStack(std::ostream& stream) const {
char ** symbols = backtrace_symbols(mCallStack, mCalls);
for(int i=0; i < mCalls; ++i) {
stream << " ";
if (i == 0) {
stream << "at ";
} else {
stream << "from ";
}
stream << symbols[i] << "\n";
}
free(symbols);
}
#endif
const char* Exception::getTypename() const {
return "Exception";
}
Exception* Exception::clone() const {
return new Exception(*this);
}
IOException::IOException(const string& message,
const string& component,
const string& location,
const Exception* reason
): Exception(message, component, location, reason)
{
}
const char* IOException::getTypename() const {
return "IOException";
}
IOException* IOException::clone() const {
return new IOException(*this);
}
}

View File

@ -1,129 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef EXCEPTION_HH
#define EXCEPTION_HH
#include <exception>
#include <iostream>
#include <string>
namespace hadoop {
/**
* Parent-type for all exceptions in hadoop.
* Provides an application specified message to the user, a call stack from
* where the exception was created, and optionally an exception that caused
* this one.
*/
class Exception: public std::exception {
public:
/**
* Create an exception.
* @param message The message to give to the user.
* @param reason The exception that caused the new exception.
*/
explicit Exception(const std::string& message,
const std::string& component="",
const std::string& location="",
const Exception* reason=NULL);
/**
* Copy the exception.
* Clones the reason, if there is one.
*/
Exception(const Exception&);
virtual ~Exception() throw ();
/**
* Make a new copy of the given exception by dynamically allocating
* memory.
*/
virtual Exception* clone() const;
/**
* Print all of the information about the exception.
*/
virtual void print(std::ostream& stream=std::cerr) const;
/**
* Result of print() as a string.
*/
virtual std::string toString() const;
#ifdef USE_EXECINFO
/**
* Print the call stack where the exception was created.
*/
virtual void printCallStack(std::ostream& stream=std::cerr) const;
#endif
const std::string& getMessage() const {
return mMessage;
}
const std::string& getComponent() const {
return mComponent;
}
const std::string& getLocation() const {
return mLocation;
}
const Exception* getReason() const {
return mReason;
}
/**
* Provide a body for the virtual from std::exception.
*/
virtual const char* what() const throw () {
return mMessage.c_str();
}
virtual const char* getTypename() const;
private:
const static int sMaxCallStackDepth = 10;
const std::string mMessage;
const std::string mComponent;
const std::string mLocation;
int mCalls;
void* mCallStack[sMaxCallStackDepth];
const Exception* mReason;
// NOT IMPLEMENTED
std::exception& operator=(const std::exception& right) throw ();
};
class IOException: public Exception {
public:
IOException(const std::string& message,
const std::string& component="",
const std::string& location="",
const Exception* reason = NULL);
virtual IOException* clone() const;
virtual const char* getTypename() const;
};
}
#endif

View File

@ -1,64 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fieldTypeInfo.hh"
using namespace hadoop;
FieldTypeInfo::~FieldTypeInfo()
{
delete pFieldID;
delete pTypeID;
}
FieldTypeInfo::FieldTypeInfo(const FieldTypeInfo& ti)
{
pFieldID = new std::string(*ti.pFieldID);
pTypeID = ti.pTypeID->clone();
}
void FieldTypeInfo::serialize(::hadoop::OArchive& a_, const char* tag) const
{
a_.serialize(*pFieldID, tag);
pTypeID->serialize(a_, tag);
}
bool FieldTypeInfo::operator==(const FieldTypeInfo& peer_) const
{
// first check if fieldID matches
if (0 != pFieldID->compare(*(peer_.pFieldID))) {
return false;
}
// now see if typeID matches
return (*pTypeID == *(peer_.pTypeID));
}
// print out the structure. Helps in debugging.
void FieldTypeInfo::print(int space) const
{
for (int i=0; i<space; i++) {
printf(" ");
}
printf("FieldTypeInfo(%lx):\n", (long)this);
for (int i=0; i<space+2; i++) {
printf(" ");
}
printf("field = \"%s\"\n", pFieldID->c_str());
pTypeID->print(space+2);
}

View File

@ -1,59 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FIELDTYPEINFO_HH_
#define FIELDTYPEINFO_HH_
#include "recordio.hh"
#include "typeIDs.hh"
namespace hadoop {
class TypeID;
/**
* Represents a type information for a field, which is made up of its
* ID (name) and its type (a TypeID object).
*/
class FieldTypeInfo {
private:
// we own memory mgmt of these vars
const std::string* pFieldID;
const TypeID* pTypeID;
public:
FieldTypeInfo(const std::string* pFieldID, const TypeID* pTypeID) :
pFieldID(pFieldID), pTypeID(pTypeID) {}
FieldTypeInfo(const FieldTypeInfo& ti);
virtual ~FieldTypeInfo();
const TypeID* getTypeID() const {return pTypeID;}
const std::string* getFieldID() const {return pFieldID;}
void serialize(::hadoop::OArchive& a_, const char* tag) const;
bool operator==(const FieldTypeInfo& peer_) const;
FieldTypeInfo* clone() const {return new FieldTypeInfo(*this);}
void print(int space=0) const;
};
}
#endif // FIELDTYPEINFO_HH_

View File

@ -1,98 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "filestream.hh"
using namespace hadoop;
hadoop::FileInStream::FileInStream()
{
mFile = NULL;
}
bool hadoop::FileInStream::open(const std::string& name)
{
mFile = fopen(name.c_str(), "rb");
return (mFile != NULL);
}
ssize_t hadoop::FileInStream::read(void *buf, size_t len)
{
return fread(buf, 1, len, mFile);
}
bool hadoop::FileInStream::skip(size_t nbytes)
{
return (0==fseek(mFile, nbytes, SEEK_CUR));
}
bool hadoop::FileInStream::close()
{
int ret = fclose(mFile);
mFile = NULL;
return (ret==0);
}
hadoop::FileInStream::~FileInStream()
{
if (mFile != NULL) {
close();
}
}
hadoop::FileOutStream::FileOutStream()
{
mFile = NULL;
}
bool hadoop::FileOutStream::open(const std::string& name, bool overwrite)
{
if (!overwrite) {
mFile = fopen(name.c_str(), "rb");
if (mFile != NULL) {
fclose(mFile);
return false;
}
}
mFile = fopen(name.c_str(), "wb");
return (mFile != NULL);
}
ssize_t hadoop::FileOutStream::write(const void* buf, size_t len)
{
return fwrite(buf, 1, len, mFile);
}
bool hadoop::FileOutStream::advance(size_t nbytes)
{
return (0==fseek(mFile, nbytes, SEEK_CUR));
}
bool hadoop::FileOutStream::close()
{
int ret = fclose(mFile);
mFile = NULL;
return (ret == 0);
}
hadoop::FileOutStream::~FileOutStream()
{
if (mFile != NULL) {
close();
}
}

View File

@ -1,55 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FILESTREAM_HH_
#define FILESTREAM_HH_
#include <stdio.h>
#include <stdint.h>
#include <string>
#include "recordio.hh"
namespace hadoop {
class FileInStream : public InStream {
public:
FileInStream();
bool open(const std::string& name);
ssize_t read(void *buf, size_t buflen);
bool skip(size_t nbytes);
bool close();
virtual ~FileInStream();
private:
FILE *mFile;
};
class FileOutStream: public OutStream {
public:
FileOutStream();
bool open(const std::string& name, bool overwrite);
ssize_t write(const void* buf, size_t len);
bool advance(size_t nbytes);
bool close();
virtual ~FileOutStream();
private:
FILE *mFile;
};
}; // end namespace
#endif /*FILESTREAM_HH_*/

View File

@ -1,143 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "recordTypeInfo.hh"
using namespace hadoop;
RecordTypeInfo::RecordTypeInfo()
{
pStid = new StructTypeID();
}
RecordTypeInfo::RecordTypeInfo(const char *pName): name(pName)
{
pStid = new StructTypeID();
}
/*RecordTypeInfo::RecordTypeInfo(const RecordTypeInfo& rti): name(rti.name)
{
// clone the typeinfos from rti and add them
for (unsigned int i=0; i<rti.typeInfos.size(); i++) {
typeInfos.push_back(rti.typeInfos[i]->clone());
}
// clone the map
for (std::map<std::string, RecordTypeInfo*>::const_iterator iter=rti.structRTIs.begin();
iter!=rti.structRTIs.end(); ++iter) {
structRTIs[iter->first] = iter->second->clone();
}
}*/
RecordTypeInfo::~RecordTypeInfo()
{
if (NULL != pStid)
delete pStid;
/*for (unsigned int i=0; i<typeInfos.size(); i++) {
delete typeInfos[i];
}
typeInfos.clear();
for (std::map<std::string, RecordTypeInfo*>::const_iterator iter=structRTIs.begin();
iter!=structRTIs.end(); ++iter) {
// delete the RTI objects
delete iter->second;
}
structRTIs.clear();*/
}
void RecordTypeInfo::addField(const std::string* pFieldID, const TypeID* pTypeID)
{
pStid->getFieldTypeInfos().push_back(new FieldTypeInfo(pFieldID, pTypeID));
}
void RecordTypeInfo::addAll(std::vector<FieldTypeInfo*>& vec)
{
// we need to copy object clones into our own vector
for (unsigned int i=0; i<vec.size(); i++) {
pStid->getFieldTypeInfos().push_back(vec[i]->clone());
}
}
// make a copy of typeInfos and return it
/*std::vector<TypeInfo*>& RecordTypeInfo::getClonedTypeInfos()
{
std::vector<TypeInfo*>* pNewVec = new std::vector<TypeInfo*>();
for (unsigned int i=0; i<typeInfos.size(); i++) {
pNewVec->push_back(typeInfos[i]->clone());
}
return *pNewVec;
} */
const std::vector<FieldTypeInfo*>& RecordTypeInfo::getFieldTypeInfos() const
{
return pStid->getFieldTypeInfos();
}
RecordTypeInfo* RecordTypeInfo::getNestedStructTypeInfo(const char *structName) const
{
StructTypeID* p = pStid->findStruct(structName);
if (NULL == p) return NULL;
return new RecordTypeInfo(structName, p);
/*std::string s(structName);
std::map<std::string, RecordTypeInfo*>::const_iterator iter = structRTIs.find(s);
if (iter == structRTIs.end()) {
return NULL;
}
return iter->second;*/
}
void RecordTypeInfo::serialize(::hadoop::OArchive& a_, const char* tag) const
{
a_.startRecord(*this, tag);
// name
a_.serialize(name, tag);
/*// number of elements
a_.serialize((int32_t)typeInfos.size(), tag);
// write out each element
for (std::vector<FieldTypeInfo*>::const_iterator iter=typeInfos.begin();
iter!=typeInfos.end(); ++iter) {
(*iter)->serialize(a_, tag);
}*/
pStid->serializeRest(a_, tag);
a_.endRecord(*this, tag);
}
void RecordTypeInfo::print(int space) const
{
for (int i=0; i<space; i++) {
printf(" ");
}
printf("RecordTypeInfo::%s\n", name.c_str());
pStid->print(space);
/*for (unsigned i=0; i<typeInfos.size(); i++) {
typeInfos[i]->print(space+2);
}*/
}
void RecordTypeInfo::deserialize(::hadoop::IArchive& a_, const char* tag)
{
a_.startRecord(*this, tag);
// name
a_.deserialize(name, tag);
pStid->deserialize(a_, tag);
a_.endRecord(*this, tag);
}

View File

@ -1,68 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef RECORDTYPEINFO_HH_
#define RECORDTYPEINFO_HH_
#include "recordio.hh"
#include <vector>
#include <map>
#include "fieldTypeInfo.hh"
namespace hadoop {
class RecordTypeInfo : public ::hadoop::Record {
private:
//std::vector<FieldTypeInfo* > typeInfos;
std::string name;
//std::map<std::string, RecordTypeInfo*> structRTIs;
StructTypeID *pStid;
RecordTypeInfo(const char * pName, StructTypeID* pStid): name(pName),pStid(pStid) {}
public:
RecordTypeInfo();
RecordTypeInfo(const char *pName);
//RecordTypeInfo(const RecordTypeInfo& rti);
virtual ~RecordTypeInfo();
void addField(const std::string* pFieldID, const TypeID* pTypeID);
void addAll(std::vector<FieldTypeInfo*>& vec);
const std::vector<FieldTypeInfo*>& getFieldTypeInfos() const;
void serialize(::hadoop::OArchive& a_, const char* tag) const;
void deserialize(::hadoop::IArchive& a_, const char* tag);
RecordTypeInfo* clone() const {return new RecordTypeInfo(*this);}
RecordTypeInfo* getNestedStructTypeInfo(const char *structName) const;
const ::std::string& getName() const {return name;}
void setName(const ::std::string& name) {this->name = name;}
const ::std::string& type() const {return name;}
const ::std::string& signature() const {return name;}
void print(int space=0) const;
};
}
#endif // RECORDTYPEINFO_HH_

View File

@ -1,75 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "recordio.hh"
#include "binarchive.hh"
#include "csvarchive.hh"
#include "xmlarchive.hh"
using namespace hadoop;
hadoop::RecordReader::RecordReader(InStream& stream, RecFormat f)
{
switch (f) {
case kBinary:
mpArchive = new IBinArchive(stream);
break;
case kCSV:
mpArchive = new ICsvArchive(stream);
break;
case kXML:
mpArchive = new IXmlArchive(stream);
break;
}
}
hadoop::RecordReader::~RecordReader()
{
delete mpArchive;
}
void hadoop::RecordReader::read(Record& record)
{
record.deserialize(*mpArchive, (const char*) NULL);
}
hadoop::RecordWriter::RecordWriter(OutStream& stream, RecFormat f)
{
switch (f) {
case kBinary:
mpArchive = new OBinArchive(stream);
break;
case kCSV:
mpArchive = new OCsvArchive(stream);
break;
case kXML:
mpArchive = new OXmlArchive(stream);
break;
}
}
hadoop::RecordWriter::~RecordWriter()
{
delete mpArchive;
}
void hadoop::RecordWriter::write(const Record& record)
{
record.serialize(*mpArchive, (const char*) NULL);
}

View File

@ -1,82 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef RECORDIO_HH_
#define RECORDIO_HH_
#include <stdio.h>
#include <stdint.h>
#include <iostream>
#include <cstring>
#include <string>
#include <vector>
#include <map>
#include <bitset>
namespace hadoop {
class InStream {
public:
virtual ssize_t read(void *buf, size_t buflen) = 0;
virtual ~InStream() {}
};
class OutStream {
public:
virtual ssize_t write(const void *buf, size_t len) = 0;
virtual ~OutStream() {}
};
class IArchive;
class OArchive;
class Record {
public:
virtual void serialize(OArchive& archive, const char* tag) const = 0;
virtual void deserialize(IArchive& archive, const char* tag) = 0;
virtual const std::string& type() const = 0;
virtual const std::string& signature() const = 0;
virtual ~Record() {}
};
enum RecFormat { kBinary, kXML, kCSV };
class RecordReader {
private:
IArchive* mpArchive;
public:
RecordReader(InStream& stream, RecFormat f);
virtual void read(hadoop::Record& record);
virtual ~RecordReader();
};
class RecordWriter {
private:
OArchive* mpArchive;
public:
RecordWriter(OutStream& stream, RecFormat f);
virtual void write(const hadoop::Record& record);
virtual ~RecordWriter();
};
}; // end namspace hadoop
#include "archive.hh"
#include "exception.hh"
#endif /*RECORDIO_HH_*/

View File

@ -1,51 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
COPTS=-g3 -O0 -Wall
all: test testFromJava
test: ${LIBRECORDIO_TEST_DIR}/test.o ${LIBRECORDIO_TEST_DIR}/test.jr.o
g++ -g3 -O0 -o ${LIBRECORDIO_TEST_DIR}/test ${LIBRECORDIO_TEST_DIR}/test.o \
${LIBRECORDIO_TEST_DIR}/test.jr.o -L${LIBRECORDIO_BUILD_DIR} -L${XERCESCROOT}/lib -lrecordio -lxerces-c
${LIBRECORDIO_TEST_DIR}/test.o: test.cc
g++ ${COPTS} -c -I .. -o ${LIBRECORDIO_TEST_DIR}/test.o test.cc
testFromJava: ${LIBRECORDIO_TEST_DIR}/testFromJava.o ${LIBRECORDIO_TEST_DIR}/test.jr.o
g++ -g3 -O0 -o ${LIBRECORDIO_TEST_DIR}/testFromJava ${LIBRECORDIO_TEST_DIR}/testFromJava.o ${LIBRECORDIO_TEST_DIR}/test.jr.o \
-L${LIBRECORDIO_BUILD_DIR} -L${XERCESCROOT}/lib -lrecordio -lxerces-c
${LIBRECORDIO_TEST_DIR}/testFromJava.o: testFromJava.cc
g++ ${COPTS} -c -I.. -o ${LIBRECORDIO_TEST_DIR}/testFromJava.o testFromJava.cc
${LIBRECORDIO_TEST_DIR}/test.jr.o: test.jr.cc
g++ ${COPTS} -c -I.. -o ${LIBRECORDIO_TEST_DIR}/test.jr.o test.jr.cc
%.jr.cc %.jr.hh: %.jr
${HADOOP_PREFIX}/bin/rcc --language c++ $<
%: %.o
%: %.cc
test.cc: test.hh
test.hh: test.jr.hh ../recordio.hh ../filestream.hh
clean:
rm -f ${LIBRECORDIO_TEST_DIR}/*~ ${LIBRECORDIO_TEST_DIR}/*.o ${LIBRECORDIO_TEST_DIR}/test \
${LIBRECORDIO_TEST_DIR}/testFromJava ${LIBRECORDIO_TEST_DIR}/*.jr.*

View File

@ -1,309 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "test.hh"
#include <vector>
int main()
{
org::apache::hadoop::record::test::RecRecord1 r1;
org::apache::hadoop::record::test::RecRecord1 r2;
{
hadoop::FileOutStream ostream;
ostream.open("/tmp/hadooptmp.dat", true);
hadoop::RecordWriter writer(ostream, hadoop::kBinary);
r1.setBoolVal(true);
r1.setByteVal((int8_t)0x66);
r1.setFloatVal(3.145);
r1.setDoubleVal(1.5234);
r1.setIntVal(4567);
r1.setLongVal(0x5a5a5a5a5a5aLL);
std::string& s = r1.getStringVal();
s = "random text";
writer.write(r1);
ostream.close();
hadoop::FileInStream istream;
istream.open("/tmp/hadooptmp.dat");
hadoop::RecordReader reader(istream, hadoop::kBinary);
reader.read(r2);
if (r1 == r2) {
printf("Binary archive test passed.\n");
} else {
printf("Binary archive test failed.\n");
}
istream.close();
}
{
hadoop::FileOutStream ostream;
ostream.open("/tmp/hadooptmp.txt", true);
hadoop::RecordWriter writer(ostream, hadoop::kCSV);
r1.setBoolVal(true);
r1.setByteVal((int8_t)0x66);
r1.setFloatVal(3.145);
r1.setDoubleVal(1.5234);
r1.setIntVal(4567);
r1.setLongVal(0x5a5a5a5a5a5aLL);
std::string& s = r1.getStringVal();
s = "random text";
writer.write(r1);
ostream.close();
hadoop::FileInStream istream;
istream.open("/tmp/hadooptmp.txt");
hadoop::RecordReader reader(istream, hadoop::kCSV);
reader.read(r2);
if (r1 == r2) {
printf("CSV archive test passed.\n");
} else {
printf("CSV archive test failed.\n");
}
istream.close();
}
{
hadoop::FileOutStream ostream;
ostream.open("/tmp/hadooptmp.xml", true);
hadoop::RecordWriter writer(ostream, hadoop::kXML);
r1.setBoolVal(true);
r1.setByteVal((int8_t)0x66);
r1.setFloatVal(3.145);
r1.setDoubleVal(1.5234);
r1.setIntVal(4567);
r1.setLongVal(0x5a5a5a5a5a5aLL);
std::string& s = r1.getStringVal();
s = "random text";
writer.write(r1);
ostream.close();
hadoop::FileInStream istream;
istream.open("/tmp/hadooptmp.xml");
hadoop::RecordReader reader(istream, hadoop::kXML);
reader.read(r2);
if (r1 == r2) {
printf("XML archive test passed.\n");
} else {
printf("XML archive test failed.\n");
}
istream.close();
}
/*
* Tests to check for versioning functionality
*/
// basic test
// write out a record and its type info, read it back using its typeinfo
{
hadoop::FileOutStream ostream, ortistream;
ostream.open("/tmp/hadooptmp.dat", true);
ortistream.open("/tmp/hadooprti.dat", true);
hadoop::RecordWriter writer(ostream, hadoop::kBinary);
hadoop::RecordWriter writerRti(ortistream, hadoop::kBinary);
r1.setBoolVal(true);
r1.setByteVal((int8_t)0x66);
r1.setFloatVal(3.145);
r1.setDoubleVal(1.5234);
r1.setIntVal(4567);
r1.setLongVal(0x5a5a5a5a5a5aLL);
std::string& s = r1.getStringVal();
s = "random text";
writer.write(r1);
ostream.close();
// write out rti info
writerRti.write(org::apache::hadoop::record::test::RecRecord1::getTypeInfo());
ortistream.close();
// read
hadoop::FileInStream istream;
istream.open("/tmp/hadooptmp.dat");
hadoop::RecordReader reader(istream, hadoop::kBinary);
hadoop::FileInStream irtistream;
irtistream.open("/tmp/hadooprti.dat");
hadoop::RecordReader readerRti(irtistream, hadoop::kBinary);
hadoop::RecordTypeInfo rti;
readerRti.read(rti);
irtistream.close();
org::apache::hadoop::record::test::RecRecord1::setTypeFilter(rti);
reader.read(r2);
if (r1 == r2) {
printf("Basic versioning test passed.\n");
} else {
printf("Basic versioning test failed.\n");
}
istream.close();
}
// versioning:write out a record and its type info, read back a similar record using the written record's typeinfo
{
hadoop::FileOutStream ostream, ortistream;
ostream.open("/tmp/hadooptmp.dat", true);
ortistream.open("/tmp/hadooprti.dat", true);
hadoop::RecordWriter writer(ostream, hadoop::kBinary);
hadoop::RecordWriter writerRti(ortistream, hadoop::kBinary);
// we create an array of records to write
std::vector<org::apache::hadoop::record::test::RecRecordOld*> recsWrite;
int i, j, k, l;
char buf[1000];
for (i=0; i<5; i++) {
org::apache::hadoop::record::test::RecRecordOld* ps1Rec =
new org::apache::hadoop::record::test::RecRecordOld();
sprintf(buf, "This is record s1: %d", i);
ps1Rec->getName().assign(buf);
for (j=0; j<3; j++) {
ps1Rec->getIvec().push_back((int64_t)(i+j));
}
for (j=0; j<2; j++) {
std::vector<org::apache::hadoop::record::test::RecRecord0>* pVec =
new std::vector<org::apache::hadoop::record::test::RecRecord0>();
for (k=0; k<3; k++) {
org::apache::hadoop::record::test::RecRecord0 *psRec =
new org::apache::hadoop::record::test::RecRecord0();
sprintf(buf, "This is record s: (%d: %d)", j, k);
psRec->getStringVal().assign(buf);
}
ps1Rec->getSvec().push_back(*pVec);
}
sprintf(buf, "This is record s: %d", i);
ps1Rec->getInner().getStringVal().assign(buf);
for (l=0; l<2; l++) {
std::vector<std::vector<std::string> >* ppVec =
new std::vector<std::vector<std::string> >();
for (j=0; j<2; j++) {
std::vector< std::string >* pVec =
new std::vector< std::string >();
for (k=0; k<3; k++) {
sprintf(buf, "THis is a nested string: (%d: %d: %d)", l, j, k);
std::string* s = new std::string((const char*)buf);
pVec->push_back(*s);
}
}
ps1Rec->getStrvec().push_back(*ppVec);
}
ps1Rec->setI1(100+i);
ps1Rec->getMap1()[23] = "23";
ps1Rec->getMap1()[11] = "11";
std::map<int32_t, int64_t>* m1 = new std::map<int32_t, int64_t>();
std::map<int32_t, int64_t>* m2 = new std::map<int32_t, int64_t>();
(*m1)[5] = 5;
(*m1)[10] = 10;
(*m2)[15] = 15;
(*m2)[20] = 20;
ps1Rec->getMvec1().push_back(*m1);
ps1Rec->getMvec1().push_back(*m2);
ps1Rec->getMvec2().push_back(*m1);
recsWrite.push_back(ps1Rec);
}
// write out to file
for (unsigned int i=0; i<recsWrite.size(); i++) {
writer.write(*(recsWrite[i]));
}
ostream.close();
// write out rti info
writerRti.write(org::apache::hadoop::record::test::RecRecordOld::getTypeInfo());
ortistream.close();
// read
hadoop::FileInStream istream;
istream.open("/tmp/hadooptmp.dat");
hadoop::RecordReader reader(istream, hadoop::kBinary);
hadoop::FileInStream irtistream;
irtistream.open("/tmp/hadooprti.dat");
hadoop::RecordReader readerRti(irtistream, hadoop::kBinary);
hadoop::RecordTypeInfo rti;
readerRti.read(rti);
irtistream.close();
org::apache::hadoop::record::test::RecRecordNew::setTypeFilter(rti);
// read records
std::vector<org::apache::hadoop::record::test::RecRecordNew*> recsRead;
for (unsigned int i=0; i<recsWrite.size(); i++) {
org::apache::hadoop::record::test::RecRecordNew* ps2Rec =
new org::apache::hadoop::record::test::RecRecordNew();
reader.read(*ps2Rec);
recsRead.push_back(ps2Rec);
}
istream.close();
// compare
bool pass = true;
for (unsigned int i=0; i<recsRead.size(); i++) {
org::apache::hadoop::record::test::RecRecordNew* ps2In = recsRead[i];
org::apache::hadoop::record::test::RecRecordOld* ps1Out = recsWrite[i];
if (!ps2In->getName2().empty()) {
printf("Error in s2: name2\n");
pass = false;
}
if (!(ps2In->getInner() == ps1Out->getInner())) {
printf("error in s2: s1 struct\n");
pass = false;
}
if (0 != ps2In->getIvec().size()) {
printf("error in s2: ivec\n");
pass = false;
}
if (0 != ps2In->getSvec().size()) {
printf("error in s2: svec\n");
pass = false;
}
for (unsigned int j=0; j<ps2In->getStrvec().size(); j++) {
::std::vector< ::std::vector< ::std::string > >& ss2Vec = ps2In->getStrvec()[j];
::std::vector< ::std::vector< ::std::string > >& ss1Vec = ps1Out->getStrvec()[j];
for (unsigned int k=0; k<ss2Vec.size(); k++) {
::std::vector< ::std::string >& s2Vec = ss2Vec[k];
::std::vector< ::std::string >& s1Vec = ss1Vec[k];
for (unsigned int l=0; l<s2Vec.size(); l++) {
if (s2Vec[l] != s1Vec[l]) {
printf("Error in s2: s2Vec\n");
pass = false;
}
}
}
}
if (0 != ps2In->getMap1().size()) {
printf("Error in s2: map1\n");
pass = false;
}
for (unsigned int j=0; j<ps2In->getMvec2().size(); j++) {
if (ps2In->getMvec2()[j] != ps1Out->getMvec2()[j]) {
printf("Error in s2: mvec2\n");
pass = false;
}
}
}
if (pass)
printf("Versioning test passed.\n");
}
return 0;
}

View File

@ -1,26 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TEST_HH_
#define TEST_HH_
#include "recordio.hh"
#include "filestream.hh"
#include "test.jr.hh"
#endif /*TEST_HH_*/

View File

@ -1,63 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
module org.apache.hadoop.record.test {
class RecRecord0 {
ustring StringVal;
}
class RecRecord1 {
boolean BoolVal;
byte ByteVal;
int IntVal;
long LongVal;
float FloatVal;
double DoubleVal;
ustring StringVal;
buffer BufferVal;
vector<ustring> VectorVal;
map<ustring, ustring> MapVal;
}
class RecRecordOld {
ustring name;
vector<long> ivec;
vector<vector<RecRecord0>> svec;
RecRecord0 inner;
vector<vector<vector<ustring>>> strvec;
float i1;
map<byte, ustring> map1;
vector<map<int, long>> mvec1;
vector<map<int, long>> mvec2;
}
/* RecRecordNew is a lot like RecRecordOld. Helps test for versioning. */
class RecRecordNew {
ustring name2;
RecRecord0 inner;
vector<int> ivec;
vector<vector<int>> svec;
vector<vector<vector<ustring>>> strvec;
int i1;
map<long, ustring> map1;
vector<map<int, long>> mvec2;
}
}

View File

@ -1,71 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "test.hh"
int main()
{
org::apache::hadoop::record::test::RecRecord1 r1;
org::apache::hadoop::record::test::RecRecord1 r2;
r1.setBoolVal(true);
r1.setByteVal((int8_t)0x66);
r1.setFloatVal(3.145);
r1.setDoubleVal(1.5234);
r1.setIntVal(4567);
r1.setLongVal(0x5a5a5a5a5a5aLL);
std::string& s = r1.getStringVal();
s = "random text";
{
hadoop::FileInStream istream;
istream.open("/tmp/hadooptemp.dat");
hadoop::RecordReader reader(istream, hadoop::kBinary);
reader.read(r2);
if (r1 == r2) {
printf("Binary archive test passed.\n");
} else {
printf("Binary archive test failed.\n");
}
istream.close();
}
{
hadoop::FileInStream istream;
istream.open("/tmp/hadooptemp.txt");
hadoop::RecordReader reader(istream, hadoop::kCSV);
reader.read(r2);
if (r1 == r2) {
printf("CSV archive test passed.\n");
} else {
printf("CSV archive test failed.\n");
}
istream.close();
}
{
hadoop::FileInStream istream;
istream.open("/tmp/hadooptemp.xml");
hadoop::RecordReader reader(istream, hadoop::kXML);
reader.read(r2);
if (r1 == r2) {
printf("XML archive test passed.\n");
} else {
printf("XML archive test failed.\n");
}
istream.close();
}
return 0;
}

View File

@ -1,26 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TEST_HH_
#define TEST_HH_
#include "recordio.hh"
#include "filestream.hh"
#include "test.jr.hh"
#endif /*TEST_HH_*/

View File

@ -1,274 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "typeIDs.hh"
using namespace hadoop;
void TypeID::serialize(::hadoop::OArchive& a_, const char* tag) const
{
a_.serialize(typeVal, tag);
}
bool TypeID::operator==(const TypeID& peer_) const
{
return (this->typeVal == peer_.typeVal);
}
void TypeID::print(int space) const
{
for (int i=0; i<space; i++) {
printf(" ");
}
printf("typeID(%lx) = %d\n", (long)this, typeVal);
}
/*StructTypeID::StructTypeID(const char *p): TypeID(RIOTYPE_STRUCT)
{
pName = new std::string(p);
}
StructTypeID::StructTypeID(std::string* p): TypeID(RIOTYPE_STRUCT)
{
this->pName = p;
}*/
StructTypeID::StructTypeID(const std::vector<FieldTypeInfo*>& vec) :
TypeID(RIOTYPE_STRUCT)
{
// we need to copy object clones into our own vector
for (unsigned int i=0; i<vec.size(); i++) {
typeInfos.push_back(vec[i]->clone());
}
}
/*StructTypeID::StructTypeID(const StructTypeID& ti) :
TypeID(RIOTYPE_STRUCT)
{
// we need to copy object clones into our own vector
for (unsigned int i=0; i<ti.typeInfos.size(); i++) {
typeInfos.push_back(ti.typeInfos[i]->clone());
}
} */
StructTypeID::~StructTypeID()
{
for (unsigned int i=0; i<typeInfos.size(); i++) {
delete typeInfos[i];
}
}
void StructTypeID::add(FieldTypeInfo *pti)
{
typeInfos.push_back(pti);
}
// return the StructTypeiD, if any, of the given field
StructTypeID* StructTypeID::findStruct(const char *pStructName)
{
// walk through the list, searching. Not the most efficient way, but this
// in intended to be used rarely, so we keep it simple.
// As an optimization, we can keep a hashmap of record name to its RTI, for later.
for (unsigned int i=0; i<typeInfos.size(); i++) {
if ((0 == typeInfos[i]->getFieldID()->compare(pStructName)) &&
(typeInfos[i]->getTypeID()->getTypeVal()==RIOTYPE_STRUCT)) {
return (StructTypeID*)(typeInfos[i]->getTypeID()->clone());
}
}
return NULL;
}
void StructTypeID::serialize(::hadoop::OArchive& a_, const char* tag) const
{
a_.serialize(typeVal, tag);
serializeRest(a_, tag);
}
/*
* Writes rest of the struct (excluding type value).
* As an optimization, this method is directly called by RTI
* for the top level record so that we don't write out the byte
* indicating that this is a struct (since top level records are
* always structs).
*/
void StructTypeID::serializeRest(::hadoop::OArchive& a_, const char* tag) const
{
a_.serialize((int32_t)typeInfos.size(), tag);
for (unsigned int i=0; i<typeInfos.size(); i++) {
typeInfos[i]->serialize(a_, tag);
}
}
/*
* deserialize ourselves. Called by RTI.
*/
void StructTypeID::deserialize(::hadoop::IArchive& a_, const char* tag)
{
// number of elements
int numElems;
a_.deserialize(numElems, tag);
for (int i=0; i<numElems; i++) {
typeInfos.push_back(genericReadTypeInfo(a_, tag));
}
}
// generic reader: reads the next TypeInfo object from stream and returns it
FieldTypeInfo* StructTypeID::genericReadTypeInfo(::hadoop::IArchive& a_, const char* tag)
{
// read name of field
std::string* pName = new std::string();
a_.deserialize(*pName, tag);
TypeID* pti = genericReadTypeID(a_, tag);
return new FieldTypeInfo(pName, pti);
}
// generic reader: reads the next TypeID object from stream and returns it
TypeID* StructTypeID::genericReadTypeID(::hadoop::IArchive& a_, const char* tag)
{
int8_t typeVal;
a_.deserialize(typeVal, tag);
switch(typeVal) {
case RIOTYPE_BOOL:
case RIOTYPE_BUFFER:
case RIOTYPE_BYTE:
case RIOTYPE_DOUBLE:
case RIOTYPE_FLOAT:
case RIOTYPE_INT:
case RIOTYPE_LONG:
case RIOTYPE_STRING:
return new TypeID(typeVal);
case RIOTYPE_STRUCT:
{
StructTypeID* pstID = new StructTypeID();
int numElems;
a_.deserialize(numElems, tag);
for (int i=0; i<numElems; i++) {
pstID->add(genericReadTypeInfo(a_, tag));
}
return pstID;
}
case RIOTYPE_VECTOR:
{
TypeID* pti = genericReadTypeID(a_, tag);
return new VectorTypeID(pti);
}
case RIOTYPE_MAP:
{
TypeID* ptiKey = genericReadTypeID(a_, tag);
TypeID* ptiValue = genericReadTypeID(a_, tag);
return new MapTypeID(ptiKey, ptiValue);
}
default:
// shouldn't be here
return NULL;
}
}
void StructTypeID::print(int space) const
{
TypeID::print(space);
for (int i=0; i<space; i++) {
printf(" ");
}
printf("StructTypeInfo(%lx): \n", (long)&typeInfos);
for (unsigned int i=0; i<typeInfos.size(); i++) {
typeInfos[i]->print(space+2);
}
}
VectorTypeID::~VectorTypeID()
{
delete ptiElement;
}
VectorTypeID::VectorTypeID(const VectorTypeID& ti): TypeID(RIOTYPE_VECTOR)
{
ptiElement = ti.ptiElement->clone();
}
void VectorTypeID::serialize(::hadoop::OArchive& a_, const char* tag) const
{
a_.serialize(typeVal, tag);
ptiElement->serialize(a_, tag);
}
bool VectorTypeID::operator==(const TypeID& peer_) const
{
if (typeVal != peer_.getTypeVal()) {
return false;
}
// this must be a vector type id
return (*ptiElement) == (*((VectorTypeID&)peer_).ptiElement);
}
void VectorTypeID::print(int space) const
{
TypeID::print(space);
for (int i=0; i<space; i++) {
printf(" ");
}
printf("VectorTypeInfo(%lx): \n", (long)this);
ptiElement->print(space+2);
}
MapTypeID::~MapTypeID()
{
delete ptiKey;
delete ptiValue;
}
MapTypeID::MapTypeID(const MapTypeID& ti): TypeID(RIOTYPE_MAP)
{
ptiKey = ti.ptiKey->clone();
ptiValue = ti.ptiValue->clone();
}
void MapTypeID::serialize(::hadoop::OArchive& a_, const char* tag) const
{
a_.serialize(typeVal, tag);
ptiKey->serialize(a_, tag);
ptiValue->serialize(a_, tag);
}
bool MapTypeID::operator==(const TypeID& peer_) const
{
if (typeVal != peer_.getTypeVal()) {
return false;
}
// this must be a map type id
MapTypeID& mti = (MapTypeID&) peer_;
if (!(*ptiKey == *(mti.ptiKey))) {
return false;
}
return ((*ptiValue == *(mti.ptiValue)));
}
void MapTypeID::print(int space) const
{
TypeID::print(space);
for (int i=0; i<space; i++) {
printf(" ");
}
printf("MapTypeInfo(%lx): \n", (long)this);
ptiKey->print(space+2);
ptiValue->print(space+2);
}

View File

@ -1,169 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TYPEIDS_HH_
#define TYPEIDS_HH_
#include "recordio.hh"
#include "fieldTypeInfo.hh"
namespace hadoop {
class FieldTypeInfo;
/*
* enum of types. We define assign values to individual bytes, rather
* than use enums because we want to make teh values consistent with
* Java code, so we need to control the values.
*/
const int8_t RIOTYPE_BOOL = 1;
const int8_t RIOTYPE_BUFFER = 2;
const int8_t RIOTYPE_BYTE = 3;
const int8_t RIOTYPE_DOUBLE = 4;
const int8_t RIOTYPE_FLOAT = 5;
const int8_t RIOTYPE_INT = 6;
const int8_t RIOTYPE_LONG = 7;
const int8_t RIOTYPE_MAP = 8;
const int8_t RIOTYPE_STRING = 9;
const int8_t RIOTYPE_STRUCT = 10;
const int8_t RIOTYPE_VECTOR = 11;
/*
* Represents typeID for basic types.
* Serializes just the single int8_t.
*/
class TypeID {
public:
TypeID(int8_t typeVal) {this->typeVal = typeVal;}
TypeID(const TypeID& t) {this->typeVal = t.typeVal;}
virtual ~TypeID() {}
int8_t getTypeVal() const {return typeVal;}
virtual void serialize(::hadoop::OArchive& a_, const char* tag) const;
virtual bool operator==(const TypeID& peer_) const;
virtual TypeID* clone() const {return new TypeID(*this);}
virtual void print(int space=0) const;
protected:
int8_t typeVal;
};
/*
* no predefined TypeID objects, since memory management becomes difficult.
* If some TypeID objects are consts and others are new-ed, becomes hard to
* destroy const objects without reference counting.
*/
/*const TypeID TID_BoolTypeID(RIOTYPE_BOOL);
const TypeID TID_BufferTypeID(RIOTYPE_BUFFER);
const TypeID TID_ByteTypeID(RIOTYPE_BYTE);
const TypeID TID_DoubleTypeID(RIOTYPE_DOUBLE);
const TypeID TID_FloatTypeID(RIOTYPE_FLOAT);
const TypeID TID_IntTypeID(RIOTYPE_INT);
const TypeID TID_LongTypeID(RIOTYPE_LONG);
const TypeID TID_StringTypeID(RIOTYPE_STRING);*/
/*
* TypeID for structures
*/
class StructTypeID : public TypeID {
private:
// note: we own the memory mgmt of TypeInfo objects stored in the vector
std::vector<FieldTypeInfo*> typeInfos;
FieldTypeInfo* genericReadTypeInfo(::hadoop::IArchive& a_, const char* tag);
TypeID* genericReadTypeID(::hadoop::IArchive& a_, const char* tag);
public:
/*StructTypeID(const char* p);
StructTypeID(std::string* p);
StructTypeID(const StructTypeID& ti);*/
StructTypeID(): TypeID(RIOTYPE_STRUCT) {};
StructTypeID(const std::vector<FieldTypeInfo*>& vec);
virtual ~StructTypeID();
void add(FieldTypeInfo *pti);
std::vector<FieldTypeInfo*>& getFieldTypeInfos() {return typeInfos;}
StructTypeID* findStruct(const char *pStructName);
void serialize(::hadoop::OArchive& a_, const char* tag) const;
void serializeRest(::hadoop::OArchive& a_, const char* tag) const;
void deserialize(::hadoop::IArchive& a_, const char* tag);
virtual TypeID* clone() const {return new StructTypeID(*this);}
virtual void print(int space=0) const;
};
/*
* TypeID for vectors
*/
class VectorTypeID : public TypeID {
private:
// ptiElement's memory mgmt is owned by class
TypeID* ptiElement;
public:
VectorTypeID(TypeID* ptiElement): TypeID(RIOTYPE_VECTOR), ptiElement(ptiElement) {}
VectorTypeID(const VectorTypeID& ti);
virtual ~VectorTypeID();
const TypeID* getElementTypeID() {return ptiElement;}
virtual TypeID* clone() const {return new VectorTypeID(*this);}
void serialize(::hadoop::OArchive& a_, const char* tag) const;
virtual bool operator==(const TypeID& peer_) const;
virtual void print(int space=0) const;
};
/*
* TypeID for maps
*/
class MapTypeID : public TypeID {
private:
// ptiKay and ptiValue's memory mgmt is owned by class
TypeID* ptiKey;
TypeID* ptiValue;
public:
MapTypeID(TypeID* ptiKey, TypeID* ptiValue):
TypeID(RIOTYPE_MAP), ptiKey(ptiKey), ptiValue(ptiValue) {}
MapTypeID(const MapTypeID& ti);
virtual ~MapTypeID();
const TypeID* getKeyTypeID() {return ptiKey;}
const TypeID* getValueTypeID() {return ptiValue;}
virtual TypeID* clone() const {return new MapTypeID(*this);}
void serialize(::hadoop::OArchive& a_, const char* tag) const;
virtual bool operator==(const TypeID& peer_) const;
virtual void print(int space=0) const;
};
}
#endif // TYPEIDS_HH_

View File

@ -1,69 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "typeInfo.hh"
using namespace hadoop;
TypeInfo::~TypeInfo()
{
delete pFieldID;
delete pTypeID;
}
/*TypeInfo& TypeInfo::operator =(const TypeInfo& ti) {
pFieldID = ti.pFieldID;
pTypeID = ti.pTypeID;
return *this;
}*/
TypeInfo::TypeInfo(const TypeInfo& ti)
{
pFieldID = new std::string(*ti.pFieldID);
pTypeID = ti.pTypeID->clone();
}
void TypeInfo::serialize(::hadoop::OArchive& a_, const char* tag) const
{
a_.serialize(*pFieldID, tag);
pTypeID->serialize(a_, tag);
}
bool TypeInfo::operator==(const TypeInfo& peer_) const
{
// first check if fieldID matches
if (0 != pFieldID->compare(*(peer_.pFieldID))) {
return false;
}
// now see if typeID matches
return (*pTypeID == *(peer_.pTypeID));
}
void TypeInfo::print(int space) const
{
for (int i=0; i<space; i++) {
printf(" ");
}
printf("TypeInfo(%lx):\n", (long)this);
for (int i=0; i<space+2; i++) {
printf(" ");
}
printf("field = \"%s\"\n", pFieldID->c_str());
pTypeID->print(space+2);
}

View File

@ -1,56 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TYPEINFO_HH_
#define TYPEINFO_HH_
#include "recordio.hh"
#include "typeIDs.hh"
namespace hadoop {
class TypeID;
class TypeInfo {
private:
// we own memory mgmt of these vars
const std::string* pFieldID;
const TypeID* pTypeID;
public:
TypeInfo(const std::string* pFieldID, const TypeID* pTypeID) :
pFieldID(pFieldID), pTypeID(pTypeID) {}
TypeInfo(const TypeInfo& ti);
virtual ~TypeInfo();
const TypeID* getTypeID() const {return pTypeID;}
const std::string* getFieldID() const {return pFieldID;}
void serialize(::hadoop::OArchive& a_, const char* tag) const;
bool operator==(const TypeInfo& peer_) const;
TypeInfo* clone() const {return new TypeInfo(*this);}
//TypeInfo& operator =(const TypeInfo& ti);
void print(int space=0) const;
};
}
#endif // TYPEINFO_HH_

View File

@ -1,109 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils.hh"
#include "recordTypeInfo.hh"
using namespace hadoop;
void Utils::skip(IArchive& a, const char* tag, const TypeID& typeID)
{
bool b;
size_t len=0;
::std::string str;
int8_t bt;
double d;
float f;
int32_t i;
int64_t l;
switch(typeID.getTypeVal()) {
case RIOTYPE_BOOL:
a.deserialize(b, tag);
break;
case RIOTYPE_BUFFER:
a.deserialize(str, len, tag);
break;
case RIOTYPE_BYTE:
a.deserialize(bt, tag);
break;
case RIOTYPE_DOUBLE:
a.deserialize(d, tag);
break;
case RIOTYPE_FLOAT:
a.deserialize(f, tag);
break;
case RIOTYPE_INT:
a.deserialize(i, tag);
break;
case RIOTYPE_LONG:
a.deserialize(l, tag);
break;
case RIOTYPE_MAP:
{
// since we don't know the key, value types,
// we need to deserialize in a generic manner
Index* idx = a.startMap(tag);
MapTypeID& mtID = (MapTypeID&) typeID;
while (!idx->done()) {
skip(a, tag, *(mtID.getKeyTypeID()));
skip(a, tag, *(mtID.getValueTypeID()));
idx->incr();
}
a.endMap(idx, tag);
}
break;
case RIOTYPE_STRING:
a.deserialize(str, tag);
break;
case RIOTYPE_STRUCT:
{
// since we don't know the key, value types,
// we need to deserialize in a generic manner
// we need to pass a record in, though it's never used
RecordTypeInfo rec;
a.startRecord(rec, tag);
StructTypeID& stID = (StructTypeID&) typeID;
std::vector<FieldTypeInfo*>& typeInfos = stID.getFieldTypeInfos();
for (unsigned int i=0; i<typeInfos.size(); i++) {
skip(a, tag, *(typeInfos[i]->getTypeID()));
}
a.endRecord(rec, tag);
}
break;
case RIOTYPE_VECTOR:
{
// since we don't know the key, value types,
// we need to deserialize in a generic manner
Index* idx = a.startVector(tag);
VectorTypeID& vtID = (VectorTypeID&) typeID;
while (!idx->done()) {
skip(a, tag, *(vtID.getElementTypeID()));
idx->incr();
}
a.endVector(idx, tag);
}
break;
default:
// shouldn't be here
throw new IOException("Unknown typeID when skipping bytes");
break;
};
}

View File

@ -1,50 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef UTILS_HH_
#define UTILS_HH_
#include "recordio.hh"
#include "typeIDs.hh"
namespace hadoop {
/**
* Various utility functions for Hadooop record I/O platform.
*/
class Utils {
private:
/** Cannot create a new instance of Utils */
Utils() {};
public:
/**
* read/skip bytes from stream based on a type
*/
static void skip(IArchive& a, const char* tag, const TypeID& typeID);
};
}
#endif // UTILS_HH_

View File

@ -1,431 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "xmlarchive.hh"
#include <stdlib.h>
using namespace hadoop;
void hadoop::MySAXHandler::startElement(const XMLCh* const name, AttributeList& attr)
{
charsValid = false;
char* qname = XMLString::transcode(name);
if(std::string("boolean") == qname ||
std::string("ex:i1") == qname ||
std::string("i4") == qname ||
std::string("int") == qname ||
std::string("ex:i8") == qname ||
std::string("ex:float") == qname ||
std::string("double") == qname ||
std::string("string") == qname) {
std::string s(qname);
Value v(s);
vlist.push_back(v);
charsValid = true;
} else if(std::string("struct") == qname ||
std::string("array") == qname) {
std::string s(qname);
Value v(s);
vlist.push_back(v);
}
XMLString::release(&qname);
}
void hadoop::MySAXHandler::endElement(const XMLCh* const name)
{
charsValid = false;
char* qname = XMLString::transcode(name);
if(std::string("struct") == qname ||
std::string("array") == qname) {
std::string s = "/";
Value v(s + qname);
vlist.push_back(v);
}
XMLString::release(&qname);
}
void hadoop::MySAXHandler::characters(const XMLCh* const buf, const unsigned int len)
{
if (charsValid) {
char *cstr = XMLString::transcode(buf);
Value& v = vlist.back();
v.addChars(cstr, strlen(cstr));
XMLString::release(&cstr);
}
}
static char hexchars[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F' };
static std::string toXMLString(std::string s)
{
std::string r;
size_t len = s.length();
size_t i;
const char* data = s.data();
for (i=0; i<len; i++, data++) {
char ch = *data;
if (ch == '<') {
r.append("&lt;");
} else if (ch == '&') {
r.append("&amp;");
} else if (ch == '%') {
r.append("%0025");
} else if (ch < 0x20) {
uint8_t* pb = (uint8_t*) &ch;
char ch1 = hexchars[*pb/16];
char ch2 = hexchars[*pb%16];
r.push_back('%');
r.push_back('0');
r.push_back('0');
r.push_back(ch1);
r.push_back(ch2);
} else {
r.push_back(ch);
}
}
return r;
}
static uint8_t h2b(char ch) {
if ((ch >= '0') || (ch <= '9')) {
return ch - '0';
}
if ((ch >= 'a') || (ch <= 'f')) {
return ch - 'a' + 10;
}
if ((ch >= 'A') || (ch <= 'F')) {
return ch - 'A' + 10;
}
return 0;
}
static std::string fromXMLString(std::string s)
{
std::string r;
size_t len = s.length();
size_t i;
uint8_t* pb = (uint8_t*) s.data();
for (i = 0; i < len; i++) {
uint8_t b = *pb;
if (b == '%') {
char *pc = (char*) (pb+1);
// ignore the first two characters, which are always '0'
*pc++;
*pc++;;
char ch1 = *pc++;
char ch2 = *pc++;
pb += 4;
uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
pc = (char*) &cnv;
r.push_back(*pc);
} else {
char *pc = (char*) pb;
r.push_back(*pc);
}
pb++;
}
return r;
}
static std::string toXMLBuffer(std::string s, size_t len)
{
std::string r;
size_t i;
uint8_t* data = (uint8_t*) s.data();
for (i=0; i<len; i++, data++) {
uint8_t b = *data;
char ch1 = hexchars[b/16];
char ch2 = hexchars[b%16];
r.push_back(ch1);
r.push_back(ch2);
}
return r;
}
static std::string fromXMLBuffer(std::string s, size_t& len)
{
len = s.length();
if (len%2 == 1) { // len is guaranteed to be even
throw new IOException("Errror deserializing buffer.");
}
len = len >> 1;
std::string t;
for (size_t idx = 0; idx < len; idx++) {
char buf[3];
buf[0] = s[2*idx];
buf[1] = s[2*idx+1];
buf[2] = '\0';
int i;
if (1 != sscanf(buf, "%2x", &i)) {
throw new IOException("Errror deserializing buffer.");
}
t.push_back((char) i);
}
len = t.length();
return t;
}
void hadoop::IXmlArchive::deserialize(int8_t& t, const char* tag)
{
Value v = next();
if (v.getType() != "ex:i1") {
throw new IOException("Error deserializing byte");
}
t = (int8_t) strtol(v.getValue().c_str(), NULL, 10);
}
void hadoop::IXmlArchive::deserialize(bool& t, const char* tag)
{
Value v = next();
if (v.getType() != "boolean") {
throw new IOException("Error deserializing boolean");
}
t = (v.getValue() == "1");
}
void hadoop::IXmlArchive::deserialize(int32_t& t, const char* tag)
{
Value v = next();
if (v.getType() != "i4" && v.getType() != "int") {
throw new IOException("Error deserializing int");
}
t = (int32_t) strtol(v.getValue().c_str(), NULL, 10);
}
void hadoop::IXmlArchive::deserialize(int64_t& t, const char* tag)
{
Value v = next();
if (v.getType() != "ex:i8") {
throw new IOException("Error deserializing long");
}
t = strtoll(v.getValue().c_str(), NULL, 10);
}
void hadoop::IXmlArchive::deserialize(float& t, const char* tag)
{
Value v = next();
if (v.getType() != "ex:float") {
throw new IOException("Error deserializing float");
}
t = strtof(v.getValue().c_str(), NULL);
}
void hadoop::IXmlArchive::deserialize(double& t, const char* tag)
{
Value v = next();
if (v.getType() != "double") {
throw new IOException("Error deserializing double");
}
t = strtod(v.getValue().c_str(), NULL);
}
void hadoop::IXmlArchive::deserialize(std::string& t, const char* tag)
{
Value v = next();
if (v.getType() != "string") {
throw new IOException("Error deserializing string");
}
t = fromXMLString(v.getValue());
}
void hadoop::IXmlArchive::deserialize(std::string& t, size_t& len, const char* tag)
{
Value v = next();
if (v.getType() != "string") {
throw new IOException("Error deserializing buffer");
}
t = fromXMLBuffer(v.getValue(), len);
}
void hadoop::IXmlArchive::startRecord(Record& s, const char* tag)
{
Value v = next();
if (v.getType() != "struct") {
throw new IOException("Error deserializing record");
}
}
void hadoop::IXmlArchive::endRecord(Record& s, const char* tag)
{
Value v = next();
if (v.getType() != "/struct") {
throw new IOException("Error deserializing record");
}
}
Index* hadoop::IXmlArchive::startVector(const char* tag)
{
Value v = next();
if (v.getType() != "array") {
throw new IOException("Error deserializing vector");
}
return new XmlIndex(vlist, vidx);
}
void hadoop::IXmlArchive::endVector(Index* idx, const char* tag)
{
Value v = next();
if (v.getType() != "/array") {
throw new IOException("Error deserializing vector");
}
delete idx;
}
Index* hadoop::IXmlArchive::startMap(const char* tag)
{
Value v = next();
if (v.getType() != "array") {
throw new IOException("Error deserializing map");
}
return new XmlIndex(vlist, vidx);
}
void hadoop::IXmlArchive::endMap(Index* idx, const char* tag)
{
Value v = next();
if (v.getType() != "/array") {
throw new IOException("Error deserializing map");
}
delete idx;
}
void hadoop::OXmlArchive::serialize(int8_t t, const char* tag)
{
printBeginEnvelope(tag);
p("<ex:i1>");
char sval[5];
sprintf(sval, "%d", t);
p(sval);
p("</ex:i1>");
printEndEnvelope(tag);
}
void hadoop::OXmlArchive::serialize(bool t, const char* tag)
{
printBeginEnvelope(tag);
p("<boolean>");
p(t ? "1" : "0");
p("</boolean>");
printEndEnvelope(tag);
}
void hadoop::OXmlArchive::serialize(int32_t t, const char* tag)
{
printBeginEnvelope(tag);
p("<i4>");
char sval[128];
sprintf(sval, "%d", t);
p(sval);
p("</i4>");
printEndEnvelope(tag);
}
void hadoop::OXmlArchive::serialize(int64_t t, const char* tag)
{
printBeginEnvelope(tag);
p("<ex:i8>");
char sval[128];
sprintf(sval, "%lld", t);
p(sval);
p("</ex:i8>");
printEndEnvelope(tag);
}
void hadoop::OXmlArchive::serialize(float t, const char* tag)
{
printBeginEnvelope(tag);
p("<ex:float>");
char sval[128];
sprintf(sval, "%f", t);
p(sval);
p("</ex:float>");
printEndEnvelope(tag);
}
void hadoop::OXmlArchive::serialize(double t, const char* tag)
{
printBeginEnvelope(tag);
p("<double>");
char sval[128];
sprintf(sval, "%lf", t);
p(sval);
p("</double>");
printEndEnvelope(tag);
}
void hadoop::OXmlArchive::serialize(const std::string& t, const char* tag)
{
printBeginEnvelope(tag);
p("<string>");
std::string s = toXMLString(t);
stream.write(s.data(), s.length());
p("</string>");
printEndEnvelope(tag);
}
void hadoop::OXmlArchive::serialize(const std::string& t, size_t len, const char* tag)
{
printBeginEnvelope(tag);
p("<string>");
std::string s = toXMLBuffer(t, len);
stream.write(s.data(), s.length());
p("</string>");
printEndEnvelope(tag);
}
void hadoop::OXmlArchive::startRecord(const Record& s, const char* tag)
{
insideRecord(tag);
p("<struct>\n");
}
void hadoop::OXmlArchive::endRecord(const Record& s, const char* tag)
{
p("</struct>\n");
outsideRecord(tag);
}
void hadoop::OXmlArchive::startVector(size_t len, const char* tag)
{
insideVector(tag);
p("<array>\n");
}
void hadoop::OXmlArchive::endVector(size_t len, const char* tag)
{
p("</array>\n");
outsideVector(tag);
}
void hadoop::OXmlArchive::startMap(size_t len, const char* tag)
{
insideMap(tag);
p("<array>\n");
}
void hadoop::OXmlArchive::endMap(size_t len, const char* tag)
{
p("</array>\n");
outsideMap(tag);
}
hadoop::OXmlArchive::~OXmlArchive()
{
}

View File

@ -1,265 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef XMLARCHIVE_HH_
#define XMLARCHIVE_HH_
#include <xercesc/parsers/SAXParser.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/BinInputStream.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/sax/InputSource.hpp>
#include "recordio.hh"
XERCES_CPP_NAMESPACE_USE
namespace hadoop {
class Value {
private:
std::string type;
std::string value;
public:
Value(const std::string& t) { type = t; }
void addChars(const char* buf, unsigned int len) {
value += std::string(buf, len);
}
const std::string& getType() const { return type; }
const std::string& getValue() const { return value; }
};
class MySAXHandler : public HandlerBase {
private:
std::vector<Value>& vlist;
bool charsValid;
public:
MySAXHandler(std::vector<Value>& list) : vlist(list) {charsValid = false;}
void startElement(const XMLCh* const name, AttributeList& attr);
void endElement(const XMLCh* const name);
void characters(const XMLCh* const buf, unsigned int len);
};
class XmlIndex : public Index {
private:
std::vector<Value>& vlist;
unsigned int& vidx;
public:
XmlIndex(std::vector<Value>& list, unsigned int& idx) : vlist(list), vidx(idx) {}
bool done() {
Value v = vlist[vidx];
return (v.getType() == "/array") ? true : false;
}
void incr() {}
~XmlIndex() {}
};
class MyBinInputStream : public BinInputStream {
private:
InStream& stream;
unsigned int pos;
public:
MyBinInputStream(InStream& s) : stream(s) { pos = 0; }
virtual unsigned int curPos() const { return pos; }
virtual unsigned int readBytes(XMLByte* const toFill,
const unsigned int maxToRead) {
ssize_t nread = stream.read(toFill, maxToRead);
if (nread < 0) {
return 0;
} else {
pos += nread;
return nread;
}
}
};
class MyInputSource : public InputSource {
private:
InStream& stream;
public:
MyInputSource(InStream& s) : stream(s) { }
virtual BinInputStream* makeStream() const {
return new MyBinInputStream(stream);
}
virtual const XMLCh* getEncoding() const {
return XMLString::transcode("UTF-8");
}
virtual ~MyInputSource() {}
};
class IXmlArchive : public IArchive {
private:
std::vector<Value> vlist;
unsigned int vidx;
MySAXHandler *docHandler;
SAXParser *parser;
MyInputSource* src;
Value next() {
Value v = vlist[vidx];
vidx++;
return v;
}
public:
IXmlArchive(InStream& _stream) {
vidx = 0;
try {
XMLPlatformUtils::Initialize();
} catch (const XMLException& e) {
throw new IOException("Unable to initialize XML Parser.");
}
parser = new SAXParser();
docHandler = new MySAXHandler(vlist);
parser->setDocumentHandler(docHandler);
src = new MyInputSource(_stream);
try {
parser->parse(*src);
} catch (const XMLException& e) {
throw new IOException("Unable to parse XML stream.");
} catch (const SAXParseException& e) {
throw new IOException("Unable to parse XML stream.");
}
delete parser;
delete docHandler;
}
virtual void deserialize(int8_t& t, const char* tag);
virtual void deserialize(bool& t, const char* tag);
virtual void deserialize(int32_t& t, const char* tag);
virtual void deserialize(int64_t& t, const char* tag);
virtual void deserialize(float& t, const char* tag);
virtual void deserialize(double& t, const char* tag);
virtual void deserialize(std::string& t, const char* tag);
virtual void deserialize(std::string& t, size_t& len, const char* tag);
virtual void startRecord(Record& s, const char* tag);
virtual void endRecord(Record& s, const char* tag);
virtual Index* startVector(const char* tag);
virtual void endVector(Index* idx, const char* tag);
virtual Index* startMap(const char* tag);
virtual void endMap(Index* idx, const char* tag);
virtual ~IXmlArchive() {
XMLPlatformUtils::Terminate();
}
};
class OXmlArchive : public OArchive {
private:
OutStream& stream;
std::vector<std::string> cstack;
void insideRecord(const char* tag) {
printBeginEnvelope(tag);
cstack.push_back("record");
}
void outsideRecord(const char* tag) {
std::string s = cstack.back();
cstack.pop_back();
if (s != "record") {
throw new IOException("Error deserializing record.");
}
printEndEnvelope(tag);
}
void insideVector(const char* tag) {
printBeginEnvelope(tag);
cstack.push_back("vector");
}
void outsideVector(const char* tag) {
std::string s = cstack.back();
cstack.pop_back();
if (s != "vector") {
throw new IOException("Error deserializing vector.");
}
printEndEnvelope(tag);
}
void insideMap(const char* tag) {
printBeginEnvelope(tag);
cstack.push_back("map");
}
void outsideMap(const char* tag) {
std::string s = cstack.back();
cstack.pop_back();
if (s != "map") {
throw new IOException("Error deserializing map.");
}
printEndEnvelope(tag);
}
void p(const char* cstr) {
stream.write(cstr, strlen(cstr));
}
void printBeginEnvelope(const char* tag) {
if (cstack.size() != 0) {
std::string s = cstack.back();
if ("record" == s) {
p("<member>\n");
p("<name>");
p(tag);
p("</name>\n");
p("<value>");
} else if ("vector" == s) {
p("<value>");
} else if ("map" == s) {
p("<value>");
}
} else {
p("<value>");
}
}
void printEndEnvelope(const char* tag) {
if (cstack.size() != 0) {
std::string s = cstack.back();
if ("record" == s) {
p("</value>\n");
p("</member>\n");
} else if ("vector" == s) {
p("</value>\n");
} else if ("map" == s) {
p("</value>\n");
}
} else {
p("</value>\n");
}
}
public:
OXmlArchive(OutStream& _stream) : stream(_stream) {}
virtual void serialize(int8_t t, const char* tag);
virtual void serialize(bool t, const char* tag);
virtual void serialize(int32_t t, const char* tag);
virtual void serialize(int64_t t, const char* tag);
virtual void serialize(float t, const char* tag);
virtual void serialize(double t, const char* tag);
virtual void serialize(const std::string& t, const char* tag);
virtual void serialize(const std::string& t, size_t len, const char* tag);
virtual void startRecord(const Record& s, const char* tag);
virtual void endRecord(const Record& s, const char* tag);
virtual void startVector(size_t len, const char* tag);
virtual void endVector(size_t len, const char* tag);
virtual void startMap(size_t len, const char* tag);
virtual void endMap(size_t len, const char* tag);
virtual ~OXmlArchive();
};
}
#endif /*XMLARCHIVE_HH_*/

View File

@ -1,25 +0,0 @@
This contribution consists of two components designed to make it easier to find information about lost or corrupt blocks.
The first is a map reduce designed to search for one or more block ids in a set of log files. It exists in org.apache.hadoop.block_forensics.BlockSearch. Building this contribution generates a jar file that can be executed using:
bin/hadoop jar [jar location] [hdfs input path] [hdfs output dir] [comma delimited list of block ids]
For example, the command:
bin/hadoop jar /foo/bar/hadoop-0.1-block_forensics.jar /input/* /ouptut 2343,45245,75823
... searches for any of blocks 2343, 45245, or 75823 in any of the files
contained in the /input/ directory.
The output will be any line containing one of the provided block ids. While this tool is designed to be used with block ids, it can also be used for general text searching.
The second component is a standalone java program that will repeatedly query the namenode at a given interval looking for corrupt replicas. If it finds a corrupt replica, it will launch the above map reduce job. The syntax is:
java BlockForensics http://[namenode]:[port]/corrupt_replicas_xml.jsp [sleep time between namenode query for corrupt blocks (in milliseconds)] [mapred jar location] [hdfs input path]
For example, the command:
java BlockForensics http://localhost:50070/corrupt_replicas_xml.jsp 30000
/foo/bar/hadoop-0.1-block_forensics.jar /input/*
... queries the namenode at localhost:50070 for corrupt replicas every 30
seconds and runs /foo/bar/hadoop-0.1-block_forensics.jar if any are found.
The map reduce job jar and the BlockForensics class can be found in your build/contrib/block_forensics and build/contrib/block_forensics/classes directories, respectively.

View File

@ -1,66 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Before you can run these subtargets directly, you need
to call at top-level: ant deploy-contrib compile-core-test
-->
<project name="block_forensics" default="jar">
<property name="version" value="0.1"/>
<import file="../build-contrib.xml"/>
<!-- create the list of files to add to the classpath -->
<fileset dir="${hadoop.root}/lib" id="class.path">
<include name="**/*.jar" />
<exclude name="**/excluded/" />
</fileset>
<!-- Override jar target to specify main class -->
<target name="jar" depends="compile">
<jar
jarfile="${build.dir}/hadoop-${version}-${name}.jar"
basedir="${build.classes}"
>
<manifest>
<attribute name="Main-Class" value="org.apache.hadoop.blockforensics.BlockSearch"/>
</manifest>
</jar>
<javac srcdir="client" destdir="${build.classes}"/>
</target>
<!-- Run only pure-Java unit tests. superdottest -->
<target name="test">
<antcall target="hadoopbuildcontrib.test">
</antcall>
</target>
<!-- Run all unit tests
This is not called as part of the nightly build
because it will only run on platforms that have standard
Unix utilities available.
-->
<target name="test-unix">
<antcall target="hadoopbuildcontrib.test">
</antcall>
</target>
</project>

View File

@ -1,186 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.Runtime;
import java.net.URL;
import java.net.URLConnection;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
* This class repeatedly queries a namenode looking for corrupt replicas. If
* any are found a provided hadoop job is launched and the output printed
* to stdout.
*
* The syntax is:
*
* java BlockForensics http://[namenode]:[port]/corrupt_replicas_xml.jsp
* [sleep time between namenode query for corrupt blocks
* (in seconds)] [mapred jar location] [hdfs input path]
*
* All arguments are required.
*/
public class BlockForensics {
public static String join(List<?> l, String sep) {
StringBuilder sb = new StringBuilder();
Iterator it = l.iterator();
while(it.hasNext()){
sb.append(it.next());
if (it.hasNext()) {
sb.append(sep);
}
}
return sb.toString();
}
// runs hadoop command and prints output to stdout
public static void runHadoopCmd(String ... args)
throws IOException {
String hadoop_home = System.getenv("HADOOP_PREFIX");
List<String> l = new LinkedList<String>();
l.add("bin/hadoop");
l.addAll(Arrays.asList(args));
ProcessBuilder pb = new ProcessBuilder(l);
if (hadoop_home != null) {
pb.directory(new File(hadoop_home));
}
pb.redirectErrorStream(true);
Process p = pb.start();
BufferedReader br = new BufferedReader(
new InputStreamReader(p.getInputStream()));
String line;
while ((line = br.readLine()) != null) {
System.out.println(line);
}
}
public static void main(String[] args)
throws SAXException, ParserConfigurationException,
InterruptedException, IOException {
if (System.getenv("HADOOP_PREFIX") == null) {
System.err.println("The environmental variable HADOOP_PREFIX is undefined");
System.exit(1);
}
if (args.length < 4) {
System.out.println("Usage: java BlockForensics [http://namenode:port/"
+ "corrupt_replicas_xml.jsp] [sleep time between "
+ "requests (in milliseconds)] [mapred jar location] "
+ "[hdfs input path]");
return;
}
int sleepTime = 30000;
try {
sleepTime = Integer.parseInt(args[1]);
} catch (NumberFormatException e) {
System.out.println("The sleep time entered is invalid, "
+ "using default value: "+sleepTime+"ms");
}
Set<Long> blockIds = new TreeSet<Long>();
while (true) {
InputStream xml = new URL(args[0]).openConnection().getInputStream();
DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = fact.newDocumentBuilder();
Document doc = builder.parse(xml);
NodeList corruptReplicaNodes = doc.getElementsByTagName("block_id");
List<Long> searchBlockIds = new LinkedList<Long>();
for(int i=0; i<corruptReplicaNodes.getLength(); i++) {
Long blockId = new Long(corruptReplicaNodes.item(i)
.getFirstChild()
.getNodeValue());
if (!blockIds.contains(blockId)) {
blockIds.add(blockId);
searchBlockIds.add(blockId);
}
}
if (searchBlockIds.size() > 0) {
String blockIdsStr = BlockForensics.join(searchBlockIds, ",");
System.out.println("\nSearching for: " + blockIdsStr);
String tmpDir =
new String("/tmp-block-forensics-" +
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
System.out.println("Using temporary dir: "+tmpDir);
// delete tmp dir
BlockForensics.runHadoopCmd("fs", "-rmr", tmpDir);
// launch mapred job
BlockForensics.runHadoopCmd("jar",
args[2], // jar location
args[3], // input dir
tmpDir, // output dir
blockIdsStr// comma delimited list of blocks
);
// cat output
BlockForensics.runHadoopCmd("fs", "-cat", tmpDir+"/part*");
// delete temp dir
BlockForensics.runHadoopCmd("fs", "-rmr", tmpDir);
int sleepSecs = (int)(sleepTime/1000.);
System.out.print("Sleeping for "+sleepSecs
+ " second"+(sleepSecs == 1?"":"s")+".");
}
System.out.print(".");
Thread.sleep(sleepTime);
}
}
}

View File

@ -1,52 +0,0 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<ivy-module version="1.0">
<info organisation="org.apache.hadoop" module="${ant.project.name}">
<license name="Apache 2.0"/>
<ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
<description>
Apache Hadoop
</description>
</info>
<configurations defaultconfmapping="default">
<!--these match the Maven configurations-->
<conf name="default" extends="master,runtime"/>
<conf name="master" description="contains the artifact but no dependencies"/>
<conf name="runtime" description="runtime but not the artifact" />
<conf name="common" visibility="private"
extends="runtime"
description="artifacts needed to compile/test the application"/>
<conf name="test" visibility="private" extends="runtime"/>
</configurations>
<publications>
<!--get the artifact from our module name-->
<artifact conf="master"/>
</publications>
<dependencies>
<dependency org="org.apache.hadoop" name="hadoop-common"
rev="${hadoop-common.version}" conf="common->default"/>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
rev="${yarn.version}" conf="common->default"/>
<dependency org="log4j" name="log4j" rev="${log4j.version}"
conf="common->master"/>
</dependencies>
</ivy-module>

View File

@ -1,21 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#This properties file lists the versions of the various artifacts used by thrifts.
#It drives ivy and the generation of a maven POM
#Please list the dependencies name with version if they are different from the ones
#listed in the global libraries.properties file (in alphabetical order)

View File

@ -1,136 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.blockforensics;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* BlockSearch is a mapred job that's designed to search input for appearances
* of strings.
*
* The syntax is:
*
* bin/hadoop jar [jar location] [hdfs input path] [hdfs output dir]
[comma delimited list of block ids]
*
* All arguments are required.
*
* This tool is designed to be used to search for one or more block ids in log
* files but can be used for general text search, assuming the search strings
* don't contain tokens. It assumes only one search string will appear per line.
*/
public class BlockSearch extends Configured implements Tool {
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
private Text blockIdText = new Text();
private Text valText = new Text();
private List<String> blockIds = null;
protected void setup(Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
StringTokenizer st = new StringTokenizer(conf.get("blockIds"), ",");
blockIds = new LinkedList<String>();
while (st.hasMoreTokens()) {
String blockId = st.nextToken();
blockIds.add(blockId);
}
}
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
if (blockIds == null) {
System.err.println("Error: No block ids specified");
} else {
String valStr = value.toString();
for(String blockId: blockIds) {
if (valStr.indexOf(blockId) != -1) {
blockIdText.set(blockId);
valText.set(valStr);
context.write(blockIdText, valText);
break; // assume only one block id appears per line
}
}
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text> {
private Text val = new Text();
public void reduce(Text key, Iterator<Text> values, Context context)
throws IOException, InterruptedException {
while (values.hasNext()) {
context.write(key, values.next());
}
}
}
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("BlockSearch <inLogs> <outDir> <comma delimited list of blocks>");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Configuration conf = getConf();
conf.set("blockIds", args[2]);
Job job = new Job(conf);
job.setCombinerClass(Reduce.class);
job.setJarByClass(BlockSearch.class);
job.setJobName("BlockSearch");
job.setMapperClass(Map.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setReducerClass(Reduce.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new BlockSearch(), args);
System.exit(res);
}
}

View File

@ -1,531 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Imported by contrib/*/build.xml files to share generic targets. -->
<project name="hadoopbuildcontrib" xmlns:ivy="antlib:org.apache.ivy.ant">
<import file="../../build-utils.xml" />
<property name="name" value="${ant.project.name}"/>
<property name="root" value="${basedir}"/>
<property name="hadoop.root" location="${root}/../../../"/>
<!-- Load all the default properties, and any the user wants -->
<!-- to contribute (without having to type -D or edit this file -->
<property file="${user.home}/${name}.build.properties" />
<property file="${root}/build.properties" />
<property file="${hadoop.root}/build.properties" />
<property name="src.dir" location="${root}/src/java"/>
<property name="src.test" location="${root}/src/test"/>
<property name="src.test.data" location="${root}/src/test/data"/>
<property name="src.examples" location="${root}/src/examples"/>
<property name="build-fi.dir" location="${hadoop.root}/build-fi"/>
<property name="system-test-build-dir" location="${build-fi.dir}/system"/>
<!-- Property added for contrib system tests -->
<property name="src.test.system" location="${root}/src/test/system"/>
<available file="${src.examples}" type="dir" property="examples.available"/>
<available file="${src.test}" type="dir" property="test.available"/>
<!-- Property added for contrib system tests -->
<available file="${src.test.system}" type="dir"
property="test.system.available"/>
<property name="conf.dir" location="${hadoop.root}/conf"/>
<property name="test.junit.output.format" value="plain"/>
<property name="test.output" value="no"/>
<property name="test.timeout" value="900000"/>
<property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
<property name="build.dir" location="${hadoop.root}/build/contrib/${name}"/>
<property name="build.classes" location="${build.dir}/classes"/>
<property name="build.test" location="${build.dir}/test"/>
<property name="test.build.extraconf" value="${build.test}/extraconf"/>
<property name="build.examples" location="${build.dir}/examples"/>
<property name="hadoop.log.dir" location="${build.dir}/test/logs"/>
<!-- all jars together -->
<property name="javac.deprecation" value="off"/>
<property name="javac.debug" value="on"/>
<property name="build.ivy.lib.dir" value="${hadoop.root}/build/ivy/lib"/>
<property name="javadoc.link"
value="http://java.sun.com/j2se/1.4/docs/api/"/>
<property name="build.encoding" value="ISO-8859-1"/>
<property name="dest.jar" value="hadoop-${version}-${name}.jar"/>
<fileset id="lib.jars" dir="${root}" includes="lib/*.jar"/>
<!-- Property added for contrib system tests -->
<property name="build.test.system" location="${build.dir}/system"/>
<property name="build.system.classes"
location="${build.test.system}/classes"/>
<!-- IVY properties set here -->
<property name="ivy.dir" location="ivy" />
<property name="ivysettings.xml" location="${hadoop.root}/ivy/ivysettings.xml"/>
<loadproperties srcfile="${ivy.dir}/libraries.properties"/>
<loadproperties srcfile="${hadoop.root}/ivy/libraries.properties"/>
<property name="ivy.jar" location="${hadoop.root}/ivy/ivy-${ivy.version}.jar"/>
<property name="ivy_repo_url"
value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
<property name="build.ivy.dir" location="${hadoop.root}/build/ivy" />
<property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
<property name="build.ivy.report.dir" location="${build.ivy.dir}/report" />
<property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
<!--this is the naming policy for artifacts we want pulled down-->
<property name="ivy.artifact.retrieve.pattern"
value="${ant.project.name}/[conf]/[artifact]-[revision](-[classifier]).[ext]"/>
<!-- the normal classpath -->
<path id="contrib-classpath">
<pathelement location="${build.classes}"/>
<fileset refid="lib.jars"/>
<pathelement location="${hadoop.root}/build/classes"/>
<pathelement location="${system-test-build-dir}/classes"/>
<pathelement location="${system-test-build-dir}/tools"/>
<pathelement location="${hadoop.root}/build/tools"/>
<fileset dir="${hadoop.root}/lib">
<include name="**/*.jar" />
</fileset>
<path refid="${ant.project.name}.common-classpath"/>
<pathelement path="${clover.jar}"/>
</path>
<!-- the unit test classpath -->
<path id="test.classpath">
<pathelement location="${build.test}" />
<pathelement location="${test.build.extraconf}" />
<pathelement location="${hadoop.root}/build/test/classes"/>
<pathelement location="${hadoop.root}/build/test/core/classes"/>
<pathelement location="${hadoop.root}/build/test/hdfs/classes"/>
<pathelement location="${hadoop.root}/build/test/mapred/classes"/>
<pathelement location="${hadoop.root}/src/contrib/test"/>
<pathelement location="${conf.dir}"/>
<pathelement location="${hadoop.root}/build"/>
<pathelement location="${build.examples}"/>
<pathelement location="${hadoop.root}/build/examples"/>
<path refid="${ant.project.name}.test-classpath"/>
<path refid="contrib-classpath"/>
</path>
<!-- The system test classpath -->
<path id="test.system.classpath">
<pathelement location="${hadoop.root}/src/contrib/${name}/src/test/system" />
<pathelement location="${build.test.system}" />
<pathelement location="${build.test.system}/classes"/>
<pathelement location="${build.examples}"/>
<pathelement location="${system-test-build-dir}/classes" />
<pathelement location="${system-test-build-dir}/test/mapred/classes" />
<pathelement location="${system-test-build-dir}" />
<pathelement location="${system-test-build-dir}/tools" />
<pathelement location="${hadoop.home}"/>
<pathelement location="${hadoop.conf.dir}"/>
<pathelement location="${hadoop.conf.dir.deployed}"/>
<pathelement location="${hadoop.root}/build"/>
<pathelement location="${hadoop.root}/build/examples"/>
<pathelement location="${hadoop.root}/build/test/classes" />
<path refid="contrib-classpath"/>
<fileset dir="${system-test-build-dir}">
<include name="**/*.jar" />
<exclude name="**/excluded/" />
</fileset>
<fileset dir="${system-test-build-dir}/test/mapred/testjar">
<include name="**/*.jar" />
<exclude name="**/excluded/" />
</fileset>
<fileset dir="${hadoop.root}/build/contrib/${name}">
<include name="**/*.jar" />
<exclude name="**/excluded/" />
</fileset>
</path>
<!-- to be overridden by sub-projects -->
<target name="check-contrib"/>
<target name="init-contrib"/>
<!-- ====================================================== -->
<!-- Stuff needed by all targets -->
<!-- ====================================================== -->
<target name="init" depends="check-contrib" unless="skip.contrib">
<echo message="contrib: ${name}"/>
<mkdir dir="${build.dir}"/>
<mkdir dir="${build.classes}"/>
<mkdir dir="${build.test}"/>
<mkdir dir="${build.test}/extraconf"/>
<mkdir dir="${build.examples}"/>
<mkdir dir="${hadoop.log.dir}"/>
<!-- The below two tags added for contrib system tests -->
<mkdir dir="${build.test.system}"/>
<mkdir dir="${build.system.classes}"/>
<antcall target="init-contrib"/>
</target>
<!-- ====================================================== -->
<!-- Compile a Hadoop contrib's files -->
<!-- ====================================================== -->
<target name="compile" depends="init, ivy-retrieve-common" unless="skip.contrib">
<echo message="contrib: ${name}"/>
<javac
encoding="${build.encoding}"
srcdir="${src.dir}"
includes="**/*.java"
excludes="system/**/*.java"
destdir="${build.classes}"
debug="${javac.debug}"
deprecation="${javac.deprecation}">
<classpath refid="contrib-classpath"/>
</javac>
</target>
<!-- ======================================================= -->
<!-- Compile a Hadoop contrib's example files (if available) -->
<!-- ======================================================= -->
<target name="compile-examples" depends="compile, ivy-retrieve-common" if="examples.available">
<echo message="contrib: ${name}"/>
<javac
encoding="${build.encoding}"
srcdir="${src.examples}"
includes="**/*.java"
destdir="${build.examples}"
debug="${javac.debug}">
<classpath refid="contrib-classpath"/>
</javac>
</target>
<!-- ================================================================== -->
<!-- Compile test code -->
<!-- ================================================================== -->
<target name="compile-test" depends="compile-examples, ivy-retrieve-test" if="test.available">
<echo message="contrib: ${name}"/>
<javac
encoding="${build.encoding}"
srcdir="${src.test}"
includes="**/*.java"
excludes="system/**/*.java"
destdir="${build.test}"
debug="${javac.debug}">
<classpath refid="test.classpath"/>
</javac>
</target>
<!-- ================================================================== -->
<!-- Compile system test code -->
<!-- ================================================================== -->
<target name="compile-test-system" depends="compile-examples, ivy-retrieve-test"
if="test.system.available">
<echo message="contrib: ${name}"/>
<javac
encoding="${build.encoding}"
srcdir="${src.test.system}"
includes="**/*.java"
destdir="${build.system.classes}"
debug="${javac.debug}">
<classpath refid="test.system.classpath"/>
</javac>
</target>
<!-- ====================================================== -->
<!-- Make a Hadoop contrib's jar -->
<!-- ====================================================== -->
<target name="jar" depends="compile" unless="skip.contrib">
<echo message="contrib: ${name}"/>
<jar
jarfile="${build.dir}/${dest.jar}"
basedir="${build.classes}"
/>
</target>
<!-- ====================================================== -->
<!-- Make a Hadoop contrib's examples jar -->
<!-- ====================================================== -->
<target name="jar-examples" depends="compile-examples"
if="examples.available" unless="skip.contrib">
<echo message="contrib: ${name}"/>
<jar jarfile="${build.dir}/hadoop-${version}-${name}-examples.jar">
<fileset dir="${build.classes}">
</fileset>
<fileset dir="${build.examples}">
</fileset>
</jar>
</target>
<!-- ====================================================== -->
<!-- Package a Hadoop contrib -->
<!-- ====================================================== -->
<target name="package" depends="jar, jar-examples" unless="skip.contrib">
<mkdir dir="${dist.dir}/contrib/${name}"/>
<copy todir="${dist.dir}/contrib/${name}" includeEmptyDirs="false" flatten="true">
<fileset dir="${build.dir}">
<include name="${dest.jar}" />
</fileset>
</copy>
<!-- copy the dependency libraries into the contrib/lib dir -->
<mkdir dir="${dist.dir}/contrib/${name}/lib"/>
<copy todir="${dist.dir}/contrib/${name}/lib" includeEmptyDirs="false" flatten="true">
<fileset dir="${common.ivy.lib.dir}">
<!-- except for those already present due to Hadoop -->
<present present="srconly" targetdir="${dist.dir}/lib" />
</fileset>
</copy>
<!-- if the lib dir is empty, remove it. -->
<delete dir="${dist.dir}/contrib/${name}/lib" includeEmptyDirs="true" excludes="*.jar" />
</target>
<!-- ================================================================== -->
<!-- Run unit tests -->
<!-- ================================================================== -->
<target name="test" depends="compile-test, compile" if="test.available">
<echo message="contrib: ${name}"/>
<delete dir="${hadoop.log.dir}"/>
<mkdir dir="${hadoop.log.dir}"/>
<junit
printsummary="yes" showoutput="${test.output}"
haltonfailure="no" fork="yes" maxmemory="512m"
errorProperty="tests.failed" failureProperty="tests.failed"
timeout="${test.timeout}">
<assertions><enable/></assertions>
<sysproperty key="test.build.data" value="${build.test}/data"/>
<sysproperty key="build.test" value="${build.test}"/>
<sysproperty key="test.build.extraconf" value="${test.build.extraconf}" />
<sysproperty key="src.test.data" value="${src.test.data}"/>
<sysproperty key="contrib.name" value="${name}"/>
<!-- requires fork=yes for:
relative File paths to use the specified user.dir
classpath to use build/contrib/*.jar
-->
<sysproperty key="user.dir" value="${build.test}/data"/>
<sysproperty key="fs.default.name" value="${fs.default.name}"/>
<sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
<sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
<sysproperty key="taskcontroller-path" value="${taskcontroller-path}"/>
<sysproperty key="taskcontroller-ugi" value="${taskcontroller-ugi}"/>
<classpath refid="test.classpath"/>
<formatter type="${test.junit.output.format}" />
<batchtest todir="${build.test}" unless="testcase">
<fileset dir="${src.test}"
includes="**/Test*.java" excludes="**/${test.exclude}.java, system/**/*.java" />
</batchtest>
<batchtest todir="${build.test}" if="testcase">
<fileset dir="${src.test}" includes="**/${testcase}.java" excludes="system/**/*.java" />
</batchtest>
</junit>
<antcall target="checkfailure"/>
</target>
<!-- ================================================================== -->
<!-- Run system tests -->
<!-- ================================================================== -->
<target name="test-system" depends="compile-test-system, jar"
if="test.system.available">
<delete dir="${build.test.system}/extraconf"/>
<mkdir dir="${build.test.system}/extraconf"/>
<property name="test.src.dir" location="${hadoop.root}/src/test"/>
<property name="test.junit.printsummary" value="yes" />
<property name="test.junit.haltonfailure" value="no" />
<property name="test.junit.maxmemory" value="512m" />
<property name="test.junit.fork.mode" value="perTest" />
<property name="test.all.tests.file" value="${test.src.dir}/all-tests" />
<property name="test.build.dir" value="${hadoop.root}/build/test"/>
<property name="basedir" value="${hadoop.root}"/>
<property name="test.timeout" value="900000"/>
<property name="test.junit.output.format" value="plain"/>
<property name="test.tools.input.dir" value="${basedir}/src/test/tools/data"/>
<property name="c++.src" value="${basedir}/src/c++"/>
<property name="test.include" value="Test*"/>
<property name="c++.libhdfs.src" value="${c++.src}/libhdfs"/>
<property name="test.build.data" value="${build.test.system}/data"/>
<property name="test.cache.data" value="${build.test.system}/cache"/>
<property name="test.debug.data" value="${build.test.system}/debug"/>
<property name="test.log.dir" value="${build.test.system}/logs"/>
<exec executable="sed" inputstring="${os.name}"
outputproperty="nonspace.os">
<arg value="s/ /_/g"/>
</exec>
<property name="build.platform"
value="${nonspace.os}-${os.arch}-${sun.arch.data.model}"/>
<property name="build.native"
value="${hadoop.root}/build/native/${build.platform}"/>
<property name="lib.dir" value="${hadoop.root}/lib"/>
<property name="install.c++.examples"
value="${hadoop.root}/build/c++-examples/${build.platform}"/>
<condition property="tests.testcase">
<and>
<isset property="testcase" />
</and>
</condition>
<property name="test.junit.jvmargs" value="-ea" />
<macro-system-test-runner test.file="${test.all.tests.file}"
classpath="test.system.classpath"
test.dir="${build.test.system}"
fileset.dir="${hadoop.root}/src/contrib/${name}/src/test/system"
hadoop.conf.dir.deployed="${hadoop.conf.dir.deployed}">
</macro-system-test-runner>
</target>
<macrodef name="macro-system-test-runner">
<attribute name="test.file" />
<attribute name="classpath" />
<attribute name="test.dir" />
<attribute name="fileset.dir" />
<attribute name="hadoop.conf.dir.deployed" default="" />
<sequential>
<delete dir="@{test.dir}/data"/>
<mkdir dir="@{test.dir}/data"/>
<delete dir="@{test.dir}/logs"/>
<mkdir dir="@{test.dir}/logs"/>
<copy file="${test.src.dir}/hadoop-policy.xml"
todir="@{test.dir}/extraconf" />
<copy file="${test.src.dir}/fi-site.xml"
todir="@{test.dir}/extraconf" />
<junit showoutput="${test.output}"
printsummary="${test.junit.printsummary}"
haltonfailure="${test.junit.haltonfailure}"
fork="yes"
forkmode="${test.junit.fork.mode}"
maxmemory="${test.junit.maxmemory}"
dir="${basedir}" timeout="${test.timeout}"
errorProperty="tests.failed" failureProperty="tests.failed">
<jvmarg value="${test.junit.jvmargs}" />
<sysproperty key="java.net.preferIPv4Stack" value="true"/>
<sysproperty key="test.build.data" value="@{test.dir}/data"/>
<sysproperty key="test.tools.input.dir" value = "${test.tools.input.dir}"/>
<sysproperty key="test.cache.data" value="${test.cache.data}"/>
<sysproperty key="test.debug.data" value="${test.debug.data}"/>
<sysproperty key="hadoop.log.dir" value="@{test.dir}/logs"/>
<sysproperty key="test.src.dir" value="@{fileset.dir}"/>
<sysproperty key="taskcontroller-path" value="${taskcontroller-path}"/>
<sysproperty key="taskcontroller-ugi" value="${taskcontroller-ugi}"/>
<sysproperty key="test.build.extraconf" value="@{test.dir}/extraconf" />
<sysproperty key="hadoop.policy.file" value="hadoop-policy.xml"/>
<sysproperty key="java.library.path"
value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
<sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
<syspropertyset dynamic="no">
<propertyref name="hadoop.tmp.dir"/>
</syspropertyset>
<!-- set compile.c++ in the child jvm only if it is set -->
<syspropertyset dynamic="no">
<propertyref name="compile.c++"/>
</syspropertyset>
<!-- Pass probability specifications to the spawn JVM -->
<syspropertyset id="FaultProbabilityProperties">
<propertyref regex="fi.*"/>
</syspropertyset>
<sysproperty key="test.system.hdrc.deployed.hadoopconfdir"
value="@{hadoop.conf.dir.deployed}" />
<classpath refid="@{classpath}"/>
<formatter type="${test.junit.output.format}" />
<batchtest todir="@{test.dir}" unless="testcase">
<fileset dir="@{fileset.dir}"
excludes="**/${test.exclude}.java aop/** system/**">
<patternset>
<includesfile name="@{test.file}"/>
</patternset>
</fileset>
</batchtest>
<batchtest todir="@{test.dir}" if="testcase">
<fileset dir="@{fileset.dir}" includes="**/${testcase}.java"/>
</batchtest>
</junit>
<antcall target="checkfailure"/>
</sequential>
</macrodef>
<target name="docs" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." if="forrest.home">
<!-- Nothing by default -->
</target>
<target name="checkfailure" if="tests.failed">
<touch file="${build.contrib.dir}/testsfailed"/>
<fail unless="continueOnFailure">Contrib Tests failed!</fail>
</target>
<!-- ================================================================== -->
<!-- Clean. Delete the build files, and their directories -->
<!-- ================================================================== -->
<target name="clean">
<echo message="contrib: ${name}"/>
<delete dir="${build.dir}"/>
</target>
<target name="ivy-probe-antlib" >
<condition property="ivy.found">
<typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
</condition>
</target>
<target name="ivy-download" description="To download ivy " unless="offline">
<get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
</target>
<target name="ivy-init-antlib" depends="ivy-download,ivy-probe-antlib" unless="ivy.found">
<typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
loaderRef="ivyLoader">
<classpath>
<pathelement location="${ivy.jar}"/>
</classpath>
</typedef>
<fail >
<condition >
<not>
<typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
</not>
</condition>
You need Apache Ivy 2.0 or later from http://ant.apache.org/
It could not be loaded from ${ivy_repo_url}
</fail>
</target>
<target name="ivy-init" depends="ivy-init-antlib">
<ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}"/>
</target>
<target name="ivy-resolve-common" depends="ivy-init">
<ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="common" />
</target>
<target name="ivy-retrieve-common" depends="ivy-resolve-common"
description="Retrieve Ivy-managed artifacts for the compile/test configurations">
<ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" sync="true" />
<ivy:cachepath pathid="${ant.project.name}.common-classpath" conf="common" />
</target>
<target name="ivy-resolve-test" depends="ivy-init">
<ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="test" />
</target>
<target name="ivy-retrieve-test" depends="ivy-resolve-test"
description="Retrieve Ivy-managed artifacts for the test configuration">
<ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" sync="true" />
<ivy:cachepath pathid="${ant.project.name}.test-classpath" conf="test" />
</target>
</project>

View File

@ -1,101 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="hadoopcontrib" default="compile" basedir=".">
<!-- In case one of the contrib subdirectories -->
<!-- fails the build or test targets and you cannot fix it: -->
<!-- Then add to fileset: excludes="badcontrib/build.xml" -->
<!-- ====================================================== -->
<!-- Compile contribs. -->
<!-- ====================================================== -->
<target name="compile">
<subant target="compile">
<fileset dir="." includes="*/build.xml"/>
</subant>
</target>
<!-- ====================================================== -->
<!-- Compile contrib test code. -->
<!-- ====================================================== -->
<target name="compile-test">
<subant target="compile-test">
<fileset dir="." includes="*/build.xml"/>
</subant>
</target>
<!-- ====================================================== -->
<!-- Package contrib jars. -->
<!-- ====================================================== -->
<target name="package">
<subant target="package">
<fileset dir="." includes="*/build.xml"/>
</subant>
</target>
<!-- ====================================================== -->
<!-- Test all the contribs. -->
<!-- ====================================================== -->
<target name="test">
<property name="hadoop.root" location="${root}/../../../"/>
<property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
<delete file="${build.contrib.dir}/testsfailed"/>
<subant target="test">
<property name="continueOnFailure" value="true"/>
<fileset dir="." includes="streaming/build.xml"/>
<fileset dir="." includes="gridmix/build.xml"/>
<fileset dir="." includes="vertica/build.xml"/>
<fileset dir="." includes="raid/build.xml"/>
</subant>
<available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
<fail if="testsfailed">Tests failed!</fail>
</target>
<!-- ====================================================== -->
<!-- Test all the contrib system tests -->
<!-- ====================================================== -->
<target name="test-system-contrib">
<property name="hadoop.root" location="${root}/../../../"/>
<property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
<delete file="${build.contrib.dir}/testsfailed"/>
<subant target="test-system">
<property name="continueOnFailure" value="true"/>
<property name="hadoop.home" value="${hadoop.home}"/>
<property name="hadoop.conf.dir" value="${hadoop.conf.dir}"/>
<property name="hadoop.conf.dir.deployed"
value="${hadoop.conf.dir.deployed}"/>
<fileset dir="." includes="hdfsproxy/build.xml"/>
<fileset dir="." includes="streaming/build.xml"/>
<fileset dir="." includes="gridmix/build.xml"/>
</subant>
<available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
<fail if="testsfailed">Tests failed!</fail>
</target>
<!-- ====================================================== -->
<!-- Clean all the contribs. -->
<!-- ====================================================== -->
<target name="clean">
<subant target="clean">
<fileset dir="." includes="*/build.xml"/>
</subant>
</target>
</project>

View File

@ -1,45 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Before you can run these subtargets directly, you need
to call at top-level: ant deploy-contrib compile-core-test
-->
<project name="datajoin" default="jar">
<import file="../build-contrib.xml"/>
<!-- Override jar target to specify main class -->
<target name="jar" depends="compile">
<jar
jarfile="${build.dir}/hadoop-${version}-${name}.jar"
basedir="${build.classes}"
>
<manifest>
<attribute name="Main-Class" value="org.apache.hadoop.contrib.utils.join.DataJoinJob"/>
</manifest>
</jar>
</target>
<target name="jar-examples" depends="jar">
<antcall target="hadoopbuildcontrib.jar-examples">
</antcall>
</target>
</project>

View File

@ -1,68 +0,0 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<ivy-module version="1.0" xmlns:m="http://ant.apache.org/ivy/maven">
<info organisation="org.apache.hadoop" module="${ant.project.name}">
<license name="Apache 2.0"/>
<ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
<description>
Apache Hadoop
</description>
</info>
<configurations defaultconfmapping="default">
<!--these match the Maven configurations-->
<conf name="default" extends="master,runtime"/>
<conf name="master" description="contains the artifact but no dependencies"/>
<conf name="runtime" description="runtime but not the artifact" />
<conf name="common" visibility="private"
extends="runtime"
description="artifacts needed to compile the application"/>
<conf name="test" visibility="private" extends="runtime"/>
</configurations>
<publications>
<!--get the artifact from our module name-->
<artifact conf="master"/>
</publications>
<dependencies>
<dependency org="org.apache.hadoop" name="hadoop-annotations" rev="${hadoop-common.version}" conf="common->default"/>
<dependency org="org.apache.hadoop" name="hadoop-common" rev="${hadoop-common.version}" conf="common->default">
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests"/>
<artifact name="hadoop-common" ext="jar"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="${hadoop-hdfs.version}" conf="common->default">
<artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
<artifact name="hadoop-hdfs" ext="jar"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
rev="${yarn.version}" conf="common->default"/>
<dependency org="org.apache.hadoop" name="hadoop-yarn-common"
rev="${yarn.version}" conf="common->default"/>
<dependency org="commons-logging" name="commons-logging" rev="${commons-logging.version}" conf="common->default"/>
<dependency org="log4j" name="log4j" rev="${log4j.version}" conf="common->master"/>
<dependency org="junit" name="junit" rev="${junit.version}" conf="common->default"/>
<!-- Exclusions for transitive dependencies pulled in by log4j -->
<exclude org="com.sun.jdmk"/>
<exclude org="com.sun.jmx"/>
<exclude org="javax.jms"/>
<exclude org="javax.mail"/>
</dependencies>
</ivy-module>

View File

@ -1,17 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#This properties file lists the versions of the various artifacts used by streaming.
#It drives ivy and the generation of a maven POM
#Please list the dependencies name with version if they are different from the ones
#listed in the global libraries.properties file (in alphabetical order)

View File

@ -1,26 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<classpath>
<classpathentry excluding="org/apache/hadoop/eclipse/server/CopyOfHadoopServer.java" kind="src" path="src/java"/>
<classpathentry exported="true" kind="lib" path="classes" sourcepath="classes"/>
<classpathentry kind="lib" path="lib/hadoop-core.jar" sourcepath="/hadoop-socks/src/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
<classpathentry kind="output" path="classes"/>
</classpath>

View File

@ -1,45 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<projectDescription>
<name>MapReduceTools</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.pde.ManifestBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.pde.SchemaBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.pde.PluginNature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>

View File

@ -1,274 +0,0 @@
#Sat Oct 13 13:37:43 CEST 2007
# Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# #
# # http://www.apache.org/licenses/LICENSE-2.0
# #
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.
#
eclipse.preferences.version=1
instance/org.eclipse.core.net/org.eclipse.core.net.hasMigrated=true
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
org.eclipse.jdt.core.formatter.alignment_for_assignment=16
org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
org.eclipse.jdt.core.formatter.blank_lines_after_package=1
org.eclipse.jdt.core.formatter.blank_lines_before_field=1
org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
org.eclipse.jdt.core.formatter.blank_lines_before_method=1
org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
org.eclipse.jdt.core.formatter.blank_lines_before_package=0
org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
org.eclipse.jdt.core.formatter.comment.format_block_comments=true
org.eclipse.jdt.core.formatter.comment.format_header=false
org.eclipse.jdt.core.formatter.comment.format_html=true
org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
org.eclipse.jdt.core.formatter.comment.format_line_comments=true
org.eclipse.jdt.core.formatter.comment.format_source_code=true
org.eclipse.jdt.core.formatter.comment.indent_parameter_description=false
org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert
org.eclipse.jdt.core.formatter.comment.line_length=77
org.eclipse.jdt.core.formatter.compact_else_if=true
org.eclipse.jdt.core.formatter.continuation_indentation=2
org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
org.eclipse.jdt.core.formatter.indent_empty_lines=false
org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true
org.eclipse.jdt.core.formatter.indentation.size=4
org.eclipse.jdt.core.formatter.insert_new_line_after_annotation=insert
org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=insert
org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
org.eclipse.jdt.core.formatter.lineSplit=77
org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
org.eclipse.jdt.core.formatter.tabulation.char=space
org.eclipse.jdt.core.formatter.tabulation.size=2
org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true

Some files were not shown because too many files have changed in this diff Show More