MAPREDUCE-4266. remove Ant remnants from MR (tgraves via bobby)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1407551 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1f40b8b4e8
commit
ca1c683b47
|
@ -587,6 +587,8 @@ Release 0.23.5 - UNRELEASED
|
|||
MAPREDUCE-4752. Reduce MR AM memory usage through String Interning (Robert
|
||||
Evans via tgraves)
|
||||
|
||||
MAPREDUCE-4266. remove Ant remnants from MR (tgraves via bobby)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Contains utilities that are common for the main and contrib builds.
|
||||
-->
|
||||
<project name="build-utils">
|
||||
|
||||
<!-- Load properties from build properties file, if available -->
|
||||
<dirname property="build-utils.basedir" file="${ant.file.build-utils}"/>
|
||||
<property file="${build-utils.basedir}/build.properties"/>
|
||||
|
||||
<target name="forrest.check" unless="forrest.home">
|
||||
<fail message="'forrest.home' is not defined. Please pass -Dforrest.home=<base of Apache Forrest installation> to Ant on the command-line, or set forest.home in build properties file." />
|
||||
</target>
|
||||
|
||||
</project>
|
File diff suppressed because it is too large
Load Diff
|
@ -1,167 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<ivy-module version="1.0" xmlns:m="http://ant.apache.org/ivy/maven">
|
||||
<info organisation="org.apache.hadoop" module="${ant.project.name}" revision="${version}">
|
||||
<license name="Apache 2.0"/>
|
||||
<ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
|
||||
<description>
|
||||
Hadoop Core
|
||||
</description>
|
||||
</info>
|
||||
<configurations defaultconfmapping="default">
|
||||
<!--these match the Maven configurations-->
|
||||
<conf name="default" extends="master,runtime"/>
|
||||
<conf name="master" description="contains the artifact but no dependencies"/>
|
||||
<conf name="compile" description="contains the artifact but no dependencies"/>
|
||||
<conf name="runtime" description="runtime but not the artifact"/>
|
||||
|
||||
<!--
|
||||
These public configurations contain the core dependencies for running hadoop client or server.
|
||||
The server is effectively a superset of the client.
|
||||
-->
|
||||
<!--Private configurations. -->
|
||||
|
||||
<conf name="common" visibility="private" extends="compile" description="common artifacts"/>
|
||||
<conf name="mapred" visibility="private" extends="compile,runtime" description="Mapred dependent artifacts"/>
|
||||
<conf name="javadoc" visibility="private" description="artiracts required while performing doc generation" extends="common"/>
|
||||
<conf name="test" extends="master" visibility="private" description="the classpath needed to run tests"/>
|
||||
<conf name="package" extends="master" description="the classpath needed for packaging"/>
|
||||
<conf name="system" extends="test" visibility="private" description="the classpath needed to run system tests"/>
|
||||
|
||||
<conf name="test-hdfswithmr" extends="test" visibility="private" description="the classpath needed to run tests"/>
|
||||
|
||||
<conf name="releaseaudit" visibility="private" description="Artifacts required for releaseaudit target"/>
|
||||
|
||||
<conf name="jdiff" visibility="private" extends="common"/>
|
||||
<conf name="checkstyle" visibility="private"/>
|
||||
|
||||
</configurations>
|
||||
|
||||
<publications>
|
||||
<!--get the artifact from our module name-->
|
||||
<artifact conf="master"/>
|
||||
</publications>
|
||||
<dependencies>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-annotations" rev="${hadoop-common.version}" conf="compile->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-common"
|
||||
rev="${hadoop-common.version}" conf="compile->default">
|
||||
<artifact name="hadoop-common" ext="jar" />
|
||||
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests" />
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-hdfs"
|
||||
rev="${hadoop-hdfs.version}" conf="compile->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-common-instrumented"
|
||||
rev="${hadoop-common.version}" conf="system->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-hdfs-instrumented"
|
||||
rev="${hadoop-hdfs.version}" conf="system->default"/>
|
||||
<dependency org="commons-logging" name="commons-logging"
|
||||
rev="${commons-logging.version}" conf="compile->master"/>
|
||||
<dependency org="org.slf4j" name="slf4j-api" rev="${slf4j-api.version}"
|
||||
conf="compile->master"/>
|
||||
<dependency org="org.slf4j" name="slf4j-log4j12"
|
||||
rev="${slf4j-log4j12.version}" conf="mapred->master"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-hdfs"
|
||||
rev="${hadoop-hdfs.version}" conf="test->default">
|
||||
<artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-common"
|
||||
rev="${hadoop-common.version}" conf="test->default">
|
||||
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests" />
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-yarn-server-common"
|
||||
rev="${yarn.version}" conf="compile->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
|
||||
rev="${yarn.version}" conf="compile->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common"
|
||||
rev="${yarn.version}" conf="compile->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-yarn-common"
|
||||
rev="${yarn.version}" conf="compile->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-examples"
|
||||
rev="${yarn.version}" conf="compile->default"/>
|
||||
<dependency org="log4j" name="log4j" rev="${log4j.version}"
|
||||
conf="compile->master"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient"
|
||||
rev="${yarn.version}" conf="compile->default">
|
||||
<artifact name="hadoop-mapreduce-client-jobclient" type="tests" ext="jar" m:classifier="tests"/>
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-rumen"
|
||||
rev="${hadoop-common.version}" conf="compile->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-archives"
|
||||
rev="${hadoop-common.version}" conf="compile->default"/>
|
||||
|
||||
<dependency org="checkstyle" name="checkstyle" rev="${checkstyle.version}"
|
||||
conf="checkstyle->default"/>
|
||||
|
||||
<dependency org="jdiff" name="jdiff" rev="${jdiff.version}"
|
||||
conf="jdiff->default"/>
|
||||
<dependency org="xerces" name="xerces" rev="${xerces.version}"
|
||||
conf="jdiff->default"/>
|
||||
|
||||
<dependency org="org.apache.rat" name="apache-rat-tasks"
|
||||
rev="${rats-lib.version}" conf="releaseaudit->default"/>
|
||||
<dependency org="commons-lang" name="commons-lang"
|
||||
rev="${commons-lang.version}" conf="releaseaudit->default"/>
|
||||
<dependency org="commons-collections" name="commons-collections"
|
||||
rev="${commons-collections.version}"
|
||||
conf="releaseaudit->default"/>
|
||||
|
||||
<dependency org="org.apache.lucene" name="lucene-core"
|
||||
rev="${lucene-core.version}" conf="javadoc->default"/>
|
||||
<dependency org="org.apache.avro" name="avro-compiler" rev="${avro.version}"
|
||||
conf="compile->master">
|
||||
<exclude module="ant"/>
|
||||
<exclude module="jetty"/>
|
||||
<exclude module="slf4j-simple"/>
|
||||
</dependency>
|
||||
<dependency org="org.apache.avro" name="avro" rev="${avro.version}"
|
||||
conf="compile->default">
|
||||
<exclude module="ant"/>
|
||||
<exclude module="jetty"/>
|
||||
<exclude module="slf4j-simple"/>
|
||||
</dependency>
|
||||
<dependency org="junit" name="junit" rev="${junit.version}"
|
||||
conf="test->default"/>
|
||||
<dependency org="org.mockito" name="mockito-all" rev="${mockito-all.version}"
|
||||
conf="test->default"/>
|
||||
<dependency org="org.vafer" name="jdeb" rev="${jdeb.version}" conf="package->master"/>
|
||||
<dependency org="org.mortbay.jetty" name="jetty-servlet-tester" rev="${jetty.version}"
|
||||
conf="test->default"/>
|
||||
|
||||
<!-- dependency for rumen anonymization -->
|
||||
<dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="${jackson.version}"
|
||||
conf="compile->default"/>
|
||||
<dependency org="org.codehaus.jackson" name="jackson-mapper-asl" rev="${jackson.version}"
|
||||
conf="compile->default"/>
|
||||
|
||||
<!-- dependency addition for the fault injection -->
|
||||
<dependency org="org.aspectj" name="aspectjrt" rev="${aspectj.version}"
|
||||
conf="compile->default"/>
|
||||
<dependency org="org.aspectj" name="aspectjtools" rev="${aspectj.version}"
|
||||
conf="compile->default"/>
|
||||
|
||||
<!-- Exclusions for transitive dependencies pulled in by log4j -->
|
||||
<exclude org="com.sun.jdmk"/>
|
||||
<exclude org="com.sun.jmx"/>
|
||||
<exclude org="javax.jms"/>
|
||||
<exclude org="javax.mail"/>
|
||||
<exclude org="org.apache.hadoop" module="avro"/>
|
||||
<exclude org="org.apache.commons" module="commons-daemon"/>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</ivy-module>
|
|
@ -1,28 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapred-examples</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>@version</version>
|
||||
<dependencies/>
|
||||
</project>
|
|
@ -1,34 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapred-instrumented</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>@version</version>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<version>3.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,34 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapred-test-instrumented</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>@version</version>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapred</artifactId>
|
||||
<version>@version</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,34 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapred</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>@version</version>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<version>3.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,34 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapred-test</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>@version</version>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapred</artifactId>
|
||||
<version>@version</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,28 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapred-tools</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>@version</version>
|
||||
<dependencies/>
|
||||
</project>
|
|
@ -1,70 +0,0 @@
|
|||
<ivysettings>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
see http://www.jayasoft.org/ivy/doc/configuration
|
||||
-->
|
||||
<!-- you can override this property to use mirrors
|
||||
http://repo1.maven.org/maven2/
|
||||
http://mirrors.dotsrc.org/maven2
|
||||
http://ftp.ggi-project.org/pub/packages/maven2
|
||||
http://mirrors.sunsite.dk/maven2
|
||||
http://public.planetmirror.com/pub/maven2
|
||||
http://ibiblio.lsu.edu/main/pub/packages/maven2
|
||||
http://www.ibiblio.net/pub/packages/maven2
|
||||
-->
|
||||
<property name="repo.maven.org" value="http://repo1.maven.org/maven2/" override="false"/>
|
||||
<property name="snapshot.apache.org" value="https://repository.apache.org/content/repositories/snapshots/" override="false"/>
|
||||
<property name="maven2.pattern" value="[organisation]/[module]/[revision]/[module]-[revision](-[classifier])"/>
|
||||
<property name="repo.dir" value="${user.home}/.m2/repository"/>
|
||||
<property name="maven2.pattern.ext" value="${maven2.pattern}.[ext]"/>
|
||||
<property name="resolvers" value="default" override="false"/>
|
||||
<property name="force-resolve" value="false" override="false"/>
|
||||
<settings defaultResolver="${resolvers}"/>
|
||||
|
||||
<resolvers>
|
||||
<ibiblio name="maven2" root="${repo.maven.org}" pattern="${maven2.pattern.ext}" m2compatible="true" checkconsistency="false"/>
|
||||
<ibiblio name="apache-snapshot" root="${snapshot.apache.org}" m2compatible="true"
|
||||
checkmodified="true" changingPattern=".*SNAPSHOT" checkconsistency="false"/>
|
||||
|
||||
<filesystem name="fs" m2compatible="true" checkconsistency="false" force="${force-resolve}">
|
||||
<artifact pattern="${repo.dir}/${maven2.pattern.ext}"/>
|
||||
<ivy pattern="${repo.dir}/[organisation]/[module]/[revision]/[module]-[revision].pom"/>
|
||||
</filesystem>
|
||||
|
||||
<chain name="default" dual="true" checkmodified="true" changingPattern=".*SNAPSHOT">
|
||||
<resolver ref="apache-snapshot"/>
|
||||
<resolver ref="maven2"/>
|
||||
</chain>
|
||||
|
||||
<chain name="internal" dual="true">
|
||||
<resolver ref="fs"/>
|
||||
<resolver ref="apache-snapshot"/>
|
||||
<resolver ref="maven2"/>
|
||||
</chain>
|
||||
|
||||
<chain name="external">
|
||||
<resolver ref="maven2"/>
|
||||
</chain>
|
||||
|
||||
</resolvers>
|
||||
<modules>
|
||||
<module organisation="org.apache.hadoop" name="hadoop-*" resolver="${resolvers}"/>
|
||||
</modules>
|
||||
</ivysettings>
|
|
@ -1,86 +0,0 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#This properties file lists the versions of the various artifacts used by hadoop and components.
|
||||
#It drives ivy and the generation of a maven POM
|
||||
|
||||
#These are the versions of our dependencies (in alphabetical order)
|
||||
ant-task.version=2.0.10
|
||||
|
||||
#Aspectj depedency for Fault injection
|
||||
#This property has to be updated synchronously with aop.xml
|
||||
aspectj.version=1.6.5
|
||||
|
||||
avro.version=1.5.2
|
||||
paranamer.version=2.2
|
||||
checkstyle.version=4.2
|
||||
|
||||
commons-cli.version=1.2
|
||||
commons-collections.version=3.1
|
||||
commons-httpclient.version=3.1
|
||||
commons-lang.version=2.5
|
||||
commons-logging.version=1.1.1
|
||||
commons-logging-api.version=1.1
|
||||
commons-el.version=1.0
|
||||
commons-fileupload.version=1.2
|
||||
commons-io.version=1.4
|
||||
commons-net.version=1.4.1
|
||||
core.version=3.1.1
|
||||
coreplugin.version=1.3.2
|
||||
|
||||
ftplet-api.version=1.0.0
|
||||
ftpserver-core.version=1.0.0
|
||||
ftpserver-deprecated.version=1.0.0-M2
|
||||
|
||||
hadoop-common.version=3.0.0-SNAPSHOT
|
||||
hadoop-hdfs.version=3.0.0-SNAPSHOT
|
||||
|
||||
hsqldb.version=1.8.0.10
|
||||
|
||||
ivy.version=2.2.0
|
||||
|
||||
jasper.version=5.5.12
|
||||
jdeb.version=0.8
|
||||
jsp.version=2.1
|
||||
jsp-api.version=5.5.12
|
||||
jets3t.version=0.7.1
|
||||
jetty.version=6.1.14
|
||||
jetty-util.version=6.1.14
|
||||
junit.version=4.8.1
|
||||
jdiff.version=1.0.9
|
||||
|
||||
kfs.version=0.3
|
||||
|
||||
log4j.version=1.2.16
|
||||
lucene-core.version=2.3.1
|
||||
|
||||
mina-core.version=2.0.0-M5
|
||||
|
||||
mockito-all.version=1.8.2
|
||||
|
||||
oro.version=2.0.8
|
||||
|
||||
rats-lib.version=0.6
|
||||
|
||||
servlet.version=4.0.6
|
||||
servlet-api-2.5.version=6.1.14
|
||||
servlet-api.version=2.5
|
||||
slf4j-api.version=1.5.11
|
||||
slf4j-log4j12.version=1.5.11
|
||||
|
||||
wagon-http.version=1.0-beta-2
|
||||
xmlenc.version=0.52
|
||||
xerces.version=1.4.4
|
||||
|
||||
jackson.version=1.8.8
|
||||
yarn.version=3.0.0-SNAPSHOT
|
||||
hadoop-mapreduce.version=3.0.0-SNAPSHOT
|
|
@ -1,168 +0,0 @@
|
|||
### "Gridmix" Benchmark ###
|
||||
|
||||
Contents:
|
||||
|
||||
0 Overview
|
||||
1 Getting Started
|
||||
1.0 Build
|
||||
1.1 Configure
|
||||
1.2 Generate test data
|
||||
2 Running
|
||||
2.0 General
|
||||
2.1 Non-Hod cluster
|
||||
2.2 Hod
|
||||
2.2.0 Static cluster
|
||||
2.2.1 Hod cluster
|
||||
|
||||
|
||||
* 0 Overview
|
||||
|
||||
The scripts in this package model a cluster workload. The workload is
|
||||
simulated by generating random data and submitting map/reduce jobs that
|
||||
mimic observed data-access patterns in user jobs. The full benchmark
|
||||
generates approximately 2.5TB of (often compressed) input data operated on
|
||||
by the following simulated jobs:
|
||||
|
||||
1) Three stage map/reduce job
|
||||
Input: 500GB compressed (2TB uncompressed) SequenceFile
|
||||
(k,v) = (5 words, 100 words)
|
||||
hadoop-env: FIXCOMPSEQ
|
||||
Compute1: keep 10% map, 40% reduce
|
||||
Compute2: keep 100% map, 77% reduce
|
||||
Input from Compute1
|
||||
Compute3: keep 116% map, 91% reduce
|
||||
Input from Compute2
|
||||
Motivation: Many user workloads are implemented as pipelined map/reduce
|
||||
jobs, including Pig workloads
|
||||
|
||||
2) Large sort of variable key/value size
|
||||
Input: 500GB compressed (2TB uncompressed) SequenceFile
|
||||
(k,v) = (5-10 words, 100-10000 words)
|
||||
hadoop-env: VARCOMPSEQ
|
||||
Compute: keep 100% map, 100% reduce
|
||||
Motivation: Processing large, compressed datsets is common.
|
||||
|
||||
3) Reference select
|
||||
Input: 500GB compressed (2TB uncompressed) SequenceFile
|
||||
(k,v) = (5-10 words, 100-10000 words)
|
||||
hadoop-env: VARCOMPSEQ
|
||||
Compute: keep 0.2% map, 5% reduce
|
||||
1 Reducer
|
||||
Motivation: Sampling from a large, reference dataset is common.
|
||||
|
||||
4) Indirect Read
|
||||
Input: 500GB compressed (2TB uncompressed) Text
|
||||
(k,v) = (5 words, 20 words)
|
||||
hadoop-env: FIXCOMPTEXT
|
||||
Compute: keep 50% map, 100% reduce Each map reads 1 input file,
|
||||
adding additional input files from the output of the
|
||||
previous iteration for 10 iterations
|
||||
Motivation: User jobs in the wild will often take input data without
|
||||
consulting the framework. This simulates an iterative job
|
||||
whose input data is all "indirect," i.e. given to the
|
||||
framework sans locality metadata.
|
||||
|
||||
5) API text sort (java, pipes, streaming)
|
||||
Input: 500GB uncompressed Text
|
||||
(k,v) = (1-10 words, 0-200 words)
|
||||
hadoop-env: VARINFLTEXT
|
||||
Compute: keep 100% map, 100% reduce
|
||||
Motivation: This benchmark should exercise each of the APIs to
|
||||
map/reduce
|
||||
|
||||
Each of these jobs may be run individually or- using the scripts provided-
|
||||
as a simulation of user activity sized to run in approximately 4 hours on a
|
||||
480-500 node cluster using Hadoop 0.15.0. The benchmark runs a mix of small,
|
||||
medium, and large jobs simultaneously, submitting each at fixed intervals.
|
||||
|
||||
Notes(1-4): Since input data are compressed, this means that each mapper
|
||||
outputs a lot more bytes than it reads in, typically causing map output
|
||||
spills.
|
||||
|
||||
|
||||
|
||||
* 1 Getting Started
|
||||
|
||||
1.0 Build
|
||||
|
||||
1) Compile the examples, including the C++ sources:
|
||||
> ant -Dcompile.c++=yes examples
|
||||
2) Copy the pipe sort example to a location in the default filesystem
|
||||
(usually HDFS, default /gridmix/programs)
|
||||
> $HADOOP_PREFIX/hadoop dfs -mkdir $GRID_MIX_PROG
|
||||
> $HADOOP_PREFIX/hadoop dfs -put build/c++-examples/$PLATFORM_STR/bin/pipes-sort $GRID_MIX_PROG
|
||||
|
||||
1.1 Configure
|
||||
|
||||
One must modify hadoop-env to supply the following information:
|
||||
|
||||
HADOOP_PREFIX The hadoop install location
|
||||
GRID_MIX_HOME The location of these scripts
|
||||
APP_JAR The location of the hadoop example
|
||||
GRID_MIX_DATA The location of the datsets for these benchmarks
|
||||
GRID_MIX_PROG The location of the pipe-sort example
|
||||
|
||||
Reasonable defaults are provided for all but HADOOP_PREFIX. The datasets used
|
||||
by each of the respective benchmarks are recorded in the Input::hadoop-env
|
||||
comment in section 0 and their location may be changed in hadoop-env. Note
|
||||
that each job expects particular input data and the parameters given to it
|
||||
must be changed in each script if a different InputFormat, keytype, or
|
||||
valuetype is desired.
|
||||
|
||||
Note that NUM_OF_REDUCERS_FOR_*_JOB properties should be sized to the
|
||||
cluster on which the benchmarks will be run. The default assumes a large
|
||||
(450-500 node) cluster.
|
||||
|
||||
1.2 Generate test data
|
||||
|
||||
Test data is generated using the generateData.sh script. While one may
|
||||
modify the structure and size of the data generated here, note that many of
|
||||
the scripts- particularly for medium and small sized jobs- rely not only on
|
||||
specific InputFormats and key/value types, but also on a particular
|
||||
structure to the input data. Changing these values will likely be necessary
|
||||
to run on small and medium-sized clusters, but any modifications must be
|
||||
informed by an explicit familiarity with the underlying scripts.
|
||||
|
||||
It is sufficient to run the script without modification, though it may
|
||||
require up to 4TB of free space in the default filesystem. Changing the size
|
||||
of the input data (COMPRESSED_DATA_BYTES, UNCOMPRESSED_DATA_BYTES,
|
||||
INDIRECT_DATA_BYTES) is safe. A 4x compression ratio for generated, block
|
||||
compressed data is typical.
|
||||
|
||||
* 2 Running
|
||||
|
||||
2.0 General
|
||||
|
||||
The submissionScripts directory contains the high-level scripts submitting
|
||||
sized jobs for the gridmix benchmark. Each submits $NUM_OF_*_JOBS_PER_CLASS
|
||||
instances as specified in the gridmix-env script, where an instance is an
|
||||
invocation of a script as in $JOBTYPE/$JOBTYPE.$CLASS (e.g.
|
||||
javasort/text-sort.large). Each instance may submit one or more map/reduce
|
||||
jobs.
|
||||
|
||||
There is a backoff script, submissionScripts/sleep_if_too_busy that can be
|
||||
modified to define throttling criteria. By default, it simply counts running
|
||||
java processes.
|
||||
|
||||
2.1 Non-Hod cluster
|
||||
|
||||
The submissionScripts/allToSameCluster script will invoke each of the other
|
||||
submission scripts for the gridmix benchmark. Depending on how your cluster
|
||||
manages job submission, these scripts may require modification. The details
|
||||
are very context-dependent.
|
||||
|
||||
2.2 Hod
|
||||
|
||||
Note that there are options in hadoop-env that control jobs sumitted thruogh
|
||||
Hod. One may specify the location of a config (HOD_CONFIG), the number of
|
||||
nodes to allocate for classes of jobs, and any additional options one wants
|
||||
to apply. The default includes an example for supplying a Hadoop tarball for
|
||||
testing platform changes (see Hod documentation).
|
||||
|
||||
2.2.0 Static Cluster
|
||||
|
||||
> hod --hod.script=submissionScripts/allToSameCluster -m 500
|
||||
|
||||
2.2.1 Hod-allocated cluster
|
||||
|
||||
> ./submissionScripts/allThroughHod
|
|
@ -1,90 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/gridmix-env
|
||||
|
||||
# Smaller data set is used by default.
|
||||
COMPRESSED_DATA_BYTES=2147483648
|
||||
UNCOMPRESSED_DATA_BYTES=536870912
|
||||
INDIRECT_DATA_BYTES=58720256
|
||||
|
||||
# Number of partitions for output data
|
||||
if [ -z ${NUM_MAPS} ] ; then
|
||||
NUM_MAPS=100
|
||||
fi
|
||||
|
||||
INDIRECT_DATA_FILES=200
|
||||
|
||||
# If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real) dataset.
|
||||
if [ ! -z ${USE_REAL_DATASET} ] ; then
|
||||
echo "Using real dataset"
|
||||
# 2TB data compressing to approx 500GB
|
||||
COMPRESSED_DATA_BYTES=2147483648000
|
||||
# 500GB
|
||||
UNCOMPRESSED_DATA_BYTES=536870912000
|
||||
# Default approx 70MB per data file, compressed
|
||||
INDIRECT_DATA_BYTES=58720256000
|
||||
fi
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar \
|
||||
${EXAMPLE_JAR} randomtextwriter \
|
||||
-D mapreduce.randomtextwriter.totalbytes=${COMPRESSED_DATA_BYTES} \
|
||||
-D mapreduce.randomtextwriter.bytespermap=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
|
||||
-D mapreduce.randomtextwriter.minwordskey=5 \
|
||||
-D mapreduce.randomtextwriter.maxwordskey=10 \
|
||||
-D mapreduce.randomtextwriter.minwordsvalue=100 \
|
||||
-D mapreduce.randomtextwriter.maxwordsvalue=10000 \
|
||||
-D mapreduce.output.fileoutputformat.compress=true \
|
||||
-D mapred.map.output.compression.type=BLOCK \
|
||||
-outFormat org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \
|
||||
${VARCOMPSEQ} &
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar \
|
||||
${EXAMPLE_JAR} randomtextwriter \
|
||||
-D mapreduce.randomtextwriter.totalbytes=${COMPRESSED_DATA_BYTES} \
|
||||
-D mapreduce.randomtextwriter.bytespermap=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
|
||||
-D mapreduce.randomtextwriter.minwordskey=5 \
|
||||
-D mapreduce.randomtextwriter.maxwordskey=5 \
|
||||
-D mapreduce.randomtextwriter.minwordsvalue=100 \
|
||||
-D mapreduce.randomtextwriter.maxwordsvalue=100 \
|
||||
-D mapreduce.output.fileoutputformat.compress=true \
|
||||
-D mapred.map.output.compression.type=BLOCK \
|
||||
-outFormat org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \
|
||||
${FIXCOMPSEQ} &
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar \
|
||||
${EXAMPLE_JAR} randomtextwriter \
|
||||
-D mapreduce.randomtextwriter.totalbytes=${UNCOMPRESSED_DATA_BYTES} \
|
||||
-D mapreduce.randomtextwriter.bytespermap=$((${UNCOMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
|
||||
-D mapreduce.randomtextwriter.minwordskey=1 \
|
||||
-D mapreduce.randomtextwriter.maxwordskey=10 \
|
||||
-D mapreduce.randomtextwriter.minwordsvalue=0 \
|
||||
-D mapreduce.randomtextwriter.maxwordsvalue=200 \
|
||||
-D mapreduce.output.fileoutputformat.compress=false \
|
||||
-outFormat org.apache.hadoop.mapreduce.lib.output.TextOutputFormat \
|
||||
${VARINFLTEXT} &
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar \
|
||||
${EXAMPLE_JAR} randomtextwriter \
|
||||
-D mapreduce.randomtextwriter.totalbytes=${INDIRECT_DATA_BYTES} \
|
||||
-D mapreduce.randomtextwriter.bytespermap=$((${INDIRECT_DATA_BYTES} / ${INDIRECT_DATA_FILES})) \
|
||||
-D mapreduce.randomtextwriter.minwordskey=5 \
|
||||
-D mapreduce.randomtextwriter.maxwordskey=5 \
|
||||
-D mapreduce.randomtextwriter.minwordsvalue=20 \
|
||||
-D mapreduce.randomtextwriter.maxwordsvalue=20 \
|
||||
-D mapreduce.output.fileoutputformat.compress=true \
|
||||
-D mapred.map.output.compression.type=BLOCK \
|
||||
-outFormat org.apache.hadoop.mapreduce.lib.output.TextOutputFormat \
|
||||
${FIXCOMPTEXT} &
|
|
@ -1,86 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
## Environment configuration
|
||||
# Hadoop installation
|
||||
# set var only if it has not already been set externally
|
||||
if [ -z "${HADOOP_PREFIX}" ] ; then
|
||||
export HADOOP_PREFIX=
|
||||
fi
|
||||
# Base directory for gridmix install
|
||||
# set var only if it has not already been set externally
|
||||
if [ -z "${GRID_MIX_HOME}" ] ; then
|
||||
export GRID_MIX_HOME=${GRID_DIR}
|
||||
fi
|
||||
# Hadoop example jar
|
||||
# set var only if it has not already been set externally
|
||||
if [ -z "${EXAMPLE_JAR}" ] ; then
|
||||
export EXAMPLE_JAR="${HADOOP_PREFIX}/hadoop-*examples.jar"
|
||||
fi
|
||||
# Hadoop test jar
|
||||
# set var only if it has not already been set externally
|
||||
if [ -z "${APP_JAR}" ] ; then
|
||||
export APP_JAR="${HADOOP_PREFIX}/hadoop-*test.jar"
|
||||
fi
|
||||
# Hadoop streaming jar
|
||||
# set var only if it has not already been set externally
|
||||
if [ -z "${STREAM_JAR}" ] ; then
|
||||
export STREAM_JAR="${HADOOP_PREFIX}/contrib/streaming/hadoop-*streaming.jar"
|
||||
fi
|
||||
# Location on default filesystem for writing gridmix data (usually HDFS)
|
||||
# Default: /gridmix/data
|
||||
# set var only if it has not already been set externally
|
||||
if [ -z "${GRID_MIX_DATA}" ] ; then
|
||||
export GRID_MIX_DATA=/gridmix/data
|
||||
fi
|
||||
# Location of executables in default filesystem (usually HDFS)
|
||||
# Default: /gridmix/programs
|
||||
# set var only if it has not already been set externally
|
||||
if [ -z "${GRID_MIX_PROG}" ] ; then
|
||||
export GRID_MIX_PROG=/gridmix/programs
|
||||
fi
|
||||
|
||||
## Data sources
|
||||
# Variable length key, value compressed SequenceFile
|
||||
export VARCOMPSEQ=${GRID_MIX_DATA}/WebSimulationBlockCompressed
|
||||
# Fixed length key, value compressed SequenceFile
|
||||
export FIXCOMPSEQ=${GRID_MIX_DATA}/MonsterQueryBlockCompressed
|
||||
# Variable length key, value uncompressed Text File
|
||||
export VARINFLTEXT=${GRID_MIX_DATA}/SortUncompressed
|
||||
# Fixed length key, value compressed Text File
|
||||
export FIXCOMPTEXT=${GRID_MIX_DATA}/EntropySimulationCompressed
|
||||
|
||||
## Job sizing
|
||||
export NUM_OF_LARGE_JOBS_FOR_ENTROPY_CLASS=5
|
||||
export NUM_OF_LARGE_JOBS_PER_CLASS=3
|
||||
export NUM_OF_MEDIUM_JOBS_PER_CLASS=20
|
||||
export NUM_OF_SMALL_JOBS_PER_CLASS=40
|
||||
|
||||
export NUM_OF_REDUCERS_FOR_LARGE_JOB=370
|
||||
export NUM_OF_REDUCERS_FOR_MEDIUM_JOB=170
|
||||
export NUM_OF_REDUCERS_FOR_SMALL_JOB=15
|
||||
|
||||
## Throttling
|
||||
export INTERVAL_BETWEEN_SUBMITION=20
|
||||
|
||||
## Hod
|
||||
#export HOD_OPTIONS=""
|
||||
|
||||
export CLUSTER_DIR_BASE=$GRID_MIX_HOME/CLUSTER_DIR_BASE
|
||||
export HOD_CONFIG=
|
||||
export ALL_HOD_OPTIONS="-c ${HOD_CONFIG} ${HOD_OPTIONS}"
|
||||
export SMALL_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 5"
|
||||
export MEDIUM_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 50"
|
||||
export LARGE_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 100"
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
INDIR=${VARINFLTEXT}
|
||||
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
OUTDIR=perf-out/sort-out-dir-large_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar ${EXAMPLE_JAR} sort -m 1 -r $NUM_OF_REDUCERS_FOR_LARGE_JOB -inFormat org.apache.hadoop.mapred.KeyValueTextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text $INDIR $OUTDIR
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
INDIR="${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/sort-out-dir-medium_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar ${EXAMPLE_JAR} sort -m 1 -r $NUM_OF_REDUCERS_FOR_MEDIUM_JOB -inFormat org.apache.hadoop.mapred.KeyValueTextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text $INDIR $OUTDIR
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
INDIR="${VARINFLTEXT}/{part-00000,part-00001,part-00002}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/sort-out-dir-small_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar ${EXAMPLE_JAR} sort -m 1 -r $NUM_OF_REDUCERS_FOR_SMALL_JOB -inFormat org.apache.hadoop.mapred.KeyValueTextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text $INDIR $OUTDIR
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=100
|
||||
INDIR=${FIXCOMPTEXT}
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/maxent-out-dir-large_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 50 -keepred 100 -inFormatIndirect org.apache.hadoop.mapred.TextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.LongWritable -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR.1 -r $NUM_OF_REDUCERS
|
||||
|
||||
ITER=7
|
||||
for ((i=1; i<$ITER; ++i))
|
||||
do
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 50 -keepred 100 -inFormatIndirect org.apache.hadoop.mapred.TextInputFormat -outFormat org.apache.hadoop.mapred.TextOutputFormat -outKey org.apache.hadoop.io.LongWritable -outValue org.apache.hadoop.io.Text -indir $INDIR -indir $OUTDIR.$i -outdir $OUTDIR.$(($i+1)) -r $NUM_OF_REDUCERS
|
||||
if [ $? -ne "0" ]
|
||||
then exit $?
|
||||
fi
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR.$i
|
||||
done
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR.$ITER
|
|
@ -1,38 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_LARGE_JOB
|
||||
INDIR=${FIXCOMPSEQ}
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/mq-out-dir-large_$Date.1
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 10 -keepred 40 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
INDIR=$OUTDIR
|
||||
OUTDIR=perf-out/mq-out-dir-large_$Date.2
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 77 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
INDIR=$OUTDIR
|
||||
OUTDIR=perf-out/mq-out-dir-large_$Date.3
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 116 -keepred 91 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_MEDIUM_JOB
|
||||
INDIR="${FIXCOMPSEQ}/{part-000*0,part-000*1,part-000*2}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/mq-out-dir-medium_$Date.1
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 10 -keepred 40 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
INDIR=$OUTDIR
|
||||
OUTDIR=perf-out/mq-out-dir-medium_$Date.2
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 77 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
INDIR=$OUTDIR
|
||||
OUTDIR=perf-out/mq-out-dir-medium_$Date.3
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 116 -keepred 91 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_SMALL_JOB
|
||||
INDIR="${FIXCOMPSEQ}/{part-00000,part-00001,part-00002}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/mq-out-dir-small_$Date.1
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 10 -keepred 40 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
INDIR=$OUTDIR
|
||||
OUTDIR=perf-out/mq-out-dir-small_$Date.2
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 77 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
INDIR=$OUTDIR
|
||||
OUTDIR=perf-out/mq-out-dir-small_$Date.3
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 116 -keepred 91 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_LARGE_JOB
|
||||
INDIR=${VARINFLTEXT}
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/pipe-out-dir-large_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop pipes -input $INDIR -output $OUTDIR -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat -program ${GRID_MIX_PROG}/pipes-sort -reduces $NUM_OF_REDUCERS -jobconf mapreduce.job.output.key.class=org.apache.hadoop.io.Text,mapreduce.job.output.value.class=org.apache.hadoop.io.Text -writer org.apache.hadoop.mapred.TextOutputFormat
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_MEDIUM_JOB
|
||||
INDIR="${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/pipe-out-dir-medium_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop pipes -input $INDIR -output $OUTDIR -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat -program ${GRID_MIX_PROG}/pipes-sort -reduces $NUM_OF_REDUCERS -jobconf mapreduce.job.output.key.class=org.apache.hadoop.io.Text,mapreduce.job.output.value.class=org.apache.hadoop.io.Text -writer org.apache.hadoop.mapred.TextOutputFormat
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_SMALL_JOB
|
||||
INDIR="${VARINFLTEXT}/{part-00000,part-00001,part-00002}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/pipe-out-dir-small_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop pipes -input $INDIR -output $OUTDIR -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat -program ${GRID_MIX_PROG}/pipes-sort -reduces $NUM_OF_REDUCERS -jobconf mapreduce.job.output.key.class=org.apache.hadoop.io.Text,mapreduce.job.output.value.class=org.apache.hadoop.io.Text -writer org.apache.hadoop.mapred.TextOutputFormat
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
export NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_LARGE_JOB
|
||||
export INDIR=${VARINFLTEXT}
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
export OUTDIR=perf-out/stream-out-dir-large_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar ${STREAM_JAR} -input $INDIR -output $OUTDIR -mapper cat -reducer cat -numReduceTasks $NUM_OF_REDUCERS
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_MEDIUM_JOB
|
||||
INDIR="${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/stream-out-dir-medium_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar ${STREAM_JAR} -input $INDIR -output $OUTDIR -mapper cat -reducer cat -numReduceTasks $NUM_OF_REDUCERS
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_SMALL_JOB
|
||||
INDIR="${VARINFLTEXT}/{part-00000,part-00001,part-00002}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/stream-out-dir-small_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar ${STREAM_JAR} -input $INDIR -output $OUTDIR -mapper cat -reducer cat -numReduceTasks $NUM_OF_REDUCERS
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
$GRID_MIX_HOME/submissionScripts/maxentHod 2>&1 > maxentHod.out &
|
||||
$GRID_MIX_HOME/submissionScripts/textSortHod 2>&1 > textSortHod.out &
|
||||
$GRID_MIX_HOME/submissionScripts/monsterQueriesHod 2>&1 > monsterQueriesHod.out &
|
||||
$GRID_MIX_HOME/submissionScripts/webdataScanHod 2>&1 > webdataScanHod.out &
|
||||
$GRID_MIX_HOME/submissionScripts/webdataSortHod 2>&1 > webdataSortHod.out &
|
||||
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
PROCESSES=""
|
||||
|
||||
$GRID_MIX_HOME/submissionScripts/maxentToSameCluster 2>&1 > maxentToSameCluster.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
sleep 20
|
||||
$GRID_MIX_HOME/submissionScripts/textSortToSameCluster 2>&1 > textSortToSameCluster.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
sleep 20
|
||||
$GRID_MIX_HOME/submissionScripts/monsterQueriesToSameCluster 2>&1 > monsterQueriesToSameCluster.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
sleep 20
|
||||
$GRID_MIX_HOME/submissionScripts/webdataScanToSameCluster 2>&1 > webdataScanToSameCluster.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
sleep 20
|
||||
$GRID_MIX_HOME/submissionScripts/webdataSortToSameCluster 2>&1 > webdataSortToSameCluster.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
|
||||
echo "Waiting for processes: ${PROCESSES}"
|
||||
for APROC in ${PROCESSES}; do
|
||||
wait ${APROC}
|
||||
done
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_FOR_ENTROPY_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/maxent.large.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
#hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/maxent/maxent.large 2>&1 > maxent.large.$i.out &
|
||||
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/maxent/maxent.large 2>&1 > maxent.large.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
|
@ -1,30 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
PROCESSES=""
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_FOR_ENTROPY_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/maxent/maxent.large 2>&1 > maxent.large.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for APROC in ${PROCESSES}; do
|
||||
wait ${APROC}
|
||||
done
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.small.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
|
||||
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.small 2>&1 > monster_query.small.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.medium.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.medium 2>&1 > monster_query.medium.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.large.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.large 2>&1 > monster_query.large.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
PROCESSES=""
|
||||
|
||||
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/monsterQuery/monster_query.small 2>&1 > monster_query.small.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/monsterQuery/monster_query.medium 2>&1 > monster_query.medium.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/monsterQuery/monster_query.large 2>&1 > monster_query.large.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for APROC in ${PROCESSES}; do
|
||||
wait ${APROC}
|
||||
done
|
|
@ -1,21 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
sleep 1
|
||||
for ((java_process=$((`ps -ef|grep java|wc -l`-1)); \
|
||||
java_process > 70; \
|
||||
java_process=$((`ps -ef|grep java|wc -l`-1))))
|
||||
do
|
||||
sleep 10
|
||||
echo $java_process
|
||||
done
|
|
@ -1,75 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.small.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
|
||||
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.small 2>&1 > pipesort.small.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.small.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.small 2>&1 > streamsort.small.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.small.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/javasort/text-sort.small 2>&1 > javasort.small.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.medium.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.medium 2>&1 > pipesort.medium.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.medium.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.medium 2>&1 > streamsort.medium.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.medium.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/javasort/text-sort.medium 2>&1 > javasort.medium.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.large.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.large 2>&1 > pipesort.large.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.large.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.large 2>&1 > streamsort.large.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.large.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/javasort/text-sort.large 2>&1 > javasort.large.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
PROCESSES=""
|
||||
|
||||
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/pipesort/text-sort.small 2>&1 > pipesort.small.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
$GRID_MIX_HOME/streamsort/text-sort.small 2>&1 > streamsort.small.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
$GRID_MIX_HOME/javasort/text-sort.small 2>&1 > javasort.small.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/pipesort/text-sort.medium 2>&1 > pipesort.medium.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
$GRID_MIX_HOME/streamsort/text-sort.medium 2>&1 > streamsort.medium.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
$GRID_MIX_HOME/javasort/text-sort.medium 2>&1 > javasort.medium.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/pipesort/text-sort.large 2>&1 > pipesort.large.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
$GRID_MIX_HOME/streamsort/text-sort.large 2>&1 > pipesort.large.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
$GRID_MIX_HOME/javasort/text-sort.large 2>&1 > pipesort.large.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for APROC in ${PROCESSES}; do
|
||||
wait ${APROC}
|
||||
done
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.small.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.small 2>&1 > webdata_scan.small.$i.out&
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
|
||||
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.medium.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.medium 2>&1 > webdata_scan.medium.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.large.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.large 2>&1 > webdata_scan.large.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
PROCESSES=""
|
||||
|
||||
for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/webdatascan/webdata_scan.medium 2>&1 > webdata_scan.medium.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/webdatascan/webdata_scan.small 2>&1 > webdata_scan.small.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/webdatascan/webdata_scan.large 2>&1 > webdata_scan.large.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for APROC in ${PROCESSES}; do
|
||||
wait ${APROC}
|
||||
done
|
|
@ -1,28 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_sort.large.$i
|
||||
mkdir $CLUSTER_DIR
|
||||
#hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/webdatasort/webdata_sort.large 2>&1 > webdata_sort.large.$i.out &
|
||||
echo "hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatasort/webdata_sort.large "
|
||||
hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatasort/webdata_sort.large 2>&1 > webdata_sort.large.$i.out &
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
PROCESSES=""
|
||||
|
||||
for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
|
||||
do
|
||||
echo $i
|
||||
$GRID_MIX_HOME/webdatasort/webdata_sort.large 2>&1 > webdata_sort.large.$i.out &
|
||||
PROCESSES="${PROCESSES} $!"
|
||||
$GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
|
||||
done
|
||||
|
||||
for APROC in ${PROCESSES}; do
|
||||
wait ${APROC}
|
||||
done
|
|
@ -1,25 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=1
|
||||
INDIR=${VARCOMPSEQ}
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/webdata-scan-out-dir-large_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 0.2 -keepred 5 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
|
@ -1,25 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=1
|
||||
INDIR="${VARCOMPSEQ}/{part-000*0,part-000*1,part-000*2}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/webdata-scan-out-dir-medium_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar ${APP_JAR} loadgen -keepmap 1 -keepred 5 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
|
@ -1,25 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=1
|
||||
INDIR="${VARCOMPSEQ}/{part-00000,part-00001,part-00002}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/webdata-scan-out-dir-small_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 1 -keepred 5 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_LARGE_JOB
|
||||
INDIR=${VARCOMPSEQ}/{part-000*0,part-000*1}
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/webdata-sort-out-dir-large_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 100 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_MEDIUM_JOB
|
||||
INDIR="${VARCOMPSEQ}/{part-0000,part-0001}"
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
OUTDIR=perf-out/webdata-sort-out-dir-medium_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 100 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/../gridmix-env
|
||||
|
||||
NUM_OF_REDUCERS=$NUM_OF_REDUCERS_FOR_SMALL_JOB
|
||||
INDIR=${VARCOMPSEQ}/part-00000
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
|
||||
export OUTDIR=perf-out/webdata-sort-out-dir-small_$Date
|
||||
${HADOOP_PREFIX}/bin/hadoop dfs -rmr $OUTDIR
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar $APP_JAR loadgen -keepmap 100 -keepred 100 -inFormat org.apache.hadoop.mapred.SequenceFileInputFormat -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat -outKey org.apache.hadoop.io.Text -outValue org.apache.hadoop.io.Text -indir $INDIR -outdir $OUTDIR -r $NUM_OF_REDUCERS
|
||||
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
### "Gridmix" Benchmark ###
|
||||
|
||||
Contents:
|
||||
|
||||
0 Overview
|
||||
1 Getting Started
|
||||
1.0 Build
|
||||
1.1 Configure
|
||||
1.2 Generate test data
|
||||
2 Running
|
||||
2.0 General
|
||||
2.1 Non-Hod cluster
|
||||
2.2 Hod
|
||||
2.2.0 Static cluster
|
||||
2.2.1 Hod cluster
|
||||
|
||||
|
||||
* 0 Overview
|
||||
|
||||
The scripts in this package model a cluster workload. The workload is
|
||||
simulated by generating random data and submitting map/reduce jobs that
|
||||
mimic observed data-access patterns in user jobs. The full benchmark
|
||||
generates approximately 2.5TB of (often compressed) input data operated on
|
||||
by the following simulated jobs:
|
||||
|
||||
1) Three stage map/reduce job
|
||||
Input: 500GB compressed (2TB uncompressed) SequenceFile
|
||||
(k,v) = (5 words, 100 words)
|
||||
hadoop-env: FIXCOMPSEQ
|
||||
Compute1: keep 10% map, 40% reduce
|
||||
Compute2: keep 100% map, 77% reduce
|
||||
Input from Compute1
|
||||
Compute3: keep 116% map, 91% reduce
|
||||
Input from Compute2
|
||||
Motivation: Many user workloads are implemented as pipelined map/reduce
|
||||
jobs, including Pig workloads
|
||||
|
||||
2) Large sort of variable key/value size
|
||||
Input: 500GB compressed (2TB uncompressed) SequenceFile
|
||||
(k,v) = (5-10 words, 100-10000 words)
|
||||
hadoop-env: VARCOMPSEQ
|
||||
Compute: keep 100% map, 100% reduce
|
||||
Motivation: Processing large, compressed datsets is common.
|
||||
|
||||
3) Reference select
|
||||
Input: 500GB compressed (2TB uncompressed) SequenceFile
|
||||
(k,v) = (5-10 words, 100-10000 words)
|
||||
hadoop-env: VARCOMPSEQ
|
||||
Compute: keep 0.2% map, 5% reduce
|
||||
1 Reducer
|
||||
Motivation: Sampling from a large, reference dataset is common.
|
||||
|
||||
4) API text sort (java, streaming)
|
||||
Input: 500GB uncompressed Text
|
||||
(k,v) = (1-10 words, 0-200 words)
|
||||
hadoop-env: VARINFLTEXT
|
||||
Compute: keep 100% map, 100% reduce
|
||||
Motivation: This benchmark should exercise each of the APIs to
|
||||
map/reduce
|
||||
|
||||
5) Jobs with combiner (word count jobs)
|
||||
|
||||
A benchmark load is a mix of different numbers of small, medium, and large jobs of the above types.
|
||||
The exact mix is specified in an xml file (gridmix_config.xml). We have a Java program to
|
||||
construct those jobs based on the xml file and put them under the control of a JobControl object.
|
||||
The JobControl object then submitts the jobs to the cluster and monitors their progress until all jobs complete.
|
||||
|
||||
|
||||
Notes(1-3): Since input data are compressed, this means that each mapper
|
||||
outputs a lot more bytes than it reads in, typically causing map output
|
||||
spills.
|
||||
|
||||
|
||||
|
||||
* 1 Getting Started
|
||||
|
||||
1.0 Build
|
||||
|
||||
In the src/benchmarks/gridmix dir, type "ant".
|
||||
gridmix.jar will be created in the build subdir.
|
||||
copy gridmix.jar to gridmix dir.
|
||||
|
||||
1.1 Configure environment variables
|
||||
|
||||
One must modify gridmix-env-2 to set the following variables:
|
||||
|
||||
HADOOP_PREFIX The hadoop install location
|
||||
HADOOP_VERSION The exact hadoop version to be used. e.g. hadoop-0.18.2-dev
|
||||
HADOOP_CONF_DIR The dir containing the hadoop-site.xml for teh cluster to be used.
|
||||
USE_REAL_DATA A large data-set will be created and used by the benchmark if it is set to true.
|
||||
|
||||
|
||||
1.2 Configure the job mixture
|
||||
|
||||
A default gridmix_conf.xml file is provided.
|
||||
One may make appropriate changes as necessary on the number of jobs of various types
|
||||
and sizes. One can also change the number of reducers of each jobs, and specify whether
|
||||
to compress the output data of a map/reduce job.
|
||||
Note that one can specify multiple numbers of in the
|
||||
numOfJobs field and numOfReduces field, like:
|
||||
<property>
|
||||
<name>javaSort.smallJobs.numOfJobs</name>
|
||||
<value>8,2</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>javaSort.smallJobs.numOfReduces</name>
|
||||
<value>15,70</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
The above spec means that we will have 8 small java sort jobs with 15 reducers and 2 small java sort
|
||||
jobs with 17 reducers.
|
||||
|
||||
1.3 Generate test data
|
||||
|
||||
Test data is generated using the generateGridmix2Data.sh script.
|
||||
./generateGridmix2Data.sh
|
||||
One may modify the structure and size of the data generated here.
|
||||
|
||||
It is sufficient to run the script without modification, though it may
|
||||
require up to 4TB of free space in the default filesystem. Changing the size
|
||||
of the input data (COMPRESSED_DATA_BYTES, UNCOMPRESSED_DATA_BYTES,
|
||||
INDIRECT_DATA_BYTES) is safe. A 4x compression ratio for generated, block
|
||||
compressed data is typical.
|
||||
|
||||
* 2 Running
|
||||
|
||||
You need to set HADOOP_CONF_DIR to the right directory where hadoop-site.xml exists.
|
||||
Then you just need to type
|
||||
./rungridmix_2
|
||||
It will create start.out to record the start time, and at the end, it will create end.out to record the
|
||||
endi time.
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project default="main" basedir=".">
|
||||
<property name="Name" value="gridmix"/>
|
||||
<property name="version" value="0.1"/>
|
||||
<property name="final.name" value="${name}-${version}"/>
|
||||
<property name="hadoop.dir" value="${basedir}/../../../"/>
|
||||
<property name="lib.dir" value="${hadoop.dir}/lib"/>
|
||||
<property name="src.dir" value="${basedir}/src"/>
|
||||
<property name="conf.dir" value="${basedir}/conf"/>
|
||||
<property name="docs.dir" value="${basedir}/docs"/>
|
||||
<property name="build.dir" value="${basedir}/build"/>
|
||||
<property name="dist.dir" value="${basedir}/dist"/>
|
||||
<property name="build.classes" value="${build.dir}/classes"/>
|
||||
|
||||
<target name="init">
|
||||
<mkdir dir="${build.dir}"/>
|
||||
<mkdir dir="${dist.dir}"/>
|
||||
</target>
|
||||
|
||||
<target name="main" depends="init, compile, compress" description="Main target">
|
||||
<echo>
|
||||
Building the .jar files.
|
||||
</echo>
|
||||
</target>
|
||||
|
||||
<target name="compile" depends="init" description="Compilation target">
|
||||
<javac srcdir="src/java/" destdir="${build.dir}">
|
||||
<classpath refid="classpath" />
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="dev-build" depends="init, dev-compile, compress" description="Developers build target">
|
||||
<echo>
|
||||
Building the .jar files.
|
||||
</echo>
|
||||
</target>
|
||||
|
||||
<target name="dev-compile" depends="init" description="Compilation target">
|
||||
<path id="dev-classpath">
|
||||
<pathelement location="${build.classes}"/>
|
||||
<fileset dir="${hadoop.dir}/build">
|
||||
<include name="**.jar" />
|
||||
<include name="contrib/streaming/**.jar" />
|
||||
</fileset>
|
||||
<fileset dir="${lib.dir}">
|
||||
<include name="*.jar" />
|
||||
<exclude name="**/excluded/" />
|
||||
</fileset>
|
||||
</path>
|
||||
<javac srcdir="src/java/" destdir="${build.dir}" classpathref="dev-classpath"/>
|
||||
</target>
|
||||
|
||||
<target name="compress" depends="compile" description="Compression target">
|
||||
<jar jarfile="${build.dir}/gridmix.jar" basedir="${build.dir}" includes="**/*.class" />
|
||||
<copy todir="." includeEmptyDirs="false">
|
||||
<fileset dir="${build.dir}">
|
||||
<exclude name="**" />
|
||||
<include name="**/*.jar" />
|
||||
</fileset>
|
||||
</copy>
|
||||
</target>
|
||||
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- Clean. Delete the build files, and their directories -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="clean" description="Clean. Delete the build files, and their directories">
|
||||
<delete dir="${build.dir}"/>
|
||||
<delete dir="${dist.dir}"/>
|
||||
</target>
|
||||
|
||||
<!-- the normal classpath -->
|
||||
<path id="classpath">
|
||||
<pathelement location="${build.classes}"/>
|
||||
<fileset dir="${lib.dir}">
|
||||
<include name="*.jar" />
|
||||
<exclude name="**/excluded/" />
|
||||
</fileset>
|
||||
<fileset dir="${hadoop.dir}">
|
||||
<include name="**.jar" />
|
||||
<include name="contrib/streaming/*.jar" />
|
||||
</fileset>
|
||||
</path>
|
||||
</project>
|
|
@ -1,94 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
##############################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#####################################################################
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/gridmix-env-2
|
||||
|
||||
# Smaller data set is used by default.
|
||||
COMPRESSED_DATA_BYTES=2147483648
|
||||
UNCOMPRESSED_DATA_BYTES=536870912
|
||||
|
||||
# Number of partitions for output data
|
||||
NUM_MAPS=100
|
||||
|
||||
# If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real) dataset.
|
||||
if [ ! -z ${USE_REAL_DATASET} ] ; then
|
||||
echo "Using real dataset"
|
||||
NUM_MAPS=492
|
||||
# 2TB data compressing to approx 500GB
|
||||
COMPRESSED_DATA_BYTES=2147483648000
|
||||
# 500GB
|
||||
UNCOMPRESSED_DATA_BYTES=536870912000
|
||||
fi
|
||||
|
||||
## Data sources
|
||||
export GRID_MIX_DATA=/gridmix/data
|
||||
# Variable length key, value compressed SequenceFile
|
||||
export VARCOMPSEQ=${GRID_MIX_DATA}/WebSimulationBlockCompressed
|
||||
# Fixed length key, value compressed SequenceFile
|
||||
export FIXCOMPSEQ=${GRID_MIX_DATA}/MonsterQueryBlockCompressed
|
||||
# Variable length key, value uncompressed Text File
|
||||
export VARINFLTEXT=${GRID_MIX_DATA}/SortUncompressed
|
||||
# Fixed length key, value compressed Text File
|
||||
export FIXCOMPTEXT=${GRID_MIX_DATA}/EntropySimulationCompressed
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar \
|
||||
${EXAMPLE_JAR} randomtextwriter \
|
||||
-D mapreduce.randomtextwriter.totalbytes=${COMPRESSED_DATA_BYTES} \
|
||||
-D mapreduce.randomtextwriter.bytespermap=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
|
||||
-D mapreduce.randomtextwriter.minwordskey=5 \
|
||||
-D mapreduce.randomtextwriter.maxwordskey=10 \
|
||||
-D mapreduce.randomtextwriter.minwordsvalue=100 \
|
||||
-D mapreduce.randomtextwriter.maxwordsvalue=10000 \
|
||||
-D mapreduce.output.fileoutputformat.compress=true \
|
||||
-D mapred.map.output.compression.type=BLOCK \
|
||||
-outFormat org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \
|
||||
${VARCOMPSEQ} &
|
||||
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar \
|
||||
${EXAMPLE_JAR} randomtextwriter \
|
||||
-D mapreduce.randomtextwriter.totalbytes=${COMPRESSED_DATA_BYTES} \
|
||||
-D mapreduce.randomtextwriter.bytespermap=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
|
||||
-D mapreduce.randomtextwriter.minwordskey=5 \
|
||||
-D mapreduce.randomtextwriter.maxwordskey=5 \
|
||||
-D mapreduce.randomtextwriter.minwordsvalue=100 \
|
||||
-D mapreduce.randomtextwriter.maxwordsvalue=100 \
|
||||
-D mapreduce.output.fileoutputformat.compress=true \
|
||||
-D mapred.map.output.compression.type=BLOCK \
|
||||
-outFormat org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \
|
||||
${FIXCOMPSEQ} &
|
||||
|
||||
|
||||
${HADOOP_PREFIX}/bin/hadoop jar \
|
||||
${EXAMPLE_JAR} randomtextwriter \
|
||||
-D mapreduce.randomtextwriter.totalbytes=${UNCOMPRESSED_DATA_BYTES} \
|
||||
-D mapreduce.randomtextwriter.bytespermap=$((${UNCOMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
|
||||
-D mapreduce.randomtextwriter.minwordskey=1 \
|
||||
-D mapreduce.randomtextwriter.maxwordskey=10 \
|
||||
-D mapreduce.randomtextwriter.minwordsvalue=0 \
|
||||
-D mapreduce.randomtextwriter.maxwordsvalue=200 \
|
||||
-D mapreduce.output.fileoutputformat.compress=false \
|
||||
-outFormat org.apache.hadoop.mapreduce.lib.output.TextOutputFormat \
|
||||
${VARINFLTEXT} &
|
||||
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
##############################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#####################################################################
|
||||
|
||||
|
||||
## Environment configuration
|
||||
# Hadoop installation
|
||||
export HADOOP_VERSION=hadoop-0.18.2-dev
|
||||
export HADOOP_PREFIX=${HADOOP_INSTALL_HOME}/${HADOOP_VERSION}
|
||||
export HADOOP_CONF_DIR=
|
||||
export USE_REAL_DATASET=TRUE
|
||||
|
||||
export APP_JAR=${HADOOP_PREFIX}/${HADOOP_VERSION}-test.jar
|
||||
export EXAMPLE_JAR=${HADOOP_PREFIX}/${HADOOP_VERSION}-examples.jar
|
||||
export STREAMING_JAR=${HADOOP_PREFIX}/contrib/streaming/${HADOOP_VERSION}-streaming.jar
|
||||
|
||||
|
||||
|
|
@ -1,567 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
|
||||
<property>
|
||||
<name>GRID_MIX_DATA</name>
|
||||
<value>/gridmix/data</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>FIXCOMPTEXT</name>
|
||||
<value>${GRID_MIX_DATA}/EntropySimulationCompressed</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>VARINFLTEXT</name>
|
||||
<value>${GRID_MIX_DATA}/SortUncompressed</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>FIXCOMPSEQ</name>
|
||||
<value>${GRID_MIX_DATA}/MonsterQueryBlockCompressed</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>VARCOMPSEQ</name>
|
||||
<value>${GRID_MIX_DATA}/WebSimulationBlockCompressed</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>streamSort.smallJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}/{part-*-00000,part-*-00001,part-*-00002}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.smallJobs.numOfJobs</name>
|
||||
<value>40</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.smallJobs.numOfReduces</name>
|
||||
<value>15</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.smallJobs.numOfMapoutputCompressed</name>
|
||||
<value>40</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.smallJobs.numOfOutputCompressed</name>
|
||||
<value>20</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.mediumJobs.numOfJobs</name>
|
||||
<value>16</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>streamSort.mediumJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>streamSort.mediumJobs.numOfReduces</name>
|
||||
<value>170</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.mediumJobs.numOfMapoutputCompressed</name>
|
||||
<value>16</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.mediumJobs.numOfOutputCompressed</name>
|
||||
<value>12</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.largeJobs.numOfJobs</name>
|
||||
<value>5</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>streamSort.largeJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>streamSort.largeJobs.numOfReduces</name>
|
||||
<value>370</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.largeJobs.numOfMapoutputCompressed</name>
|
||||
<value>5</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>streamSort.largeJobs.numOfOutputCompressed</name>
|
||||
<value>3</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>javaSort.smallJobs.numOfJobs</name>
|
||||
<value>8,2</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>javaSort.smallJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}/{part-*-00000,part-*-00001,part-*-00002}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>javaSort.smallJobs.numOfReduces</name>
|
||||
<value>15,70</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>javaSort.smallJobs.numOfMapoutputCompressed</name>
|
||||
<value>10</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>javaSort.smallJobs.numOfOutputCompressed</name>
|
||||
<value>3</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>javaSort.mediumJobs.numOfJobs</name>
|
||||
<value>4,2</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>javaSort.mediumJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>javaSort.mediumJobs.numOfReduces</name>
|
||||
<value>170,70</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>javaSort.mediumJobs.numOfMapoutputCompressed</name>
|
||||
<value>6</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>javaSort.mediumJobs.numOfOutputCompressed</name>
|
||||
<value>4</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>javaSort.largeJobs.numOfJobs</name>
|
||||
<value>3</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>javaSort.largeJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>javaSort.largeJobs.numOfReduces</name>
|
||||
<value>370</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>javaSort.largeJobs.numOfMapoutputCompressed</name>
|
||||
<value>3</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>javaSort.largeJobs.numOfOutputCompressed</name>
|
||||
<value>2</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>combiner.smallJobs.numOfJobs</name>
|
||||
<value>11,4</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>combiner.smallJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}/{part-*-00000,part-*-00001,part-*-00002}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>combiner.smallJobs.numOfReduces</name>
|
||||
<value>10,1</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>combiner.smallJobs.numOfMapoutputCompressed</name>
|
||||
<value>15</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>combiner.smallJobs.numOfOutputCompressed</name>
|
||||
<value>0</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>combiner.mediumJobs.numOfJobs</name>
|
||||
<value>8</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>combiner.mediumJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>combiner.mediumJobs.numOfReduces</name>
|
||||
<value>100</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>combiner.mediumJobs.numOfMapoutputCompressed</name>
|
||||
<value>8</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>combiner.mediumJobs.numOfOutputCompressed</name>
|
||||
<value>0</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>combiner.largeJobs.numOfJobs</name>
|
||||
<value>4</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>combiner.largeJobs.inputFiles</name>
|
||||
<value>${VARINFLTEXT}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>combiner.largeJobs.numOfReduces</name>
|
||||
<value>360</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>combiner.largeJobs.numOfMapoutputCompressed</name>
|
||||
<value>4</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>combiner.largeJobs.numOfOutputCompressed</name>
|
||||
<value>0</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.smallJobs.numOfJobs</name>
|
||||
<value>7</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>monsterQuery.smallJobs.inputFiles</name>
|
||||
<value>${FIXCOMPSEQ}/{part-*-00000,part-*-00001,part-*-00002}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>monsterQuery.smallJobs.numOfReduces</name>
|
||||
<value>5</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.smallJobs.numOfMapoutputCompressed</name>
|
||||
<value>7</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.smallJobs.numOfOutputCompressed</name>
|
||||
<value>0</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.mediumJobs.numOfJobs</name>
|
||||
<value>5</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>monsterQuery.mediumJobs.inputFiles</name>
|
||||
<value>${FIXCOMPSEQ}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>monsterQuery.mediumJobs.numOfReduces</name>
|
||||
<value>100</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.mediumJobs.numOfMapoutputCompressed</name>
|
||||
<value>5</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.mediumJobs.numOfOutputCompressed</name>
|
||||
<value>0</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.largeJobs.numOfJobs</name>
|
||||
<value>3</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>monsterQuery.largeJobs.inputFiles</name>
|
||||
<value>${FIXCOMPSEQ}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>monsterQuery.largeJobs.numOfReduces</name>
|
||||
<value>370</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.largeJobs.numOfMapoutputCompressed</name>
|
||||
<value>3</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>monsterQuery.largeJobs.numOfOutputCompressed</name>
|
||||
<value>0</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>webdataScan.smallJobs.numOfJobs</name>
|
||||
<value>24</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataScan.smallJobs.inputFiles</name>
|
||||
<value>${VARCOMPSEQ}/{part-*-00000,part-*-00001,part-*-00002}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataScan.smallJobs.numOfMapoutputCompressed</name>
|
||||
<value>24</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataScan.smallJobs.numOfOutputCompressed</name>
|
||||
<value>0</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataScan.mediumJobs.numOfJobs</name>
|
||||
<value>12</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataScan.mediumJobs.inputFiles</name>
|
||||
<value>${VARCOMPSEQ}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataScan.mediumJobs.numOfMapoutputCompressed</name>
|
||||
<value>12</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataScan.mediumJobs.numOfReduces</name>
|
||||
<value>7</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataScan.mediumJobs.numOfOutputCompressed</name>
|
||||
<value>0</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataScan.largeJobs.numOfJobs</name>
|
||||
<value>2</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataScan.largeJobs.inputFiles</name>
|
||||
<value>${VARCOMPSEQ}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataScan.largeJobs.numOfMapoutputCompressed</name>
|
||||
<value>3</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataScan.largeJobs.numOfReduces</name>
|
||||
<value>70</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataScan.largeJobs.numOfOutputCompressed</name>
|
||||
<value>3</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>webdataSort.smallJobs.numOfJobs</name>
|
||||
<value>7</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataSort.smallJobs.inputFiles</name>
|
||||
<value>${VARCOMPSEQ}/{part-*-00000,part-*-00001,part-*-00002}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataSort.smallJobs.numOfReduces</name>
|
||||
<value>15</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataSort.smallJobs.numOfMapoutputCompressed</name>
|
||||
<value>7</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataSort.smallJobs.numOfOutputCompressed</name>
|
||||
<value>7</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>webdataSort.mediumJobs.numOfJobs</name>
|
||||
<value>4</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataSort.mediumJobs.inputFiles</name>
|
||||
<value>${VARCOMPSEQ}/{part-*-000*0,part-*-000*1,part-*-000*2}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataSort.mediumJobs.numOfReduces</name>
|
||||
<value>170</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataSort.mediumJobs.numOfMapoutputCompressed</name>
|
||||
<value>4</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataSort.mediumJobs.numOfOutputCompressed</name>
|
||||
<value>4</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataSort.largeJobs.numOfJobs</name>
|
||||
<value>1</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataSort.largeJobs.inputFiles</name>
|
||||
<value>${VARCOMPSEQ}</value>
|
||||
<description></description>
|
||||
</property>
|
||||
<property>
|
||||
<name>webdataSort.largeJobs.numOfReduces</name>
|
||||
<value>800</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataSort.largeJobs.numOfMapoutputCompressed</name>
|
||||
<value>1</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>webdataSort.largeJobs.numOfOutputCompressed</name>
|
||||
<value>1</value>
|
||||
<description> </description>
|
||||
</property>
|
||||
|
||||
</configuration>
|
|
@ -1,37 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
##############################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#####################################################################
|
||||
|
||||
## Environment configuration
|
||||
|
||||
GRID_DIR=`dirname "$0"`
|
||||
GRID_DIR=`cd "$GRID_DIR"; pwd`
|
||||
source $GRID_DIR/gridmix-env-2
|
||||
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
echo $Date > $1_start.out
|
||||
|
||||
export HADOOP_CLASSPATH=${APP_JAR}:${EXAMPLE_JAR}:${STREAMING_JAR}
|
||||
export LIBJARS=${APP_JAR},${EXAMPLE_JAR},${STREAMING_JAR}
|
||||
${HADOOP_PREFIX}/bin/hadoop jar gridmix.jar org.apache.hadoop.mapreduce.GridMixRunner -libjars ${LIBJARS}
|
||||
|
||||
Date=`date +%F-%H-%M-%S-%N`
|
||||
echo $Date > $1_end.out
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.mapreduce;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
|
||||
|
||||
public class CombinerJobCreator {
|
||||
|
||||
public static Job createJob(String[] args) throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
int numReduces = 1;
|
||||
String indir = null;
|
||||
String outdir = null;
|
||||
boolean mapoutputCompressed = false;
|
||||
boolean outputCompressed = false;
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
try {
|
||||
if ("-r".equals(args[i])) {
|
||||
numReduces = Integer.parseInt(args[++i]);
|
||||
} else if ("-indir".equals(args[i])) {
|
||||
indir = args[++i];
|
||||
} else if ("-outdir".equals(args[i])) {
|
||||
outdir = args[++i];
|
||||
} else if ("-mapoutputCompressed".equals(args[i])) {
|
||||
mapoutputCompressed = Boolean.valueOf(args[++i]).booleanValue();
|
||||
} else if ("-outputCompressed".equals(args[i])) {
|
||||
outputCompressed = Boolean.valueOf(args[++i]).booleanValue();
|
||||
}
|
||||
} catch (NumberFormatException except) {
|
||||
System.out.println("ERROR: Integer expected instead of " + args[i]);
|
||||
return null;
|
||||
} catch (ArrayIndexOutOfBoundsException except) {
|
||||
System.out.println("ERROR: Required parameter missing from "
|
||||
+ args[i - 1]);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, mapoutputCompressed);
|
||||
conf.setBoolean(FileOutputFormat.COMPRESS, outputCompressed);
|
||||
|
||||
Job job = new Job(conf);
|
||||
job.setJobName("GridmixCombinerJob");
|
||||
|
||||
// the keys are words (strings)
|
||||
job.setOutputKeyClass(Text.class);
|
||||
// the values are counts (ints)
|
||||
job.setOutputValueClass(IntWritable.class);
|
||||
|
||||
job.setMapperClass(TokenCounterMapper.class);
|
||||
job.setCombinerClass(IntSumReducer.class);
|
||||
job.setReducerClass(IntSumReducer.class);
|
||||
|
||||
job.setNumReduceTasks(numReduces);
|
||||
if (indir != null) {
|
||||
FileInputFormat.setInputPaths(job, indir);
|
||||
}
|
||||
if (outdir != null) {
|
||||
FileOutputFormat.setOutputPath(job, new Path(outdir));
|
||||
}
|
||||
return job;
|
||||
}
|
||||
}
|
|
@ -1,100 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.mapreduce;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.Stack;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
|
||||
import org.apache.hadoop.mapred.JobClient;
|
||||
|
||||
public class GenericMRLoadJobCreator extends GenericMRLoadGenerator {
|
||||
|
||||
public static Job createJob(String[] argv, boolean mapoutputCompressed,
|
||||
boolean outputCompressed) throws Exception {
|
||||
|
||||
Job job = new Job();
|
||||
job.setJarByClass(GenericMRLoadGenerator.class);
|
||||
job.setMapperClass(SampleMapper.class);
|
||||
job.setReducerClass(SampleReducer.class);
|
||||
if (!parseArgs(argv, job)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (null == FileOutputFormat.getOutputPath(job)) {
|
||||
// No output dir? No writes
|
||||
job.setOutputFormatClass(NullOutputFormat.class);
|
||||
}
|
||||
|
||||
Configuration conf = job.getConfiguration();
|
||||
if (0 == FileInputFormat.getInputPaths(job).length) {
|
||||
// No input dir? Generate random data
|
||||
System.err.println("No input path; ignoring InputFormat");
|
||||
confRandom(job);
|
||||
} else if (null != conf.getClass(INDIRECT_INPUT_FORMAT, null)) {
|
||||
// specified IndirectInputFormat? Build src list
|
||||
JobClient jClient = new JobClient(conf);
|
||||
Path sysdir = jClient.getSystemDir();
|
||||
Random r = new Random();
|
||||
Path indirInputFile = new Path(sysdir, Integer.toString(r
|
||||
.nextInt(Integer.MAX_VALUE), 36)
|
||||
+ "_files");
|
||||
conf.set(INDIRECT_INPUT_FILE, indirInputFile.toString());
|
||||
SequenceFile.Writer writer = SequenceFile.createWriter(sysdir
|
||||
.getFileSystem(conf), conf, indirInputFile, LongWritable.class,
|
||||
Text.class, SequenceFile.CompressionType.NONE);
|
||||
try {
|
||||
for (Path p : FileInputFormat.getInputPaths(job)) {
|
||||
FileSystem fs = p.getFileSystem(conf);
|
||||
Stack<Path> pathstack = new Stack<Path>();
|
||||
pathstack.push(p);
|
||||
while (!pathstack.empty()) {
|
||||
for (FileStatus stat : fs.listStatus(pathstack.pop())) {
|
||||
if (stat.isDirectory()) {
|
||||
if (!stat.getPath().getName().startsWith("_")) {
|
||||
pathstack.push(stat.getPath());
|
||||
}
|
||||
} else {
|
||||
writer.sync();
|
||||
writer.append(new LongWritable(stat.getLen()), new Text(stat
|
||||
.getPath().toUri().toString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
|
||||
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, mapoutputCompressed);
|
||||
conf.setBoolean(FileOutputFormat.COMPRESS, outputCompressed);
|
||||
return job;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,680 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.mapreduce;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.examples.Sort;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapred.JobClient;
|
||||
import org.apache.hadoop.mapred.TaskReport;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
|
||||
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
|
||||
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
||||
|
||||
import org.apache.hadoop.streaming.StreamJob;
|
||||
|
||||
public class GridMixRunner {
|
||||
|
||||
private static final int NUM_OF_LARGE_JOBS_PER_CLASS = 0;
|
||||
private static final int NUM_OF_MEDIUM_JOBS_PER_CLASS = 0;
|
||||
private static final int NUM_OF_SMALL_JOBS_PER_CLASS = 0;
|
||||
|
||||
private static final int NUM_OF_REDUCERS_FOR_SMALL_JOB = 15;
|
||||
private static final int NUM_OF_REDUCERS_FOR_MEDIUM_JOB = 170;
|
||||
private static final int NUM_OF_REDUCERS_FOR_LARGE_JOB = 370;
|
||||
|
||||
private static final String GRID_MIX_DATA = "/gridmix/data";
|
||||
private static final String VARCOMPSEQ =
|
||||
GRID_MIX_DATA + "/WebSimulationBlockCompressed";
|
||||
private static final String FIXCOMPSEQ =
|
||||
GRID_MIX_DATA + "/MonsterQueryBlockCompressed";
|
||||
private static final String VARINFLTEXT =
|
||||
GRID_MIX_DATA + "/SortUncompressed";
|
||||
|
||||
private static final String GRIDMIXCONFIG = "gridmix_config.xml";
|
||||
|
||||
private static final Configuration config = initConfig();
|
||||
private static final FileSystem fs = initFs();
|
||||
private final JobControl gridmix;
|
||||
private int numOfJobs = 0;
|
||||
|
||||
private enum Size {
|
||||
SMALL("small", // name
|
||||
"/{part-*-00000,part-*-00001,part-*-00002}", // default input subset
|
||||
NUM_OF_SMALL_JOBS_PER_CLASS, // defuault num jobs
|
||||
NUM_OF_REDUCERS_FOR_SMALL_JOB), // default num reducers
|
||||
MEDIUM("medium", // name
|
||||
"/{part-*-000*0, part-*-000*1, part-*-000*2}", // default input subset
|
||||
NUM_OF_MEDIUM_JOBS_PER_CLASS, // defuault num jobs
|
||||
NUM_OF_REDUCERS_FOR_MEDIUM_JOB), // default num reducers
|
||||
LARGE("large", // name
|
||||
"", // default input subset
|
||||
NUM_OF_LARGE_JOBS_PER_CLASS, // defuault num jobs
|
||||
NUM_OF_REDUCERS_FOR_LARGE_JOB); // default num reducers
|
||||
|
||||
private final String str;
|
||||
private final String path;
|
||||
private final int numJobs;
|
||||
private final int numReducers;
|
||||
Size(String str, String path, int numJobs, int numReducers) {
|
||||
this.str = str;
|
||||
this.path = path;
|
||||
this.numJobs = numJobs;
|
||||
this.numReducers = numReducers;
|
||||
}
|
||||
public String defaultPath(String base) {
|
||||
return base + path;
|
||||
}
|
||||
public int defaultNumJobs() {
|
||||
return numJobs;
|
||||
}
|
||||
public int defaultNumReducers() {
|
||||
return numReducers;
|
||||
}
|
||||
public String toString() {
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
private enum GridMixJob {
|
||||
STREAMSORT("streamSort") {
|
||||
public void addJob(int numReducers, boolean mapoutputCompressed,
|
||||
boolean outputCompressed, Size size, JobControl gridmix) {
|
||||
final String prop = String.format("streamSort.%sJobs.inputFiles", size);
|
||||
final String indir =
|
||||
getInputDirsFor(prop, size.defaultPath(VARINFLTEXT));
|
||||
final String outdir = addTSSuffix("perf-out/stream-out-dir-" + size);
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("-input ").append(indir).append(" ");
|
||||
sb.append("-output ").append(outdir).append(" ");
|
||||
sb.append("-mapper cat ");
|
||||
sb.append("-reducer cat ");
|
||||
sb.append("-numReduceTasks ").append(numReducers);
|
||||
String[] args = sb.toString().split(" ");
|
||||
|
||||
clearDir(outdir);
|
||||
try {
|
||||
Configuration conf = StreamJob.createJob(args);
|
||||
conf.setBoolean(FileOutputFormat.COMPRESS, outputCompressed);
|
||||
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, mapoutputCompressed);
|
||||
Job job = new Job(conf, "GridmixStreamingSorter." + size);
|
||||
ControlledJob cjob = new ControlledJob(job, null);
|
||||
gridmix.addJob(cjob);
|
||||
} catch (Exception ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
JAVASORT("javaSort") {
|
||||
public void addJob(int numReducers, boolean mapoutputCompressed,
|
||||
boolean outputCompressed, Size size, JobControl gridmix) {
|
||||
final String prop = String.format("javaSort.%sJobs.inputFiles", size);
|
||||
final String indir = getInputDirsFor(prop,
|
||||
size.defaultPath(VARINFLTEXT));
|
||||
final String outdir = addTSSuffix("perf-out/sort-out-dir-" + size);
|
||||
|
||||
clearDir(outdir);
|
||||
|
||||
try {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setBoolean(FileOutputFormat.COMPRESS, outputCompressed);
|
||||
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, mapoutputCompressed);
|
||||
Job job = new Job(conf);
|
||||
job.setJarByClass(Sort.class);
|
||||
job.setJobName("GridmixJavaSorter." + size);
|
||||
job.setMapperClass(Mapper.class);
|
||||
job.setReducerClass(Reducer.class);
|
||||
|
||||
job.setNumReduceTasks(numReducers);
|
||||
job.setInputFormatClass(KeyValueTextInputFormat.class);
|
||||
job.setOutputFormatClass(TextOutputFormat.class);
|
||||
|
||||
job.setOutputKeyClass(org.apache.hadoop.io.Text.class);
|
||||
job.setOutputValueClass(org.apache.hadoop.io.Text.class);
|
||||
|
||||
FileInputFormat.addInputPaths(job, indir);
|
||||
FileOutputFormat.setOutputPath(job, new Path(outdir));
|
||||
|
||||
ControlledJob cjob = new ControlledJob(job, null);
|
||||
gridmix.addJob(cjob);
|
||||
} catch (Exception ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
WEBDATASCAN("webdataScan") {
|
||||
public void addJob(int numReducers, boolean mapoutputCompressed,
|
||||
boolean outputCompressed, Size size, JobControl gridmix) {
|
||||
final String prop = String.format("webdataScan.%sJobs.inputFiles", size);
|
||||
final String indir = getInputDirsFor(prop, size.defaultPath(VARCOMPSEQ));
|
||||
final String outdir = addTSSuffix("perf-out/webdata-scan-out-dir-"
|
||||
+ size);
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("-keepmap 0.2 ");
|
||||
sb.append("-keepred 5 ");
|
||||
sb.append("-inFormat");
|
||||
sb.append(" org.apache.hadoop.mapreduce." +
|
||||
"lib.input.SequenceFileInputFormat ");
|
||||
sb.append("-outFormat");
|
||||
sb.append(" org.apache.hadoop.mapreduce." +
|
||||
"lib.output.SequenceFileOutputFormat ");
|
||||
sb.append("-outKey org.apache.hadoop.io.Text ");
|
||||
sb.append("-outValue org.apache.hadoop.io.Text ");
|
||||
sb.append("-indir ").append(indir).append(" ");
|
||||
sb.append("-outdir ").append(outdir).append(" ");
|
||||
sb.append("-r ").append(numReducers);
|
||||
|
||||
String[] args = sb.toString().split(" ");
|
||||
clearDir(outdir);
|
||||
try {
|
||||
Job job = GenericMRLoadJobCreator.createJob(
|
||||
args, mapoutputCompressed, outputCompressed);
|
||||
job.setJobName("GridmixWebdatascan." + size);
|
||||
ControlledJob cjob = new ControlledJob(job, null);
|
||||
gridmix.addJob(cjob);
|
||||
} catch (Exception ex) {
|
||||
System.out.println(ex.getStackTrace());
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
COMBINER("combiner") {
|
||||
public void addJob(int numReducers, boolean mapoutputCompressed,
|
||||
boolean outputCompressed, Size size, JobControl gridmix) {
|
||||
final String prop = String.format("combiner.%sJobs.inputFiles", size);
|
||||
final String indir = getInputDirsFor(prop, size.defaultPath(VARCOMPSEQ));
|
||||
final String outdir = addTSSuffix("perf-out/combiner-out-dir-" + size);
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("-r ").append(numReducers).append(" ");
|
||||
sb.append("-indir ").append(indir).append(" ");
|
||||
sb.append("-outdir ").append(outdir);
|
||||
sb.append("-mapoutputCompressed ");
|
||||
sb.append(mapoutputCompressed).append(" ");
|
||||
sb.append("-outputCompressed ").append(outputCompressed);
|
||||
|
||||
String[] args = sb.toString().split(" ");
|
||||
clearDir(outdir);
|
||||
try {
|
||||
Job job = CombinerJobCreator.createJob(args);
|
||||
job.setJobName("GridmixCombinerJob." + size);
|
||||
ControlledJob cjob = new ControlledJob(job, null);
|
||||
gridmix.addJob(cjob);
|
||||
} catch (Exception ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
MONSTERQUERY("monsterQuery") {
|
||||
public void addJob(int numReducers, boolean mapoutputCompressed,
|
||||
boolean outputCompressed, Size size, JobControl gridmix) {
|
||||
final String prop =
|
||||
String.format("monsterQuery.%sJobs.inputFiles", size);
|
||||
final String indir = getInputDirsFor(prop, size.defaultPath(FIXCOMPSEQ));
|
||||
final String outdir = addTSSuffix("perf-out/mq-out-dir-" + size);
|
||||
int iter = 3;
|
||||
try {
|
||||
ControlledJob pjob = null;
|
||||
ControlledJob cjob = null;
|
||||
for (int i = 0; i < iter; i++) {
|
||||
String outdirfull = outdir + "." + i;
|
||||
String indirfull = (0 == i) ? indir : outdir + "." + (i - 1);
|
||||
Path outfile = new Path(outdirfull);
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("-keepmap 10 ");
|
||||
sb.append("-keepred 40 ");
|
||||
sb.append("-inFormat");
|
||||
sb.append(" org.apache.hadoop.mapreduce." +
|
||||
"lib.input.SequenceFileInputFormat ");
|
||||
sb.append("-outFormat");
|
||||
sb.append(" org.apache.hadoop.mapreduce." +
|
||||
"lib.output.SequenceFileOutputFormat ");
|
||||
sb.append("-outKey org.apache.hadoop.io.Text ");
|
||||
sb.append("-outValue org.apache.hadoop.io.Text ");
|
||||
sb.append("-indir ").append(indirfull).append(" ");
|
||||
sb.append("-outdir ").append(outdirfull).append(" ");
|
||||
sb.append("-r ").append(numReducers);
|
||||
String[] args = sb.toString().split(" ");
|
||||
|
||||
try {
|
||||
fs.delete(outfile, true);
|
||||
} catch (IOException ex) {
|
||||
System.out.println(ex.toString());
|
||||
}
|
||||
|
||||
Job job = GenericMRLoadJobCreator.createJob(
|
||||
args, mapoutputCompressed, outputCompressed);
|
||||
job.setJobName("GridmixMonsterQuery." + size);
|
||||
cjob = new ControlledJob(job, null);
|
||||
if (pjob != null) {
|
||||
cjob.addDependingJob(pjob);
|
||||
}
|
||||
gridmix.addJob(cjob);
|
||||
pjob = cjob;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.out.println(e.getStackTrace());
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
WEBDATASORT("webdataSort") {
|
||||
public void addJob(int numReducers, boolean mapoutputCompressed,
|
||||
boolean outputCompressed, Size size, JobControl gridmix) {
|
||||
final String prop = String.format("webdataSort.%sJobs.inputFiles", size);
|
||||
final String indir = getInputDirsFor(prop, size.defaultPath(VARCOMPSEQ));
|
||||
final String outdir =
|
||||
addTSSuffix("perf-out/webdata-sort-out-dir-" + size);
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("-keepmap 100 ");
|
||||
sb.append("-keepred 100 ");
|
||||
sb.append("-inFormat org.apache.hadoop.mapreduce." +
|
||||
"lib.input.SequenceFileInputFormat ");
|
||||
sb.append("-outFormat org.apache.hadoop.mapreduce." +
|
||||
"lib.output.SequenceFileOutputFormat ");
|
||||
sb.append("-outKey org.apache.hadoop.io.Text ");
|
||||
sb.append("-outValue org.apache.hadoop.io.Text ");
|
||||
sb.append("-indir ").append(indir).append(" ");
|
||||
sb.append("-outdir ").append(outdir).append(" ");
|
||||
sb.append("-r ").append(numReducers);
|
||||
|
||||
String[] args = sb.toString().split(" ");
|
||||
clearDir(outdir);
|
||||
try {
|
||||
Job job = GenericMRLoadJobCreator.createJob(
|
||||
args, mapoutputCompressed, outputCompressed);
|
||||
job.setJobName("GridmixWebdataSort." + size);
|
||||
ControlledJob cjob = new ControlledJob(job, null);
|
||||
gridmix.addJob(cjob);
|
||||
} catch (Exception ex) {
|
||||
System.out.println(ex.getStackTrace());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private final String name;
|
||||
GridMixJob(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public abstract void addJob(int numReducers, boolean mapComp,
|
||||
boolean outComp, Size size, JobControl gridmix);
|
||||
}
|
||||
|
||||
public GridMixRunner() throws IOException {
|
||||
gridmix = new JobControl("GridMix");
|
||||
if (null == config || null == fs) {
|
||||
throw new IOException("Bad configuration. Cannot continue.");
|
||||
}
|
||||
}
|
||||
|
||||
private static FileSystem initFs() {
|
||||
try {
|
||||
return FileSystem.get(config);
|
||||
} catch (Exception e) {
|
||||
System.out.println("fs initation error: " + e.getMessage());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static Configuration initConfig() {
|
||||
Configuration conf = new Configuration();
|
||||
String configFile = System.getenv("GRIDMIXCONFIG");
|
||||
if (configFile == null) {
|
||||
String configDir = System.getProperty("user.dir");
|
||||
if (configDir == null) {
|
||||
configDir = ".";
|
||||
}
|
||||
configFile = configDir + "/" + GRIDMIXCONFIG;
|
||||
}
|
||||
try {
|
||||
Path fileResource = new Path(configFile);
|
||||
conf.addResource(fileResource);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Error reading config file " + configFile + ":" +
|
||||
e.getMessage());
|
||||
return null;
|
||||
}
|
||||
return conf;
|
||||
}
|
||||
|
||||
private static int[] getInts(Configuration conf, String name, int defaultV) {
|
||||
String[] vals = conf.getStrings(name, String.valueOf(defaultV));
|
||||
int[] results = new int[vals.length];
|
||||
for (int i = 0; i < vals.length; ++i) {
|
||||
results[i] = Integer.parseInt(vals[i]);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private static String getInputDirsFor(String jobType, String defaultIndir) {
|
||||
String inputFile[] = config.getStrings(jobType, defaultIndir);
|
||||
StringBuffer indirBuffer = new StringBuffer();
|
||||
for (int i = 0; i < inputFile.length; i++) {
|
||||
indirBuffer = indirBuffer.append(inputFile[i]).append(",");
|
||||
}
|
||||
return indirBuffer.substring(0, indirBuffer.length() - 1);
|
||||
}
|
||||
|
||||
private static void clearDir(String dir) {
|
||||
try {
|
||||
Path outfile = new Path(dir);
|
||||
fs.delete(outfile, true);
|
||||
} catch (IOException ex) {
|
||||
ex.printStackTrace();
|
||||
System.out.println("delete file error:");
|
||||
System.out.println(ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
private boolean select(int total, int selected, int index) {
|
||||
if (selected <= 0 || selected >= total) {
|
||||
return selected > 0;
|
||||
}
|
||||
int step = total / selected;
|
||||
int effectiveTotal = total - total % selected;
|
||||
return (index <= effectiveTotal - 1 && (index % step == 0));
|
||||
}
|
||||
|
||||
private static String addTSSuffix(String s) {
|
||||
Date date = Calendar.getInstance().getTime();
|
||||
String ts = String.valueOf(date.getTime());
|
||||
return s + "_" + ts;
|
||||
}
|
||||
|
||||
private void addJobs(GridMixJob job, Size size) throws IOException {
|
||||
final String prefix = String.format("%s.%sJobs", job.getName(), size);
|
||||
int[] numJobs = getInts(config, prefix + ".numOfJobs",
|
||||
size.defaultNumJobs());
|
||||
int[] numReduces = getInts(config, prefix + ".numOfReduces",
|
||||
size.defaultNumReducers());
|
||||
if (numJobs.length != numReduces.length) {
|
||||
throw new IOException("Configuration error: " +
|
||||
prefix + ".numOfJobs must match " +
|
||||
prefix + ".numOfReduces");
|
||||
}
|
||||
int numMapoutputCompressed = config.getInt(
|
||||
prefix + ".numOfMapoutputCompressed", 0);
|
||||
int numOutputCompressed = config.getInt(
|
||||
prefix + ".numOfOutputCompressed", size.defaultNumJobs());
|
||||
int totalJobs = 0;
|
||||
for (int nJob : numJobs) {
|
||||
totalJobs += nJob;
|
||||
}
|
||||
int currentIndex = 0;
|
||||
for (int i = 0; i < numJobs.length; ++i) {
|
||||
for (int j = 0; j < numJobs[i]; ++j) {
|
||||
boolean mapoutputComp =
|
||||
select(totalJobs, numMapoutputCompressed, currentIndex);
|
||||
boolean outputComp =
|
||||
select(totalJobs, numOutputCompressed, currentIndex);
|
||||
job.addJob(numReduces[i], mapoutputComp, outputComp, size, gridmix);
|
||||
++numOfJobs;
|
||||
++currentIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addAllJobs(GridMixJob job) throws IOException {
|
||||
for (Size size : EnumSet.allOf(Size.class)) {
|
||||
addJobs(job, size);
|
||||
}
|
||||
}
|
||||
|
||||
public void addjobs() throws IOException {
|
||||
for (GridMixJob jobtype : EnumSet.allOf(GridMixJob.class)) {
|
||||
addAllJobs(jobtype);
|
||||
}
|
||||
System.out.println("total " +
|
||||
gridmix.getWaitingJobList().size() + " jobs");
|
||||
}
|
||||
|
||||
class SimpleStats {
|
||||
long minValue;
|
||||
long maxValue;
|
||||
long averageValue;
|
||||
long mediumValue;
|
||||
int n;
|
||||
|
||||
SimpleStats(long[] data) {
|
||||
Arrays.sort(data);
|
||||
n = data.length;
|
||||
minValue = data[0];
|
||||
maxValue = data[n - 1];
|
||||
mediumValue = data[n / 2];
|
||||
long total = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
total += data[i];
|
||||
}
|
||||
averageValue = total / n;
|
||||
}
|
||||
}
|
||||
|
||||
class TaskExecutionStats {
|
||||
TreeMap<String, SimpleStats> theStats;
|
||||
|
||||
void computeStats(String name, long[] data) {
|
||||
SimpleStats v = new SimpleStats(data);
|
||||
theStats.put(name, v);
|
||||
}
|
||||
|
||||
TaskExecutionStats() {
|
||||
theStats = new TreeMap<String, SimpleStats>();
|
||||
}
|
||||
}
|
||||
|
||||
private TreeMap<String, String> getStatForJob(ControlledJob cjob) {
|
||||
TreeMap<String, String> retv = new TreeMap<String, String>();
|
||||
JobID mapreduceID = cjob.getMapredJobID();
|
||||
Job job = cjob.getJob();
|
||||
String jobName = job.getJobName();
|
||||
retv.put("JobId", mapreduceID.toString());
|
||||
retv.put("JobName", jobName);
|
||||
|
||||
TaskExecutionStats theTaskExecutionStats = new TaskExecutionStats();
|
||||
|
||||
try {
|
||||
Counters jobCounters = job.getCounters();
|
||||
Iterator<CounterGroup> groups = jobCounters.iterator();
|
||||
while (groups.hasNext()) {
|
||||
CounterGroup g = groups.next();
|
||||
String gn = g.getName();
|
||||
Iterator<Counter> cs = g.iterator();
|
||||
while (cs.hasNext()) {
|
||||
Counter c = cs.next();
|
||||
String n = c.getName();
|
||||
long v = c.getValue();
|
||||
retv.put(mapreduceID + "." + jobName + "." + gn + "." + n, "" + v);
|
||||
}
|
||||
}
|
||||
JobClient jc = new JobClient(job.getConfiguration());
|
||||
TaskReport[] maps = jc
|
||||
.getMapTaskReports((org.apache.hadoop.mapred.JobID)mapreduceID);
|
||||
TaskReport[] reduces = jc
|
||||
.getReduceTaskReports((org.apache.hadoop.mapred.JobID)mapreduceID);
|
||||
retv.put(mapreduceID + "." + jobName + "." + "numOfMapTasks", ""
|
||||
+ maps.length);
|
||||
retv.put(mapreduceID + "." + jobName + "." + "numOfReduceTasks", ""
|
||||
+ reduces.length);
|
||||
long[] mapExecutionTimes = new long[maps.length];
|
||||
long[] reduceExecutionTimes = new long[reduces.length];
|
||||
Date date = Calendar.getInstance().getTime();
|
||||
long startTime = date.getTime();
|
||||
long finishTime = 0;
|
||||
for (int j = 0; j < maps.length; j++) {
|
||||
TaskReport map = maps[j];
|
||||
long thisStartTime = map.getStartTime();
|
||||
long thisFinishTime = map.getFinishTime();
|
||||
if (thisStartTime > 0 && thisFinishTime > 0) {
|
||||
mapExecutionTimes[j] = thisFinishTime - thisStartTime;
|
||||
}
|
||||
if (startTime > thisStartTime) {
|
||||
startTime = thisStartTime;
|
||||
}
|
||||
if (finishTime < thisFinishTime) {
|
||||
finishTime = thisFinishTime;
|
||||
}
|
||||
}
|
||||
|
||||
theTaskExecutionStats.computeStats("mapExecutionTimeStats",
|
||||
mapExecutionTimes);
|
||||
|
||||
retv.put(mapreduceID + "." + jobName + "." + "mapStartTime", ""
|
||||
+ startTime);
|
||||
retv.put(mapreduceID + "." + jobName + "." + "mapEndTime", ""
|
||||
+ finishTime);
|
||||
for (int j = 0; j < reduces.length; j++) {
|
||||
TaskReport reduce = reduces[j];
|
||||
long thisStartTime = reduce.getStartTime();
|
||||
long thisFinishTime = reduce.getFinishTime();
|
||||
if (thisStartTime > 0 && thisFinishTime > 0) {
|
||||
reduceExecutionTimes[j] = thisFinishTime - thisStartTime;
|
||||
}
|
||||
if (startTime > thisStartTime) {
|
||||
startTime = thisStartTime;
|
||||
}
|
||||
if (finishTime < thisFinishTime) {
|
||||
finishTime = thisFinishTime;
|
||||
}
|
||||
}
|
||||
|
||||
theTaskExecutionStats.computeStats("reduceExecutionTimeStats",
|
||||
reduceExecutionTimes);
|
||||
|
||||
retv.put(mapreduceID + "." + jobName + "." + "reduceStartTime", ""
|
||||
+ startTime);
|
||||
retv.put(mapreduceID + "." + jobName + "." + "reduceEndTime", ""
|
||||
+ finishTime);
|
||||
if (cjob.getJobState() == ControlledJob.State.SUCCESS) {
|
||||
retv.put(mapreduceID + "." + "jobStatus", "successful");
|
||||
} else if (cjob.getJobState() == ControlledJob.State.FAILED) {
|
||||
retv.put(mapreduceID + "." + jobName + "." + "jobStatus", "failed");
|
||||
} else {
|
||||
retv.put(mapreduceID + "." + jobName + "." + "jobStatus", "unknown");
|
||||
}
|
||||
Iterator<Entry<String, SimpleStats>> entries =
|
||||
theTaskExecutionStats.theStats.entrySet().iterator();
|
||||
while (entries.hasNext()) {
|
||||
Entry<String, SimpleStats> e = entries.next();
|
||||
SimpleStats v = e.getValue();
|
||||
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "min",
|
||||
"" + v.minValue);
|
||||
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "max",
|
||||
"" + v.maxValue);
|
||||
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "."
|
||||
+ "medium", "" + v.mediumValue);
|
||||
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "avg",
|
||||
"" + v.averageValue);
|
||||
retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "."
|
||||
+ "numOfItems", "" + v.n);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return retv;
|
||||
}
|
||||
|
||||
private void printJobStat(TreeMap<String, String> stat) {
|
||||
Iterator<Entry<String, String>> entries = stat.entrySet().iterator();
|
||||
while (entries.hasNext()) {
|
||||
Entry<String, String> e = entries.next();
|
||||
System.out.println(e.getKey() + "\t" + e.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
private void printStatsForJobs(List<ControlledJob> jobs) {
|
||||
for (int i = 0; i < jobs.size(); i++) {
|
||||
printJobStat(getStatForJob(jobs.get(i)));
|
||||
}
|
||||
}
|
||||
|
||||
public void run() {
|
||||
|
||||
Thread theGridmixRunner = new Thread(gridmix);
|
||||
theGridmixRunner.start();
|
||||
long startTime = System.currentTimeMillis();
|
||||
while (!gridmix.allFinished()) {
|
||||
System.out.println("Jobs in waiting state: "
|
||||
+ gridmix.getWaitingJobList().size());
|
||||
System.out.println("Jobs in ready state: "
|
||||
+ gridmix.getReadyJobsList().size());
|
||||
System.out.println("Jobs in running state: "
|
||||
+ gridmix.getRunningJobList().size());
|
||||
System.out.println("Jobs in success state: "
|
||||
+ gridmix.getSuccessfulJobList().size());
|
||||
System.out.println("Jobs in failed state: "
|
||||
+ gridmix.getFailedJobList().size());
|
||||
System.out.println("\n");
|
||||
|
||||
try {
|
||||
Thread.sleep(10 * 1000);
|
||||
} catch (Exception e) {
|
||||
|
||||
}
|
||||
}
|
||||
long endTime = System.currentTimeMillis();
|
||||
List<ControlledJob> fail = gridmix.getFailedJobList();
|
||||
List<ControlledJob> succeed = gridmix.getSuccessfulJobList();
|
||||
int numOfSuccessfulJob = succeed.size();
|
||||
if (numOfSuccessfulJob > 0) {
|
||||
System.out.println(numOfSuccessfulJob + " jobs succeeded");
|
||||
printStatsForJobs(succeed);
|
||||
|
||||
}
|
||||
int numOfFailedjob = fail.size();
|
||||
if (numOfFailedjob > 0) {
|
||||
System.out.println("------------------------------- ");
|
||||
System.out.println(numOfFailedjob + " jobs failed");
|
||||
printStatsForJobs(fail);
|
||||
}
|
||||
System.out.println("GridMix results:");
|
||||
System.out.println("Total num of Jobs: " + numOfJobs);
|
||||
System.out.println("ExecutionTime: " + ((endTime-startTime) / 1000));
|
||||
gridmix.stop();
|
||||
}
|
||||
|
||||
public static void main(String argv[]) throws Exception {
|
||||
GridMixRunner gridmixRunner = new GridMixRunner();
|
||||
gridmixRunner.addjobs();
|
||||
gridmixRunner.run();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
COPTS=-g3 -O0 -Wall
|
||||
|
||||
all: ${LIBRECORDIO_BUILD_DIR}/librecordio.a
|
||||
|
||||
COBJS = $(addprefix ${LIBRECORDIO_BUILD_DIR}/, recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o \
|
||||
exception.o typeIDs.o fieldTypeInfo.o recordTypeInfo.o utils.o)
|
||||
|
||||
CCMD = $(addprefix ${LIBRECORDIO_BUILD_DIR}/, librecordio.a recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o \
|
||||
exception.o typeIDs.o fieldTypeInfo.o recordTypeInfo.o utils.o)
|
||||
|
||||
${LIBRECORDIO_BUILD_DIR}/librecordio.a: ${COBJS}
|
||||
ar cru ${CCMD}
|
||||
|
||||
${LIBRECORDIO_BUILD_DIR}/recordio.o: recordio.cc recordio.hh archive.hh
|
||||
g++ ${COPTS} -c -I${XERCESCROOT}/include -o ${LIBRECORDIO_BUILD_DIR}/recordio.o recordio.cc
|
||||
|
||||
${LIBRECORDIO_BUILD_DIR}/filestream.o: filestream.cc recordio.hh filestream.hh
|
||||
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/filestream.o filestream.cc
|
||||
|
||||
${LIBRECORDIO_BUILD_DIR}/binarchive.o: binarchive.cc recordio.hh binarchive.hh archive.hh
|
||||
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/binarchive.o binarchive.cc
|
||||
|
||||
${LIBRECORDIO_BUILD_DIR}/csvarchive.o: csvarchive.cc recordio.hh csvarchive.hh archive.hh
|
||||
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/csvarchive.o csvarchive.cc
|
||||
|
||||
${LIBRECORDIO_BUILD_DIR}/xmlarchive.o: xmlarchive.cc recordio.hh xmlarchive.hh archive.hh
|
||||
g++ ${COPTS} -c -I${XERCESCROOT}/include -o ${LIBRECORDIO_BUILD_DIR}/xmlarchive.o xmlarchive.cc
|
||||
|
||||
${LIBRECORDIO_BUILD_DIR}/exception.o: exception.cc exception.hh
|
||||
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/exception.o exception.cc
|
||||
|
||||
${LIBRECORDIO_BUILD_DIR}/typeIDs.o: typeIDs.cc typeIDs.hh
|
||||
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/typeIDs.o typeIDs.cc
|
||||
${LIBRECORDIO_BUILD_DIR}/fieldTypeInfo.o: fieldTypeInfo.cc fieldTypeInfo.hh
|
||||
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/fieldTypeInfo.o fieldTypeInfo.cc
|
||||
${LIBRECORDIO_BUILD_DIR}/recordTypeInfo.o: recordTypeInfo.cc recordTypeInfo.hh
|
||||
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/recordTypeInfo.o recordTypeInfo.cc
|
||||
${LIBRECORDIO_BUILD_DIR}/utils.o: utils.cc utils.hh
|
||||
g++ ${COPTS} -c -o ${LIBRECORDIO_BUILD_DIR}/utils.o utils.cc
|
||||
recordio.cc: recordio.hh archive.hh exception.hh
|
||||
filestream.cc: recordio.hh filestream.hh
|
||||
binarchive.cc: recordio.hh binarchive.hh
|
||||
csvarchive.cc: recordio.hh csvarchive.hh
|
||||
xmlarchive.cc: recordio.hh xmlarchive.hh
|
||||
exception.cc: exception.hh
|
||||
typeIDs.cc: typeIDs.hh
|
||||
fieldTypeInfo.cc: fieldTypeInfo.hh
|
||||
recordTypeInfo.cc: recordTypeInfo.hh
|
||||
utils.cc: utils.hh
|
||||
|
||||
test: librecordio.a
|
||||
make -C test all
|
||||
|
||||
clean:
|
||||
rm -f ${LIBRECORDIO_BUILD_DIR}/*~ ${LIBRECORDIO_BUILD_DIR}/*.o ${LIBRECORDIO_BUILD_DIR}/*.a
|
||||
make -C test clean
|
|
@ -1,122 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef ARCHIVE_HH_
|
||||
#define ARCHIVE_HH_
|
||||
#include "recordio.hh"
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class Index {
|
||||
public:
|
||||
virtual bool done() = 0;
|
||||
virtual void incr() = 0;
|
||||
virtual ~Index() {}
|
||||
};
|
||||
|
||||
class IArchive {
|
||||
public:
|
||||
virtual void deserialize(int8_t& t, const char* tag) = 0;
|
||||
virtual void deserialize(bool& t, const char* tag) = 0;
|
||||
virtual void deserialize(int32_t& t, const char* tag) = 0;
|
||||
virtual void deserialize(int64_t& t, const char* tag) = 0;
|
||||
virtual void deserialize(float& t, const char* tag) = 0;
|
||||
virtual void deserialize(double& t, const char* tag) = 0;
|
||||
virtual void deserialize(std::string& t, const char* tag) = 0;
|
||||
virtual void deserialize(std::string& t, size_t& len, const char* tag) = 0;
|
||||
virtual void startRecord(hadoop::Record& s, const char* tag) = 0;
|
||||
virtual void endRecord(hadoop::Record& s, const char* tag) = 0;
|
||||
virtual Index* startVector(const char* tag) = 0;
|
||||
virtual void endVector(Index* idx, const char* tag) = 0;
|
||||
virtual Index* startMap(const char* tag) = 0;
|
||||
virtual void endMap(Index* idx, const char* tag) = 0;
|
||||
virtual void deserialize(hadoop::Record& s, const char* tag) {
|
||||
s.deserialize(*this, tag);
|
||||
}
|
||||
template <typename T>
|
||||
void deserialize(std::vector<T>& v, const char* tag) {
|
||||
Index* idx = startVector(tag);
|
||||
while (!idx->done()) {
|
||||
T t;
|
||||
deserialize(t, tag);
|
||||
v.push_back(t);
|
||||
idx->incr();
|
||||
}
|
||||
endVector(idx, tag);
|
||||
}
|
||||
template <typename K, typename V>
|
||||
void deserialize(std::map<K,V>& v, const char* tag) {
|
||||
Index* idx = startMap(tag);
|
||||
while (!idx->done()) {
|
||||
K key;
|
||||
deserialize(key, tag);
|
||||
V value;
|
||||
deserialize(value, tag);
|
||||
v[key] = value;
|
||||
idx->incr();
|
||||
}
|
||||
endMap(idx, tag);
|
||||
}
|
||||
virtual ~IArchive() {}
|
||||
};
|
||||
|
||||
class OArchive {
|
||||
public:
|
||||
virtual void serialize(int8_t t, const char* tag) = 0;
|
||||
virtual void serialize(bool t, const char* tag) = 0;
|
||||
virtual void serialize(int32_t t, const char* tag) = 0;
|
||||
virtual void serialize(int64_t t, const char* tag) = 0;
|
||||
virtual void serialize(float t, const char* tag) = 0;
|
||||
virtual void serialize(double t, const char* tag) = 0;
|
||||
virtual void serialize(const std::string& t, const char* tag) = 0;
|
||||
virtual void serialize(const std::string& t, size_t len, const char* tag) = 0;
|
||||
virtual void startRecord(const hadoop::Record& s, const char* tag) = 0;
|
||||
virtual void endRecord(const hadoop::Record& s, const char* tag) = 0;
|
||||
virtual void startVector(size_t len, const char* tag) = 0;
|
||||
virtual void endVector(size_t len, const char* tag) = 0;
|
||||
virtual void startMap(size_t len, const char* tag) = 0;
|
||||
virtual void endMap(size_t len, const char* tag) = 0;
|
||||
virtual void serialize(const hadoop::Record& s, const char* tag) {
|
||||
s.serialize(*this, tag);
|
||||
}
|
||||
template <typename T>
|
||||
void serialize(const std::vector<T>& v, const char* tag) {
|
||||
startVector(v.size(), tag);
|
||||
if (v.size()>0) {
|
||||
for (size_t cur = 0; cur<v.size(); cur++) {
|
||||
serialize(v[cur], tag);
|
||||
}
|
||||
}
|
||||
endVector(v.size(), tag);
|
||||
}
|
||||
template <typename K, typename V>
|
||||
void serialize(const std::map<K,V>& v, const char* tag) {
|
||||
startMap(v.size(), tag);
|
||||
if (v.size()>0) {
|
||||
typedef typename std::map<K,V>::const_iterator CI;
|
||||
for (CI cur = v.begin(); cur!=v.end(); cur++) {
|
||||
serialize(cur->first, tag);
|
||||
serialize(cur->second, tag);
|
||||
}
|
||||
}
|
||||
endMap(v.size(), tag);
|
||||
}
|
||||
virtual ~OArchive() {}
|
||||
};
|
||||
}; // end namespace hadoop
|
||||
#endif /*ARCHIVE_HH_*/
|
|
@ -1,330 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "binarchive.hh"
|
||||
#include <rpc/types.h>
|
||||
#include <rpc/xdr.h>
|
||||
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
template <typename T>
|
||||
static void serialize(T t, OutStream& stream)
|
||||
{
|
||||
if (sizeof(T) != stream.write((const void *) &t, sizeof(T))) {
|
||||
throw new IOException("Error serializing data.");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void deserialize(T& t, InStream& stream)
|
||||
{
|
||||
if (sizeof(T) != stream.read((void *) &t, sizeof(T))) {
|
||||
throw new IOException("Error deserializing data.");
|
||||
}
|
||||
}
|
||||
|
||||
static void serializeLong(int64_t t, OutStream& stream)
|
||||
{
|
||||
if (t >= -112 && t <= 127) {
|
||||
int8_t b = t;
|
||||
stream.write(&b, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
int8_t len = -112;
|
||||
if (t < 0) {
|
||||
t ^= 0xFFFFFFFFFFFFFFFFLL; // take one's complement
|
||||
len = -120;
|
||||
}
|
||||
|
||||
uint64_t tmp = t;
|
||||
while (tmp != 0) {
|
||||
tmp = tmp >> 8;
|
||||
len--;
|
||||
}
|
||||
|
||||
stream.write(&len, 1);
|
||||
|
||||
len = (len < -120) ? -(len + 120) : -(len + 112);
|
||||
|
||||
for (uint32_t idx = len; idx != 0; idx--) {
|
||||
uint32_t shiftbits = (idx - 1) * 8;
|
||||
uint64_t mask = 0xFFLL << shiftbits;
|
||||
uint8_t b = (t & mask) >> shiftbits;
|
||||
stream.write(&b, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void deserializeLong(int64_t& t, InStream& stream)
|
||||
{
|
||||
int8_t b;
|
||||
if (1 != stream.read(&b, 1)) {
|
||||
throw new IOException("Error deserializing long.");
|
||||
}
|
||||
if (b >= -112) {
|
||||
t = b;
|
||||
return;
|
||||
}
|
||||
bool isNegative = (b < -120);
|
||||
b = isNegative ? -(b + 120) : -(b + 112);
|
||||
uint8_t barr[b];
|
||||
if (b != stream.read(barr, b)) {
|
||||
throw new IOException("Error deserializing long.");
|
||||
}
|
||||
t = 0;
|
||||
for (int idx = 0; idx < b; idx++) {
|
||||
t = t << 8;
|
||||
t |= (barr[idx] & 0xFF);
|
||||
}
|
||||
if (isNegative) {
|
||||
t ^= 0xFFFFFFFFFFFFFFFFLL;
|
||||
}
|
||||
}
|
||||
|
||||
static void serializeInt(int32_t t, OutStream& stream)
|
||||
{
|
||||
int64_t longVal = t;
|
||||
::serializeLong(longVal, stream);
|
||||
}
|
||||
|
||||
static void deserializeInt(int32_t& t, InStream& stream)
|
||||
{
|
||||
int64_t longVal;
|
||||
::deserializeLong(longVal, stream);
|
||||
t = longVal;
|
||||
}
|
||||
|
||||
static void serializeFloat(float t, OutStream& stream)
|
||||
{
|
||||
char buf[sizeof(float)];
|
||||
XDR xdrs;
|
||||
xdrmem_create(&xdrs, buf, sizeof(float), XDR_ENCODE);
|
||||
xdr_float(&xdrs, &t);
|
||||
stream.write(buf, sizeof(float));
|
||||
}
|
||||
|
||||
static void deserializeFloat(float& t, InStream& stream)
|
||||
{
|
||||
char buf[sizeof(float)];
|
||||
if (sizeof(float) != stream.read(buf, sizeof(float))) {
|
||||
throw new IOException("Error deserializing float.");
|
||||
}
|
||||
XDR xdrs;
|
||||
xdrmem_create(&xdrs, buf, sizeof(float), XDR_DECODE);
|
||||
xdr_float(&xdrs, &t);
|
||||
}
|
||||
|
||||
static void serializeDouble(double t, OutStream& stream)
|
||||
{
|
||||
char buf[sizeof(double)];
|
||||
XDR xdrs;
|
||||
xdrmem_create(&xdrs, buf, sizeof(double), XDR_ENCODE);
|
||||
xdr_double(&xdrs, &t);
|
||||
stream.write(buf, sizeof(double));
|
||||
}
|
||||
|
||||
static void deserializeDouble(double& t, InStream& stream)
|
||||
{
|
||||
char buf[sizeof(double)];
|
||||
stream.read(buf, sizeof(double));
|
||||
XDR xdrs;
|
||||
xdrmem_create(&xdrs, buf, sizeof(double), XDR_DECODE);
|
||||
xdr_double(&xdrs, &t);
|
||||
}
|
||||
|
||||
static void serializeString(const std::string& t, OutStream& stream)
|
||||
{
|
||||
::serializeInt(t.length(), stream);
|
||||
if (t.length() > 0) {
|
||||
stream.write(t.data(), t.length());
|
||||
}
|
||||
}
|
||||
|
||||
static void deserializeString(std::string& t, InStream& stream)
|
||||
{
|
||||
int32_t len = 0;
|
||||
::deserializeInt(len, stream);
|
||||
if (len > 0) {
|
||||
// resize the string to the right length
|
||||
t.resize(len);
|
||||
// read into the string in 64k chunks
|
||||
const int bufSize = 65536;
|
||||
int offset = 0;
|
||||
char buf[bufSize];
|
||||
while (len > 0) {
|
||||
int chunkLength = len > bufSize ? bufSize : len;
|
||||
stream.read((void *)buf, chunkLength);
|
||||
t.replace(offset, chunkLength, buf, chunkLength);
|
||||
offset += chunkLength;
|
||||
len -= chunkLength;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::deserialize(int8_t& t, const char* tag)
|
||||
{
|
||||
::deserialize(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::deserialize(bool& t, const char* tag)
|
||||
{
|
||||
::deserialize(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::deserialize(int32_t& t, const char* tag)
|
||||
{
|
||||
int64_t longVal = 0LL;
|
||||
::deserializeLong(longVal, stream);
|
||||
t = longVal;
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::deserialize(int64_t& t, const char* tag)
|
||||
{
|
||||
::deserializeLong(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::deserialize(float& t, const char* tag)
|
||||
{
|
||||
::deserializeFloat(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::deserialize(double& t, const char* tag)
|
||||
{
|
||||
::deserializeDouble(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::deserialize(std::string& t, const char* tag)
|
||||
{
|
||||
::deserializeString(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::deserialize(std::string& t, size_t& len, const char* tag)
|
||||
{
|
||||
::deserializeString(t, stream);
|
||||
len = t.length();
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::startRecord(Record& s, const char* tag)
|
||||
{
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::endRecord(Record& s, const char* tag)
|
||||
{
|
||||
}
|
||||
|
||||
Index* hadoop::IBinArchive::startVector(const char* tag)
|
||||
{
|
||||
int32_t len;
|
||||
::deserializeInt(len, stream);
|
||||
BinIndex *idx = new BinIndex((size_t) len);
|
||||
return idx;
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::endVector(Index* idx, const char* tag)
|
||||
{
|
||||
delete idx;
|
||||
}
|
||||
|
||||
Index* hadoop::IBinArchive::startMap(const char* tag)
|
||||
{
|
||||
int32_t len;
|
||||
::deserializeInt(len, stream);
|
||||
BinIndex *idx = new BinIndex((size_t) len);
|
||||
return idx;
|
||||
}
|
||||
|
||||
void hadoop::IBinArchive::endMap(Index* idx, const char* tag)
|
||||
{
|
||||
delete idx;
|
||||
}
|
||||
|
||||
hadoop::IBinArchive::~IBinArchive()
|
||||
{
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::serialize(int8_t t, const char* tag)
|
||||
{
|
||||
::serialize(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::serialize(bool t, const char* tag)
|
||||
{
|
||||
::serialize(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::serialize(int32_t t, const char* tag)
|
||||
{
|
||||
int64_t longVal = t;
|
||||
::serializeLong(longVal, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::serialize(int64_t t, const char* tag)
|
||||
{
|
||||
::serializeLong(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::serialize(float t, const char* tag)
|
||||
{
|
||||
::serializeFloat(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::serialize(double t, const char* tag)
|
||||
{
|
||||
::serializeDouble(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::serialize(const std::string& t, const char* tag)
|
||||
{
|
||||
::serializeString(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::serialize(const std::string& t, size_t len, const char* tag)
|
||||
{
|
||||
::serializeString(t, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::startRecord(const Record& s, const char* tag)
|
||||
{
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::endRecord(const Record& s, const char* tag)
|
||||
{
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::startVector(size_t len, const char* tag)
|
||||
{
|
||||
::serializeInt(len, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::endVector(size_t len, const char* tag)
|
||||
{
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::startMap(size_t len, const char* tag)
|
||||
{
|
||||
::serializeInt(len, stream);
|
||||
}
|
||||
|
||||
void hadoop::OBinArchive::endMap(size_t len, const char* tag)
|
||||
{
|
||||
}
|
||||
|
||||
hadoop::OBinArchive::~OBinArchive()
|
||||
{
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef BINARCHIVE_HH_
|
||||
#define BINARCHIVE_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class BinIndex : public Index {
|
||||
private:
|
||||
size_t size;
|
||||
public:
|
||||
BinIndex(size_t size_) { size = size_; }
|
||||
bool done() { return (size==0); }
|
||||
void incr() { size--; }
|
||||
~BinIndex() {}
|
||||
};
|
||||
|
||||
class IBinArchive : public IArchive {
|
||||
private:
|
||||
InStream& stream;
|
||||
public:
|
||||
IBinArchive(InStream& _stream) : stream(_stream) {}
|
||||
virtual void deserialize(int8_t& t, const char* tag);
|
||||
virtual void deserialize(bool& t, const char* tag);
|
||||
virtual void deserialize(int32_t& t, const char* tag);
|
||||
virtual void deserialize(int64_t& t, const char* tag);
|
||||
virtual void deserialize(float& t, const char* tag);
|
||||
virtual void deserialize(double& t, const char* tag);
|
||||
virtual void deserialize(std::string& t, const char* tag);
|
||||
virtual void deserialize(std::string& t, size_t& len, const char* tag);
|
||||
virtual void startRecord(Record& s, const char* tag);
|
||||
virtual void endRecord(Record& s, const char* tag);
|
||||
virtual Index* startVector(const char* tag);
|
||||
virtual void endVector(Index* idx, const char* tag);
|
||||
virtual Index* startMap(const char* tag);
|
||||
virtual void endMap(Index* idx, const char* tag);
|
||||
virtual ~IBinArchive();
|
||||
};
|
||||
|
||||
class OBinArchive : public OArchive {
|
||||
private:
|
||||
OutStream& stream;
|
||||
public:
|
||||
OBinArchive(OutStream& _stream) : stream(_stream) {}
|
||||
virtual void serialize(int8_t t, const char* tag);
|
||||
virtual void serialize(bool t, const char* tag);
|
||||
virtual void serialize(int32_t t, const char* tag);
|
||||
virtual void serialize(int64_t t, const char* tag);
|
||||
virtual void serialize(float t, const char* tag);
|
||||
virtual void serialize(double t, const char* tag);
|
||||
virtual void serialize(const std::string& t, const char* tag);
|
||||
virtual void serialize(const std::string& t, size_t len, const char* tag);
|
||||
virtual void startRecord(const Record& s, const char* tag);
|
||||
virtual void endRecord(const Record& s, const char* tag);
|
||||
virtual void startVector(size_t len, const char* tag);
|
||||
virtual void endVector(size_t len, const char* tag);
|
||||
virtual void startMap(size_t len, const char* tag);
|
||||
virtual void endMap(size_t len, const char* tag);
|
||||
virtual ~OBinArchive();
|
||||
};
|
||||
|
||||
}
|
||||
#endif /*BINARCHIVE_HH_*/
|
|
@ -1,368 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "csvarchive.hh"
|
||||
#include <stdlib.h>
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
static std::string readUptoTerminator(PushBackInStream& stream)
|
||||
{
|
||||
std::string s;
|
||||
while (1) {
|
||||
char c;
|
||||
if (1 != stream.read(&c, 1)) {
|
||||
throw new IOException("Error in deserialization.");
|
||||
}
|
||||
if (c == ',' || c == '\n' || c == '}') {
|
||||
if (c != ',') {
|
||||
stream.pushBack(c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
s.push_back(c);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::deserialize(int8_t& t, const char* tag)
|
||||
{
|
||||
std::string s = readUptoTerminator(stream);
|
||||
t = (int8_t) strtol(s.c_str(), NULL, 10);
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::deserialize(bool& t, const char* tag)
|
||||
{
|
||||
std::string s = readUptoTerminator(stream);
|
||||
t = (s == "T") ? true : false;
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::deserialize(int32_t& t, const char* tag)
|
||||
{
|
||||
std::string s = readUptoTerminator(stream);
|
||||
t = strtol(s.c_str(), NULL, 10);
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::deserialize(int64_t& t, const char* tag)
|
||||
{
|
||||
std::string s = readUptoTerminator(stream);
|
||||
t = strtoll(s.c_str(), NULL, 10);
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::deserialize(float& t, const char* tag)
|
||||
{
|
||||
std::string s = readUptoTerminator(stream);
|
||||
t = strtof(s.c_str(), NULL);
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::deserialize(double& t, const char* tag)
|
||||
{
|
||||
std::string s = readUptoTerminator(stream);
|
||||
t = strtod(s.c_str(), NULL);
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::deserialize(std::string& t, const char* tag)
|
||||
{
|
||||
std::string temp = readUptoTerminator(stream);
|
||||
if (temp[0] != '\'') {
|
||||
throw new IOException("Errror deserializing string.");
|
||||
}
|
||||
t.clear();
|
||||
// skip first character, replace escaped characters
|
||||
int len = temp.length();
|
||||
for (int i = 1; i < len; i++) {
|
||||
char c = temp.at(i);
|
||||
if (c == '%') {
|
||||
// since we escape '%', there have to be at least two chars following a '%'
|
||||
char ch1 = temp.at(i+1);
|
||||
char ch2 = temp.at(i+2);
|
||||
i += 2;
|
||||
if (ch1 == '0' && ch2 == '0') {
|
||||
t.append(1, '\0');
|
||||
} else if (ch1 == '0' && ch2 == 'A') {
|
||||
t.append(1, '\n');
|
||||
} else if (ch1 == '0' && ch2 == 'D') {
|
||||
t.append(1, '\r');
|
||||
} else if (ch1 == '2' && ch2 == 'C') {
|
||||
t.append(1, ',');
|
||||
} else if (ch1 == '7' && ch2 == 'D') {
|
||||
t.append(1, '}');
|
||||
} else if (ch1 == '2' && ch2 == '5') {
|
||||
t.append(1, '%');
|
||||
} else {
|
||||
throw new IOException("Error deserializing string.");
|
||||
}
|
||||
}
|
||||
else {
|
||||
t.append(1, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::deserialize(std::string& t, size_t& len, const char* tag)
|
||||
{
|
||||
std::string s = readUptoTerminator(stream);
|
||||
if (s[0] != '#') {
|
||||
throw new IOException("Errror deserializing buffer.");
|
||||
}
|
||||
s.erase(0, 1); /// erase first character
|
||||
len = s.length();
|
||||
if (len%2 == 1) { // len is guaranteed to be even
|
||||
throw new IOException("Errror deserializing buffer.");
|
||||
}
|
||||
len = len >> 1;
|
||||
for (size_t idx = 0; idx < len; idx++) {
|
||||
char buf[3];
|
||||
buf[0] = s[2*idx];
|
||||
buf[1] = s[2*idx+1];
|
||||
buf[2] = '\0';
|
||||
int i;
|
||||
if (1 != sscanf(buf, "%2x", &i)) {
|
||||
throw new IOException("Errror deserializing buffer.");
|
||||
}
|
||||
t.push_back((char) i);
|
||||
}
|
||||
len = t.length();
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::startRecord(Record& s, const char* tag)
|
||||
{
|
||||
if (tag != NULL) {
|
||||
char mark[2];
|
||||
if (2 != stream.read(mark, 2)) {
|
||||
throw new IOException("Error deserializing record.");
|
||||
}
|
||||
if (mark[0] != 's' || mark[1] != '{') {
|
||||
throw new IOException("Error deserializing record.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::endRecord(Record& s, const char* tag)
|
||||
{
|
||||
char mark;
|
||||
if (1 != stream.read(&mark, 1)) {
|
||||
throw new IOException("Error deserializing record.");
|
||||
}
|
||||
if (tag == NULL) {
|
||||
if (mark != '\n') {
|
||||
throw new IOException("Error deserializing record.");
|
||||
}
|
||||
} else if (mark != '}') {
|
||||
throw new IOException("Error deserializing record.");
|
||||
} else {
|
||||
readUptoTerminator(stream);
|
||||
}
|
||||
}
|
||||
|
||||
Index* hadoop::ICsvArchive::startVector(const char* tag)
|
||||
{
|
||||
char mark[2];
|
||||
if (2 != stream.read(mark, 2)) {
|
||||
throw new IOException("Error deserializing vector.");
|
||||
}
|
||||
if (mark[0] != 'v' || mark[1] != '{') {
|
||||
throw new IOException("Error deserializing vector.");
|
||||
}
|
||||
return new CsvIndex(stream);
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::endVector(Index* idx, const char* tag)
|
||||
{
|
||||
delete idx;
|
||||
char mark;
|
||||
if (1 != stream.read(&mark, 1)) {
|
||||
throw new IOException("Error deserializing vector.");
|
||||
}
|
||||
if (mark != '}') {
|
||||
throw new IOException("Error deserializing vector.");
|
||||
}
|
||||
readUptoTerminator(stream);
|
||||
}
|
||||
|
||||
Index* hadoop::ICsvArchive::startMap(const char* tag)
|
||||
{
|
||||
char mark[2];
|
||||
if (2 != stream.read(mark, 2)) {
|
||||
throw new IOException("Error deserializing map.");
|
||||
}
|
||||
if (mark[0] != 'm' || mark[1] != '{') {
|
||||
throw new IOException("Error deserializing map.");
|
||||
}
|
||||
|
||||
return new CsvIndex(stream);
|
||||
}
|
||||
|
||||
void hadoop::ICsvArchive::endMap(Index* idx, const char* tag)
|
||||
{
|
||||
delete idx;
|
||||
char mark;
|
||||
if (1 != stream.read(&mark, 1)) {
|
||||
throw new IOException("Error deserializing map.");
|
||||
}
|
||||
if (mark != '}') {
|
||||
throw new IOException("Error deserializing map.");
|
||||
}
|
||||
readUptoTerminator(stream);
|
||||
}
|
||||
|
||||
hadoop::ICsvArchive::~ICsvArchive()
|
||||
{
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::serialize(int8_t t, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
char sval[5];
|
||||
sprintf(sval, "%d", t);
|
||||
stream.write(sval, strlen(sval));
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::serialize(bool t, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
const char *sval = t ? "T" : "F";
|
||||
stream.write(sval,1);
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::serialize(int32_t t, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
char sval[128];
|
||||
sprintf(sval, "%d", t);
|
||||
stream.write(sval, strlen(sval));
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::serialize(int64_t t, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
char sval[128];
|
||||
sprintf(sval, "%lld", t);
|
||||
stream.write(sval, strlen(sval));
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::serialize(float t, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
char sval[128];
|
||||
sprintf(sval, "%f", t);
|
||||
stream.write(sval, strlen(sval));
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::serialize(double t, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
char sval[128];
|
||||
sprintf(sval, "%lf", t);
|
||||
stream.write(sval, strlen(sval));
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::serialize(const std::string& t, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
stream.write("'",1);
|
||||
int len = t.length();
|
||||
for (int idx = 0; idx < len; idx++) {
|
||||
char c = t[idx];
|
||||
switch(c) {
|
||||
case '\0':
|
||||
stream.write("%00",3);
|
||||
break;
|
||||
case 0x0A:
|
||||
stream.write("%0A",3);
|
||||
break;
|
||||
case 0x0D:
|
||||
stream.write("%0D",3);
|
||||
break;
|
||||
case 0x25:
|
||||
stream.write("%25",3);
|
||||
break;
|
||||
case 0x2C:
|
||||
stream.write("%2C",3);
|
||||
break;
|
||||
case 0x7D:
|
||||
stream.write("%7D",3);
|
||||
break;
|
||||
default:
|
||||
stream.write(&c,1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::serialize(const std::string& t, size_t len, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
stream.write("#",1);
|
||||
for(size_t idx = 0; idx < len; idx++) {
|
||||
uint8_t b = t[idx];
|
||||
char sval[3];
|
||||
sprintf(sval,"%2x",b);
|
||||
stream.write(sval, 2);
|
||||
}
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::startRecord(const Record& s, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
if (tag != NULL && strlen(tag) != 0) {
|
||||
stream.write("s{",2);
|
||||
}
|
||||
isFirst = true;
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::endRecord(const Record& s, const char* tag)
|
||||
{
|
||||
if (tag == NULL || strlen(tag) == 0) {
|
||||
stream.write("\n",1);
|
||||
isFirst = true;
|
||||
} else {
|
||||
stream.write("}",1);
|
||||
isFirst = false;
|
||||
}
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::startVector(size_t len, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
stream.write("v{",2);
|
||||
isFirst = true;
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::endVector(size_t len, const char* tag)
|
||||
{
|
||||
stream.write("}",1);
|
||||
isFirst = false;
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::startMap(size_t len, const char* tag)
|
||||
{
|
||||
printCommaUnlessFirst();
|
||||
stream.write("m{",2);
|
||||
isFirst = true;
|
||||
}
|
||||
|
||||
void hadoop::OCsvArchive::endMap(size_t len, const char* tag)
|
||||
{
|
||||
stream.write("}",1);
|
||||
isFirst = false;
|
||||
}
|
||||
|
||||
hadoop::OCsvArchive::~OCsvArchive()
|
||||
{
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef CSVARCHIVE_HH_
|
||||
#define CSVARCHIVE_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class PushBackInStream {
|
||||
private:
|
||||
InStream* stream;
|
||||
bool isAvail;
|
||||
char pbchar;
|
||||
public:
|
||||
void setStream(InStream* stream_) {
|
||||
stream = stream_;
|
||||
isAvail = false;
|
||||
pbchar = 0;
|
||||
}
|
||||
ssize_t read(void* buf, size_t len) {
|
||||
if (len > 0 && isAvail) {
|
||||
char* p = (char*) buf;
|
||||
*p = pbchar;
|
||||
isAvail = false;
|
||||
if (len > 1) {
|
||||
ssize_t ret = stream->read((char*)buf + 1, len - 1);
|
||||
return ret + 1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
return stream->read(buf, len);
|
||||
}
|
||||
}
|
||||
void pushBack(char c) {
|
||||
pbchar = c;
|
||||
isAvail = true;
|
||||
}
|
||||
};
|
||||
|
||||
class CsvIndex : public Index {
|
||||
private:
|
||||
PushBackInStream& stream;
|
||||
public:
|
||||
CsvIndex(PushBackInStream& _stream) : stream(_stream) {}
|
||||
bool done() {
|
||||
char c;
|
||||
stream.read(&c, 1);
|
||||
if (c != ',') {
|
||||
stream.pushBack(c);
|
||||
}
|
||||
return (c == '}') ? true : false;
|
||||
}
|
||||
void incr() {}
|
||||
~CsvIndex() {}
|
||||
};
|
||||
|
||||
class ICsvArchive : public IArchive {
|
||||
private:
|
||||
PushBackInStream stream;
|
||||
public:
|
||||
ICsvArchive(InStream& _stream) { stream.setStream(&_stream); }
|
||||
virtual void deserialize(int8_t& t, const char* tag);
|
||||
virtual void deserialize(bool& t, const char* tag);
|
||||
virtual void deserialize(int32_t& t, const char* tag);
|
||||
virtual void deserialize(int64_t& t, const char* tag);
|
||||
virtual void deserialize(float& t, const char* tag);
|
||||
virtual void deserialize(double& t, const char* tag);
|
||||
virtual void deserialize(std::string& t, const char* tag);
|
||||
virtual void deserialize(std::string& t, size_t& len, const char* tag);
|
||||
virtual void startRecord(Record& s, const char* tag);
|
||||
virtual void endRecord(Record& s, const char* tag);
|
||||
virtual Index* startVector(const char* tag);
|
||||
virtual void endVector(Index* idx, const char* tag);
|
||||
virtual Index* startMap(const char* tag);
|
||||
virtual void endMap(Index* idx, const char* tag);
|
||||
virtual ~ICsvArchive();
|
||||
};
|
||||
|
||||
class OCsvArchive : public OArchive {
|
||||
private:
|
||||
OutStream& stream;
|
||||
bool isFirst;
|
||||
|
||||
void printCommaUnlessFirst() {
|
||||
if (!isFirst) {
|
||||
stream.write(",",1);
|
||||
}
|
||||
isFirst = false;
|
||||
}
|
||||
public:
|
||||
OCsvArchive(OutStream& _stream) : stream(_stream) {isFirst = true;}
|
||||
virtual void serialize(int8_t t, const char* tag);
|
||||
virtual void serialize(bool t, const char* tag);
|
||||
virtual void serialize(int32_t t, const char* tag);
|
||||
virtual void serialize(int64_t t, const char* tag);
|
||||
virtual void serialize(float t, const char* tag);
|
||||
virtual void serialize(double t, const char* tag);
|
||||
virtual void serialize(const std::string& t, const char* tag);
|
||||
virtual void serialize(const std::string& t, size_t len, const char* tag);
|
||||
virtual void startRecord(const Record& s, const char* tag);
|
||||
virtual void endRecord(const Record& s, const char* tag);
|
||||
virtual void startVector(size_t len, const char* tag);
|
||||
virtual void endVector(size_t len, const char* tag);
|
||||
virtual void startMap(size_t len, const char* tag);
|
||||
virtual void endMap(size_t len, const char* tag);
|
||||
virtual ~OCsvArchive();
|
||||
};
|
||||
|
||||
}
|
||||
#endif /*CSVARCHIVE_HH_*/
|
|
@ -1,152 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "exception.hh"
|
||||
#ifdef USE_EXECINFO
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <sstream>
|
||||
#include <typeinfo>
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
/**
|
||||
* Create an exception.
|
||||
* @param message The message to give to the user.
|
||||
* @param reason The exception that caused the new exception.
|
||||
*/
|
||||
Exception::Exception(const string& message,
|
||||
const string& component,
|
||||
const string& location,
|
||||
const Exception* reason
|
||||
): mMessage(message),
|
||||
mComponent(component),
|
||||
mLocation(location),
|
||||
mReason(reason)
|
||||
|
||||
{
|
||||
#ifdef USE_EXECINFO
|
||||
mCalls = backtrace(mCallStack, sMaxCallStackDepth);
|
||||
#else
|
||||
mCalls = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the exception.
|
||||
* Clones the reason, if there is one.
|
||||
*/
|
||||
Exception::Exception(const Exception& other
|
||||
): mMessage(other.mMessage),
|
||||
mComponent(other.mComponent),
|
||||
mLocation(other.mLocation),
|
||||
mCalls(other.mCalls)
|
||||
{
|
||||
for(int i=0; i < mCalls; ++i) {
|
||||
mCallStack[i] = other.mCallStack[i];
|
||||
}
|
||||
if (other.mReason) {
|
||||
mReason = other.mReason->clone();
|
||||
} else {
|
||||
mReason = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
Exception::~Exception() throw () {
|
||||
delete mReason;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print all of the information about the exception.
|
||||
*/
|
||||
void Exception::print(std::ostream& stream) const {
|
||||
stream << "Exception " << getTypename();
|
||||
if (mComponent.size() != 0) {
|
||||
stream << " (" << mComponent << ")";
|
||||
}
|
||||
stream << ": " << mMessage << "\n";
|
||||
if (mLocation.size() != 0) {
|
||||
stream << " thrown at " << mLocation << "\n";
|
||||
}
|
||||
#ifdef USE_EXECINFO
|
||||
printCallStack(stream);
|
||||
#endif
|
||||
if (mReason) {
|
||||
stream << "caused by: ";
|
||||
mReason->print(stream);
|
||||
}
|
||||
stream.flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of print() as a string.
|
||||
*/
|
||||
string Exception::toString() const {
|
||||
std::ostringstream stream;
|
||||
print(stream);
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
#ifdef USE_EXECINFO
|
||||
/**
|
||||
* Print the call stack where the exception was created.
|
||||
*/
|
||||
void Exception::printCallStack(std::ostream& stream) const {
|
||||
char ** symbols = backtrace_symbols(mCallStack, mCalls);
|
||||
for(int i=0; i < mCalls; ++i) {
|
||||
stream << " ";
|
||||
if (i == 0) {
|
||||
stream << "at ";
|
||||
} else {
|
||||
stream << "from ";
|
||||
}
|
||||
stream << symbols[i] << "\n";
|
||||
}
|
||||
free(symbols);
|
||||
}
|
||||
#endif
|
||||
|
||||
const char* Exception::getTypename() const {
|
||||
return "Exception";
|
||||
}
|
||||
|
||||
Exception* Exception::clone() const {
|
||||
return new Exception(*this);
|
||||
}
|
||||
|
||||
IOException::IOException(const string& message,
|
||||
const string& component,
|
||||
const string& location,
|
||||
const Exception* reason
|
||||
): Exception(message, component, location, reason)
|
||||
{
|
||||
}
|
||||
|
||||
const char* IOException::getTypename() const {
|
||||
return "IOException";
|
||||
}
|
||||
|
||||
IOException* IOException::clone() const {
|
||||
return new IOException(*this);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,129 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef EXCEPTION_HH
|
||||
#define EXCEPTION_HH
|
||||
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
/**
|
||||
* Parent-type for all exceptions in hadoop.
|
||||
* Provides an application specified message to the user, a call stack from
|
||||
* where the exception was created, and optionally an exception that caused
|
||||
* this one.
|
||||
*/
|
||||
class Exception: public std::exception {
|
||||
public:
|
||||
|
||||
/**
|
||||
* Create an exception.
|
||||
* @param message The message to give to the user.
|
||||
* @param reason The exception that caused the new exception.
|
||||
*/
|
||||
explicit Exception(const std::string& message,
|
||||
const std::string& component="",
|
||||
const std::string& location="",
|
||||
const Exception* reason=NULL);
|
||||
|
||||
/**
|
||||
* Copy the exception.
|
||||
* Clones the reason, if there is one.
|
||||
*/
|
||||
Exception(const Exception&);
|
||||
|
||||
virtual ~Exception() throw ();
|
||||
|
||||
/**
|
||||
* Make a new copy of the given exception by dynamically allocating
|
||||
* memory.
|
||||
*/
|
||||
virtual Exception* clone() const;
|
||||
|
||||
/**
|
||||
* Print all of the information about the exception.
|
||||
*/
|
||||
virtual void print(std::ostream& stream=std::cerr) const;
|
||||
|
||||
/**
|
||||
* Result of print() as a string.
|
||||
*/
|
||||
virtual std::string toString() const;
|
||||
|
||||
#ifdef USE_EXECINFO
|
||||
/**
|
||||
* Print the call stack where the exception was created.
|
||||
*/
|
||||
virtual void printCallStack(std::ostream& stream=std::cerr) const;
|
||||
#endif
|
||||
|
||||
const std::string& getMessage() const {
|
||||
return mMessage;
|
||||
}
|
||||
|
||||
const std::string& getComponent() const {
|
||||
return mComponent;
|
||||
}
|
||||
|
||||
const std::string& getLocation() const {
|
||||
return mLocation;
|
||||
}
|
||||
|
||||
const Exception* getReason() const {
|
||||
return mReason;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide a body for the virtual from std::exception.
|
||||
*/
|
||||
virtual const char* what() const throw () {
|
||||
return mMessage.c_str();
|
||||
}
|
||||
|
||||
virtual const char* getTypename() const;
|
||||
|
||||
private:
|
||||
const static int sMaxCallStackDepth = 10;
|
||||
const std::string mMessage;
|
||||
const std::string mComponent;
|
||||
const std::string mLocation;
|
||||
int mCalls;
|
||||
void* mCallStack[sMaxCallStackDepth];
|
||||
const Exception* mReason;
|
||||
|
||||
// NOT IMPLEMENTED
|
||||
std::exception& operator=(const std::exception& right) throw ();
|
||||
};
|
||||
|
||||
class IOException: public Exception {
|
||||
public:
|
||||
IOException(const std::string& message,
|
||||
const std::string& component="",
|
||||
const std::string& location="",
|
||||
const Exception* reason = NULL);
|
||||
|
||||
virtual IOException* clone() const;
|
||||
virtual const char* getTypename() const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
|
@ -1,64 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fieldTypeInfo.hh"
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
FieldTypeInfo::~FieldTypeInfo()
|
||||
{
|
||||
delete pFieldID;
|
||||
delete pTypeID;
|
||||
}
|
||||
|
||||
FieldTypeInfo::FieldTypeInfo(const FieldTypeInfo& ti)
|
||||
{
|
||||
pFieldID = new std::string(*ti.pFieldID);
|
||||
pTypeID = ti.pTypeID->clone();
|
||||
}
|
||||
|
||||
|
||||
void FieldTypeInfo::serialize(::hadoop::OArchive& a_, const char* tag) const
|
||||
{
|
||||
a_.serialize(*pFieldID, tag);
|
||||
pTypeID->serialize(a_, tag);
|
||||
}
|
||||
|
||||
bool FieldTypeInfo::operator==(const FieldTypeInfo& peer_) const
|
||||
{
|
||||
// first check if fieldID matches
|
||||
if (0 != pFieldID->compare(*(peer_.pFieldID))) {
|
||||
return false;
|
||||
}
|
||||
// now see if typeID matches
|
||||
return (*pTypeID == *(peer_.pTypeID));
|
||||
}
|
||||
|
||||
// print out the structure. Helps in debugging.
|
||||
void FieldTypeInfo::print(int space) const
|
||||
{
|
||||
for (int i=0; i<space; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("FieldTypeInfo(%lx):\n", (long)this);
|
||||
for (int i=0; i<space+2; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("field = \"%s\"\n", pFieldID->c_str());
|
||||
pTypeID->print(space+2);
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FIELDTYPEINFO_HH_
|
||||
#define FIELDTYPEINFO_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
#include "typeIDs.hh"
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class TypeID;
|
||||
|
||||
/**
|
||||
* Represents a type information for a field, which is made up of its
|
||||
* ID (name) and its type (a TypeID object).
|
||||
*/
|
||||
class FieldTypeInfo {
|
||||
|
||||
private:
|
||||
// we own memory mgmt of these vars
|
||||
const std::string* pFieldID;
|
||||
const TypeID* pTypeID;
|
||||
|
||||
public:
|
||||
FieldTypeInfo(const std::string* pFieldID, const TypeID* pTypeID) :
|
||||
pFieldID(pFieldID), pTypeID(pTypeID) {}
|
||||
FieldTypeInfo(const FieldTypeInfo& ti);
|
||||
virtual ~FieldTypeInfo();
|
||||
|
||||
const TypeID* getTypeID() const {return pTypeID;}
|
||||
const std::string* getFieldID() const {return pFieldID;}
|
||||
void serialize(::hadoop::OArchive& a_, const char* tag) const;
|
||||
bool operator==(const FieldTypeInfo& peer_) const;
|
||||
FieldTypeInfo* clone() const {return new FieldTypeInfo(*this);}
|
||||
|
||||
void print(int space=0) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // FIELDTYPEINFO_HH_
|
||||
|
|
@ -1,98 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "filestream.hh"
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
hadoop::FileInStream::FileInStream()
|
||||
{
|
||||
mFile = NULL;
|
||||
}
|
||||
|
||||
bool hadoop::FileInStream::open(const std::string& name)
|
||||
{
|
||||
mFile = fopen(name.c_str(), "rb");
|
||||
return (mFile != NULL);
|
||||
}
|
||||
|
||||
ssize_t hadoop::FileInStream::read(void *buf, size_t len)
|
||||
{
|
||||
return fread(buf, 1, len, mFile);
|
||||
}
|
||||
|
||||
bool hadoop::FileInStream::skip(size_t nbytes)
|
||||
{
|
||||
return (0==fseek(mFile, nbytes, SEEK_CUR));
|
||||
}
|
||||
|
||||
bool hadoop::FileInStream::close()
|
||||
{
|
||||
int ret = fclose(mFile);
|
||||
mFile = NULL;
|
||||
return (ret==0);
|
||||
}
|
||||
|
||||
hadoop::FileInStream::~FileInStream()
|
||||
{
|
||||
if (mFile != NULL) {
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
hadoop::FileOutStream::FileOutStream()
|
||||
{
|
||||
mFile = NULL;
|
||||
}
|
||||
|
||||
bool hadoop::FileOutStream::open(const std::string& name, bool overwrite)
|
||||
{
|
||||
if (!overwrite) {
|
||||
mFile = fopen(name.c_str(), "rb");
|
||||
if (mFile != NULL) {
|
||||
fclose(mFile);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
mFile = fopen(name.c_str(), "wb");
|
||||
return (mFile != NULL);
|
||||
}
|
||||
|
||||
ssize_t hadoop::FileOutStream::write(const void* buf, size_t len)
|
||||
{
|
||||
return fwrite(buf, 1, len, mFile);
|
||||
}
|
||||
|
||||
bool hadoop::FileOutStream::advance(size_t nbytes)
|
||||
{
|
||||
return (0==fseek(mFile, nbytes, SEEK_CUR));
|
||||
}
|
||||
|
||||
bool hadoop::FileOutStream::close()
|
||||
{
|
||||
int ret = fclose(mFile);
|
||||
mFile = NULL;
|
||||
return (ret == 0);
|
||||
}
|
||||
|
||||
hadoop::FileOutStream::~FileOutStream()
|
||||
{
|
||||
if (mFile != NULL) {
|
||||
close();
|
||||
}
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FILESTREAM_HH_
|
||||
#define FILESTREAM_HH_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include "recordio.hh"
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class FileInStream : public InStream {
|
||||
public:
|
||||
FileInStream();
|
||||
bool open(const std::string& name);
|
||||
ssize_t read(void *buf, size_t buflen);
|
||||
bool skip(size_t nbytes);
|
||||
bool close();
|
||||
virtual ~FileInStream();
|
||||
private:
|
||||
FILE *mFile;
|
||||
};
|
||||
|
||||
|
||||
class FileOutStream: public OutStream {
|
||||
public:
|
||||
FileOutStream();
|
||||
bool open(const std::string& name, bool overwrite);
|
||||
ssize_t write(const void* buf, size_t len);
|
||||
bool advance(size_t nbytes);
|
||||
bool close();
|
||||
virtual ~FileOutStream();
|
||||
private:
|
||||
FILE *mFile;
|
||||
};
|
||||
|
||||
}; // end namespace
|
||||
#endif /*FILESTREAM_HH_*/
|
|
@ -1,143 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "recordTypeInfo.hh"
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
RecordTypeInfo::RecordTypeInfo()
|
||||
{
|
||||
pStid = new StructTypeID();
|
||||
}
|
||||
|
||||
RecordTypeInfo::RecordTypeInfo(const char *pName): name(pName)
|
||||
{
|
||||
pStid = new StructTypeID();
|
||||
}
|
||||
|
||||
|
||||
/*RecordTypeInfo::RecordTypeInfo(const RecordTypeInfo& rti): name(rti.name)
|
||||
{
|
||||
// clone the typeinfos from rti and add them
|
||||
for (unsigned int i=0; i<rti.typeInfos.size(); i++) {
|
||||
typeInfos.push_back(rti.typeInfos[i]->clone());
|
||||
}
|
||||
// clone the map
|
||||
for (std::map<std::string, RecordTypeInfo*>::const_iterator iter=rti.structRTIs.begin();
|
||||
iter!=rti.structRTIs.end(); ++iter) {
|
||||
structRTIs[iter->first] = iter->second->clone();
|
||||
}
|
||||
}*/
|
||||
|
||||
|
||||
RecordTypeInfo::~RecordTypeInfo()
|
||||
{
|
||||
if (NULL != pStid)
|
||||
delete pStid;
|
||||
|
||||
/*for (unsigned int i=0; i<typeInfos.size(); i++) {
|
||||
delete typeInfos[i];
|
||||
}
|
||||
typeInfos.clear();
|
||||
for (std::map<std::string, RecordTypeInfo*>::const_iterator iter=structRTIs.begin();
|
||||
iter!=structRTIs.end(); ++iter) {
|
||||
// delete the RTI objects
|
||||
delete iter->second;
|
||||
}
|
||||
structRTIs.clear();*/
|
||||
}
|
||||
|
||||
void RecordTypeInfo::addField(const std::string* pFieldID, const TypeID* pTypeID)
|
||||
{
|
||||
pStid->getFieldTypeInfos().push_back(new FieldTypeInfo(pFieldID, pTypeID));
|
||||
}
|
||||
|
||||
void RecordTypeInfo::addAll(std::vector<FieldTypeInfo*>& vec)
|
||||
{
|
||||
// we need to copy object clones into our own vector
|
||||
for (unsigned int i=0; i<vec.size(); i++) {
|
||||
pStid->getFieldTypeInfos().push_back(vec[i]->clone());
|
||||
}
|
||||
}
|
||||
|
||||
// make a copy of typeInfos and return it
|
||||
/*std::vector<TypeInfo*>& RecordTypeInfo::getClonedTypeInfos()
|
||||
{
|
||||
std::vector<TypeInfo*>* pNewVec = new std::vector<TypeInfo*>();
|
||||
for (unsigned int i=0; i<typeInfos.size(); i++) {
|
||||
pNewVec->push_back(typeInfos[i]->clone());
|
||||
}
|
||||
return *pNewVec;
|
||||
} */
|
||||
|
||||
const std::vector<FieldTypeInfo*>& RecordTypeInfo::getFieldTypeInfos() const
|
||||
{
|
||||
return pStid->getFieldTypeInfos();
|
||||
}
|
||||
|
||||
|
||||
RecordTypeInfo* RecordTypeInfo::getNestedStructTypeInfo(const char *structName) const
|
||||
{
|
||||
StructTypeID* p = pStid->findStruct(structName);
|
||||
if (NULL == p) return NULL;
|
||||
return new RecordTypeInfo(structName, p);
|
||||
/*std::string s(structName);
|
||||
std::map<std::string, RecordTypeInfo*>::const_iterator iter = structRTIs.find(s);
|
||||
if (iter == structRTIs.end()) {
|
||||
return NULL;
|
||||
}
|
||||
return iter->second;*/
|
||||
}
|
||||
|
||||
void RecordTypeInfo::serialize(::hadoop::OArchive& a_, const char* tag) const
|
||||
{
|
||||
a_.startRecord(*this, tag);
|
||||
// name
|
||||
a_.serialize(name, tag);
|
||||
/*// number of elements
|
||||
a_.serialize((int32_t)typeInfos.size(), tag);
|
||||
// write out each element
|
||||
for (std::vector<FieldTypeInfo*>::const_iterator iter=typeInfos.begin();
|
||||
iter!=typeInfos.end(); ++iter) {
|
||||
(*iter)->serialize(a_, tag);
|
||||
}*/
|
||||
pStid->serializeRest(a_, tag);
|
||||
a_.endRecord(*this, tag);
|
||||
}
|
||||
|
||||
void RecordTypeInfo::print(int space) const
|
||||
{
|
||||
for (int i=0; i<space; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("RecordTypeInfo::%s\n", name.c_str());
|
||||
pStid->print(space);
|
||||
/*for (unsigned i=0; i<typeInfos.size(); i++) {
|
||||
typeInfos[i]->print(space+2);
|
||||
}*/
|
||||
}
|
||||
|
||||
void RecordTypeInfo::deserialize(::hadoop::IArchive& a_, const char* tag)
|
||||
{
|
||||
a_.startRecord(*this, tag);
|
||||
// name
|
||||
a_.deserialize(name, tag);
|
||||
pStid->deserialize(a_, tag);
|
||||
a_.endRecord(*this, tag);
|
||||
}
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef RECORDTYPEINFO_HH_
|
||||
#define RECORDTYPEINFO_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "fieldTypeInfo.hh"
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class RecordTypeInfo : public ::hadoop::Record {
|
||||
|
||||
private:
|
||||
//std::vector<FieldTypeInfo* > typeInfos;
|
||||
std::string name;
|
||||
//std::map<std::string, RecordTypeInfo*> structRTIs;
|
||||
StructTypeID *pStid;
|
||||
|
||||
RecordTypeInfo(const char * pName, StructTypeID* pStid): name(pName),pStid(pStid) {}
|
||||
|
||||
public:
|
||||
RecordTypeInfo();
|
||||
RecordTypeInfo(const char *pName);
|
||||
//RecordTypeInfo(const RecordTypeInfo& rti);
|
||||
virtual ~RecordTypeInfo();
|
||||
|
||||
void addField(const std::string* pFieldID, const TypeID* pTypeID);
|
||||
void addAll(std::vector<FieldTypeInfo*>& vec);
|
||||
const std::vector<FieldTypeInfo*>& getFieldTypeInfos() const;
|
||||
void serialize(::hadoop::OArchive& a_, const char* tag) const;
|
||||
void deserialize(::hadoop::IArchive& a_, const char* tag);
|
||||
RecordTypeInfo* clone() const {return new RecordTypeInfo(*this);}
|
||||
RecordTypeInfo* getNestedStructTypeInfo(const char *structName) const;
|
||||
|
||||
const ::std::string& getName() const {return name;}
|
||||
void setName(const ::std::string& name) {this->name = name;}
|
||||
|
||||
const ::std::string& type() const {return name;}
|
||||
const ::std::string& signature() const {return name;}
|
||||
|
||||
void print(int space=0) const;
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif // RECORDTYPEINFO_HH_
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "recordio.hh"
|
||||
#include "binarchive.hh"
|
||||
#include "csvarchive.hh"
|
||||
#include "xmlarchive.hh"
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
hadoop::RecordReader::RecordReader(InStream& stream, RecFormat f)
|
||||
{
|
||||
switch (f) {
|
||||
case kBinary:
|
||||
mpArchive = new IBinArchive(stream);
|
||||
break;
|
||||
case kCSV:
|
||||
mpArchive = new ICsvArchive(stream);
|
||||
break;
|
||||
case kXML:
|
||||
mpArchive = new IXmlArchive(stream);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
hadoop::RecordReader::~RecordReader()
|
||||
{
|
||||
delete mpArchive;
|
||||
}
|
||||
|
||||
void hadoop::RecordReader::read(Record& record)
|
||||
{
|
||||
record.deserialize(*mpArchive, (const char*) NULL);
|
||||
}
|
||||
|
||||
hadoop::RecordWriter::RecordWriter(OutStream& stream, RecFormat f)
|
||||
{
|
||||
switch (f) {
|
||||
case kBinary:
|
||||
mpArchive = new OBinArchive(stream);
|
||||
break;
|
||||
case kCSV:
|
||||
mpArchive = new OCsvArchive(stream);
|
||||
break;
|
||||
case kXML:
|
||||
mpArchive = new OXmlArchive(stream);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
hadoop::RecordWriter::~RecordWriter()
|
||||
{
|
||||
delete mpArchive;
|
||||
}
|
||||
|
||||
void hadoop::RecordWriter::write(const Record& record)
|
||||
{
|
||||
record.serialize(*mpArchive, (const char*) NULL);
|
||||
}
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef RECORDIO_HH_
|
||||
#define RECORDIO_HH_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <bitset>
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class InStream {
|
||||
public:
|
||||
virtual ssize_t read(void *buf, size_t buflen) = 0;
|
||||
virtual ~InStream() {}
|
||||
};
|
||||
|
||||
class OutStream {
|
||||
public:
|
||||
virtual ssize_t write(const void *buf, size_t len) = 0;
|
||||
virtual ~OutStream() {}
|
||||
};
|
||||
|
||||
class IArchive;
|
||||
class OArchive;
|
||||
|
||||
class Record {
|
||||
public:
|
||||
virtual void serialize(OArchive& archive, const char* tag) const = 0;
|
||||
virtual void deserialize(IArchive& archive, const char* tag) = 0;
|
||||
virtual const std::string& type() const = 0;
|
||||
virtual const std::string& signature() const = 0;
|
||||
virtual ~Record() {}
|
||||
};
|
||||
|
||||
enum RecFormat { kBinary, kXML, kCSV };
|
||||
|
||||
class RecordReader {
|
||||
private:
|
||||
IArchive* mpArchive;
|
||||
public:
|
||||
RecordReader(InStream& stream, RecFormat f);
|
||||
virtual void read(hadoop::Record& record);
|
||||
virtual ~RecordReader();
|
||||
};
|
||||
|
||||
class RecordWriter {
|
||||
private:
|
||||
OArchive* mpArchive;
|
||||
public:
|
||||
RecordWriter(OutStream& stream, RecFormat f);
|
||||
virtual void write(const hadoop::Record& record);
|
||||
virtual ~RecordWriter();
|
||||
};
|
||||
}; // end namspace hadoop
|
||||
|
||||
#include "archive.hh"
|
||||
#include "exception.hh"
|
||||
|
||||
#endif /*RECORDIO_HH_*/
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
COPTS=-g3 -O0 -Wall
|
||||
|
||||
all: test testFromJava
|
||||
|
||||
test: ${LIBRECORDIO_TEST_DIR}/test.o ${LIBRECORDIO_TEST_DIR}/test.jr.o
|
||||
g++ -g3 -O0 -o ${LIBRECORDIO_TEST_DIR}/test ${LIBRECORDIO_TEST_DIR}/test.o \
|
||||
${LIBRECORDIO_TEST_DIR}/test.jr.o -L${LIBRECORDIO_BUILD_DIR} -L${XERCESCROOT}/lib -lrecordio -lxerces-c
|
||||
|
||||
${LIBRECORDIO_TEST_DIR}/test.o: test.cc
|
||||
g++ ${COPTS} -c -I .. -o ${LIBRECORDIO_TEST_DIR}/test.o test.cc
|
||||
|
||||
testFromJava: ${LIBRECORDIO_TEST_DIR}/testFromJava.o ${LIBRECORDIO_TEST_DIR}/test.jr.o
|
||||
g++ -g3 -O0 -o ${LIBRECORDIO_TEST_DIR}/testFromJava ${LIBRECORDIO_TEST_DIR}/testFromJava.o ${LIBRECORDIO_TEST_DIR}/test.jr.o \
|
||||
-L${LIBRECORDIO_BUILD_DIR} -L${XERCESCROOT}/lib -lrecordio -lxerces-c
|
||||
|
||||
${LIBRECORDIO_TEST_DIR}/testFromJava.o: testFromJava.cc
|
||||
g++ ${COPTS} -c -I.. -o ${LIBRECORDIO_TEST_DIR}/testFromJava.o testFromJava.cc
|
||||
|
||||
${LIBRECORDIO_TEST_DIR}/test.jr.o: test.jr.cc
|
||||
g++ ${COPTS} -c -I.. -o ${LIBRECORDIO_TEST_DIR}/test.jr.o test.jr.cc
|
||||
|
||||
%.jr.cc %.jr.hh: %.jr
|
||||
${HADOOP_PREFIX}/bin/rcc --language c++ $<
|
||||
|
||||
%: %.o
|
||||
%: %.cc
|
||||
|
||||
test.cc: test.hh
|
||||
test.hh: test.jr.hh ../recordio.hh ../filestream.hh
|
||||
|
||||
clean:
|
||||
rm -f ${LIBRECORDIO_TEST_DIR}/*~ ${LIBRECORDIO_TEST_DIR}/*.o ${LIBRECORDIO_TEST_DIR}/test \
|
||||
${LIBRECORDIO_TEST_DIR}/testFromJava ${LIBRECORDIO_TEST_DIR}/*.jr.*
|
||||
|
|
@ -1,309 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "test.hh"
|
||||
#include <vector>
|
||||
|
||||
int main()
|
||||
{
|
||||
org::apache::hadoop::record::test::RecRecord1 r1;
|
||||
org::apache::hadoop::record::test::RecRecord1 r2;
|
||||
{
|
||||
hadoop::FileOutStream ostream;
|
||||
ostream.open("/tmp/hadooptmp.dat", true);
|
||||
hadoop::RecordWriter writer(ostream, hadoop::kBinary);
|
||||
r1.setBoolVal(true);
|
||||
r1.setByteVal((int8_t)0x66);
|
||||
r1.setFloatVal(3.145);
|
||||
r1.setDoubleVal(1.5234);
|
||||
r1.setIntVal(4567);
|
||||
r1.setLongVal(0x5a5a5a5a5a5aLL);
|
||||
std::string& s = r1.getStringVal();
|
||||
s = "random text";
|
||||
writer.write(r1);
|
||||
ostream.close();
|
||||
hadoop::FileInStream istream;
|
||||
istream.open("/tmp/hadooptmp.dat");
|
||||
hadoop::RecordReader reader(istream, hadoop::kBinary);
|
||||
reader.read(r2);
|
||||
if (r1 == r2) {
|
||||
printf("Binary archive test passed.\n");
|
||||
} else {
|
||||
printf("Binary archive test failed.\n");
|
||||
}
|
||||
istream.close();
|
||||
}
|
||||
{
|
||||
hadoop::FileOutStream ostream;
|
||||
ostream.open("/tmp/hadooptmp.txt", true);
|
||||
hadoop::RecordWriter writer(ostream, hadoop::kCSV);
|
||||
r1.setBoolVal(true);
|
||||
r1.setByteVal((int8_t)0x66);
|
||||
r1.setFloatVal(3.145);
|
||||
r1.setDoubleVal(1.5234);
|
||||
r1.setIntVal(4567);
|
||||
r1.setLongVal(0x5a5a5a5a5a5aLL);
|
||||
std::string& s = r1.getStringVal();
|
||||
s = "random text";
|
||||
writer.write(r1);
|
||||
ostream.close();
|
||||
hadoop::FileInStream istream;
|
||||
istream.open("/tmp/hadooptmp.txt");
|
||||
hadoop::RecordReader reader(istream, hadoop::kCSV);
|
||||
reader.read(r2);
|
||||
if (r1 == r2) {
|
||||
printf("CSV archive test passed.\n");
|
||||
} else {
|
||||
printf("CSV archive test failed.\n");
|
||||
}
|
||||
istream.close();
|
||||
}
|
||||
{
|
||||
hadoop::FileOutStream ostream;
|
||||
ostream.open("/tmp/hadooptmp.xml", true);
|
||||
hadoop::RecordWriter writer(ostream, hadoop::kXML);
|
||||
r1.setBoolVal(true);
|
||||
r1.setByteVal((int8_t)0x66);
|
||||
r1.setFloatVal(3.145);
|
||||
r1.setDoubleVal(1.5234);
|
||||
r1.setIntVal(4567);
|
||||
r1.setLongVal(0x5a5a5a5a5a5aLL);
|
||||
std::string& s = r1.getStringVal();
|
||||
s = "random text";
|
||||
writer.write(r1);
|
||||
ostream.close();
|
||||
hadoop::FileInStream istream;
|
||||
istream.open("/tmp/hadooptmp.xml");
|
||||
hadoop::RecordReader reader(istream, hadoop::kXML);
|
||||
reader.read(r2);
|
||||
if (r1 == r2) {
|
||||
printf("XML archive test passed.\n");
|
||||
} else {
|
||||
printf("XML archive test failed.\n");
|
||||
}
|
||||
istream.close();
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests to check for versioning functionality
|
||||
*/
|
||||
|
||||
// basic test
|
||||
// write out a record and its type info, read it back using its typeinfo
|
||||
{
|
||||
hadoop::FileOutStream ostream, ortistream;
|
||||
ostream.open("/tmp/hadooptmp.dat", true);
|
||||
ortistream.open("/tmp/hadooprti.dat", true);
|
||||
hadoop::RecordWriter writer(ostream, hadoop::kBinary);
|
||||
hadoop::RecordWriter writerRti(ortistream, hadoop::kBinary);
|
||||
r1.setBoolVal(true);
|
||||
r1.setByteVal((int8_t)0x66);
|
||||
r1.setFloatVal(3.145);
|
||||
r1.setDoubleVal(1.5234);
|
||||
r1.setIntVal(4567);
|
||||
r1.setLongVal(0x5a5a5a5a5a5aLL);
|
||||
std::string& s = r1.getStringVal();
|
||||
s = "random text";
|
||||
writer.write(r1);
|
||||
ostream.close();
|
||||
// write out rti info
|
||||
writerRti.write(org::apache::hadoop::record::test::RecRecord1::getTypeInfo());
|
||||
ortistream.close();
|
||||
|
||||
// read
|
||||
hadoop::FileInStream istream;
|
||||
istream.open("/tmp/hadooptmp.dat");
|
||||
hadoop::RecordReader reader(istream, hadoop::kBinary);
|
||||
hadoop::FileInStream irtistream;
|
||||
irtistream.open("/tmp/hadooprti.dat");
|
||||
hadoop::RecordReader readerRti(irtistream, hadoop::kBinary);
|
||||
hadoop::RecordTypeInfo rti;
|
||||
readerRti.read(rti);
|
||||
irtistream.close();
|
||||
org::apache::hadoop::record::test::RecRecord1::setTypeFilter(rti);
|
||||
reader.read(r2);
|
||||
if (r1 == r2) {
|
||||
printf("Basic versioning test passed.\n");
|
||||
} else {
|
||||
printf("Basic versioning test failed.\n");
|
||||
}
|
||||
istream.close();
|
||||
}
|
||||
|
||||
// versioning:write out a record and its type info, read back a similar record using the written record's typeinfo
|
||||
{
|
||||
hadoop::FileOutStream ostream, ortistream;
|
||||
ostream.open("/tmp/hadooptmp.dat", true);
|
||||
ortistream.open("/tmp/hadooprti.dat", true);
|
||||
hadoop::RecordWriter writer(ostream, hadoop::kBinary);
|
||||
hadoop::RecordWriter writerRti(ortistream, hadoop::kBinary);
|
||||
|
||||
// we create an array of records to write
|
||||
std::vector<org::apache::hadoop::record::test::RecRecordOld*> recsWrite;
|
||||
int i, j, k, l;
|
||||
char buf[1000];
|
||||
for (i=0; i<5; i++) {
|
||||
org::apache::hadoop::record::test::RecRecordOld* ps1Rec =
|
||||
new org::apache::hadoop::record::test::RecRecordOld();
|
||||
sprintf(buf, "This is record s1: %d", i);
|
||||
ps1Rec->getName().assign(buf);
|
||||
|
||||
for (j=0; j<3; j++) {
|
||||
ps1Rec->getIvec().push_back((int64_t)(i+j));
|
||||
}
|
||||
|
||||
for (j=0; j<2; j++) {
|
||||
std::vector<org::apache::hadoop::record::test::RecRecord0>* pVec =
|
||||
new std::vector<org::apache::hadoop::record::test::RecRecord0>();
|
||||
for (k=0; k<3; k++) {
|
||||
org::apache::hadoop::record::test::RecRecord0 *psRec =
|
||||
new org::apache::hadoop::record::test::RecRecord0();
|
||||
sprintf(buf, "This is record s: (%d: %d)", j, k);
|
||||
psRec->getStringVal().assign(buf);
|
||||
}
|
||||
ps1Rec->getSvec().push_back(*pVec);
|
||||
}
|
||||
|
||||
sprintf(buf, "This is record s: %d", i);
|
||||
ps1Rec->getInner().getStringVal().assign(buf);
|
||||
|
||||
for (l=0; l<2; l++) {
|
||||
std::vector<std::vector<std::string> >* ppVec =
|
||||
new std::vector<std::vector<std::string> >();
|
||||
for (j=0; j<2; j++) {
|
||||
std::vector< std::string >* pVec =
|
||||
new std::vector< std::string >();
|
||||
for (k=0; k<3; k++) {
|
||||
sprintf(buf, "THis is a nested string: (%d: %d: %d)", l, j, k);
|
||||
std::string* s = new std::string((const char*)buf);
|
||||
pVec->push_back(*s);
|
||||
}
|
||||
}
|
||||
ps1Rec->getStrvec().push_back(*ppVec);
|
||||
}
|
||||
|
||||
ps1Rec->setI1(100+i);
|
||||
|
||||
ps1Rec->getMap1()[23] = "23";
|
||||
ps1Rec->getMap1()[11] = "11";
|
||||
|
||||
std::map<int32_t, int64_t>* m1 = new std::map<int32_t, int64_t>();
|
||||
std::map<int32_t, int64_t>* m2 = new std::map<int32_t, int64_t>();
|
||||
(*m1)[5] = 5;
|
||||
(*m1)[10] = 10;
|
||||
(*m2)[15] = 15;
|
||||
(*m2)[20] = 20;
|
||||
ps1Rec->getMvec1().push_back(*m1);
|
||||
ps1Rec->getMvec1().push_back(*m2);
|
||||
ps1Rec->getMvec2().push_back(*m1);
|
||||
|
||||
recsWrite.push_back(ps1Rec);
|
||||
}
|
||||
|
||||
// write out to file
|
||||
for (unsigned int i=0; i<recsWrite.size(); i++) {
|
||||
writer.write(*(recsWrite[i]));
|
||||
}
|
||||
ostream.close();
|
||||
// write out rti info
|
||||
writerRti.write(org::apache::hadoop::record::test::RecRecordOld::getTypeInfo());
|
||||
ortistream.close();
|
||||
|
||||
// read
|
||||
hadoop::FileInStream istream;
|
||||
istream.open("/tmp/hadooptmp.dat");
|
||||
hadoop::RecordReader reader(istream, hadoop::kBinary);
|
||||
hadoop::FileInStream irtistream;
|
||||
irtistream.open("/tmp/hadooprti.dat");
|
||||
hadoop::RecordReader readerRti(irtistream, hadoop::kBinary);
|
||||
hadoop::RecordTypeInfo rti;
|
||||
readerRti.read(rti);
|
||||
irtistream.close();
|
||||
org::apache::hadoop::record::test::RecRecordNew::setTypeFilter(rti);
|
||||
|
||||
// read records
|
||||
std::vector<org::apache::hadoop::record::test::RecRecordNew*> recsRead;
|
||||
for (unsigned int i=0; i<recsWrite.size(); i++) {
|
||||
org::apache::hadoop::record::test::RecRecordNew* ps2Rec =
|
||||
new org::apache::hadoop::record::test::RecRecordNew();
|
||||
reader.read(*ps2Rec);
|
||||
recsRead.push_back(ps2Rec);
|
||||
}
|
||||
istream.close();
|
||||
|
||||
// compare
|
||||
bool pass = true;
|
||||
for (unsigned int i=0; i<recsRead.size(); i++) {
|
||||
org::apache::hadoop::record::test::RecRecordNew* ps2In = recsRead[i];
|
||||
org::apache::hadoop::record::test::RecRecordOld* ps1Out = recsWrite[i];
|
||||
|
||||
if (!ps2In->getName2().empty()) {
|
||||
printf("Error in s2: name2\n");
|
||||
pass = false;
|
||||
}
|
||||
|
||||
if (!(ps2In->getInner() == ps1Out->getInner())) {
|
||||
printf("error in s2: s1 struct\n");
|
||||
pass = false;
|
||||
}
|
||||
|
||||
if (0 != ps2In->getIvec().size()) {
|
||||
printf("error in s2: ivec\n");
|
||||
pass = false;
|
||||
}
|
||||
|
||||
if (0 != ps2In->getSvec().size()) {
|
||||
printf("error in s2: svec\n");
|
||||
pass = false;
|
||||
}
|
||||
|
||||
for (unsigned int j=0; j<ps2In->getStrvec().size(); j++) {
|
||||
::std::vector< ::std::vector< ::std::string > >& ss2Vec = ps2In->getStrvec()[j];
|
||||
::std::vector< ::std::vector< ::std::string > >& ss1Vec = ps1Out->getStrvec()[j];
|
||||
for (unsigned int k=0; k<ss2Vec.size(); k++) {
|
||||
::std::vector< ::std::string >& s2Vec = ss2Vec[k];
|
||||
::std::vector< ::std::string >& s1Vec = ss1Vec[k];
|
||||
for (unsigned int l=0; l<s2Vec.size(); l++) {
|
||||
if (s2Vec[l] != s1Vec[l]) {
|
||||
printf("Error in s2: s2Vec\n");
|
||||
pass = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0 != ps2In->getMap1().size()) {
|
||||
printf("Error in s2: map1\n");
|
||||
pass = false;
|
||||
}
|
||||
|
||||
for (unsigned int j=0; j<ps2In->getMvec2().size(); j++) {
|
||||
if (ps2In->getMvec2()[j] != ps1Out->getMvec2()[j]) {
|
||||
printf("Error in s2: mvec2\n");
|
||||
pass = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pass)
|
||||
printf("Versioning test passed.\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TEST_HH_
|
||||
#define TEST_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
#include "filestream.hh"
|
||||
#include "test.jr.hh"
|
||||
|
||||
#endif /*TEST_HH_*/
|
|
@ -1,63 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
module org.apache.hadoop.record.test {
|
||||
class RecRecord0 {
|
||||
ustring StringVal;
|
||||
}
|
||||
|
||||
class RecRecord1 {
|
||||
boolean BoolVal;
|
||||
byte ByteVal;
|
||||
int IntVal;
|
||||
long LongVal;
|
||||
float FloatVal;
|
||||
double DoubleVal;
|
||||
ustring StringVal;
|
||||
buffer BufferVal;
|
||||
vector<ustring> VectorVal;
|
||||
map<ustring, ustring> MapVal;
|
||||
}
|
||||
|
||||
class RecRecordOld {
|
||||
ustring name;
|
||||
vector<long> ivec;
|
||||
vector<vector<RecRecord0>> svec;
|
||||
RecRecord0 inner;
|
||||
vector<vector<vector<ustring>>> strvec;
|
||||
float i1;
|
||||
map<byte, ustring> map1;
|
||||
vector<map<int, long>> mvec1;
|
||||
vector<map<int, long>> mvec2;
|
||||
}
|
||||
|
||||
/* RecRecordNew is a lot like RecRecordOld. Helps test for versioning. */
|
||||
class RecRecordNew {
|
||||
ustring name2;
|
||||
RecRecord0 inner;
|
||||
vector<int> ivec;
|
||||
vector<vector<int>> svec;
|
||||
vector<vector<vector<ustring>>> strvec;
|
||||
int i1;
|
||||
map<long, ustring> map1;
|
||||
vector<map<int, long>> mvec2;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "test.hh"
|
||||
|
||||
int main()
|
||||
{
|
||||
org::apache::hadoop::record::test::RecRecord1 r1;
|
||||
org::apache::hadoop::record::test::RecRecord1 r2;
|
||||
r1.setBoolVal(true);
|
||||
r1.setByteVal((int8_t)0x66);
|
||||
r1.setFloatVal(3.145);
|
||||
r1.setDoubleVal(1.5234);
|
||||
r1.setIntVal(4567);
|
||||
r1.setLongVal(0x5a5a5a5a5a5aLL);
|
||||
std::string& s = r1.getStringVal();
|
||||
s = "random text";
|
||||
{
|
||||
hadoop::FileInStream istream;
|
||||
istream.open("/tmp/hadooptemp.dat");
|
||||
hadoop::RecordReader reader(istream, hadoop::kBinary);
|
||||
reader.read(r2);
|
||||
if (r1 == r2) {
|
||||
printf("Binary archive test passed.\n");
|
||||
} else {
|
||||
printf("Binary archive test failed.\n");
|
||||
}
|
||||
istream.close();
|
||||
}
|
||||
{
|
||||
hadoop::FileInStream istream;
|
||||
istream.open("/tmp/hadooptemp.txt");
|
||||
hadoop::RecordReader reader(istream, hadoop::kCSV);
|
||||
reader.read(r2);
|
||||
if (r1 == r2) {
|
||||
printf("CSV archive test passed.\n");
|
||||
} else {
|
||||
printf("CSV archive test failed.\n");
|
||||
}
|
||||
istream.close();
|
||||
}
|
||||
{
|
||||
hadoop::FileInStream istream;
|
||||
istream.open("/tmp/hadooptemp.xml");
|
||||
hadoop::RecordReader reader(istream, hadoop::kXML);
|
||||
reader.read(r2);
|
||||
if (r1 == r2) {
|
||||
printf("XML archive test passed.\n");
|
||||
} else {
|
||||
printf("XML archive test failed.\n");
|
||||
}
|
||||
istream.close();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TEST_HH_
|
||||
#define TEST_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
#include "filestream.hh"
|
||||
#include "test.jr.hh"
|
||||
|
||||
#endif /*TEST_HH_*/
|
|
@ -1,274 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "typeIDs.hh"
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
void TypeID::serialize(::hadoop::OArchive& a_, const char* tag) const
|
||||
{
|
||||
a_.serialize(typeVal, tag);
|
||||
}
|
||||
|
||||
bool TypeID::operator==(const TypeID& peer_) const
|
||||
{
|
||||
return (this->typeVal == peer_.typeVal);
|
||||
}
|
||||
|
||||
void TypeID::print(int space) const
|
||||
{
|
||||
for (int i=0; i<space; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("typeID(%lx) = %d\n", (long)this, typeVal);
|
||||
}
|
||||
|
||||
|
||||
/*StructTypeID::StructTypeID(const char *p): TypeID(RIOTYPE_STRUCT)
|
||||
{
|
||||
pName = new std::string(p);
|
||||
}
|
||||
|
||||
StructTypeID::StructTypeID(std::string* p): TypeID(RIOTYPE_STRUCT)
|
||||
{
|
||||
this->pName = p;
|
||||
}*/
|
||||
|
||||
StructTypeID::StructTypeID(const std::vector<FieldTypeInfo*>& vec) :
|
||||
TypeID(RIOTYPE_STRUCT)
|
||||
{
|
||||
// we need to copy object clones into our own vector
|
||||
for (unsigned int i=0; i<vec.size(); i++) {
|
||||
typeInfos.push_back(vec[i]->clone());
|
||||
}
|
||||
}
|
||||
|
||||
/*StructTypeID::StructTypeID(const StructTypeID& ti) :
|
||||
TypeID(RIOTYPE_STRUCT)
|
||||
{
|
||||
// we need to copy object clones into our own vector
|
||||
for (unsigned int i=0; i<ti.typeInfos.size(); i++) {
|
||||
typeInfos.push_back(ti.typeInfos[i]->clone());
|
||||
}
|
||||
} */
|
||||
|
||||
StructTypeID::~StructTypeID()
|
||||
{
|
||||
for (unsigned int i=0; i<typeInfos.size(); i++) {
|
||||
delete typeInfos[i];
|
||||
}
|
||||
}
|
||||
|
||||
void StructTypeID::add(FieldTypeInfo *pti)
|
||||
{
|
||||
typeInfos.push_back(pti);
|
||||
}
|
||||
|
||||
// return the StructTypeiD, if any, of the given field
|
||||
StructTypeID* StructTypeID::findStruct(const char *pStructName)
|
||||
{
|
||||
// walk through the list, searching. Not the most efficient way, but this
|
||||
// in intended to be used rarely, so we keep it simple.
|
||||
// As an optimization, we can keep a hashmap of record name to its RTI, for later.
|
||||
for (unsigned int i=0; i<typeInfos.size(); i++) {
|
||||
if ((0 == typeInfos[i]->getFieldID()->compare(pStructName)) &&
|
||||
(typeInfos[i]->getTypeID()->getTypeVal()==RIOTYPE_STRUCT)) {
|
||||
return (StructTypeID*)(typeInfos[i]->getTypeID()->clone());
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void StructTypeID::serialize(::hadoop::OArchive& a_, const char* tag) const
|
||||
{
|
||||
a_.serialize(typeVal, tag);
|
||||
serializeRest(a_, tag);
|
||||
}
|
||||
|
||||
/*
|
||||
* Writes rest of the struct (excluding type value).
|
||||
* As an optimization, this method is directly called by RTI
|
||||
* for the top level record so that we don't write out the byte
|
||||
* indicating that this is a struct (since top level records are
|
||||
* always structs).
|
||||
*/
|
||||
void StructTypeID::serializeRest(::hadoop::OArchive& a_, const char* tag) const
|
||||
{
|
||||
a_.serialize((int32_t)typeInfos.size(), tag);
|
||||
for (unsigned int i=0; i<typeInfos.size(); i++) {
|
||||
typeInfos[i]->serialize(a_, tag);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* deserialize ourselves. Called by RTI.
|
||||
*/
|
||||
void StructTypeID::deserialize(::hadoop::IArchive& a_, const char* tag)
|
||||
{
|
||||
// number of elements
|
||||
int numElems;
|
||||
a_.deserialize(numElems, tag);
|
||||
for (int i=0; i<numElems; i++) {
|
||||
typeInfos.push_back(genericReadTypeInfo(a_, tag));
|
||||
}
|
||||
}
|
||||
|
||||
// generic reader: reads the next TypeInfo object from stream and returns it
|
||||
FieldTypeInfo* StructTypeID::genericReadTypeInfo(::hadoop::IArchive& a_, const char* tag)
|
||||
{
|
||||
// read name of field
|
||||
std::string* pName = new std::string();
|
||||
a_.deserialize(*pName, tag);
|
||||
TypeID* pti = genericReadTypeID(a_, tag);
|
||||
return new FieldTypeInfo(pName, pti);
|
||||
}
|
||||
|
||||
// generic reader: reads the next TypeID object from stream and returns it
|
||||
TypeID* StructTypeID::genericReadTypeID(::hadoop::IArchive& a_, const char* tag)
|
||||
{
|
||||
int8_t typeVal;
|
||||
a_.deserialize(typeVal, tag);
|
||||
switch(typeVal) {
|
||||
case RIOTYPE_BOOL:
|
||||
case RIOTYPE_BUFFER:
|
||||
case RIOTYPE_BYTE:
|
||||
case RIOTYPE_DOUBLE:
|
||||
case RIOTYPE_FLOAT:
|
||||
case RIOTYPE_INT:
|
||||
case RIOTYPE_LONG:
|
||||
case RIOTYPE_STRING:
|
||||
return new TypeID(typeVal);
|
||||
case RIOTYPE_STRUCT:
|
||||
{
|
||||
StructTypeID* pstID = new StructTypeID();
|
||||
int numElems;
|
||||
a_.deserialize(numElems, tag);
|
||||
for (int i=0; i<numElems; i++) {
|
||||
pstID->add(genericReadTypeInfo(a_, tag));
|
||||
}
|
||||
return pstID;
|
||||
}
|
||||
case RIOTYPE_VECTOR:
|
||||
{
|
||||
TypeID* pti = genericReadTypeID(a_, tag);
|
||||
return new VectorTypeID(pti);
|
||||
}
|
||||
case RIOTYPE_MAP:
|
||||
{
|
||||
TypeID* ptiKey = genericReadTypeID(a_, tag);
|
||||
TypeID* ptiValue = genericReadTypeID(a_, tag);
|
||||
return new MapTypeID(ptiKey, ptiValue);
|
||||
}
|
||||
default:
|
||||
// shouldn't be here
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void StructTypeID::print(int space) const
|
||||
{
|
||||
TypeID::print(space);
|
||||
for (int i=0; i<space; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("StructTypeInfo(%lx): \n", (long)&typeInfos);
|
||||
for (unsigned int i=0; i<typeInfos.size(); i++) {
|
||||
typeInfos[i]->print(space+2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
VectorTypeID::~VectorTypeID()
|
||||
{
|
||||
delete ptiElement;
|
||||
}
|
||||
|
||||
VectorTypeID::VectorTypeID(const VectorTypeID& ti): TypeID(RIOTYPE_VECTOR)
|
||||
{
|
||||
ptiElement = ti.ptiElement->clone();
|
||||
}
|
||||
|
||||
void VectorTypeID::serialize(::hadoop::OArchive& a_, const char* tag) const
|
||||
{
|
||||
a_.serialize(typeVal, tag);
|
||||
ptiElement->serialize(a_, tag);
|
||||
}
|
||||
|
||||
bool VectorTypeID::operator==(const TypeID& peer_) const
|
||||
{
|
||||
if (typeVal != peer_.getTypeVal()) {
|
||||
return false;
|
||||
}
|
||||
// this must be a vector type id
|
||||
return (*ptiElement) == (*((VectorTypeID&)peer_).ptiElement);
|
||||
}
|
||||
|
||||
void VectorTypeID::print(int space) const
|
||||
{
|
||||
TypeID::print(space);
|
||||
for (int i=0; i<space; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("VectorTypeInfo(%lx): \n", (long)this);
|
||||
ptiElement->print(space+2);
|
||||
}
|
||||
|
||||
|
||||
MapTypeID::~MapTypeID()
|
||||
{
|
||||
delete ptiKey;
|
||||
delete ptiValue;
|
||||
}
|
||||
|
||||
MapTypeID::MapTypeID(const MapTypeID& ti): TypeID(RIOTYPE_MAP)
|
||||
{
|
||||
ptiKey = ti.ptiKey->clone();
|
||||
ptiValue = ti.ptiValue->clone();
|
||||
}
|
||||
|
||||
void MapTypeID::serialize(::hadoop::OArchive& a_, const char* tag) const
|
||||
{
|
||||
a_.serialize(typeVal, tag);
|
||||
ptiKey->serialize(a_, tag);
|
||||
ptiValue->serialize(a_, tag);
|
||||
}
|
||||
|
||||
bool MapTypeID::operator==(const TypeID& peer_) const
|
||||
{
|
||||
if (typeVal != peer_.getTypeVal()) {
|
||||
return false;
|
||||
}
|
||||
// this must be a map type id
|
||||
MapTypeID& mti = (MapTypeID&) peer_;
|
||||
if (!(*ptiKey == *(mti.ptiKey))) {
|
||||
return false;
|
||||
}
|
||||
return ((*ptiValue == *(mti.ptiValue)));
|
||||
}
|
||||
|
||||
void MapTypeID::print(int space) const
|
||||
{
|
||||
TypeID::print(space);
|
||||
for (int i=0; i<space; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("MapTypeInfo(%lx): \n", (long)this);
|
||||
ptiKey->print(space+2);
|
||||
ptiValue->print(space+2);
|
||||
}
|
|
@ -1,169 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TYPEIDS_HH_
|
||||
#define TYPEIDS_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
#include "fieldTypeInfo.hh"
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class FieldTypeInfo;
|
||||
|
||||
/*
|
||||
* enum of types. We define assign values to individual bytes, rather
|
||||
* than use enums because we want to make teh values consistent with
|
||||
* Java code, so we need to control the values.
|
||||
*/
|
||||
const int8_t RIOTYPE_BOOL = 1;
|
||||
const int8_t RIOTYPE_BUFFER = 2;
|
||||
const int8_t RIOTYPE_BYTE = 3;
|
||||
const int8_t RIOTYPE_DOUBLE = 4;
|
||||
const int8_t RIOTYPE_FLOAT = 5;
|
||||
const int8_t RIOTYPE_INT = 6;
|
||||
const int8_t RIOTYPE_LONG = 7;
|
||||
const int8_t RIOTYPE_MAP = 8;
|
||||
const int8_t RIOTYPE_STRING = 9;
|
||||
const int8_t RIOTYPE_STRUCT = 10;
|
||||
const int8_t RIOTYPE_VECTOR = 11;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Represents typeID for basic types.
|
||||
* Serializes just the single int8_t.
|
||||
*/
|
||||
class TypeID {
|
||||
|
||||
public:
|
||||
|
||||
TypeID(int8_t typeVal) {this->typeVal = typeVal;}
|
||||
TypeID(const TypeID& t) {this->typeVal = t.typeVal;}
|
||||
virtual ~TypeID() {}
|
||||
|
||||
int8_t getTypeVal() const {return typeVal;}
|
||||
virtual void serialize(::hadoop::OArchive& a_, const char* tag) const;
|
||||
|
||||
virtual bool operator==(const TypeID& peer_) const;
|
||||
virtual TypeID* clone() const {return new TypeID(*this);}
|
||||
|
||||
virtual void print(int space=0) const;
|
||||
|
||||
protected:
|
||||
int8_t typeVal;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* no predefined TypeID objects, since memory management becomes difficult.
|
||||
* If some TypeID objects are consts and others are new-ed, becomes hard to
|
||||
* destroy const objects without reference counting.
|
||||
*/
|
||||
/*const TypeID TID_BoolTypeID(RIOTYPE_BOOL);
|
||||
const TypeID TID_BufferTypeID(RIOTYPE_BUFFER);
|
||||
const TypeID TID_ByteTypeID(RIOTYPE_BYTE);
|
||||
const TypeID TID_DoubleTypeID(RIOTYPE_DOUBLE);
|
||||
const TypeID TID_FloatTypeID(RIOTYPE_FLOAT);
|
||||
const TypeID TID_IntTypeID(RIOTYPE_INT);
|
||||
const TypeID TID_LongTypeID(RIOTYPE_LONG);
|
||||
const TypeID TID_StringTypeID(RIOTYPE_STRING);*/
|
||||
|
||||
|
||||
/*
|
||||
* TypeID for structures
|
||||
*/
|
||||
class StructTypeID : public TypeID {
|
||||
|
||||
private:
|
||||
// note: we own the memory mgmt of TypeInfo objects stored in the vector
|
||||
std::vector<FieldTypeInfo*> typeInfos;
|
||||
FieldTypeInfo* genericReadTypeInfo(::hadoop::IArchive& a_, const char* tag);
|
||||
TypeID* genericReadTypeID(::hadoop::IArchive& a_, const char* tag);
|
||||
|
||||
public:
|
||||
/*StructTypeID(const char* p);
|
||||
StructTypeID(std::string* p);
|
||||
StructTypeID(const StructTypeID& ti);*/
|
||||
StructTypeID(): TypeID(RIOTYPE_STRUCT) {};
|
||||
StructTypeID(const std::vector<FieldTypeInfo*>& vec);
|
||||
virtual ~StructTypeID();
|
||||
|
||||
void add(FieldTypeInfo *pti);
|
||||
std::vector<FieldTypeInfo*>& getFieldTypeInfos() {return typeInfos;}
|
||||
StructTypeID* findStruct(const char *pStructName);
|
||||
void serialize(::hadoop::OArchive& a_, const char* tag) const;
|
||||
void serializeRest(::hadoop::OArchive& a_, const char* tag) const;
|
||||
void deserialize(::hadoop::IArchive& a_, const char* tag);
|
||||
virtual TypeID* clone() const {return new StructTypeID(*this);}
|
||||
|
||||
virtual void print(int space=0) const;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* TypeID for vectors
|
||||
*/
|
||||
class VectorTypeID : public TypeID {
|
||||
|
||||
private:
|
||||
// ptiElement's memory mgmt is owned by class
|
||||
TypeID* ptiElement;
|
||||
|
||||
public:
|
||||
VectorTypeID(TypeID* ptiElement): TypeID(RIOTYPE_VECTOR), ptiElement(ptiElement) {}
|
||||
VectorTypeID(const VectorTypeID& ti);
|
||||
virtual ~VectorTypeID();
|
||||
|
||||
const TypeID* getElementTypeID() {return ptiElement;}
|
||||
virtual TypeID* clone() const {return new VectorTypeID(*this);}
|
||||
void serialize(::hadoop::OArchive& a_, const char* tag) const;
|
||||
virtual bool operator==(const TypeID& peer_) const;
|
||||
|
||||
virtual void print(int space=0) const;
|
||||
};
|
||||
|
||||
/*
|
||||
* TypeID for maps
|
||||
*/
|
||||
class MapTypeID : public TypeID {
|
||||
|
||||
private:
|
||||
// ptiKay and ptiValue's memory mgmt is owned by class
|
||||
TypeID* ptiKey;
|
||||
TypeID* ptiValue;
|
||||
|
||||
public:
|
||||
MapTypeID(TypeID* ptiKey, TypeID* ptiValue):
|
||||
TypeID(RIOTYPE_MAP), ptiKey(ptiKey), ptiValue(ptiValue) {}
|
||||
MapTypeID(const MapTypeID& ti);
|
||||
virtual ~MapTypeID();
|
||||
|
||||
const TypeID* getKeyTypeID() {return ptiKey;}
|
||||
const TypeID* getValueTypeID() {return ptiValue;}
|
||||
virtual TypeID* clone() const {return new MapTypeID(*this);}
|
||||
void serialize(::hadoop::OArchive& a_, const char* tag) const;
|
||||
virtual bool operator==(const TypeID& peer_) const;
|
||||
|
||||
virtual void print(int space=0) const;
|
||||
};
|
||||
|
||||
}
|
||||
#endif // TYPEIDS_HH_
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "typeInfo.hh"
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
TypeInfo::~TypeInfo()
|
||||
{
|
||||
delete pFieldID;
|
||||
delete pTypeID;
|
||||
}
|
||||
|
||||
/*TypeInfo& TypeInfo::operator =(const TypeInfo& ti) {
|
||||
pFieldID = ti.pFieldID;
|
||||
pTypeID = ti.pTypeID;
|
||||
return *this;
|
||||
}*/
|
||||
|
||||
TypeInfo::TypeInfo(const TypeInfo& ti)
|
||||
{
|
||||
pFieldID = new std::string(*ti.pFieldID);
|
||||
pTypeID = ti.pTypeID->clone();
|
||||
}
|
||||
|
||||
|
||||
void TypeInfo::serialize(::hadoop::OArchive& a_, const char* tag) const
|
||||
{
|
||||
a_.serialize(*pFieldID, tag);
|
||||
pTypeID->serialize(a_, tag);
|
||||
}
|
||||
|
||||
bool TypeInfo::operator==(const TypeInfo& peer_) const
|
||||
{
|
||||
// first check if fieldID matches
|
||||
if (0 != pFieldID->compare(*(peer_.pFieldID))) {
|
||||
return false;
|
||||
}
|
||||
// now see if typeID matches
|
||||
return (*pTypeID == *(peer_.pTypeID));
|
||||
}
|
||||
|
||||
void TypeInfo::print(int space) const
|
||||
{
|
||||
for (int i=0; i<space; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("TypeInfo(%lx):\n", (long)this);
|
||||
for (int i=0; i<space+2; i++) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("field = \"%s\"\n", pFieldID->c_str());
|
||||
pTypeID->print(space+2);
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TYPEINFO_HH_
|
||||
#define TYPEINFO_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
#include "typeIDs.hh"
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class TypeID;
|
||||
|
||||
class TypeInfo {
|
||||
|
||||
private:
|
||||
// we own memory mgmt of these vars
|
||||
const std::string* pFieldID;
|
||||
const TypeID* pTypeID;
|
||||
|
||||
public:
|
||||
TypeInfo(const std::string* pFieldID, const TypeID* pTypeID) :
|
||||
pFieldID(pFieldID), pTypeID(pTypeID) {}
|
||||
TypeInfo(const TypeInfo& ti);
|
||||
virtual ~TypeInfo();
|
||||
|
||||
const TypeID* getTypeID() const {return pTypeID;}
|
||||
const std::string* getFieldID() const {return pFieldID;}
|
||||
void serialize(::hadoop::OArchive& a_, const char* tag) const;
|
||||
bool operator==(const TypeInfo& peer_) const;
|
||||
TypeInfo* clone() const {return new TypeInfo(*this);}
|
||||
|
||||
//TypeInfo& operator =(const TypeInfo& ti);
|
||||
void print(int space=0) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // TYPEINFO_HH_
|
||||
|
|
@ -1,109 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "utils.hh"
|
||||
#include "recordTypeInfo.hh"
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
void Utils::skip(IArchive& a, const char* tag, const TypeID& typeID)
|
||||
{
|
||||
bool b;
|
||||
size_t len=0;
|
||||
::std::string str;
|
||||
int8_t bt;
|
||||
double d;
|
||||
float f;
|
||||
int32_t i;
|
||||
int64_t l;
|
||||
|
||||
switch(typeID.getTypeVal()) {
|
||||
case RIOTYPE_BOOL:
|
||||
a.deserialize(b, tag);
|
||||
break;
|
||||
case RIOTYPE_BUFFER:
|
||||
a.deserialize(str, len, tag);
|
||||
break;
|
||||
case RIOTYPE_BYTE:
|
||||
a.deserialize(bt, tag);
|
||||
break;
|
||||
case RIOTYPE_DOUBLE:
|
||||
a.deserialize(d, tag);
|
||||
break;
|
||||
case RIOTYPE_FLOAT:
|
||||
a.deserialize(f, tag);
|
||||
break;
|
||||
case RIOTYPE_INT:
|
||||
a.deserialize(i, tag);
|
||||
break;
|
||||
case RIOTYPE_LONG:
|
||||
a.deserialize(l, tag);
|
||||
break;
|
||||
case RIOTYPE_MAP:
|
||||
{
|
||||
// since we don't know the key, value types,
|
||||
// we need to deserialize in a generic manner
|
||||
Index* idx = a.startMap(tag);
|
||||
MapTypeID& mtID = (MapTypeID&) typeID;
|
||||
while (!idx->done()) {
|
||||
skip(a, tag, *(mtID.getKeyTypeID()));
|
||||
skip(a, tag, *(mtID.getValueTypeID()));
|
||||
idx->incr();
|
||||
}
|
||||
a.endMap(idx, tag);
|
||||
}
|
||||
break;
|
||||
case RIOTYPE_STRING:
|
||||
a.deserialize(str, tag);
|
||||
break;
|
||||
case RIOTYPE_STRUCT:
|
||||
{
|
||||
// since we don't know the key, value types,
|
||||
// we need to deserialize in a generic manner
|
||||
// we need to pass a record in, though it's never used
|
||||
RecordTypeInfo rec;
|
||||
a.startRecord(rec, tag);
|
||||
StructTypeID& stID = (StructTypeID&) typeID;
|
||||
std::vector<FieldTypeInfo*>& typeInfos = stID.getFieldTypeInfos();
|
||||
for (unsigned int i=0; i<typeInfos.size(); i++) {
|
||||
skip(a, tag, *(typeInfos[i]->getTypeID()));
|
||||
}
|
||||
a.endRecord(rec, tag);
|
||||
}
|
||||
break;
|
||||
case RIOTYPE_VECTOR:
|
||||
{
|
||||
// since we don't know the key, value types,
|
||||
// we need to deserialize in a generic manner
|
||||
Index* idx = a.startVector(tag);
|
||||
VectorTypeID& vtID = (VectorTypeID&) typeID;
|
||||
while (!idx->done()) {
|
||||
skip(a, tag, *(vtID.getElementTypeID()));
|
||||
idx->incr();
|
||||
}
|
||||
a.endVector(idx, tag);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// shouldn't be here
|
||||
throw new IOException("Unknown typeID when skipping bytes");
|
||||
break;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef UTILS_HH_
|
||||
#define UTILS_HH_
|
||||
|
||||
#include "recordio.hh"
|
||||
#include "typeIDs.hh"
|
||||
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
/**
|
||||
* Various utility functions for Hadooop record I/O platform.
|
||||
*/
|
||||
|
||||
class Utils {
|
||||
|
||||
private:
|
||||
/** Cannot create a new instance of Utils */
|
||||
Utils() {};
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* read/skip bytes from stream based on a type
|
||||
*/
|
||||
static void skip(IArchive& a, const char* tag, const TypeID& typeID);
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif // UTILS_HH_
|
||||
|
|
@ -1,431 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "xmlarchive.hh"
|
||||
#include <stdlib.h>
|
||||
|
||||
using namespace hadoop;
|
||||
|
||||
void hadoop::MySAXHandler::startElement(const XMLCh* const name, AttributeList& attr)
|
||||
{
|
||||
charsValid = false;
|
||||
char* qname = XMLString::transcode(name);
|
||||
if(std::string("boolean") == qname ||
|
||||
std::string("ex:i1") == qname ||
|
||||
std::string("i4") == qname ||
|
||||
std::string("int") == qname ||
|
||||
std::string("ex:i8") == qname ||
|
||||
std::string("ex:float") == qname ||
|
||||
std::string("double") == qname ||
|
||||
std::string("string") == qname) {
|
||||
std::string s(qname);
|
||||
Value v(s);
|
||||
vlist.push_back(v);
|
||||
charsValid = true;
|
||||
} else if(std::string("struct") == qname ||
|
||||
std::string("array") == qname) {
|
||||
std::string s(qname);
|
||||
Value v(s);
|
||||
vlist.push_back(v);
|
||||
}
|
||||
XMLString::release(&qname);
|
||||
}
|
||||
|
||||
void hadoop::MySAXHandler::endElement(const XMLCh* const name)
|
||||
{
|
||||
charsValid = false;
|
||||
char* qname = XMLString::transcode(name);
|
||||
if(std::string("struct") == qname ||
|
||||
std::string("array") == qname) {
|
||||
std::string s = "/";
|
||||
Value v(s + qname);
|
||||
vlist.push_back(v);
|
||||
}
|
||||
XMLString::release(&qname);
|
||||
}
|
||||
|
||||
void hadoop::MySAXHandler::characters(const XMLCh* const buf, const unsigned int len)
|
||||
{
|
||||
if (charsValid) {
|
||||
char *cstr = XMLString::transcode(buf);
|
||||
Value& v = vlist.back();
|
||||
v.addChars(cstr, strlen(cstr));
|
||||
XMLString::release(&cstr);
|
||||
}
|
||||
}
|
||||
|
||||
static char hexchars[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'A', 'B', 'C', 'D', 'E', 'F' };
|
||||
|
||||
static std::string toXMLString(std::string s)
|
||||
{
|
||||
std::string r;
|
||||
size_t len = s.length();
|
||||
size_t i;
|
||||
const char* data = s.data();
|
||||
for (i=0; i<len; i++, data++) {
|
||||
char ch = *data;
|
||||
if (ch == '<') {
|
||||
r.append("<");
|
||||
} else if (ch == '&') {
|
||||
r.append("&");
|
||||
} else if (ch == '%') {
|
||||
r.append("%0025");
|
||||
} else if (ch < 0x20) {
|
||||
uint8_t* pb = (uint8_t*) &ch;
|
||||
char ch1 = hexchars[*pb/16];
|
||||
char ch2 = hexchars[*pb%16];
|
||||
r.push_back('%');
|
||||
r.push_back('0');
|
||||
r.push_back('0');
|
||||
r.push_back(ch1);
|
||||
r.push_back(ch2);
|
||||
} else {
|
||||
r.push_back(ch);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static uint8_t h2b(char ch) {
|
||||
if ((ch >= '0') || (ch <= '9')) {
|
||||
return ch - '0';
|
||||
}
|
||||
if ((ch >= 'a') || (ch <= 'f')) {
|
||||
return ch - 'a' + 10;
|
||||
}
|
||||
if ((ch >= 'A') || (ch <= 'F')) {
|
||||
return ch - 'A' + 10;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static std::string fromXMLString(std::string s)
|
||||
{
|
||||
std::string r;
|
||||
size_t len = s.length();
|
||||
size_t i;
|
||||
uint8_t* pb = (uint8_t*) s.data();
|
||||
for (i = 0; i < len; i++) {
|
||||
uint8_t b = *pb;
|
||||
if (b == '%') {
|
||||
char *pc = (char*) (pb+1);
|
||||
// ignore the first two characters, which are always '0'
|
||||
*pc++;
|
||||
*pc++;;
|
||||
char ch1 = *pc++;
|
||||
char ch2 = *pc++;
|
||||
pb += 4;
|
||||
uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
|
||||
pc = (char*) &cnv;
|
||||
r.push_back(*pc);
|
||||
} else {
|
||||
char *pc = (char*) pb;
|
||||
r.push_back(*pc);
|
||||
}
|
||||
pb++;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static std::string toXMLBuffer(std::string s, size_t len)
|
||||
{
|
||||
std::string r;
|
||||
size_t i;
|
||||
uint8_t* data = (uint8_t*) s.data();
|
||||
for (i=0; i<len; i++, data++) {
|
||||
uint8_t b = *data;
|
||||
char ch1 = hexchars[b/16];
|
||||
char ch2 = hexchars[b%16];
|
||||
r.push_back(ch1);
|
||||
r.push_back(ch2);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static std::string fromXMLBuffer(std::string s, size_t& len)
|
||||
{
|
||||
len = s.length();
|
||||
if (len%2 == 1) { // len is guaranteed to be even
|
||||
throw new IOException("Errror deserializing buffer.");
|
||||
}
|
||||
len = len >> 1;
|
||||
std::string t;
|
||||
for (size_t idx = 0; idx < len; idx++) {
|
||||
char buf[3];
|
||||
buf[0] = s[2*idx];
|
||||
buf[1] = s[2*idx+1];
|
||||
buf[2] = '\0';
|
||||
int i;
|
||||
if (1 != sscanf(buf, "%2x", &i)) {
|
||||
throw new IOException("Errror deserializing buffer.");
|
||||
}
|
||||
t.push_back((char) i);
|
||||
}
|
||||
len = t.length();
|
||||
return t;
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::deserialize(int8_t& t, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "ex:i1") {
|
||||
throw new IOException("Error deserializing byte");
|
||||
}
|
||||
t = (int8_t) strtol(v.getValue().c_str(), NULL, 10);
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::deserialize(bool& t, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "boolean") {
|
||||
throw new IOException("Error deserializing boolean");
|
||||
}
|
||||
t = (v.getValue() == "1");
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::deserialize(int32_t& t, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "i4" && v.getType() != "int") {
|
||||
throw new IOException("Error deserializing int");
|
||||
}
|
||||
t = (int32_t) strtol(v.getValue().c_str(), NULL, 10);
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::deserialize(int64_t& t, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "ex:i8") {
|
||||
throw new IOException("Error deserializing long");
|
||||
}
|
||||
t = strtoll(v.getValue().c_str(), NULL, 10);
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::deserialize(float& t, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "ex:float") {
|
||||
throw new IOException("Error deserializing float");
|
||||
}
|
||||
t = strtof(v.getValue().c_str(), NULL);
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::deserialize(double& t, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "double") {
|
||||
throw new IOException("Error deserializing double");
|
||||
}
|
||||
t = strtod(v.getValue().c_str(), NULL);
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::deserialize(std::string& t, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "string") {
|
||||
throw new IOException("Error deserializing string");
|
||||
}
|
||||
t = fromXMLString(v.getValue());
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::deserialize(std::string& t, size_t& len, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "string") {
|
||||
throw new IOException("Error deserializing buffer");
|
||||
}
|
||||
t = fromXMLBuffer(v.getValue(), len);
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::startRecord(Record& s, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "struct") {
|
||||
throw new IOException("Error deserializing record");
|
||||
}
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::endRecord(Record& s, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "/struct") {
|
||||
throw new IOException("Error deserializing record");
|
||||
}
|
||||
}
|
||||
|
||||
Index* hadoop::IXmlArchive::startVector(const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "array") {
|
||||
throw new IOException("Error deserializing vector");
|
||||
}
|
||||
return new XmlIndex(vlist, vidx);
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::endVector(Index* idx, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "/array") {
|
||||
throw new IOException("Error deserializing vector");
|
||||
}
|
||||
delete idx;
|
||||
}
|
||||
|
||||
Index* hadoop::IXmlArchive::startMap(const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "array") {
|
||||
throw new IOException("Error deserializing map");
|
||||
}
|
||||
return new XmlIndex(vlist, vidx);
|
||||
}
|
||||
|
||||
void hadoop::IXmlArchive::endMap(Index* idx, const char* tag)
|
||||
{
|
||||
Value v = next();
|
||||
if (v.getType() != "/array") {
|
||||
throw new IOException("Error deserializing map");
|
||||
}
|
||||
delete idx;
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::serialize(int8_t t, const char* tag)
|
||||
{
|
||||
printBeginEnvelope(tag);
|
||||
p("<ex:i1>");
|
||||
char sval[5];
|
||||
sprintf(sval, "%d", t);
|
||||
p(sval);
|
||||
p("</ex:i1>");
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::serialize(bool t, const char* tag)
|
||||
{
|
||||
printBeginEnvelope(tag);
|
||||
p("<boolean>");
|
||||
p(t ? "1" : "0");
|
||||
p("</boolean>");
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::serialize(int32_t t, const char* tag)
|
||||
{
|
||||
printBeginEnvelope(tag);
|
||||
p("<i4>");
|
||||
char sval[128];
|
||||
sprintf(sval, "%d", t);
|
||||
p(sval);
|
||||
p("</i4>");
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::serialize(int64_t t, const char* tag)
|
||||
{
|
||||
printBeginEnvelope(tag);
|
||||
p("<ex:i8>");
|
||||
char sval[128];
|
||||
sprintf(sval, "%lld", t);
|
||||
p(sval);
|
||||
p("</ex:i8>");
|
||||
printEndEnvelope(tag);
|
||||
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::serialize(float t, const char* tag)
|
||||
{
|
||||
printBeginEnvelope(tag);
|
||||
p("<ex:float>");
|
||||
char sval[128];
|
||||
sprintf(sval, "%f", t);
|
||||
p(sval);
|
||||
p("</ex:float>");
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::serialize(double t, const char* tag)
|
||||
{
|
||||
printBeginEnvelope(tag);
|
||||
p("<double>");
|
||||
char sval[128];
|
||||
sprintf(sval, "%lf", t);
|
||||
p(sval);
|
||||
p("</double>");
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::serialize(const std::string& t, const char* tag)
|
||||
{
|
||||
printBeginEnvelope(tag);
|
||||
p("<string>");
|
||||
std::string s = toXMLString(t);
|
||||
stream.write(s.data(), s.length());
|
||||
p("</string>");
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::serialize(const std::string& t, size_t len, const char* tag)
|
||||
{
|
||||
printBeginEnvelope(tag);
|
||||
p("<string>");
|
||||
std::string s = toXMLBuffer(t, len);
|
||||
stream.write(s.data(), s.length());
|
||||
p("</string>");
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::startRecord(const Record& s, const char* tag)
|
||||
{
|
||||
insideRecord(tag);
|
||||
p("<struct>\n");
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::endRecord(const Record& s, const char* tag)
|
||||
{
|
||||
p("</struct>\n");
|
||||
outsideRecord(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::startVector(size_t len, const char* tag)
|
||||
{
|
||||
insideVector(tag);
|
||||
p("<array>\n");
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::endVector(size_t len, const char* tag)
|
||||
{
|
||||
p("</array>\n");
|
||||
outsideVector(tag);
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::startMap(size_t len, const char* tag)
|
||||
{
|
||||
insideMap(tag);
|
||||
p("<array>\n");
|
||||
}
|
||||
|
||||
void hadoop::OXmlArchive::endMap(size_t len, const char* tag)
|
||||
{
|
||||
p("</array>\n");
|
||||
outsideMap(tag);
|
||||
}
|
||||
|
||||
hadoop::OXmlArchive::~OXmlArchive()
|
||||
{
|
||||
}
|
|
@ -1,265 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef XMLARCHIVE_HH_
|
||||
#define XMLARCHIVE_HH_
|
||||
|
||||
#include <xercesc/parsers/SAXParser.hpp>
|
||||
#include <xercesc/util/PlatformUtils.hpp>
|
||||
#include <xercesc/util/BinInputStream.hpp>
|
||||
#include <xercesc/sax/HandlerBase.hpp>
|
||||
#include <xercesc/sax/InputSource.hpp>
|
||||
#include "recordio.hh"
|
||||
|
||||
XERCES_CPP_NAMESPACE_USE
|
||||
|
||||
namespace hadoop {
|
||||
|
||||
class Value {
|
||||
private:
|
||||
std::string type;
|
||||
std::string value;
|
||||
public:
|
||||
Value(const std::string& t) { type = t; }
|
||||
void addChars(const char* buf, unsigned int len) {
|
||||
value += std::string(buf, len);
|
||||
}
|
||||
const std::string& getType() const { return type; }
|
||||
const std::string& getValue() const { return value; }
|
||||
};
|
||||
|
||||
class MySAXHandler : public HandlerBase {
|
||||
private:
|
||||
std::vector<Value>& vlist;
|
||||
bool charsValid;
|
||||
public:
|
||||
MySAXHandler(std::vector<Value>& list) : vlist(list) {charsValid = false;}
|
||||
void startElement(const XMLCh* const name, AttributeList& attr);
|
||||
void endElement(const XMLCh* const name);
|
||||
void characters(const XMLCh* const buf, unsigned int len);
|
||||
};
|
||||
|
||||
class XmlIndex : public Index {
|
||||
private:
|
||||
std::vector<Value>& vlist;
|
||||
unsigned int& vidx;
|
||||
public:
|
||||
XmlIndex(std::vector<Value>& list, unsigned int& idx) : vlist(list), vidx(idx) {}
|
||||
bool done() {
|
||||
Value v = vlist[vidx];
|
||||
return (v.getType() == "/array") ? true : false;
|
||||
}
|
||||
void incr() {}
|
||||
~XmlIndex() {}
|
||||
};
|
||||
|
||||
class MyBinInputStream : public BinInputStream {
|
||||
private:
|
||||
InStream& stream;
|
||||
unsigned int pos;
|
||||
public:
|
||||
MyBinInputStream(InStream& s) : stream(s) { pos = 0; }
|
||||
virtual unsigned int curPos() const { return pos; }
|
||||
virtual unsigned int readBytes(XMLByte* const toFill,
|
||||
const unsigned int maxToRead) {
|
||||
ssize_t nread = stream.read(toFill, maxToRead);
|
||||
if (nread < 0) {
|
||||
return 0;
|
||||
} else {
|
||||
pos += nread;
|
||||
return nread;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class MyInputSource : public InputSource {
|
||||
private:
|
||||
InStream& stream;
|
||||
public:
|
||||
MyInputSource(InStream& s) : stream(s) { }
|
||||
virtual BinInputStream* makeStream() const {
|
||||
return new MyBinInputStream(stream);
|
||||
}
|
||||
virtual const XMLCh* getEncoding() const {
|
||||
return XMLString::transcode("UTF-8");
|
||||
}
|
||||
virtual ~MyInputSource() {}
|
||||
};
|
||||
|
||||
class IXmlArchive : public IArchive {
|
||||
private:
|
||||
std::vector<Value> vlist;
|
||||
unsigned int vidx;
|
||||
MySAXHandler *docHandler;
|
||||
SAXParser *parser;
|
||||
MyInputSource* src;
|
||||
Value next() {
|
||||
Value v = vlist[vidx];
|
||||
vidx++;
|
||||
return v;
|
||||
}
|
||||
public:
|
||||
IXmlArchive(InStream& _stream) {
|
||||
vidx = 0;
|
||||
try {
|
||||
XMLPlatformUtils::Initialize();
|
||||
} catch (const XMLException& e) {
|
||||
throw new IOException("Unable to initialize XML Parser.");
|
||||
}
|
||||
parser = new SAXParser();
|
||||
docHandler = new MySAXHandler(vlist);
|
||||
parser->setDocumentHandler(docHandler);
|
||||
src = new MyInputSource(_stream);
|
||||
try {
|
||||
parser->parse(*src);
|
||||
} catch (const XMLException& e) {
|
||||
throw new IOException("Unable to parse XML stream.");
|
||||
} catch (const SAXParseException& e) {
|
||||
throw new IOException("Unable to parse XML stream.");
|
||||
}
|
||||
delete parser;
|
||||
delete docHandler;
|
||||
}
|
||||
virtual void deserialize(int8_t& t, const char* tag);
|
||||
virtual void deserialize(bool& t, const char* tag);
|
||||
virtual void deserialize(int32_t& t, const char* tag);
|
||||
virtual void deserialize(int64_t& t, const char* tag);
|
||||
virtual void deserialize(float& t, const char* tag);
|
||||
virtual void deserialize(double& t, const char* tag);
|
||||
virtual void deserialize(std::string& t, const char* tag);
|
||||
virtual void deserialize(std::string& t, size_t& len, const char* tag);
|
||||
virtual void startRecord(Record& s, const char* tag);
|
||||
virtual void endRecord(Record& s, const char* tag);
|
||||
virtual Index* startVector(const char* tag);
|
||||
virtual void endVector(Index* idx, const char* tag);
|
||||
virtual Index* startMap(const char* tag);
|
||||
virtual void endMap(Index* idx, const char* tag);
|
||||
virtual ~IXmlArchive() {
|
||||
XMLPlatformUtils::Terminate();
|
||||
}
|
||||
};
|
||||
|
||||
class OXmlArchive : public OArchive {
|
||||
private:
|
||||
OutStream& stream;
|
||||
|
||||
std::vector<std::string> cstack;
|
||||
|
||||
void insideRecord(const char* tag) {
|
||||
printBeginEnvelope(tag);
|
||||
cstack.push_back("record");
|
||||
}
|
||||
|
||||
void outsideRecord(const char* tag) {
|
||||
std::string s = cstack.back();
|
||||
cstack.pop_back();
|
||||
if (s != "record") {
|
||||
throw new IOException("Error deserializing record.");
|
||||
}
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void insideVector(const char* tag) {
|
||||
printBeginEnvelope(tag);
|
||||
cstack.push_back("vector");
|
||||
}
|
||||
|
||||
void outsideVector(const char* tag) {
|
||||
std::string s = cstack.back();
|
||||
cstack.pop_back();
|
||||
if (s != "vector") {
|
||||
throw new IOException("Error deserializing vector.");
|
||||
}
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void insideMap(const char* tag) {
|
||||
printBeginEnvelope(tag);
|
||||
cstack.push_back("map");
|
||||
}
|
||||
|
||||
void outsideMap(const char* tag) {
|
||||
std::string s = cstack.back();
|
||||
cstack.pop_back();
|
||||
if (s != "map") {
|
||||
throw new IOException("Error deserializing map.");
|
||||
}
|
||||
printEndEnvelope(tag);
|
||||
}
|
||||
|
||||
void p(const char* cstr) {
|
||||
stream.write(cstr, strlen(cstr));
|
||||
}
|
||||
|
||||
void printBeginEnvelope(const char* tag) {
|
||||
if (cstack.size() != 0) {
|
||||
std::string s = cstack.back();
|
||||
if ("record" == s) {
|
||||
p("<member>\n");
|
||||
p("<name>");
|
||||
p(tag);
|
||||
p("</name>\n");
|
||||
p("<value>");
|
||||
} else if ("vector" == s) {
|
||||
p("<value>");
|
||||
} else if ("map" == s) {
|
||||
p("<value>");
|
||||
}
|
||||
} else {
|
||||
p("<value>");
|
||||
}
|
||||
}
|
||||
|
||||
void printEndEnvelope(const char* tag) {
|
||||
if (cstack.size() != 0) {
|
||||
std::string s = cstack.back();
|
||||
if ("record" == s) {
|
||||
p("</value>\n");
|
||||
p("</member>\n");
|
||||
} else if ("vector" == s) {
|
||||
p("</value>\n");
|
||||
} else if ("map" == s) {
|
||||
p("</value>\n");
|
||||
}
|
||||
} else {
|
||||
p("</value>\n");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
OXmlArchive(OutStream& _stream) : stream(_stream) {}
|
||||
virtual void serialize(int8_t t, const char* tag);
|
||||
virtual void serialize(bool t, const char* tag);
|
||||
virtual void serialize(int32_t t, const char* tag);
|
||||
virtual void serialize(int64_t t, const char* tag);
|
||||
virtual void serialize(float t, const char* tag);
|
||||
virtual void serialize(double t, const char* tag);
|
||||
virtual void serialize(const std::string& t, const char* tag);
|
||||
virtual void serialize(const std::string& t, size_t len, const char* tag);
|
||||
virtual void startRecord(const Record& s, const char* tag);
|
||||
virtual void endRecord(const Record& s, const char* tag);
|
||||
virtual void startVector(size_t len, const char* tag);
|
||||
virtual void endVector(size_t len, const char* tag);
|
||||
virtual void startMap(size_t len, const char* tag);
|
||||
virtual void endMap(size_t len, const char* tag);
|
||||
virtual ~OXmlArchive();
|
||||
};
|
||||
|
||||
}
|
||||
#endif /*XMLARCHIVE_HH_*/
|
|
@ -1,25 +0,0 @@
|
|||
This contribution consists of two components designed to make it easier to find information about lost or corrupt blocks.
|
||||
|
||||
The first is a map reduce designed to search for one or more block ids in a set of log files. It exists in org.apache.hadoop.block_forensics.BlockSearch. Building this contribution generates a jar file that can be executed using:
|
||||
|
||||
bin/hadoop jar [jar location] [hdfs input path] [hdfs output dir] [comma delimited list of block ids]
|
||||
|
||||
For example, the command:
|
||||
bin/hadoop jar /foo/bar/hadoop-0.1-block_forensics.jar /input/* /ouptut 2343,45245,75823
|
||||
... searches for any of blocks 2343, 45245, or 75823 in any of the files
|
||||
contained in the /input/ directory.
|
||||
|
||||
|
||||
The output will be any line containing one of the provided block ids. While this tool is designed to be used with block ids, it can also be used for general text searching.
|
||||
|
||||
The second component is a standalone java program that will repeatedly query the namenode at a given interval looking for corrupt replicas. If it finds a corrupt replica, it will launch the above map reduce job. The syntax is:
|
||||
|
||||
java BlockForensics http://[namenode]:[port]/corrupt_replicas_xml.jsp [sleep time between namenode query for corrupt blocks (in milliseconds)] [mapred jar location] [hdfs input path]
|
||||
|
||||
For example, the command:
|
||||
java BlockForensics http://localhost:50070/corrupt_replicas_xml.jsp 30000
|
||||
/foo/bar/hadoop-0.1-block_forensics.jar /input/*
|
||||
... queries the namenode at localhost:50070 for corrupt replicas every 30
|
||||
seconds and runs /foo/bar/hadoop-0.1-block_forensics.jar if any are found.
|
||||
|
||||
The map reduce job jar and the BlockForensics class can be found in your build/contrib/block_forensics and build/contrib/block_forensics/classes directories, respectively.
|
|
@ -1,66 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Before you can run these subtargets directly, you need
|
||||
to call at top-level: ant deploy-contrib compile-core-test
|
||||
-->
|
||||
<project name="block_forensics" default="jar">
|
||||
<property name="version" value="0.1"/>
|
||||
<import file="../build-contrib.xml"/>
|
||||
|
||||
<!-- create the list of files to add to the classpath -->
|
||||
<fileset dir="${hadoop.root}/lib" id="class.path">
|
||||
<include name="**/*.jar" />
|
||||
<exclude name="**/excluded/" />
|
||||
</fileset>
|
||||
|
||||
<!-- Override jar target to specify main class -->
|
||||
<target name="jar" depends="compile">
|
||||
<jar
|
||||
jarfile="${build.dir}/hadoop-${version}-${name}.jar"
|
||||
basedir="${build.classes}"
|
||||
>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.apache.hadoop.blockforensics.BlockSearch"/>
|
||||
</manifest>
|
||||
</jar>
|
||||
|
||||
<javac srcdir="client" destdir="${build.classes}"/>
|
||||
|
||||
</target>
|
||||
|
||||
<!-- Run only pure-Java unit tests. superdottest -->
|
||||
<target name="test">
|
||||
<antcall target="hadoopbuildcontrib.test">
|
||||
</antcall>
|
||||
</target>
|
||||
|
||||
<!-- Run all unit tests
|
||||
This is not called as part of the nightly build
|
||||
because it will only run on platforms that have standard
|
||||
Unix utilities available.
|
||||
-->
|
||||
<target name="test-unix">
|
||||
<antcall target="hadoopbuildcontrib.test">
|
||||
</antcall>
|
||||
</target>
|
||||
|
||||
|
||||
</project>
|
|
@ -1,186 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.lang.Runtime;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.TreeSet;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* This class repeatedly queries a namenode looking for corrupt replicas. If
|
||||
* any are found a provided hadoop job is launched and the output printed
|
||||
* to stdout.
|
||||
*
|
||||
* The syntax is:
|
||||
*
|
||||
* java BlockForensics http://[namenode]:[port]/corrupt_replicas_xml.jsp
|
||||
* [sleep time between namenode query for corrupt blocks
|
||||
* (in seconds)] [mapred jar location] [hdfs input path]
|
||||
*
|
||||
* All arguments are required.
|
||||
*/
|
||||
public class BlockForensics {
|
||||
|
||||
public static String join(List<?> l, String sep) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Iterator it = l.iterator();
|
||||
|
||||
while(it.hasNext()){
|
||||
sb.append(it.next());
|
||||
if (it.hasNext()) {
|
||||
sb.append(sep);
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
// runs hadoop command and prints output to stdout
|
||||
public static void runHadoopCmd(String ... args)
|
||||
throws IOException {
|
||||
String hadoop_home = System.getenv("HADOOP_PREFIX");
|
||||
|
||||
List<String> l = new LinkedList<String>();
|
||||
l.add("bin/hadoop");
|
||||
l.addAll(Arrays.asList(args));
|
||||
|
||||
ProcessBuilder pb = new ProcessBuilder(l);
|
||||
|
||||
if (hadoop_home != null) {
|
||||
pb.directory(new File(hadoop_home));
|
||||
}
|
||||
|
||||
pb.redirectErrorStream(true);
|
||||
|
||||
Process p = pb.start();
|
||||
|
||||
BufferedReader br = new BufferedReader(
|
||||
new InputStreamReader(p.getInputStream()));
|
||||
String line;
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
System.out.println(line);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
public static void main(String[] args)
|
||||
throws SAXException, ParserConfigurationException,
|
||||
InterruptedException, IOException {
|
||||
|
||||
if (System.getenv("HADOOP_PREFIX") == null) {
|
||||
System.err.println("The environmental variable HADOOP_PREFIX is undefined");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
|
||||
if (args.length < 4) {
|
||||
System.out.println("Usage: java BlockForensics [http://namenode:port/"
|
||||
+ "corrupt_replicas_xml.jsp] [sleep time between "
|
||||
+ "requests (in milliseconds)] [mapred jar location] "
|
||||
+ "[hdfs input path]");
|
||||
return;
|
||||
}
|
||||
|
||||
int sleepTime = 30000;
|
||||
|
||||
try {
|
||||
sleepTime = Integer.parseInt(args[1]);
|
||||
} catch (NumberFormatException e) {
|
||||
System.out.println("The sleep time entered is invalid, "
|
||||
+ "using default value: "+sleepTime+"ms");
|
||||
}
|
||||
|
||||
Set<Long> blockIds = new TreeSet<Long>();
|
||||
|
||||
while (true) {
|
||||
InputStream xml = new URL(args[0]).openConnection().getInputStream();
|
||||
|
||||
DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
|
||||
DocumentBuilder builder = fact.newDocumentBuilder();
|
||||
Document doc = builder.parse(xml);
|
||||
|
||||
NodeList corruptReplicaNodes = doc.getElementsByTagName("block_id");
|
||||
|
||||
List<Long> searchBlockIds = new LinkedList<Long>();
|
||||
for(int i=0; i<corruptReplicaNodes.getLength(); i++) {
|
||||
Long blockId = new Long(corruptReplicaNodes.item(i)
|
||||
.getFirstChild()
|
||||
.getNodeValue());
|
||||
if (!blockIds.contains(blockId)) {
|
||||
blockIds.add(blockId);
|
||||
searchBlockIds.add(blockId);
|
||||
}
|
||||
}
|
||||
|
||||
if (searchBlockIds.size() > 0) {
|
||||
String blockIdsStr = BlockForensics.join(searchBlockIds, ",");
|
||||
System.out.println("\nSearching for: " + blockIdsStr);
|
||||
String tmpDir =
|
||||
new String("/tmp-block-forensics-" +
|
||||
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
|
||||
|
||||
System.out.println("Using temporary dir: "+tmpDir);
|
||||
|
||||
// delete tmp dir
|
||||
BlockForensics.runHadoopCmd("fs", "-rmr", tmpDir);
|
||||
|
||||
// launch mapred job
|
||||
BlockForensics.runHadoopCmd("jar",
|
||||
args[2], // jar location
|
||||
args[3], // input dir
|
||||
tmpDir, // output dir
|
||||
blockIdsStr// comma delimited list of blocks
|
||||
);
|
||||
// cat output
|
||||
BlockForensics.runHadoopCmd("fs", "-cat", tmpDir+"/part*");
|
||||
|
||||
// delete temp dir
|
||||
BlockForensics.runHadoopCmd("fs", "-rmr", tmpDir);
|
||||
|
||||
int sleepSecs = (int)(sleepTime/1000.);
|
||||
System.out.print("Sleeping for "+sleepSecs
|
||||
+ " second"+(sleepSecs == 1?"":"s")+".");
|
||||
}
|
||||
|
||||
System.out.print(".");
|
||||
Thread.sleep(sleepTime);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
<?xml version="1.0" ?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<ivy-module version="1.0">
|
||||
<info organisation="org.apache.hadoop" module="${ant.project.name}">
|
||||
<license name="Apache 2.0"/>
|
||||
<ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
|
||||
<description>
|
||||
Apache Hadoop
|
||||
</description>
|
||||
</info>
|
||||
<configurations defaultconfmapping="default">
|
||||
<!--these match the Maven configurations-->
|
||||
<conf name="default" extends="master,runtime"/>
|
||||
<conf name="master" description="contains the artifact but no dependencies"/>
|
||||
<conf name="runtime" description="runtime but not the artifact" />
|
||||
|
||||
<conf name="common" visibility="private"
|
||||
extends="runtime"
|
||||
description="artifacts needed to compile/test the application"/>
|
||||
<conf name="test" visibility="private" extends="runtime"/>
|
||||
</configurations>
|
||||
|
||||
<publications>
|
||||
<!--get the artifact from our module name-->
|
||||
<artifact conf="master"/>
|
||||
</publications>
|
||||
<dependencies>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-common"
|
||||
rev="${hadoop-common.version}" conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
|
||||
rev="${yarn.version}" conf="common->default"/>
|
||||
<dependency org="log4j" name="log4j" rev="${log4j.version}"
|
||||
conf="common->master"/>
|
||||
</dependencies>
|
||||
</ivy-module>
|
|
@ -1,21 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
#This properties file lists the versions of the various artifacts used by thrifts.
|
||||
#It drives ivy and the generation of a maven POM
|
||||
|
||||
#Please list the dependencies name with version if they are different from the ones
|
||||
#listed in the global libraries.properties file (in alphabetical order)
|
|
@ -1,136 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.blockforensics;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.StringTokenizer;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
|
||||
/**
|
||||
* BlockSearch is a mapred job that's designed to search input for appearances
|
||||
* of strings.
|
||||
*
|
||||
* The syntax is:
|
||||
*
|
||||
* bin/hadoop jar [jar location] [hdfs input path] [hdfs output dir]
|
||||
[comma delimited list of block ids]
|
||||
*
|
||||
* All arguments are required.
|
||||
*
|
||||
* This tool is designed to be used to search for one or more block ids in log
|
||||
* files but can be used for general text search, assuming the search strings
|
||||
* don't contain tokens. It assumes only one search string will appear per line.
|
||||
*/
|
||||
public class BlockSearch extends Configured implements Tool {
|
||||
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
|
||||
private Text blockIdText = new Text();
|
||||
private Text valText = new Text();
|
||||
private List<String> blockIds = null;
|
||||
|
||||
protected void setup(Context context)
|
||||
throws IOException, InterruptedException {
|
||||
Configuration conf = context.getConfiguration();
|
||||
StringTokenizer st = new StringTokenizer(conf.get("blockIds"), ",");
|
||||
blockIds = new LinkedList<String>();
|
||||
while (st.hasMoreTokens()) {
|
||||
String blockId = st.nextToken();
|
||||
blockIds.add(blockId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void map(LongWritable key, Text value, Context context)
|
||||
throws IOException, InterruptedException {
|
||||
if (blockIds == null) {
|
||||
System.err.println("Error: No block ids specified");
|
||||
} else {
|
||||
String valStr = value.toString();
|
||||
|
||||
for(String blockId: blockIds) {
|
||||
if (valStr.indexOf(blockId) != -1) {
|
||||
blockIdText.set(blockId);
|
||||
valText.set(valStr);
|
||||
context.write(blockIdText, valText);
|
||||
break; // assume only one block id appears per line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static class Reduce extends Reducer<Text, Text, Text, Text> {
|
||||
private Text val = new Text();
|
||||
public void reduce(Text key, Iterator<Text> values, Context context)
|
||||
throws IOException, InterruptedException {
|
||||
while (values.hasNext()) {
|
||||
context.write(key, values.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int run(String[] args) throws Exception {
|
||||
if (args.length < 3) {
|
||||
System.out.println("BlockSearch <inLogs> <outDir> <comma delimited list of blocks>");
|
||||
ToolRunner.printGenericCommandUsage(System.out);
|
||||
return 2;
|
||||
}
|
||||
|
||||
Configuration conf = getConf();
|
||||
conf.set("blockIds", args[2]);
|
||||
|
||||
Job job = new Job(conf);
|
||||
|
||||
job.setCombinerClass(Reduce.class);
|
||||
job.setJarByClass(BlockSearch.class);
|
||||
job.setJobName("BlockSearch");
|
||||
job.setMapperClass(Map.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(Text.class);
|
||||
job.setReducerClass(Reduce.class);
|
||||
|
||||
FileInputFormat.setInputPaths(job, new Path(args[0]));
|
||||
FileOutputFormat.setOutputPath(job, new Path(args[1]));
|
||||
|
||||
return job.waitForCompletion(true) ? 0 : 1;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
int res = ToolRunner.run(new Configuration(), new BlockSearch(), args);
|
||||
System.exit(res);
|
||||
}
|
||||
}
|
|
@ -1,531 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- Imported by contrib/*/build.xml files to share generic targets. -->
|
||||
|
||||
<project name="hadoopbuildcontrib" xmlns:ivy="antlib:org.apache.ivy.ant">
|
||||
|
||||
<import file="../../build-utils.xml" />
|
||||
|
||||
<property name="name" value="${ant.project.name}"/>
|
||||
<property name="root" value="${basedir}"/>
|
||||
<property name="hadoop.root" location="${root}/../../../"/>
|
||||
|
||||
<!-- Load all the default properties, and any the user wants -->
|
||||
<!-- to contribute (without having to type -D or edit this file -->
|
||||
<property file="${user.home}/${name}.build.properties" />
|
||||
<property file="${root}/build.properties" />
|
||||
<property file="${hadoop.root}/build.properties" />
|
||||
|
||||
<property name="src.dir" location="${root}/src/java"/>
|
||||
<property name="src.test" location="${root}/src/test"/>
|
||||
<property name="src.test.data" location="${root}/src/test/data"/>
|
||||
<property name="src.examples" location="${root}/src/examples"/>
|
||||
<property name="build-fi.dir" location="${hadoop.root}/build-fi"/>
|
||||
<property name="system-test-build-dir" location="${build-fi.dir}/system"/>
|
||||
<!-- Property added for contrib system tests -->
|
||||
<property name="src.test.system" location="${root}/src/test/system"/>
|
||||
|
||||
<available file="${src.examples}" type="dir" property="examples.available"/>
|
||||
<available file="${src.test}" type="dir" property="test.available"/>
|
||||
<!-- Property added for contrib system tests -->
|
||||
<available file="${src.test.system}" type="dir"
|
||||
property="test.system.available"/>
|
||||
|
||||
<property name="conf.dir" location="${hadoop.root}/conf"/>
|
||||
<property name="test.junit.output.format" value="plain"/>
|
||||
<property name="test.output" value="no"/>
|
||||
<property name="test.timeout" value="900000"/>
|
||||
<property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
|
||||
<property name="build.dir" location="${hadoop.root}/build/contrib/${name}"/>
|
||||
<property name="build.classes" location="${build.dir}/classes"/>
|
||||
<property name="build.test" location="${build.dir}/test"/>
|
||||
<property name="test.build.extraconf" value="${build.test}/extraconf"/>
|
||||
<property name="build.examples" location="${build.dir}/examples"/>
|
||||
<property name="hadoop.log.dir" location="${build.dir}/test/logs"/>
|
||||
<!-- all jars together -->
|
||||
<property name="javac.deprecation" value="off"/>
|
||||
<property name="javac.debug" value="on"/>
|
||||
<property name="build.ivy.lib.dir" value="${hadoop.root}/build/ivy/lib"/>
|
||||
|
||||
<property name="javadoc.link"
|
||||
value="http://java.sun.com/j2se/1.4/docs/api/"/>
|
||||
|
||||
<property name="build.encoding" value="ISO-8859-1"/>
|
||||
<property name="dest.jar" value="hadoop-${version}-${name}.jar"/>
|
||||
|
||||
<fileset id="lib.jars" dir="${root}" includes="lib/*.jar"/>
|
||||
<!-- Property added for contrib system tests -->
|
||||
<property name="build.test.system" location="${build.dir}/system"/>
|
||||
<property name="build.system.classes"
|
||||
location="${build.test.system}/classes"/>
|
||||
|
||||
|
||||
<!-- IVY properties set here -->
|
||||
<property name="ivy.dir" location="ivy" />
|
||||
<property name="ivysettings.xml" location="${hadoop.root}/ivy/ivysettings.xml"/>
|
||||
<loadproperties srcfile="${ivy.dir}/libraries.properties"/>
|
||||
<loadproperties srcfile="${hadoop.root}/ivy/libraries.properties"/>
|
||||
<property name="ivy.jar" location="${hadoop.root}/ivy/ivy-${ivy.version}.jar"/>
|
||||
<property name="ivy_repo_url"
|
||||
value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
|
||||
<property name="build.ivy.dir" location="${hadoop.root}/build/ivy" />
|
||||
<property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
|
||||
<property name="build.ivy.report.dir" location="${build.ivy.dir}/report" />
|
||||
<property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
|
||||
|
||||
<!--this is the naming policy for artifacts we want pulled down-->
|
||||
<property name="ivy.artifact.retrieve.pattern"
|
||||
value="${ant.project.name}/[conf]/[artifact]-[revision](-[classifier]).[ext]"/>
|
||||
|
||||
<!-- the normal classpath -->
|
||||
<path id="contrib-classpath">
|
||||
<pathelement location="${build.classes}"/>
|
||||
<fileset refid="lib.jars"/>
|
||||
<pathelement location="${hadoop.root}/build/classes"/>
|
||||
<pathelement location="${system-test-build-dir}/classes"/>
|
||||
<pathelement location="${system-test-build-dir}/tools"/>
|
||||
<pathelement location="${hadoop.root}/build/tools"/>
|
||||
<fileset dir="${hadoop.root}/lib">
|
||||
<include name="**/*.jar" />
|
||||
</fileset>
|
||||
<path refid="${ant.project.name}.common-classpath"/>
|
||||
<pathelement path="${clover.jar}"/>
|
||||
</path>
|
||||
|
||||
<!-- the unit test classpath -->
|
||||
<path id="test.classpath">
|
||||
<pathelement location="${build.test}" />
|
||||
<pathelement location="${test.build.extraconf}" />
|
||||
<pathelement location="${hadoop.root}/build/test/classes"/>
|
||||
<pathelement location="${hadoop.root}/build/test/core/classes"/>
|
||||
<pathelement location="${hadoop.root}/build/test/hdfs/classes"/>
|
||||
<pathelement location="${hadoop.root}/build/test/mapred/classes"/>
|
||||
<pathelement location="${hadoop.root}/src/contrib/test"/>
|
||||
<pathelement location="${conf.dir}"/>
|
||||
<pathelement location="${hadoop.root}/build"/>
|
||||
<pathelement location="${build.examples}"/>
|
||||
<pathelement location="${hadoop.root}/build/examples"/>
|
||||
<path refid="${ant.project.name}.test-classpath"/>
|
||||
<path refid="contrib-classpath"/>
|
||||
</path>
|
||||
|
||||
<!-- The system test classpath -->
|
||||
<path id="test.system.classpath">
|
||||
<pathelement location="${hadoop.root}/src/contrib/${name}/src/test/system" />
|
||||
<pathelement location="${build.test.system}" />
|
||||
<pathelement location="${build.test.system}/classes"/>
|
||||
<pathelement location="${build.examples}"/>
|
||||
<pathelement location="${system-test-build-dir}/classes" />
|
||||
<pathelement location="${system-test-build-dir}/test/mapred/classes" />
|
||||
<pathelement location="${system-test-build-dir}" />
|
||||
<pathelement location="${system-test-build-dir}/tools" />
|
||||
<pathelement location="${hadoop.home}"/>
|
||||
<pathelement location="${hadoop.conf.dir}"/>
|
||||
<pathelement location="${hadoop.conf.dir.deployed}"/>
|
||||
<pathelement location="${hadoop.root}/build"/>
|
||||
<pathelement location="${hadoop.root}/build/examples"/>
|
||||
<pathelement location="${hadoop.root}/build/test/classes" />
|
||||
<path refid="contrib-classpath"/>
|
||||
<fileset dir="${system-test-build-dir}">
|
||||
<include name="**/*.jar" />
|
||||
<exclude name="**/excluded/" />
|
||||
</fileset>
|
||||
<fileset dir="${system-test-build-dir}/test/mapred/testjar">
|
||||
<include name="**/*.jar" />
|
||||
<exclude name="**/excluded/" />
|
||||
</fileset>
|
||||
<fileset dir="${hadoop.root}/build/contrib/${name}">
|
||||
<include name="**/*.jar" />
|
||||
<exclude name="**/excluded/" />
|
||||
</fileset>
|
||||
</path>
|
||||
|
||||
<!-- to be overridden by sub-projects -->
|
||||
<target name="check-contrib"/>
|
||||
<target name="init-contrib"/>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Stuff needed by all targets -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="init" depends="check-contrib" unless="skip.contrib">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<mkdir dir="${build.dir}"/>
|
||||
<mkdir dir="${build.classes}"/>
|
||||
<mkdir dir="${build.test}"/>
|
||||
<mkdir dir="${build.test}/extraconf"/>
|
||||
<mkdir dir="${build.examples}"/>
|
||||
<mkdir dir="${hadoop.log.dir}"/>
|
||||
<!-- The below two tags added for contrib system tests -->
|
||||
<mkdir dir="${build.test.system}"/>
|
||||
<mkdir dir="${build.system.classes}"/>
|
||||
<antcall target="init-contrib"/>
|
||||
</target>
|
||||
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Compile a Hadoop contrib's files -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="compile" depends="init, ivy-retrieve-common" unless="skip.contrib">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<javac
|
||||
encoding="${build.encoding}"
|
||||
srcdir="${src.dir}"
|
||||
includes="**/*.java"
|
||||
excludes="system/**/*.java"
|
||||
destdir="${build.classes}"
|
||||
debug="${javac.debug}"
|
||||
deprecation="${javac.deprecation}">
|
||||
<classpath refid="contrib-classpath"/>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
|
||||
<!-- ======================================================= -->
|
||||
<!-- Compile a Hadoop contrib's example files (if available) -->
|
||||
<!-- ======================================================= -->
|
||||
<target name="compile-examples" depends="compile, ivy-retrieve-common" if="examples.available">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<javac
|
||||
encoding="${build.encoding}"
|
||||
srcdir="${src.examples}"
|
||||
includes="**/*.java"
|
||||
destdir="${build.examples}"
|
||||
debug="${javac.debug}">
|
||||
<classpath refid="contrib-classpath"/>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- Compile test code -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="compile-test" depends="compile-examples, ivy-retrieve-test" if="test.available">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<javac
|
||||
encoding="${build.encoding}"
|
||||
srcdir="${src.test}"
|
||||
includes="**/*.java"
|
||||
excludes="system/**/*.java"
|
||||
destdir="${build.test}"
|
||||
debug="${javac.debug}">
|
||||
<classpath refid="test.classpath"/>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- Compile system test code -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="compile-test-system" depends="compile-examples, ivy-retrieve-test"
|
||||
if="test.system.available">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<javac
|
||||
encoding="${build.encoding}"
|
||||
srcdir="${src.test.system}"
|
||||
includes="**/*.java"
|
||||
destdir="${build.system.classes}"
|
||||
debug="${javac.debug}">
|
||||
<classpath refid="test.system.classpath"/>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Make a Hadoop contrib's jar -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="jar" depends="compile" unless="skip.contrib">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<jar
|
||||
jarfile="${build.dir}/${dest.jar}"
|
||||
basedir="${build.classes}"
|
||||
/>
|
||||
</target>
|
||||
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Make a Hadoop contrib's examples jar -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="jar-examples" depends="compile-examples"
|
||||
if="examples.available" unless="skip.contrib">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<jar jarfile="${build.dir}/hadoop-${version}-${name}-examples.jar">
|
||||
<fileset dir="${build.classes}">
|
||||
</fileset>
|
||||
<fileset dir="${build.examples}">
|
||||
</fileset>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Package a Hadoop contrib -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="package" depends="jar, jar-examples" unless="skip.contrib">
|
||||
<mkdir dir="${dist.dir}/contrib/${name}"/>
|
||||
<copy todir="${dist.dir}/contrib/${name}" includeEmptyDirs="false" flatten="true">
|
||||
<fileset dir="${build.dir}">
|
||||
<include name="${dest.jar}" />
|
||||
</fileset>
|
||||
</copy>
|
||||
|
||||
<!-- copy the dependency libraries into the contrib/lib dir -->
|
||||
<mkdir dir="${dist.dir}/contrib/${name}/lib"/>
|
||||
<copy todir="${dist.dir}/contrib/${name}/lib" includeEmptyDirs="false" flatten="true">
|
||||
<fileset dir="${common.ivy.lib.dir}">
|
||||
<!-- except for those already present due to Hadoop -->
|
||||
<present present="srconly" targetdir="${dist.dir}/lib" />
|
||||
</fileset>
|
||||
</copy>
|
||||
<!-- if the lib dir is empty, remove it. -->
|
||||
<delete dir="${dist.dir}/contrib/${name}/lib" includeEmptyDirs="true" excludes="*.jar" />
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- Run unit tests -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="test" depends="compile-test, compile" if="test.available">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<delete dir="${hadoop.log.dir}"/>
|
||||
<mkdir dir="${hadoop.log.dir}"/>
|
||||
<junit
|
||||
printsummary="yes" showoutput="${test.output}"
|
||||
haltonfailure="no" fork="yes" maxmemory="512m"
|
||||
errorProperty="tests.failed" failureProperty="tests.failed"
|
||||
timeout="${test.timeout}">
|
||||
|
||||
<assertions><enable/></assertions>
|
||||
<sysproperty key="test.build.data" value="${build.test}/data"/>
|
||||
<sysproperty key="build.test" value="${build.test}"/>
|
||||
<sysproperty key="test.build.extraconf" value="${test.build.extraconf}" />
|
||||
<sysproperty key="src.test.data" value="${src.test.data}"/>
|
||||
<sysproperty key="contrib.name" value="${name}"/>
|
||||
|
||||
<!-- requires fork=yes for:
|
||||
relative File paths to use the specified user.dir
|
||||
classpath to use build/contrib/*.jar
|
||||
-->
|
||||
<sysproperty key="user.dir" value="${build.test}/data"/>
|
||||
|
||||
<sysproperty key="fs.default.name" value="${fs.default.name}"/>
|
||||
<sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
|
||||
<sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
|
||||
<sysproperty key="taskcontroller-path" value="${taskcontroller-path}"/>
|
||||
<sysproperty key="taskcontroller-ugi" value="${taskcontroller-ugi}"/>
|
||||
<classpath refid="test.classpath"/>
|
||||
<formatter type="${test.junit.output.format}" />
|
||||
<batchtest todir="${build.test}" unless="testcase">
|
||||
<fileset dir="${src.test}"
|
||||
includes="**/Test*.java" excludes="**/${test.exclude}.java, system/**/*.java" />
|
||||
</batchtest>
|
||||
<batchtest todir="${build.test}" if="testcase">
|
||||
<fileset dir="${src.test}" includes="**/${testcase}.java" excludes="system/**/*.java" />
|
||||
</batchtest>
|
||||
</junit>
|
||||
<antcall target="checkfailure"/>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- Run system tests -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="test-system" depends="compile-test-system, jar"
|
||||
if="test.system.available">
|
||||
<delete dir="${build.test.system}/extraconf"/>
|
||||
<mkdir dir="${build.test.system}/extraconf"/>
|
||||
<property name="test.src.dir" location="${hadoop.root}/src/test"/>
|
||||
<property name="test.junit.printsummary" value="yes" />
|
||||
<property name="test.junit.haltonfailure" value="no" />
|
||||
<property name="test.junit.maxmemory" value="512m" />
|
||||
<property name="test.junit.fork.mode" value="perTest" />
|
||||
<property name="test.all.tests.file" value="${test.src.dir}/all-tests" />
|
||||
<property name="test.build.dir" value="${hadoop.root}/build/test"/>
|
||||
<property name="basedir" value="${hadoop.root}"/>
|
||||
<property name="test.timeout" value="900000"/>
|
||||
<property name="test.junit.output.format" value="plain"/>
|
||||
<property name="test.tools.input.dir" value="${basedir}/src/test/tools/data"/>
|
||||
<property name="c++.src" value="${basedir}/src/c++"/>
|
||||
<property name="test.include" value="Test*"/>
|
||||
<property name="c++.libhdfs.src" value="${c++.src}/libhdfs"/>
|
||||
<property name="test.build.data" value="${build.test.system}/data"/>
|
||||
<property name="test.cache.data" value="${build.test.system}/cache"/>
|
||||
<property name="test.debug.data" value="${build.test.system}/debug"/>
|
||||
<property name="test.log.dir" value="${build.test.system}/logs"/>
|
||||
<exec executable="sed" inputstring="${os.name}"
|
||||
outputproperty="nonspace.os">
|
||||
<arg value="s/ /_/g"/>
|
||||
</exec>
|
||||
<property name="build.platform"
|
||||
value="${nonspace.os}-${os.arch}-${sun.arch.data.model}"/>
|
||||
<property name="build.native"
|
||||
value="${hadoop.root}/build/native/${build.platform}"/>
|
||||
<property name="lib.dir" value="${hadoop.root}/lib"/>
|
||||
<property name="install.c++.examples"
|
||||
value="${hadoop.root}/build/c++-examples/${build.platform}"/>
|
||||
<condition property="tests.testcase">
|
||||
<and>
|
||||
<isset property="testcase" />
|
||||
</and>
|
||||
</condition>
|
||||
<property name="test.junit.jvmargs" value="-ea" />
|
||||
<macro-system-test-runner test.file="${test.all.tests.file}"
|
||||
classpath="test.system.classpath"
|
||||
test.dir="${build.test.system}"
|
||||
fileset.dir="${hadoop.root}/src/contrib/${name}/src/test/system"
|
||||
hadoop.conf.dir.deployed="${hadoop.conf.dir.deployed}">
|
||||
</macro-system-test-runner>
|
||||
</target>
|
||||
<macrodef name="macro-system-test-runner">
|
||||
<attribute name="test.file" />
|
||||
<attribute name="classpath" />
|
||||
<attribute name="test.dir" />
|
||||
<attribute name="fileset.dir" />
|
||||
<attribute name="hadoop.conf.dir.deployed" default="" />
|
||||
<sequential>
|
||||
<delete dir="@{test.dir}/data"/>
|
||||
<mkdir dir="@{test.dir}/data"/>
|
||||
<delete dir="@{test.dir}/logs"/>
|
||||
<mkdir dir="@{test.dir}/logs"/>
|
||||
<copy file="${test.src.dir}/hadoop-policy.xml"
|
||||
todir="@{test.dir}/extraconf" />
|
||||
<copy file="${test.src.dir}/fi-site.xml"
|
||||
todir="@{test.dir}/extraconf" />
|
||||
<junit showoutput="${test.output}"
|
||||
printsummary="${test.junit.printsummary}"
|
||||
haltonfailure="${test.junit.haltonfailure}"
|
||||
fork="yes"
|
||||
forkmode="${test.junit.fork.mode}"
|
||||
maxmemory="${test.junit.maxmemory}"
|
||||
dir="${basedir}" timeout="${test.timeout}"
|
||||
errorProperty="tests.failed" failureProperty="tests.failed">
|
||||
<jvmarg value="${test.junit.jvmargs}" />
|
||||
<sysproperty key="java.net.preferIPv4Stack" value="true"/>
|
||||
<sysproperty key="test.build.data" value="@{test.dir}/data"/>
|
||||
<sysproperty key="test.tools.input.dir" value = "${test.tools.input.dir}"/>
|
||||
<sysproperty key="test.cache.data" value="${test.cache.data}"/>
|
||||
<sysproperty key="test.debug.data" value="${test.debug.data}"/>
|
||||
<sysproperty key="hadoop.log.dir" value="@{test.dir}/logs"/>
|
||||
<sysproperty key="test.src.dir" value="@{fileset.dir}"/>
|
||||
<sysproperty key="taskcontroller-path" value="${taskcontroller-path}"/>
|
||||
<sysproperty key="taskcontroller-ugi" value="${taskcontroller-ugi}"/>
|
||||
<sysproperty key="test.build.extraconf" value="@{test.dir}/extraconf" />
|
||||
<sysproperty key="hadoop.policy.file" value="hadoop-policy.xml"/>
|
||||
<sysproperty key="java.library.path"
|
||||
value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
|
||||
<sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
|
||||
<syspropertyset dynamic="no">
|
||||
<propertyref name="hadoop.tmp.dir"/>
|
||||
</syspropertyset>
|
||||
<!-- set compile.c++ in the child jvm only if it is set -->
|
||||
<syspropertyset dynamic="no">
|
||||
<propertyref name="compile.c++"/>
|
||||
</syspropertyset>
|
||||
|
||||
<!-- Pass probability specifications to the spawn JVM -->
|
||||
<syspropertyset id="FaultProbabilityProperties">
|
||||
<propertyref regex="fi.*"/>
|
||||
</syspropertyset>
|
||||
<sysproperty key="test.system.hdrc.deployed.hadoopconfdir"
|
||||
value="@{hadoop.conf.dir.deployed}" />
|
||||
<classpath refid="@{classpath}"/>
|
||||
<formatter type="${test.junit.output.format}" />
|
||||
<batchtest todir="@{test.dir}" unless="testcase">
|
||||
<fileset dir="@{fileset.dir}"
|
||||
excludes="**/${test.exclude}.java aop/** system/**">
|
||||
<patternset>
|
||||
<includesfile name="@{test.file}"/>
|
||||
</patternset>
|
||||
</fileset>
|
||||
</batchtest>
|
||||
<batchtest todir="@{test.dir}" if="testcase">
|
||||
<fileset dir="@{fileset.dir}" includes="**/${testcase}.java"/>
|
||||
</batchtest>
|
||||
</junit>
|
||||
<antcall target="checkfailure"/>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<target name="docs" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=<base of Apache Forrest installation> on the command line." if="forrest.home">
|
||||
<!-- Nothing by default -->
|
||||
</target>
|
||||
|
||||
<target name="checkfailure" if="tests.failed">
|
||||
<touch file="${build.contrib.dir}/testsfailed"/>
|
||||
<fail unless="continueOnFailure">Contrib Tests failed!</fail>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- Clean. Delete the build files, and their directories -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="clean">
|
||||
<echo message="contrib: ${name}"/>
|
||||
<delete dir="${build.dir}"/>
|
||||
</target>
|
||||
|
||||
<target name="ivy-probe-antlib" >
|
||||
<condition property="ivy.found">
|
||||
<typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
|
||||
<target name="ivy-download" description="To download ivy " unless="offline">
|
||||
<get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
|
||||
</target>
|
||||
|
||||
<target name="ivy-init-antlib" depends="ivy-download,ivy-probe-antlib" unless="ivy.found">
|
||||
<typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
|
||||
loaderRef="ivyLoader">
|
||||
<classpath>
|
||||
<pathelement location="${ivy.jar}"/>
|
||||
</classpath>
|
||||
</typedef>
|
||||
<fail >
|
||||
<condition >
|
||||
<not>
|
||||
<typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
|
||||
</not>
|
||||
</condition>
|
||||
You need Apache Ivy 2.0 or later from http://ant.apache.org/
|
||||
It could not be loaded from ${ivy_repo_url}
|
||||
</fail>
|
||||
</target>
|
||||
|
||||
<target name="ivy-init" depends="ivy-init-antlib">
|
||||
<ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}"/>
|
||||
</target>
|
||||
|
||||
<target name="ivy-resolve-common" depends="ivy-init">
|
||||
<ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="common" />
|
||||
</target>
|
||||
|
||||
<target name="ivy-retrieve-common" depends="ivy-resolve-common"
|
||||
description="Retrieve Ivy-managed artifacts for the compile/test configurations">
|
||||
<ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
|
||||
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" sync="true" />
|
||||
<ivy:cachepath pathid="${ant.project.name}.common-classpath" conf="common" />
|
||||
</target>
|
||||
|
||||
<target name="ivy-resolve-test" depends="ivy-init">
|
||||
<ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="test" />
|
||||
</target>
|
||||
|
||||
<target name="ivy-retrieve-test" depends="ivy-resolve-test"
|
||||
description="Retrieve Ivy-managed artifacts for the test configuration">
|
||||
<ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
|
||||
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" sync="true" />
|
||||
<ivy:cachepath pathid="${ant.project.name}.test-classpath" conf="test" />
|
||||
</target>
|
||||
</project>
|
|
@ -1,100 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="hadoopcontrib" default="compile" basedir=".">
|
||||
|
||||
<!-- In case one of the contrib subdirectories -->
|
||||
<!-- fails the build or test targets and you cannot fix it: -->
|
||||
<!-- Then add to fileset: excludes="badcontrib/build.xml" -->
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Compile contribs. -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="compile">
|
||||
<subant target="compile">
|
||||
<fileset dir="." includes="*/build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Compile contrib test code. -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="compile-test">
|
||||
<subant target="compile-test">
|
||||
<fileset dir="." includes="*/build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Package contrib jars. -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="package">
|
||||
<subant target="package">
|
||||
<fileset dir="." includes="*/build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Test all the contribs. -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="test">
|
||||
<property name="hadoop.root" location="${root}/../../../"/>
|
||||
<property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
|
||||
<delete file="${build.contrib.dir}/testsfailed"/>
|
||||
<subant target="test">
|
||||
<property name="continueOnFailure" value="true"/>
|
||||
<fileset dir="." includes="streaming/build.xml"/>
|
||||
<fileset dir="." includes="gridmix/build.xml"/>
|
||||
<fileset dir="." includes="vertica/build.xml"/>
|
||||
</subant>
|
||||
<available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
|
||||
<fail if="testsfailed">Tests failed!</fail>
|
||||
</target>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Test all the contrib system tests -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="test-system-contrib">
|
||||
<property name="hadoop.root" location="${root}/../../../"/>
|
||||
<property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
|
||||
<delete file="${build.contrib.dir}/testsfailed"/>
|
||||
<subant target="test-system">
|
||||
<property name="continueOnFailure" value="true"/>
|
||||
<property name="hadoop.home" value="${hadoop.home}"/>
|
||||
<property name="hadoop.conf.dir" value="${hadoop.conf.dir}"/>
|
||||
<property name="hadoop.conf.dir.deployed"
|
||||
value="${hadoop.conf.dir.deployed}"/>
|
||||
<fileset dir="." includes="hdfsproxy/build.xml"/>
|
||||
<fileset dir="." includes="streaming/build.xml"/>
|
||||
<fileset dir="." includes="gridmix/build.xml"/>
|
||||
</subant>
|
||||
<available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
|
||||
<fail if="testsfailed">Tests failed!</fail>
|
||||
</target>
|
||||
|
||||
<!-- ====================================================== -->
|
||||
<!-- Clean all the contribs. -->
|
||||
<!-- ====================================================== -->
|
||||
<target name="clean">
|
||||
<subant target="clean">
|
||||
<fileset dir="." includes="*/build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
</project>
|
|
@ -1,45 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Before you can run these subtargets directly, you need
|
||||
to call at top-level: ant deploy-contrib compile-core-test
|
||||
-->
|
||||
<project name="datajoin" default="jar">
|
||||
|
||||
<import file="../build-contrib.xml"/>
|
||||
|
||||
<!-- Override jar target to specify main class -->
|
||||
<target name="jar" depends="compile">
|
||||
<jar
|
||||
jarfile="${build.dir}/hadoop-${version}-${name}.jar"
|
||||
basedir="${build.classes}"
|
||||
>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.apache.hadoop.contrib.utils.join.DataJoinJob"/>
|
||||
</manifest>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
<target name="jar-examples" depends="jar">
|
||||
<antcall target="hadoopbuildcontrib.jar-examples">
|
||||
</antcall>
|
||||
</target>
|
||||
|
||||
</project>
|
|
@ -1,68 +0,0 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<ivy-module version="1.0" xmlns:m="http://ant.apache.org/ivy/maven">
|
||||
<info organisation="org.apache.hadoop" module="${ant.project.name}">
|
||||
<license name="Apache 2.0"/>
|
||||
<ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
|
||||
<description>
|
||||
Apache Hadoop
|
||||
</description>
|
||||
</info>
|
||||
<configurations defaultconfmapping="default">
|
||||
<!--these match the Maven configurations-->
|
||||
<conf name="default" extends="master,runtime"/>
|
||||
<conf name="master" description="contains the artifact but no dependencies"/>
|
||||
<conf name="runtime" description="runtime but not the artifact" />
|
||||
|
||||
<conf name="common" visibility="private"
|
||||
extends="runtime"
|
||||
description="artifacts needed to compile the application"/>
|
||||
<conf name="test" visibility="private" extends="runtime"/>
|
||||
</configurations>
|
||||
|
||||
<publications>
|
||||
<!--get the artifact from our module name-->
|
||||
<artifact conf="master"/>
|
||||
</publications>
|
||||
<dependencies>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-annotations" rev="${hadoop-common.version}" conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-common" rev="${hadoop-common.version}" conf="common->default">
|
||||
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests"/>
|
||||
<artifact name="hadoop-common" ext="jar"/>
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="${hadoop-hdfs.version}" conf="common->default">
|
||||
<artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
|
||||
<artifact name="hadoop-hdfs" ext="jar"/>
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
|
||||
rev="${yarn.version}" conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-yarn-common"
|
||||
rev="${yarn.version}" conf="common->default"/>
|
||||
<dependency org="commons-logging" name="commons-logging" rev="${commons-logging.version}" conf="common->default"/>
|
||||
<dependency org="log4j" name="log4j" rev="${log4j.version}" conf="common->master"/>
|
||||
<dependency org="junit" name="junit" rev="${junit.version}" conf="common->default"/>
|
||||
|
||||
<!-- Exclusions for transitive dependencies pulled in by log4j -->
|
||||
<exclude org="com.sun.jdmk"/>
|
||||
<exclude org="com.sun.jmx"/>
|
||||
<exclude org="javax.jms"/>
|
||||
<exclude org="javax.mail"/>
|
||||
|
||||
</dependencies>
|
||||
</ivy-module>
|
|
@ -1,17 +0,0 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#This properties file lists the versions of the various artifacts used by streaming.
|
||||
#It drives ivy and the generation of a maven POM
|
||||
|
||||
#Please list the dependencies name with version if they are different from the ones
|
||||
#listed in the global libraries.properties file (in alphabetical order)
|
|
@ -1,26 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<classpath>
|
||||
<classpathentry excluding="org/apache/hadoop/eclipse/server/CopyOfHadoopServer.java" kind="src" path="src/java"/>
|
||||
<classpathentry exported="true" kind="lib" path="classes" sourcepath="classes"/>
|
||||
<classpathentry kind="lib" path="lib/hadoop-core.jar" sourcepath="/hadoop-socks/src/java"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
|
||||
<classpathentry kind="output" path="classes"/>
|
||||
</classpath>
|
|
@ -1,45 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<projectDescription>
|
||||
<name>MapReduceTools</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.pde.ManifestBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.pde.SchemaBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.pde.PluginNature</nature>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
|
@ -1,274 +0,0 @@
|
|||
#Sat Oct 13 13:37:43 CEST 2007
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# # you may not use this file except in compliance with the License.
|
||||
# # You may obtain a copy of the License at
|
||||
# #
|
||||
# # http://www.apache.org/licenses/LICENSE-2.0
|
||||
# #
|
||||
# # Unless required by applicable law or agreed to in writing, software
|
||||
# # distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# # See the License for the specific language governing permissions and
|
||||
# # limitations under the License.
|
||||
#
|
||||
eclipse.preferences.version=1
|
||||
instance/org.eclipse.core.net/org.eclipse.core.net.hasMigrated=true
|
||||
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
|
||||
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
|
||||
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
|
||||
org.eclipse.jdt.core.compiler.debug.localVariable=generate
|
||||
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
|
||||
org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
|
||||
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_assignment=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
|
||||
org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
|
||||
org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
|
||||
org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
|
||||
org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
|
||||
org.eclipse.jdt.core.formatter.blank_lines_after_package=1
|
||||
org.eclipse.jdt.core.formatter.blank_lines_before_field=1
|
||||
org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
|
||||
org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
|
||||
org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
|
||||
org.eclipse.jdt.core.formatter.blank_lines_before_method=1
|
||||
org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
|
||||
org.eclipse.jdt.core.formatter.blank_lines_before_package=0
|
||||
org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
|
||||
org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
|
||||
org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
|
||||
org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
|
||||
org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
|
||||
org.eclipse.jdt.core.formatter.comment.format_block_comments=true
|
||||
org.eclipse.jdt.core.formatter.comment.format_header=false
|
||||
org.eclipse.jdt.core.formatter.comment.format_html=true
|
||||
org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
|
||||
org.eclipse.jdt.core.formatter.comment.format_line_comments=true
|
||||
org.eclipse.jdt.core.formatter.comment.format_source_code=true
|
||||
org.eclipse.jdt.core.formatter.comment.indent_parameter_description=false
|
||||
org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
|
||||
org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
|
||||
org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert
|
||||
org.eclipse.jdt.core.formatter.comment.line_length=77
|
||||
org.eclipse.jdt.core.formatter.compact_else_if=true
|
||||
org.eclipse.jdt.core.formatter.continuation_indentation=2
|
||||
org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
|
||||
org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
|
||||
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
|
||||
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
|
||||
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
|
||||
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
|
||||
org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
|
||||
org.eclipse.jdt.core.formatter.indent_empty_lines=false
|
||||
org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
|
||||
org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
|
||||
org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
|
||||
org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true
|
||||
org.eclipse.jdt.core.formatter.indentation.size=4
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_after_annotation=insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
|
||||
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
|
||||
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
|
||||
org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
|
||||
org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
|
||||
org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
|
||||
org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
|
||||
org.eclipse.jdt.core.formatter.lineSplit=77
|
||||
org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
|
||||
org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
|
||||
org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
|
||||
org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
|
||||
org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
|
||||
org.eclipse.jdt.core.formatter.tabulation.char=space
|
||||
org.eclipse.jdt.core.formatter.tabulation.size=2
|
||||
org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
|
||||
org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue