HADOOP-7136. Remove failmon contrib component. (nigel)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1070008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nigel Daley 2011-02-12 01:58:08 +00:00
parent ab611ed091
commit 96dd151504
33 changed files with 0 additions and 3969 deletions

View File

@ -46,7 +46,6 @@
<!-- ====================================================== -->
<target name="test">
<subant target="test">
<fileset dir="." includes="failmon/build.xml"/>
<fileset dir="." includes="hod/build.xml"/>
</subant>
</target>

View File

@ -1,97 +0,0 @@
****************** FailMon Quick Start Guide ***********************
This document is a guide to quickly setting up and running FailMon.
For more information and details please see the FailMon User Manual.
***** Building FailMon *****
Normally, FailMon lies under <hadoop-dir>/src/contrib/failmon, where
<hadoop-source-dir> is the Hadoop project root folder. To compile it,
one can either run ant for the whole Hadoop project, i.e.:
$ cd <hadoop-dir>
$ ant
or run ant only for FailMon:
$ cd <hadoop-dir>/src/contrib/failmon
$ ant
The above will compile FailMon and place all class files under
<hadoop-dir>/build/contrib/failmon/classes.
By invoking:
$ cd <hadoop-dir>/src/contrib/failmon
$ ant tar
FailMon is packaged as a standalone jar application in
<hadoop-dir>/src/contrib/failmon/failmon.tar.gz.
***** Deploying FailMon *****
There are two ways FailMon can be deployed in a cluster:
a) Within Hadoop, in which case the whole Hadoop package is uploaded
to the cluster nodes. In that case, nothing else needs to be done on
individual nodes.
b) Independently of the Hadoop deployment, i.e., by uploading
failmon.tar.gz to all nodes and uncompressing it. In that case, the
bin/failmon.sh script needs to be edited; environment variable
HADOOPDIR should point to the root directory of the Hadoop
distribution. Also the location of the Hadoop configuration files
should be pointed by the property 'hadoop.conf.path' in file
conf/failmon.properties. Note that these files refer to the HDFS in
which we want to store the FailMon data (which can potentially be
different than the one on the cluster we are monitoring).
We assume that either way FailMon is placed in the same directory on
all nodes, which is typical for most clusters. If this is not
feasible, one should create the same symbolic link on all nodes of the
cluster, that points to the FailMon directory of each node.
One should also edit the conf/failmon.properties file on each node to
set his own property values. However, the default values are expected
to serve most practical cases. Refer to the FailMon User Manual about
the various properties and configuration parameters.
***** Running FailMon *****
In order to run FailMon using a node to do the ad-hoc scheduling of
monitoring jobs, one needs edit the hosts.list file to specify the
list of machine hostnames on which FailMon is to be run. Also, in file
conf/global.config the username used to connect to the machines has to
be specified (passwordless SSH is assumed) in property 'ssh.username'.
In property 'failmon.dir', the path to the FailMon folder has to be
specified as well (it is assumed to be the same on all machines in the
cluster). Then one only needs to invoke the command:
$ cd <hadoop-dir>
$ bin/scheduler.py
to start the system.
***** Merging HDFS files *****
For the purpose of merging the files created on HDFS by FailMon, the
following command can be used:
$ cd <hadoop-dir>
$ bin/failmon.sh --mergeFiles
This will concatenate all files in the HDFS folder (pointed to by the
'hdfs.upload.dir' property in conf/failmon.properties file) into a
single file, which will be placed in the same folder. Also the
location of the Hadoop configuration files should be pointed by the
property 'hadoop.conf.path' in file conf/failmon.properties. Note that
these files refer to the HDFS in which have stored the FailMon data
(which can potentially be different than the one on the cluster we are
monitoring). Also, the scheduler.py script can be set up to merge the
HDFS files when their number surpasses a configurable limit (see
'conf/global.config' file).
Please refer to the FailMon User Manual for more details.

View File

@ -1,54 +0,0 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# First we need to determine whether Failmon has been distributed with
# Hadoop, or as standalone. In the latter case failmon.jar will lie in
# the current directory.
JARNAME="failmon.jar"
HADOOPDIR=""
CLASSPATH=""
if [ `ls -l | grep src | wc -l` == 0 ]
then
# standalone binary
if [ -n $1 ] && [ "$1" == "--mergeFiles" ]
then
jar -ufe $JARNAME org.apache.hadoop.contrib.failmon.HDFSMerger
java -jar $JARNAME
else
jar -ufe $JARNAME org.apache.hadoop.contrib.failmon.RunOnce
java -jar $JARNAME $*
fi
else
# distributed with Hadoop
HADOOPDIR=`pwd`/../../../
CLASSPATH=$CLASSPATH:$HADOOPDIR/build/contrib/failmon/classes
CLASSPATH=$CLASSPATH:$HADOOPDIR/build/classes
CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/commons-logging-api-1*.jar`
CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/commons-logging-1*.jar`
CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/log4j-*.jar`
# echo $CLASSPATH
if [ -n $1 ] && [ "$1" == "--mergeFiles" ]
then
java -cp $CLASSPATH org.apache.hadoop.contrib.failmon.HDFSMerger
else
java -cp $CLASSPATH org.apache.hadoop.contrib.failmon.RunOnce $*
fi
fi

View File

@ -1,235 +0,0 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Schedule FailMon execution for nodes of file hosts.list, according to
# the properties file conf/global.config.
import time
import ConfigParser
import subprocess
import threading
import random
jobs = []
username = "user"
connections = 10
failmonDir = ""
maxFiles = 100
# This class represents a thread that connects to a set of cluster
# nodes to locally execute monitoring jobs. These jobs are specified
# as a shell command in the constructor.
class sshThread (threading.Thread):
def __init__(self, threadname, username, command, failmonDir):
threading.Thread.__init__(self)
self.name = threadname
self.username = username
self.command = command
self.failmonDir = failmonDir
self.hosts = []
def addHost(self, host):
self.hosts.append(host)
def run (self):
for host in self.hosts:
toRun = ["ssh", self.username + "@" + host, "cd " + self.failmonDir + " ; " + self.command]
print "Thread", self.name, "invoking command on", host, ":\t", toRun, "...",
subprocess.check_call(toRun)
print "Done!"
# This class represents a monitoring job. The param member is a string
# that can be passed in the '--only' list of jobs given to the Java
# class org.apache.hadoop.contrib.failmon.RunOnce for execution on a
# node.
class Job:
def __init__(self, param, interval):
self.param = param
self.interval = interval
self.counter = interval
return
def reset(self):
self.counter = self.interval
# This function reads the configuration file to get the values of the
# configuration parameters.
def getJobs(file):
global username
global connections
global jobs
global failmonDir
global maxFiles
conf = ConfigParser.SafeConfigParser()
conf.read(file)
username = conf.get("Default", "ssh.username")
connections = int(conf.get("Default", "max.connections"))
failmonDir = conf.get("Default", "failmon.dir")
maxFiles = conf.get("Default", "hdfs.files.max")
# Hadoop Log
interval = int(conf.get("Default", "log.hadoop.interval"))
if interval != 0:
jobs.append(Job("hadoopLog", interval))
# System Log
interval = int(conf.get("Default", "log.system.interval"))
if interval != 0:
jobs.append(Job("systemLog", interval))
# NICs
interval = int(conf.get("Default", "nics.interval"))
if interval != 0:
jobs.append(Job("nics", interval))
# CPU
interval = int(conf.get("Default", "cpu.interval"))
if interval != 0:
jobs.append(Job("cpu", interval))
# CPU
interval = int(conf.get("Default", "disks.interval"))
if interval != 0:
jobs.append(Job("disks", interval))
# sensors
interval = int(conf.get("Default", "sensors.interval"))
if interval != 0:
jobs.append(Job("sensors", interval))
# upload
interval = int(conf.get("Default", "upload.interval"))
if interval != 0:
jobs.append(Job("upload", interval))
return
# Compute the gcd (Greatest Common Divisor) of two integerss
def GCD(a, b):
assert isinstance(a, int)
assert isinstance(b, int)
while a:
a, b = b%a, a
return b
# Compute the gcd (Greatest Common Divisor) of a list of integers
def listGCD(joblist):
assert isinstance(joblist, list)
if (len(joblist) == 1):
return joblist[0].interval
g = GCD(joblist[0].interval, joblist[1].interval)
for i in range (2, len(joblist)):
g = GCD(g, joblist[i].interval)
return g
# Merge all failmon files created on the HDFS into a single file
def mergeFiles():
global username
global failmonDir
hostList = []
hosts = open('./conf/hosts.list', 'r')
for host in hosts:
hostList.append(host.strip().rstrip())
randomHost = random.sample(hostList, 1)
mergeCommand = "bin/failmon.sh --mergeFiles"
toRun = ["ssh", username + "@" + randomHost[0], "cd " + failmonDir + " ; " + mergeCommand]
print "Invoking command on", randomHost, ":\t", mergeCommand, "...",
subprocess.check_call(toRun)
print "Done!"
return
# The actual scheduling is done here
def main():
getJobs("./conf/global.config")
for job in jobs:
print "Configuration: ", job.param, "every", job.interval, "seconds"
globalInterval = listGCD(jobs)
while True :
time.sleep(globalInterval)
params = []
for job in jobs:
job.counter -= globalInterval
if (job.counter <= 0):
params.append(job.param)
job.reset()
if (len(params) == 0):
continue;
onlyStr = "--only " + params[0]
for i in range(1, len(params)):
onlyStr += ',' + params[i]
command = "bin/failmon.sh " + onlyStr
# execute on all nodes
hosts = open('./conf/hosts.list', 'r')
threadList = []
# create a thread for every connection
for i in range(0, connections):
threadList.append(sshThread(i, username, command, failmonDir))
# assign some hosts/connections hosts to every thread
cur = 0;
for host in hosts:
threadList[cur].addHost(host.strip().rstrip())
cur += 1
if (cur == len(threadList)):
cur = 0
for ready in threadList:
ready.start()
for ssht in threading.enumerate():
if ssht != threading.currentThread():
ssht.join()
# if an upload has been done, then maybe we need to merge the
# HDFS files
if "upload" in params:
mergeFiles()
return
if __name__ == '__main__':
main()

View File

@ -1,120 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="failmon" default="compile">
<import file="../build-contrib.xml"/>
<property name="jarfile" value="${build.dir}/${name}.jar"/>
<target name="jar" depends="compile" unless="skip.contrib">
<!-- Make sure that the hadoop jar has been created -->
<!-- This works, but causes findbugs to fail
<subant antfile="build.xml" target="jar">
<fileset dir="../../.." includes="build.xml"/>
</subant>
-->
<!-- Copy the required files so that the jar can run independently
of Hadoop source code -->
<!-- create the list of files to add to the classpath -->
<fileset dir="${hadoop.root}/lib" id="class.path">
<include name="**/*.jar" />
<exclude name="**/excluded/" />
</fileset>
<pathconvert pathsep=" " property="failmon-class-path" refid="class.path">
<map from="${basedir}/" to=""/>
</pathconvert>
<echo message="contrib: ${name}"/>
<jar jarfile="${jarfile}" basedir="${build.classes}">
<manifest>
<attribute name="Main-Class" value="org.apache.hadoop.contrib.failmon.RunOnce"/>
<attribute name="Class-Path" value="${failmon-class-path}"/>
</manifest>
</jar>
</target>
<!-- Override test target to copy sample data -->
<target name="test" depends="compile-test, compile, compile-examples" if="test.available">
<echo message="contrib: ${name}"/>
<delete dir="${hadoop.log.dir}"/>
<mkdir dir="${hadoop.log.dir}"/>
<delete dir="${build.test}/sample"/>
<mkdir dir="${build.test}/sample"/>
<copy todir="${build.test}/sample">
<fileset dir="${root}/sample"/>
</copy>
<junit
printsummary="yes" showoutput="${test.output}"
haltonfailure="no" fork="yes" maxmemory="256m"
errorProperty="tests.failed" failureProperty="tests.failed"
timeout="${test.timeout}">
<sysproperty key="test.build.data" value="${build.test}/data"/>
<sysproperty key="build.test" value="${build.test}"/>
<sysproperty key="contrib.name" value="${name}"/>
<!-- requires fork=yes for:
relative File paths to use the specified user.dir
classpath to use build/contrib/*.jar
-->
<sysproperty key="user.dir" value="${build.test}/data"/>
<sysproperty key="fs.default.name" value="${fs.default.name}"/>
<sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
<sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
<classpath refid="test.classpath"/>
<formatter type="${test.junit.output.format}" />
<batchtest todir="${build.test}" unless="testcase">
<fileset dir="${src.test}"
includes="**/Test*.java" excludes="**/${test.exclude}.java" />
</batchtest>
<batchtest todir="${build.test}" if="testcase">
<fileset dir="${src.test}" includes="**/${testcase}.java"/>
</batchtest>
</junit>
<fail if="tests.failed">Tests failed!</fail>
</target>
<target name="tar" depends="jar">
<copy todir=".">
<fileset dir="${hadoop.root}/build/contrib/failmon/"
includes="failmon.jar"/>
</copy>
<tar tarfile="${name}.tar"
basedir=".."
includes="${name}/**"
excludes="${name}/${name}.tar.gz, ${name}/src/**, ${name}/logs/**, ${name}/build.xml*"/>
<gzip zipfile="${name}.tar.gz" src="${name}.tar"/>
<delete file="${name}.tar"/>
<delete file="${name}.jar"/>
<move file="${name}.tar.gz" todir="${build.dir}"/>
<echo message= "${hadoop.root}/build/contrib/failmon/${name}.jar"/>
</target>
</project>

View File

@ -1,25 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#Logging Implementation
#Log4J
org.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger
#JDK Logger
#org.apache.commons.logging.Log=org.apache.commons.logging.impl.Jdk14Logger

View File

@ -1,80 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# HDFS properties
hdfs.upload.dir = /failmon
hadoop.conf.path = ../../../conf
# Hadoop Log file properties
log.hadoop.enabled = true
log.hadoop.filenames = /home/hadoop/hadoop-0.17.0/logs/
# set to non-zero only for continous mode:
log.hadoop.interval = 0
log.hadoop.dateformat = \\d{4}-\\d{2}-\\d{2}
log.hadoop.timeformat = \\d{2}:\\d{2}:\\d{2}
# System Log file properties
log.system.enabled = true
log.system.filenames = /var/log/messages
# set to non-zero only for continous mode:
log.system.interval = 0
log.system.dateformat = (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+(\\d+)
log.system.timeformat = \\d{2}:\\d{2}:\\d{2}
# Network Interfaces
nic.enabled = true
nic.list = eth0, eth1
# set to non-zero only for continous mode:
nic.interval = 0
# CPUs & Motherboard
cpu.enabled = true
# set to non-zero only for continous mode:
cpu.interval = 0
# Disk devices. For all devices listed under disks.list, the corresponding
# property disk./dev/xxx.source specifies where the output of
# "sudo smartctl --all /dev/xxx" can be read by a user. If this property is
# missing, super-user privileges are assumed and the smartctl command will be
# invoked itself.
disks.enabled = true
disks.list = /dev/sda, /dev/sdb, /dev/sdc, /dev/sdd, /dev/hda, /dev/hdb, /dev/hdc, /dev/hdd
#disks./dev/sda.source = hda.smart
# set to non-zero only for continous mode:
disks.interval = 0
# lm-sensors polling
sensors.enabled = true
# set to non-zero only for continous mode:
sensors.interval = 0
# Executor thread properties
executor.interval.min = 1
# Anonymization properties
anonymizer.hash.hostnames = false
anonymizer.hash.ips = false
anonymizer.hash.filenames = false
anonymizer.hostname.suffix = apache.org
# Local files options
local.tmp.filename = failmon.dat
local.tmp.compression = false
# set to non-zero only for continous mode:
local.upload.interval = 0

View File

@ -1,39 +0,0 @@
[Default]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# general settings
# the username to use to connect to cluster nodes
ssh.username = user
# the maximum number of SSH connections to keep open at any time
max.connections = 2
# the directory in which FailMon lies
failmon.dir = /home/user/hadoop-core-trunk/src/contrib/failmon
# the maximum number of HDFS files to allow FailMon to create. After
# this limit is surpassed, all HDFS files will be concatenated into
# one file.
hdfs.files.max = 100
# iteration intervals
log.hadoop.interval = 0
log.system.interval = 0
nics.interval = 10
cpu.interval = 10
disks.interval = 0
sensors.interval = 0
upload.interval = 20

View File

@ -1,10 +0,0 @@
host00
host01
host02
host03
host04
host05
host06
host07
host08
host09

View File

@ -1,40 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Define some default values that can be overridden by system properties
failmon.log.dir=logs
failmon.log.file=failmon.log
log4j.rootLogger= INFO, simpleFile, console
# Logging Threshold
log4j.threshhold=ALL
#
# console
# Add "console" to rootlogger above if you want to use this
#
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
log4j.appender.simpleFile=org.apache.log4j.FileAppender
log4j.appender.simpleFile.layout=org.apache.log4j.PatternLayout
log4j.appender.simpleFile.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
log4j.appender.simpleFile.file= ${failmon.log.dir}/${failmon.log.file}

View File

@ -1,52 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<ivy-module version="1.0">
<info organisation="org.apache.hadoop" module="${ant.project.name}">
<license name="Apache 2.0"/>
<ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
<description>
Apache Hadoop
</description>
</info>
<configurations defaultconfmapping="default">
<!--these match the Maven configurations-->
<conf name="default" extends="master,runtime"/>
<conf name="master" description="contains the artifact but no dependencies"/>
<conf name="runtime" description="runtime but not the artifact" />
<conf name="common" visibility="private"
extends="runtime"
description="artifacts needed compile/test the application"/>
<conf name="test" visibility="private" extends="runtime"/>
</configurations>
<publications>
<!--get the artifact from our module name-->
<artifact conf="master"/>
</publications>
<dependencies>
<dependency org="commons-logging"
name="commons-logging"
rev="${commons-logging.version}"
conf="common->default"/>
<dependency org="log4j"
name="log4j"
rev="${log4j.version}"
conf="common->master"/>
</dependencies>
</ivy-module>

View File

@ -1,17 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#This properties file lists the versions of the various artifacts used by streaming.
#It drives ivy and the generation of a maven POM
#Please list the dependencies name with version if they are different from the ones
#listed in the global libraries.properties file (in alphabetical order)

View File

@ -1,154 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
/**********************************************************
* This class provides anonymization to SerializedRecord objects. It
* anonymizes all hostnames, ip addresses and file names/paths
* that appear in EventRecords gathered from the logs
* and other system utilities. Such values are hashed using a
* cryptographically safe one-way-hash algorithm (MD5).
*
**********************************************************/
public class Anonymizer {
/**
* Anonymize hostnames, ip addresses and file names/paths
* that appear in fields of a SerializedRecord.
*
* @param sr the input SerializedRecord
*
* @return the anonymized SerializedRecord
*/
public static SerializedRecord anonymize(SerializedRecord sr)
throws Exception {
String hostname = sr.get("hostname");
if (hostname == null)
throw new Exception("Malformed SerializedRecord: no hostname found");
if ("true".equalsIgnoreCase(Environment
.getProperty("anonymizer.hash.hostnames"))) {
// hash the node's hostname
anonymizeField(sr, "message", hostname, "_hn_");
anonymizeField(sr, "hostname", hostname, "_hn_");
// hash all other hostnames
String suffix = Environment.getProperty("anonymizer.hostname.suffix");
if (suffix != null)
anonymizeField(sr, "message", "(\\S+\\.)*" + suffix, "_hn_");
}
if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.ips"))) {
// hash all ip addresses
String ipPattern = "(\\d{1,3}\\.){3}\\d{1,3}";
anonymizeField(sr, "message", ipPattern, "_ip_");
anonymizeField(sr, "ips", ipPattern, "_ip_");
// if multiple ips are present for a node:
int i = 0;
while (sr.get("ips" + "#" + i) != null)
anonymizeField(sr, "ips" + "#" + i++, ipPattern, "_ip_");
if ("NIC".equalsIgnoreCase(sr.get("type")))
anonymizeField(sr, "ipAddress", ipPattern, "_ip_");
}
if ("true".equalsIgnoreCase(Environment
.getProperty("anonymizer.hash.filenames"))) {
// hash every filename present in messages
anonymizeField(sr, "message", "\\s+/(\\S+/)*[^:\\s]*", " _fn_");
anonymizeField(sr, "message", "\\s+hdfs://(\\S+/)*[^:\\s]*",
" hdfs://_fn_");
}
return sr;
}
/**
* Anonymize hostnames, ip addresses and file names/paths
* that appear in fields of an EventRecord, after it gets
* serialized into a SerializedRecord.
*
* @param er the input EventRecord
*
* @return the anonymized SerializedRecord
*/
public static SerializedRecord anonymize(EventRecord er) throws Exception {
return anonymize(new SerializedRecord(er));
}
private static String anonymizeField(SerializedRecord sr, String fieldName,
String pattern, String prefix) {
String txt = sr.get(fieldName);
if (txt == null)
return null;
else {
String anon = getMD5Hash(pattern);
sr.set(fieldName, txt.replaceAll(pattern, (prefix == null ? "" : prefix)
+ anon));
return txt;
}
}
/**
* Create the MD5 digest of an input text.
*
* @param text the input text
*
* @return the hexadecimal representation of the MD5 digest
*/
public static String getMD5Hash(String text) {
MessageDigest md;
byte[] md5hash = new byte[32];
try {
md = MessageDigest.getInstance("MD5");
md.update(text.getBytes("iso-8859-1"), 0, text.length());
md5hash = md.digest();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return convertToHex(md5hash);
}
private static String convertToHex(byte[] data) {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < data.length; i++) {
int halfbyte = (data[i] >>> 4) & 0x0F;
int two_halfs = 0;
do {
if ((0 <= halfbyte) && (halfbyte <= 9))
buf.append((char) ('0' + halfbyte));
else
buf.append((char) ('a' + (halfbyte - 10)));
halfbyte = data[i] & 0x0F;
} while (two_halfs++ < 1);
}
return buf.toString();
}
}

View File

@ -1,101 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.net.InetAddress;
import java.util.Calendar;
/**********************************************************
* Objects of this class parse the /proc/cpuinfo file to
* gather information about present processors in the system.
*
**********************************************************/
public class CPUParser extends ShellParser {
/**
* Constructs a CPUParser
*/
public CPUParser() {
super();
}
/**
* Reads and parses /proc/cpuinfo and creates an appropriate
* EventRecord that holds the desirable information.
*
* @param s unused parameter
*
* @return the EventRecord created
*/
public EventRecord query(String s) throws Exception {
CharSequence sb = Environment.runCommandGeneric("cat /proc/cpuinfo");
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
.getHostName()), Calendar.getInstance(), "CPU", "Unknown", "CPU", "-");
retval.set("processors", findAll("\\s*processor\\s*:\\s*(\\d+)", sb
.toString(), 1, ", "));
retval.set("model name", findPattern("\\s*model name\\s*:\\s*(.+)", sb
.toString(), 1));
retval.set("frequency", findAll("\\s*cpu\\s*MHz\\s*:\\s*(\\d+)", sb
.toString(), 1, ", "));
retval.set("physical id", findAll("\\s*physical\\s*id\\s*:\\s*(\\d+)", sb
.toString(), 1, ", "));
retval.set("core id", findAll("\\s*core\\s*id\\s*:\\s*(\\d+)", sb
.toString(), 1, ", "));
return retval;
}
/**
* Invokes query() to do the parsing and handles parsing errors.
*
* @return an array of EventRecords that holds one element that represents
* the current state of /proc/cpuinfo
*/
public EventRecord[] monitor() {
EventRecord[] recs = new EventRecord[1];
try {
recs[0] = query(null);
} catch (Exception e) {
e.printStackTrace();
}
return recs;
}
/**
* Return a String with information about this class
*
* @return A String describing this class
*/
public String getInfo() {
return ("CPU Info parser");
}
}

View File

@ -1,41 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.IOException;
/**********************************************************
* This class runs FailMon in a continuous mode on the local
* node.
*
**********************************************************/
public class Continuous {
public static void main(String[] args) {
Environment.prepare("failmon.properties");
Executor ex = new Executor(null);
new Thread(ex).start();
}
}

View File

@ -1,486 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.*;
import org.apache.log4j.PropertyConfigurator;
/**********************************************************
* This class provides various methods for interaction with
* the configuration and the operating system environment. Also
* provides some helper methods for use by other classes in
* the package.
**********************************************************/
public class Environment {
public static final int DEFAULT_LOG_INTERVAL = 3600;
public static final int DEFAULT_POLL_INTERVAL = 360;
public static int MIN_INTERVAL = 5;
public static final int MAX_OUTPUT_LENGTH = 51200;
public static Log LOG;
static Properties fmProperties = new Properties();
static boolean superuser = false;
static boolean ready = false;
/**
* Initializes structures needed by other methods. Also determines
* whether the executing user has superuser privileges.
*
*/
public static void prepare(String fname) {
if (!"Linux".equalsIgnoreCase(System.getProperty("os.name"))) {
System.err.println("Linux system required for FailMon. Exiting...");
System.exit(0);
}
System.setProperty("log4j.configuration", "conf/log4j.properties");
PropertyConfigurator.configure("conf/log4j.properties");
LOG = LogFactory.getLog("org.apache.hadoop.contrib.failmon");
logInfo("********** FailMon started ***********");
// read parseState file
PersistentState.readState("conf/parsing.state");
try {
FileInputStream propFile = new FileInputStream(fname);
fmProperties.load(propFile);
propFile.close();
} catch (FileNotFoundException e1) {
e1.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
ready = true;
try {
String sudo_prompt = "passwd_needed:";
String echo_txt = "access_ok";
Process p = Runtime.getRuntime().exec("sudo -S -p " + sudo_prompt + " echo " + echo_txt );
InputStream inps = p.getInputStream();
InputStream errs = p.getErrorStream();
while (inps.available() < echo_txt.length() && errs.available() < sudo_prompt.length())
Thread.sleep(100);
byte [] buf;
String s;
if (inps.available() >= echo_txt.length()) {
buf = new byte[inps.available()];
inps.read(buf);
s = new String(buf);
if (s.startsWith(echo_txt)) {
superuser = true;
logInfo("Superuser privileges found!");
} else {
// no need to read errs
superuser = false;
logInfo("Superuser privileges not found.");
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/**
* Fetches the value of a property from the configuration file.
*
* @param key the name of the property
*
* @return the value of the property, if it exists and
* null otherwise
*/
public static String getProperty(String key) {
if (!ready)
prepare("conf/failmon.properties");
return fmProperties.getProperty(key);
}
/**
* Sets the value of a property inthe configuration file.
*
* @param key the name of the property
* @param value the new value for the property
*
*/
public static void setProperty(String key, String value) {
fmProperties.setProperty(key, value);
}
/**
* Scans the configuration file to determine which monitoring
* utilities are available in the system. For each one of them, a
* job is created. All such jobs are scheduled and executed by
* Executor.
*
* @return an ArrayList that contains jobs to be executed by theExecutor.
*/
public static ArrayList<MonitorJob> getJobs() {
ArrayList<MonitorJob> monitors = new ArrayList<MonitorJob>();
int timeInt = 0;
// for Hadoop Log parsing
String [] fnames_r = getProperty("log.hadoop.filenames").split(",\\s*");
String tmp = getProperty("log.hadoop.enabled");
String [] fnames = expandDirs(fnames_r, ".*(.log).*");
timeInt = setValue("log.hadoop.interval", DEFAULT_LOG_INTERVAL);
if ("true".equalsIgnoreCase(tmp) && fnames[0] != null)
for (String fname : fnames) {
File f = new File(fname);
if (f.exists() && f.canRead()) {
monitors.add(new MonitorJob(new HadoopLogParser(fname), "hadoopLog", timeInt));
logInfo("Created Monitor for Hadoop log file: " + f.getAbsolutePath());
} else if (!f.exists())
logInfo("Skipping Hadoop log file " + fname + " (file not found)");
else
logInfo("Skipping Hadoop log file " + fname + " (permission denied)");
}
// for System Log parsing
fnames_r = getProperty("log.system.filenames").split(",\\s*");
tmp = getProperty("log.system.enabled");
fnames = expandDirs(fnames_r, ".*(messages).*");
timeInt = setValue("log.system.interval", DEFAULT_LOG_INTERVAL);
if ("true".equalsIgnoreCase(tmp))
for (String fname : fnames) {
File f = new File(fname);
if (f.exists() && f.canRead()) {
monitors.add(new MonitorJob(new SystemLogParser(fname), "systemLog", timeInt));
logInfo("Created Monitor for System log file: " + f.getAbsolutePath());
} else if (!f.exists())
logInfo("Skipping system log file " + fname + " (file not found)");
else
logInfo("Skipping system log file " + fname + " (permission denied)");
}
// for network interfaces
tmp = getProperty("nic.enabled");
timeInt = setValue("nics.interval", DEFAULT_POLL_INTERVAL);
if ("true".equalsIgnoreCase(tmp)) {
monitors.add(new MonitorJob(new NICParser(), "nics", timeInt));
logInfo("Created Monitor for NICs");
}
// for cpu
tmp = getProperty("cpu.enabled");
timeInt = setValue("cpu.interval", DEFAULT_POLL_INTERVAL);
if ("true".equalsIgnoreCase(tmp)) {
monitors.add(new MonitorJob(new CPUParser(), "cpu", timeInt));
logInfo("Created Monitor for CPUs");
}
// for disks
tmp = getProperty("disks.enabled");
timeInt = setValue("disks.interval", DEFAULT_POLL_INTERVAL);
if ("true".equalsIgnoreCase(tmp)) {
// check privileges if a disk with no disks./dev/xxx/.source is found
boolean smart_present = checkExistence("smartctl");
int disks_ok = 0;
String devicesStr = getProperty("disks.list");
String[] devices = null;
if (devicesStr != null)
devices = devicesStr.split(",\\s*");
for (int i = 0; i< devices.length; i++) {
boolean file_present = false;
boolean disk_present = false;
String fileloc = getProperty("disks." + devices[i] + ".source");
if (fileloc != null && fileloc.equalsIgnoreCase("true"))
file_present = true;
if (!file_present)
if (superuser) {
CharSequence sb = runCommandGeneric("sudo smartctl -i " + devices[i]);
String patternStr = "[(failed)(device not supported)]";
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher = pattern.matcher(sb.toString());
if (matcher.find(0))
disk_present = false;
else
disk_present = true;
}
if (file_present || (disk_present && smart_present)) {
disks_ok++;
} else
devices[i] = null;
}
// now remove disks that dont exist
StringBuilder resetSB = new StringBuilder();
for (int j = 0; j < devices.length; j++) {
resetSB.append(devices[j] == null ? "" : devices[j] + ", ");
if (devices[j] != null)
logInfo("Found S.M.A.R.T. attributes for disk " + devices[j]);
}
// fix the property
if (resetSB.length() >= 2)
setProperty("disks.list", resetSB.substring(0, resetSB.length() - 2));
if (disks_ok > 0) {
monitors.add(new MonitorJob(new SMARTParser(), "disks", timeInt));
logInfo("Created Monitor for S.M.A.R.T disk attributes");
}
}
// for lm-sensors
tmp = getProperty("sensors.enabled");
timeInt = setValue("sensors.interval", DEFAULT_POLL_INTERVAL);
if ("true".equalsIgnoreCase(tmp) && checkExistence("sensors")) {
monitors.add(new MonitorJob(new SensorsParser(), "sensors", timeInt));
logInfo("Created Monitor for lm-sensors output");
}
return monitors;
}
/**
* Determines the minimum interval at which the executor thread
* needs to wake upto execute jobs. Essentially, this is interval
* equals the GCD of intervals of all scheduled jobs.
*
* @param monitors the list of scheduled jobs
*
* @return the minimum interval between two scheduled jobs
*/
public static int getInterval(ArrayList<MonitorJob> monitors) {
String tmp = getProperty("executor.interval.min");
if (tmp != null)
MIN_INTERVAL = Integer.parseInt(tmp);
int[] monIntervals = new int[monitors.size()];
for (int i = 0; i < monitors.size(); i++)
monIntervals[i] = monitors.get(i).interval;
return Math.max(MIN_INTERVAL, gcd(monIntervals));
}
/**
* Checks whether a specific shell command is available
* in the system.
*
* @param cmd the command to check against
*
* @return true, if the command is availble, false otherwise
*/
public static boolean checkExistence(String cmd) {
CharSequence sb = runCommandGeneric("which " + cmd);
if (sb.length() > 1)
return true;
return false;
}
/**
* Runs a shell command in the system and provides a StringBuilder
* with the output of the command.
* <p>This method is deprecated. See related method that returns a CharSequence as oppposed to a StringBuffer.
*
* @param cmd an array of string that form the command to run
*
* @return a text that contains the output of the command
* @see #runCommandGeneric(String[])
* @deprecated
*/
public static StringBuffer runCommand(String[] cmd) {
return new StringBuffer(runCommandGeneric(cmd));
}
/**
* Runs a shell command in the system and provides a StringBuilder
* with the output of the command.
*
* @param cmd an array of string that form the command to run
*
* @return a text that contains the output of the command
*/
public static CharSequence runCommandGeneric(String[] cmd) {
StringBuilder retval = new StringBuilder(MAX_OUTPUT_LENGTH);
Process p;
try {
p = Runtime.getRuntime().exec(cmd);
InputStream tmp = p.getInputStream();
p.waitFor();
int c;
while ((c = tmp.read()) != -1)
retval.append((char) c);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
return retval;
}
/**
* Runs a shell command in the system and provides a StringBuilder
* with the output of the command.
* <p>This method is deprecated in favor of the one that returns CharSequence as opposed to StringBuffer
* @param cmd the command to run
*
* @return a text that contains the output of the command
* @see #runCommandGeneric(String)
* @deprecated
*/
public static StringBuffer runCommand(String cmd) {
return new StringBuffer(runCommandGeneric(cmd));
}
/**
* Runs a shell command in the system and provides a StringBuilder
* with the output of the command.
*
* @param cmd the command to run
*
* @return a text that contains the output of the command
*/
public static CharSequence runCommandGeneric(String cmd) {
return runCommandGeneric(cmd.split("\\s+"));
}
/**
* Determines the greatest common divisor (GCD) of two integers.
*
* @param m the first integer
* @param n the second integer
*
* @return the greatest common divisor of m and n
*/
public static int gcd(int m, int n) {
if (m == 0 && n == 0)
return 0;
if (m < n) {
int t = m;
m = n;
n = t;
}
int r = m % n;
if (r == 0) {
return n;
} else {
return gcd(n, r);
}
}
/**
* Determines the greatest common divisor (GCD) of a list
* of integers.
*
* @param numbers the list of integers to process
*
* @return the greatest common divisor of all numbers
*/
public static int gcd(int[] numbers) {
if (numbers.length == 1)
return numbers[0];
int g = gcd(numbers[0], numbers[1]);
for (int i = 2; i < numbers.length; i++)
g = gcd(g, numbers[i]);
return g;
}
private static String [] expandDirs(String [] input, String patternStr) {
ArrayList<String> fnames = new ArrayList<String>();
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher;
File f;
for (String fname : input) {
f = new File(fname);
if (f.exists()) {
if (f.isDirectory()) {
// add all matching files
File [] fcs = f.listFiles();
for (File fc : fcs) {
matcher = pattern.matcher(fc.getName());
if (matcher.find() && fc.isFile())
fnames.add(fc.getAbsolutePath());
}
} else {
// normal file, just add to output
fnames.add(f.getAbsolutePath());
}
}
}
return fnames.toArray(input);
}
private static int setValue(String propname, int defaultValue) {
String v = getProperty(propname);
if (v != null)
return Integer.parseInt(v);
else
return defaultValue;
}
public static void logInfo(String str) {
LOG.info(str);
}
}

View File

@ -1,151 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.HashMap;
/**********************************************************
* Objects of this class represent metrics collected for
* a specific hardware source. Each EventRecord contains a HashMap of
* (key, value) pairs, each of which represents a property of
* the metered value. For instance, when parsing a log file, an
* EventRecord is created for each log entry, which contains
* the hostname and the ip addresses of the node, timestamp of
* the log entry, the actual message etc. Each and every EventRecord
* contains the hostname of the machine on which it was collected,
* its IP address and the time of collection.
*
* The main purpose of this class is to provide a uniform format
* for records collected from various system compontents (logs,
* ifconfig, smartmontools, lm-sensors etc). All metric values are
* converted into this format after they are collected by a
* Monitored object.
*
**********************************************************/
public class EventRecord {
HashMap<String, Object> fields;
/**
* Create the EventRecord given the most common properties
* among different metric types.
*/
public EventRecord(String _hostname, Object [] _ips, Calendar _timestamp,
String _type, String _logLevel, String _source, String _message) {
fields = new HashMap<String, Object>();
fields.clear();
set("hostname", _hostname);
set("ips", _ips);
set("timestamp", _timestamp);
set("type", _type);
set("logLevel", _logLevel);
set("source", _source);
set("message", _message);
}
/**
* Create the EventRecord with no fields other than "invalid" as
* the hostname. This is only used as a dummy.
*/
public EventRecord() {
// creates an invalid record
fields = new HashMap<String, Object>();
fields.clear();
set("hostname", "invalid");
}
/**
* Return the HashMap of properties of the EventRecord.
*
* @return a HashMap that contains all properties of the record.
*/
public final HashMap<String, Object> getMap() {
return fields;
}
/**
* Set the value of a property of the EventRecord.
*
* @param fieldName the name of the property to set
* @param fieldValue the value of the property to set
*
*/
public void set(String fieldName, Object fieldValue) {
if (fieldValue != null)
fields.put(fieldName, fieldValue);
}
/**
* Get the value of a property of the EventRecord.
* If the property with the specific key is not found,
* null is returned.
*
* @param fieldName the name of the property to get.
*/
public Object get(String fieldName) {
return fields.get(fieldName);
}
/**
* Check if the EventRecord is a valid one, i.e., whether
* it represents meaningful metric values.
*
* @return true if the EventRecord is a valid one, false otherwise.
*/
public boolean isValid() {
return !("invalid".equalsIgnoreCase((String) fields.get("hostname")));
}
/**
* Creates and returns a string representation of the object.
*
* @return a String representation of the object
*/
public String toString() {
String retval = "";
ArrayList<String> keys = new ArrayList<String>(fields.keySet());
Collections.sort(keys);
for (int i = 0; i < keys.size(); i++) {
Object value = fields.get(keys.get(i));
if (value == null)
retval += keys.get(i) + ":\tnull\n";
else if (value instanceof String)
retval += keys.get(i) + ":\t" + value + "\n";
else if (value instanceof Calendar)
retval += keys.get(i) + ":\t" + ((Calendar) value).getTime() + "\n";
else if (value instanceof InetAddress[] || value instanceof String []) {
retval += "Known IPs:\t";
for (InetAddress ip : ((InetAddress[]) value))
retval += ip.getHostAddress() + " ";
retval += "\n";
} else {
retval += keys.get(i) + ":\t" + value.toString() + "\n";
}
}
return retval;
}
}

View File

@ -1,120 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
/**********************************************************
* This class executes monitoring jobs on all nodes of the
* cluster, on which we intend to gather failure metrics.
* It is basically a thread that sleeps and periodically wakes
* up to execute monitoring jobs and ship all gathered data to
* a "safe" location, which in most cases will be the HDFS
* filesystem of the monitored cluster.
*
**********************************************************/
public class Executor implements Runnable {
public static final int DEFAULT_LOG_INTERVAL = 3600;
public static final int DEFAULT_POLL_INTERVAL = 360;
public static int MIN_INTERVAL = 5;
public static int instances = 0;
LocalStore lstore;
ArrayList<MonitorJob> monitors;
int interval;
int upload_interval;
int upload_counter;
/**
* Create an instance of the class and read the configuration
* file to determine the set of jobs that will be run and the
* maximum interval for which the thread can sleep before it
* wakes up to execute a monitoring job on the node.
*
*/
public Executor(Configuration conf) {
Environment.prepare("conf/failmon.properties");
String localTmpDir;
if (conf == null) {
// running as a stand-alone application
localTmpDir = System.getProperty("java.io.tmpdir");
Environment.setProperty("local.tmp.dir", localTmpDir);
} else {
// running from within Hadoop
localTmpDir = conf.get("hadoop.tmp.dir");
String hadoopLogPath = System.getProperty("hadoop.log.dir") + "/" + System.getProperty("hadoop.log.file");
Environment.setProperty("hadoop.log.file", hadoopLogPath);
Environment.setProperty("local.tmp.dir", localTmpDir);
}
monitors = Environment.getJobs();
interval = Environment.getInterval(monitors);
upload_interval = LocalStore.UPLOAD_INTERVAL;
lstore = new LocalStore();
if (Environment.getProperty("local.upload.interval") != null)
upload_interval = Integer.parseInt(Environment.getProperty("local.upload.interval"));
instances++;
}
public void run() {
upload_counter = upload_interval;
Environment.logInfo("Failmon Executor thread started successfully.");
while (true) {
try {
Thread.sleep(interval * 1000);
for (int i = 0; i < monitors.size(); i++) {
monitors.get(i).counter -= interval;
if (monitors.get(i).counter <= 0) {
monitors.get(i).reset();
Environment.logInfo("Calling " + monitors.get(i).job.getInfo() + "...\t");
monitors.get(i).job.monitor(lstore);
}
}
upload_counter -= interval;
if (upload_counter <= 0) {
lstore.upload();
upload_counter = upload_interval;
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
public void cleanup() {
instances--;
}
}

View File

@ -1,154 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.BufferedOutputStream;
import java.io.InputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.zip.CRC32;
import java.util.zip.CheckedOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FSDataInputStream;
public class HDFSMerger {
Configuration hadoopConf;
FileSystem hdfs;
String hdfsDir;
FileStatus [] inputFiles;
Path outputFilePath;
FSDataOutputStream outputFile;
boolean compress;
FileWriter fw;
BufferedWriter writer;
public HDFSMerger() throws IOException {
String hadoopConfPath;
if (Environment.getProperty("hadoop.conf.path") == null)
hadoopConfPath = "../../../conf";
else
hadoopConfPath = Environment.getProperty("hadoop.conf.path");
// Read the configuration for the Hadoop environment
Configuration hadoopConf = new Configuration();
hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-default.xml"));
hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-site.xml"));
// determine the local output file name
if (Environment.getProperty("local.tmp.filename") == null)
Environment.setProperty("local.tmp.filename", "failmon.dat");
// determine the upload location
hdfsDir = Environment.getProperty("hdfs.upload.dir");
if (hdfsDir == null)
hdfsDir = "/failmon";
hdfs = FileSystem.get(hadoopConf);
Path hdfsDirPath = new Path(hadoopConf.get("fs.default.name") + hdfsDir);
try {
if (!hdfs.getFileStatus(hdfsDirPath).isDir()) {
Environment.logInfo("HDFSMerger: Not an HDFS directory: " + hdfsDirPath.toString());
System.exit(0);
}
} catch (FileNotFoundException e) {
Environment.logInfo("HDFSMerger: Directory not found: " + hdfsDirPath.toString());
}
inputFiles = hdfs.listStatus(hdfsDirPath);
outputFilePath = new Path(hdfsDirPath.toString() + "/" + "merge-"
+ Calendar.getInstance().getTimeInMillis() + ".dat");
outputFile = hdfs.create(outputFilePath);
for (FileStatus fstatus : inputFiles) {
appendFile(fstatus.getPath());
hdfs.delete(fstatus.getPath(), true);
}
outputFile.close();
Environment.logInfo("HDFS file merging complete!");
}
private void appendFile (Path inputPath) throws IOException {
FSDataInputStream anyInputFile = hdfs.open(inputPath);
InputStream inputFile;
byte buffer[] = new byte[4096];
if (inputPath.toString().endsWith(LocalStore.COMPRESSION_SUFFIX)) {
// the file is compressed
inputFile = new ZipInputStream(anyInputFile);
((ZipInputStream) inputFile).getNextEntry();
} else {
inputFile = anyInputFile;
}
try {
int bytesRead = 0;
while ((bytesRead = inputFile.read(buffer)) > 0) {
outputFile.write(buffer, 0, bytesRead);
}
} catch (IOException e) {
Environment.logInfo("Error while copying file:" + inputPath.toString());
} finally {
inputFile.close();
}
}
public static void main(String [] args) {
Environment.prepare("./conf/failmon.properties");
try {
new HDFSMerger();
} catch (IOException e) {
e.printStackTrace();
}
}
}

View File

@ -1,136 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.IOException;
import java.util.Calendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**********************************************************
* An object of this class parses a Hadoop log file to create
* appropriate EventRecords. The log file can either be the log
* of a NameNode or JobTracker or DataNode or TaskTracker.
*
**********************************************************/
public class HadoopLogParser extends LogParser {
/**
* Create a new parser object and try to find the hostname
* of the node that generated the log
*/
public HadoopLogParser(String fname) {
super(fname);
if ((dateformat = Environment.getProperty("log.hadoop.dateformat")) == null)
dateformat = "\\d{4}-\\d{2}-\\d{2}";
if ((timeformat = Environment.getProperty("log.hadoop.timeformat")) == null)
timeformat = "\\d{2}:\\d{2}:\\d{2}";
findHostname();
}
/**
* Parses one line of the log. If the line contains a valid
* log entry, then an appropriate EventRecord is returned, after all
* relevant fields have been parsed.
*
* @param line the log line to be parsed
*
* @return the EventRecord representing the log entry of the line. If
* the line does not contain a valid log entry, then the EventRecord
* returned has isValid() = false. When the end-of-file has been reached,
* null is returned to the caller.
*/
public EventRecord parseLine(String line) throws IOException {
EventRecord retval = null;
if (line != null) {
// process line
String patternStr = "(" + dateformat + ")";
patternStr += "\\s+";
patternStr += "(" + timeformat + ")";
patternStr += ".{4}\\s(\\w*)\\s"; // for logLevel
patternStr += "\\s*([\\w+\\.?]+)"; // for source
patternStr += ":\\s+(.+)"; // for the message
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher = pattern.matcher(line);
if (matcher.find(0) && matcher.groupCount() >= 5) {
retval = new EventRecord(hostname, ips, parseDate(matcher.group(1),
matcher.group(2)),
"HadoopLog",
matcher.group(3), // loglevel
matcher.group(4), // source
matcher.group(5)); // message
} else {
retval = new EventRecord();
}
}
return retval;
}
/**
* Parse a date found in the Hadoop log.
*
* @return a Calendar representing the date
*/
protected Calendar parseDate(String strDate, String strTime) {
Calendar retval = Calendar.getInstance();
// set date
String[] fields = strDate.split("-");
retval.set(Calendar.YEAR, Integer.parseInt(fields[0]));
retval.set(Calendar.MONTH, Integer.parseInt(fields[1]));
retval.set(Calendar.DATE, Integer.parseInt(fields[2]));
// set time
fields = strTime.split(":");
retval.set(Calendar.HOUR_OF_DAY, Integer.parseInt(fields[0]));
retval.set(Calendar.MINUTE, Integer.parseInt(fields[1]));
retval.set(Calendar.SECOND, Integer.parseInt(fields[2]));
return retval;
}
/**
* Attempt to determine the hostname of the node that created the
* log file. This information can be found in the STARTUP_MSG lines
* of the Hadoop log, which are emitted when the node starts.
*
*/
private void findHostname() {
String startupInfo = Environment.runCommandGeneric(
"grep --max-count=1 STARTUP_MSG:\\s*host " + file.getName()).toString();
Pattern pattern = Pattern.compile("\\s+(\\w+/.+)\\s+");
Matcher matcher = pattern.matcher(startupInfo);
if (matcher.find(0)) {
hostname = matcher.group(1).split("/")[0];
ips = new String[1];
ips[0] = matcher.group(1).split("/")[1];
}
}
/**
* Return a String with information about this class
*
* @return A String describing this class
*/
public String getInfo() {
return ("Hadoop Log Parser for file: " + file.getName());
}
}

View File

@ -1,282 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.zip.CRC32;
import java.util.zip.CheckedOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**********************************************************
* This class takes care of the temporary local storage of
* gathered metrics before they get uploaded into HDFS. It writes
* Serialized Records as lines in a temporary file and then
* compresses and uploads it into HDFS.
*
**********************************************************/
public class LocalStore {
public final static char FIELD_SEPARATOR = '|';
public final static char RECORD_SEPARATOR = '\n';
public final static String COMPRESSION_SUFFIX = ".zip";
public final static int UPLOAD_INTERVAL = 600;
String filename;
String hdfsDir;
boolean compress;
FileWriter fw;
BufferedWriter writer;
/**
* Create an instance of the class and read the configuration
* file to determine some output parameters. Then, initiate the
* structured needed for the buffered I/O (so that smal appends
* can be handled efficiently).
*
*/
public LocalStore() {
// determine the local output file name
if (Environment.getProperty("local.tmp.filename") == null)
Environment.setProperty("local.tmp.filename", "failmon.dat");
// local.tmp.dir has been set by the Executor
if (Environment.getProperty("local.tmp.dir") == null)
Environment.setProperty("local.tmp.dir", System.getProperty("java.io.tmpdir"));
filename = Environment.getProperty("local.tmp.dir") + "/" +
Environment.getProperty("local.tmp.filename");
// determine the upload location
hdfsDir = Environment.getProperty("hdfs.upload.dir");
if (hdfsDir == null)
hdfsDir = "/failmon";
// determine if compression is enabled
compress = true;
if ("false".equalsIgnoreCase(Environment
.getProperty("local.tmp.compression")))
compress = false;
try {
fw = new FileWriter(filename, true);
writer = new BufferedWriter(fw);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Insert an EventRecord to the local storage, after it
* gets serialized and anonymized.
*
* @param er the EventRecord to be inserted
*/
public void insert(EventRecord er) {
SerializedRecord sr = new SerializedRecord(er);
try {
Anonymizer.anonymize(sr);
} catch (Exception e) {
e.printStackTrace();
}
append(sr);
}
/**
* Insert an array of EventRecords to the local storage, after they
* get serialized and anonymized.
*
* @param ers the array of EventRecords to be inserted
*/
public void insert(EventRecord[] ers) {
for (EventRecord er : ers)
insert(er);
}
private void append(SerializedRecord sr) {
try {
writer.write(pack(sr).toString());
writer.write(RECORD_SEPARATOR);
// writer.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Pack a SerializedRecord into an array of bytes
* <p>
* This method is deprecated.
* @param sr the SerializedRecord to be packed
* @return Packed representation fo the Serialized Record
* @see #packConcurrent(SerializedRecord)
* @deprecated
*/
public static StringBuffer pack(SerializedRecord sr) {
return new StringBuffer(packConcurrent(sr));
}
/**
* Pack a SerializedRecord into an array of bytes
*
* @param sr the SerializedRecord to be packed
* @return Packed representation fo the Serialized Record
*/
public static CharSequence packConcurrent(SerializedRecord sr) {
StringBuilder sb = new StringBuilder();
ArrayList<String> keys = new ArrayList<String>(sr.fields.keySet());
if (sr.isValid())
SerializedRecord.arrangeKeys(keys);
for (int i = 0; i < keys.size(); i++) {
String value = sr.fields.get(keys.get(i));
sb.append(keys.get(i) + ":" + value);
sb.append(FIELD_SEPARATOR);
}
return sb;
}
/**
* Upload the local file store into HDFS, after it
* compressing it. Then a new local file is created
* as a temporary record store.
*
*/
public void upload() {
try {
writer.flush();
if (compress)
zipCompress(filename);
String remoteName = "failmon-";
if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.hostnames")))
remoteName += Anonymizer.getMD5Hash(InetAddress.getLocalHost().getCanonicalHostName()) + "-";
else
remoteName += InetAddress.getLocalHost().getCanonicalHostName() + "-";
remoteName += Calendar.getInstance().getTimeInMillis();//.toString();
if (compress)
copyToHDFS(filename + COMPRESSION_SUFFIX, hdfsDir + "/" + remoteName + COMPRESSION_SUFFIX);
else
copyToHDFS(filename, hdfsDir + "/" + remoteName);
} catch (IOException e) {
e.printStackTrace();
}
// delete and re-open
try {
fw.close();
fw = new FileWriter(filename);
writer = new BufferedWriter(fw);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Compress a text file using the ZIP compressing algorithm.
*
* @param filename the path to the file to be compressed
*/
public static void zipCompress(String filename) throws IOException {
FileOutputStream fos = new FileOutputStream(filename + COMPRESSION_SUFFIX);
CheckedOutputStream csum = new CheckedOutputStream(fos, new CRC32());
ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(csum));
out.setComment("Failmon records.");
BufferedReader in = new BufferedReader(new FileReader(filename));
out.putNextEntry(new ZipEntry(new File(filename).getName()));
int c;
while ((c = in.read()) != -1)
out.write(c);
in.close();
out.finish();
out.close();
}
/**
* Copy a local file to HDFS
*
* @param localFile the filename of the local file
* @param hdfsFile the HDFS filename to copy to
*/
public static void copyToHDFS(String localFile, String hdfsFile) throws IOException {
String hadoopConfPath;
if (Environment.getProperty("hadoop.conf.path") == null)
hadoopConfPath = "../../../conf";
else
hadoopConfPath = Environment.getProperty("hadoop.conf.path");
// Read the configuration for the Hadoop environment
Configuration hadoopConf = new Configuration();
hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-default.xml"));
hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-site.xml"));
// System.out.println(hadoopConf.get("hadoop.tmp.dir"));
// System.out.println(hadoopConf.get("fs.default.name"));
FileSystem fs = FileSystem.get(hadoopConf);
// HadoopDFS deals with Path
Path inFile = new Path("file://" + localFile);
Path outFile = new Path(hadoopConf.get("fs.default.name") + hdfsFile);
// Read from and write to new file
Environment.logInfo("Uploading to HDFS (file " + outFile + ") ...");
fs.copyFromLocalFile(false, inFile, outFile);
}
/**
* Close the temporary local file
*
*/
public void close() {
try {
writer.flush();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

View File

@ -1,214 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Calendar;
/**********************************************************
* This class represents objects that provide log parsing
* functionality. Typically, such objects read log files line
* by line and for each log entry they identify, they create a
* corresponding EventRecord. In this way, disparate log files
* can be merged using the uniform format of EventRecords and can,
* thus, be processed in a uniform way.
*
**********************************************************/
public abstract class LogParser implements Monitored {
File file;
BufferedReader reader;
String hostname;
Object [] ips;
String dateformat;
String timeformat;
private String firstLine;
private long offset;
/**
* Create a parser that will read from the specified log file.
*
* @param fname the filename of the log file to be read
*/
public LogParser(String fname) {
file = new File(fname);
ParseState ps = PersistentState.getState(file.getAbsolutePath());
firstLine = ps.firstLine;
offset = ps.offset;
try {
reader = new BufferedReader(new FileReader(file));
checkForRotation();
Environment.logInfo("Checked for rotation...");
reader.skip(offset);
} catch (FileNotFoundException e) {
System.err.println(e.getMessage());
e.printStackTrace();
} catch (IOException e) {
System.err.println(e.getMessage());
e.printStackTrace();
}
setNetworkProperties();
}
protected void setNetworkProperties() {
// determine hostname and ip addresses for the node
try {
// Get hostname
hostname = InetAddress.getLocalHost().getCanonicalHostName();
// Get all associated ip addresses
ips = InetAddress.getAllByName(hostname);
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
/**
* Insert all EventRecords that can be extracted for
* the represented hardware component into a LocalStore.
*
* @param ls the LocalStore into which the EventRecords
* are to be stored.
*/
public void monitor(LocalStore ls) {
int in = 0;
EventRecord er = null;
Environment.logInfo("Started processing log...");
while ((er = getNext()) != null) {
// Environment.logInfo("Processing log line:\t" + in++);
if (er.isValid()) {
ls.insert(er);
}
}
PersistentState.updateState(file.getAbsolutePath(), firstLine, offset);
PersistentState.writeState("conf/parsing.state");
}
/**
* Get an array of all EventRecords that can be extracted for
* the represented hardware component.
*
* @return The array of EventRecords
*/
public EventRecord[] monitor() {
ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
EventRecord er;
while ((er = getNext()) != null)
recs.add(er);
EventRecord[] T = new EventRecord[recs.size()];
return recs.toArray(T);
}
/**
* Continue parsing the log file until a valid log entry is identified.
* When one such entry is found, parse it and return a corresponding EventRecord.
*
*
* @return The EventRecord corresponding to the next log entry
*/
public EventRecord getNext() {
try {
String line = reader.readLine();
if (line != null) {
if (firstLine == null)
firstLine = new String(line);
offset += line.length() + 1;
return parseLine(line);
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* Return the BufferedReader, that reads the log file
*
* @return The BufferedReader that reads the log file
*/
public BufferedReader getReader() {
return reader;
}
/**
* Check whether the log file has been rotated. If so,
* start reading the file from the beginning.
*
*/
public void checkForRotation() {
try {
BufferedReader probe = new BufferedReader(new FileReader(file.getAbsoluteFile()));
if (firstLine == null || (!firstLine.equals(probe.readLine()))) {
probe.close();
// start reading the file from the beginning
reader.close();
reader = new BufferedReader(new FileReader(file.getAbsoluteFile()));
firstLine = null;
offset = 0;
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Parses one line of the log. If the line contains a valid
* log entry, then an appropriate EventRecord is returned, after all
* relevant fields have been parsed.
*
* @param line the log line to be parsed
*
* @return the EventRecord representing the log entry of the line. If
* the line does not contain a valid log entry, then the EventRecord
* returned has isValid() = false. When the end-of-file has been reached,
* null is returned to the caller.
*/
abstract public EventRecord parseLine(String line) throws IOException;
/**
* Parse a date found in Hadoop log file.
*
* @return a Calendar representing the date
*/
abstract protected Calendar parseDate(String strDate, String strTime);
}

View File

@ -1,43 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
/**********************************************************
* This class is a wrapper for a monitoring job.
*
**********************************************************/
public class MonitorJob {
Monitored job;
String type;
int interval;
int counter;
public MonitorJob(Monitored _job, String _type, int _interval) {
job = _job;
type = _type;
interval = _interval;
counter = _interval;
}
public void reset() {
counter = interval;
}
}

View File

@ -1,53 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
/**********************************************************
* Represents objects that monitor specific hardware resources and
* can query them to get EventRecords describing the state of these
* resources.
*
**********************************************************/
public interface Monitored {
/**
* Get an array of all EventRecords that can be extracted for
* the represented hardware component.
*
* @return The array of EventRecords
*/
public EventRecord[] monitor();
/**
* Inserts all EventRecords that can be extracted for
* the represented hardware component into a LocalStore.
*
* @param ls the LocalStore into which the EventRecords
* are to be stored.
*/
public void monitor(LocalStore ls);
/**
* Return a String with information about the implementing
* class
*
* @return A String describing the implementing class
*/
public String getInfo();
}

View File

@ -1,140 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Calendar;
/**********************************************************
* Objects of this class parse the output of ifconfig to
* gather information about present Network Interface Cards
* in the system. The list of NICs to poll is specified in the
* configuration file.
*
**********************************************************/
public class NICParser extends ShellParser {
String[] nics;
/**
* Constructs a NICParser and reads the list of NICs to query
*/
public NICParser() {
super();
nics = Environment.getProperty("nic.list").split(",\\s*");
}
/**
* Reads and parses the output of ifconfig for a specified NIC and
* creates an appropriate EventRecord that holds the desirable
* information for it.
*
* @param device the NIC device name to query
*
* @return the EventRecord created
*/
public EventRecord query(String device) throws UnknownHostException {
CharSequence sb = Environment.runCommandGeneric("/sbin/ifconfig " + device);
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
.getHostName()), Calendar.getInstance(), "NIC", "Unknown", device, "-");
retval.set("hwAddress", findPattern("HWaddr\\s*([\\S{2}:]{17})", sb
.toString(), 1));
retval.set("ipAddress", findPattern("inet\\s+addr:\\s*([\\w.?]*)", sb
.toString(), 1));
String tmp = findPattern("inet\\s+addr:\\s*([\\w.?]*)", sb.toString(), 1);
retval.set("status", (tmp == null) ? "DOWN" : "UP");
if (tmp != null)
retval.set("ipAddress", tmp);
retval.set("rxPackets", findPattern("RX\\s*packets\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("rxErrors", findPattern("RX.+errors\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("rxDropped", findPattern("RX.+dropped\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("rxOverruns", findPattern("RX.+overruns\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("rxFrame", findPattern("RX.+frame\\s*:\\s*(\\d+)",
sb.toString(), 1));
retval.set("txPackets", findPattern("TX\\s*packets\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("txErrors", findPattern("TX.+errors\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("txDropped", findPattern("TX.+dropped\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("txOverruns", findPattern("TX.+overruns\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("txCarrier", findPattern("TX.+carrier\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("collisions", findPattern("\\s+collisions\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("rxBytes", findPattern("RX\\s*bytes\\s*:\\s*(\\d+)", sb
.toString(), 1));
retval.set("txBytes", findPattern("TX\\s*bytes\\s*:\\s*(\\d+)", sb
.toString(), 1));
return retval;
}
/**
* Invokes query() to do the parsing and handles parsing errors for
* each one of the NICs specified in the configuration.
*
* @return an array of EventRecords that holds one element that represents
* the current state of network interfaces.
*/
public EventRecord[] monitor() {
ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
for (String nic : nics) {
try {
recs.add(query(nic));
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
EventRecord[] T = new EventRecord[recs.size()];
return recs.toArray(T);
}
/**
* Return a String with information about this class
*
* @return A String describing this class
*/
public String getInfo() {
String retval = "ifconfig parser for interfaces: ";
for (String nic : nics)
retval += nic + " ";
return retval;
}
}

View File

@ -1,132 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
/**********************************************************
* This class can be used to anonymize logs independently of
* Hadoop and the Executor. It parses the specified log file to
* create log records for it and then passes them to the Anonymizer.
* After they are anonymized, they are written to a local file,
* which is then compressed and stored locally.
*
**********************************************************/
public class OfflineAnonymizer {
public enum LogType {
HADOOP, SYSTEM
};
LogType logtype;
File logfile;
LogParser parser;
/**
* Creates an OfflineAnonymizer for a specific log file.
*
* @param logtype the type of the log file. This can either be
* LogFile.HADOOP or LogFile.SYSTEM
* @param filename the path to the log file
*
*/
public OfflineAnonymizer(LogType logtype, String filename) {
logfile = new File(filename);
if (!logfile.exists()) {
System.err.println("Input file does not exist!");
System.exit(0);
}
if (logtype == LogType.HADOOP)
parser = new HadoopLogParser(filename);
else
parser = new SystemLogParser(filename);
}
/**
* Performs anonymization for the log file. Log entries are
* read one by one and EventRecords are created, which are then
* anonymized and written to the output.
*
*/
public void anonymize() throws Exception {
EventRecord er = null;
SerializedRecord sr = null;
BufferedWriter bfw = new BufferedWriter(new FileWriter(logfile.getName()
+ ".anonymized"));
System.out.println("Anonymizing log records...");
while ((er = parser.getNext()) != null) {
if (er.isValid()) {
sr = new SerializedRecord(er);
Anonymizer.anonymize(sr);
bfw.write(LocalStore.pack(sr).toString());
bfw.write(LocalStore.RECORD_SEPARATOR);
}
}
bfw.flush();
bfw.close();
System.out.println("Anonymized log records written to " + logfile.getName()
+ ".anonymized");
System.out.println("Compressing output file...");
LocalStore.zipCompress(logfile.getName() + ".anonymized");
System.out.println("Compressed output file written to " + logfile.getName()
+ ".anonymized" + LocalStore.COMPRESSION_SUFFIX);
}
public static void main(String[] args) {
if (args.length < 2) {
System.out.println("Usage: OfflineAnonymizer <log_type> <filename>");
System.out
.println("where <log_type> is either \"hadoop\" or \"system\" and <filename> is the path to the log file");
System.exit(0);
}
LogType logtype = null;
if (args[0].equalsIgnoreCase("-hadoop"))
logtype = LogType.HADOOP;
else if (args[0].equalsIgnoreCase("-system"))
logtype = LogType.SYSTEM;
else {
System.err.println("Invalid first argument.");
System.exit(0);
}
OfflineAnonymizer oa = new OfflineAnonymizer(logtype, args[1]);
try {
oa.anonymize();
} catch (Exception e) {
e.printStackTrace();
}
return;
}
}

View File

@ -1,163 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.util.Properties;
import java.util.Calendar;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
/**********************************************************
* This class takes care of the information that needs to be
* persistently stored locally on nodes. Bookkeeping is done for the
* state of parsing of log files, so that the portion of the file that
* has already been parsed in previous calls will not be parsed again.
* For each log file, we maintain the byte offset of the last
* character parsed in previous passes. Also, the first entry in the
* log file is stored, so that FailMon can determine when a log file
* has been rotated (and thus parsing needs to start from the
* beginning of the file). We use a property file to store that
* information. For each log file we create a property keyed by the
* filename, the value of which contains the byte offset and first log
* entry separated by a SEPARATOR.
*
**********************************************************/
public class PersistentState {
private final static String SEPARATOR = "###";
static String filename;
static Properties persData = new Properties();
/**
* Read the state of parsing for all open log files from a property
* file.
*
* @param fname the filename of the property file to be read
*/
public static void readState(String fname) {
filename = fname;
try {
persData.load(new FileInputStream(filename));
} catch (FileNotFoundException e1) {
// ignore
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Read and return the state of parsing for a particular log file.
*
* @param fname the log file for which to read the state
*/
public static ParseState getState(String fname) {
String [] fields = persData.getProperty(fname, "null" + SEPARATOR + "0").split(SEPARATOR, 2);
String firstLine;
long offset;
if (fields.length < 2) {
System.err.println("Malformed persistent state data found");
Environment.logInfo("Malformed persistent state data found");
firstLine = null;
offset = 0;
} else {
firstLine = (fields[0].equals("null") ? null : fields[0]);
offset = Long.parseLong(fields[1]);
}
return new ParseState(fname, firstLine, offset);
}
/**
* Set the state of parsing for a particular log file.
*
* @param state the ParseState to set
*/
public static void setState(ParseState state) {
if (state == null) {
System.err.println("Null state found");
Environment.logInfo("Null state found");
}
persData.setProperty(state.filename, state.firstLine + SEPARATOR + state.offset);
}
/**
* Upadate the state of parsing for a particular log file.
*
* @param filename the log file for which to update the state
* @param firstLine the first line of the log file currently
* @param offset the byte offset of the last character parsed
*/
public static void updateState(String filename, String firstLine, long offset) {
ParseState ps = getState(filename);
if (firstLine != null)
ps.firstLine = firstLine;
ps.offset = offset;
setState(ps);
}
/**
* Write the state of parsing for all open log files to a property
* file on disk.
*
* @param fname the filename of the property file to write to
*/
public static void writeState(String fname) {
try {
persData.store(new FileOutputStream(fname), Calendar.getInstance().getTime().toString());
} catch (FileNotFoundException e1) {
e1.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**********************************************************
* This class represents the state of parsing for a particular log
* file.
*
**********************************************************/
class ParseState {
public String filename;
public String firstLine;
public long offset;
public ParseState(String _filename, String _firstLine, long _offset) {
this.filename = _filename;
this.firstLine = _firstLine;
this.offset = _offset;
}
}

View File

@ -1,120 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.util.ArrayList;
/**********************************************************
* Runs a set of monitoring jobs once for the local node. The set of
* jobs to be run is the intersection of the jobs specifed in the
* configuration file and the set of jobs specified in the --only
* command line argument.
**********************************************************/
public class RunOnce {
LocalStore lstore;
ArrayList<MonitorJob> monitors;
boolean uploading = true;
public RunOnce(String confFile) {
Environment.prepare(confFile);
String localTmpDir;
// running as a stand-alone application
localTmpDir = System.getProperty("java.io.tmpdir");
Environment.setProperty("local.tmp.dir", localTmpDir);
monitors = Environment.getJobs();
lstore = new LocalStore();
uploading = true;
}
private void filter (String [] ftypes) {
ArrayList<MonitorJob> filtered = new ArrayList<MonitorJob>();
boolean found;
// filter out unwanted monitor jobs
for (MonitorJob job : monitors) {
found = false;
for (String ftype : ftypes)
if (job.type.equalsIgnoreCase(ftype))
found = true;
if (found)
filtered.add(job);
}
// disable uploading if not requested
found = false;
for (String ftype : ftypes)
if (ftype.equalsIgnoreCase("upload"))
found = true;
if (!found)
uploading = false;
monitors = filtered;
}
private void run() {
Environment.logInfo("Failmon started successfully.");
for (int i = 0; i < monitors.size(); i++) {
Environment.logInfo("Calling " + monitors.get(i).job.getInfo() + "...\t");
monitors.get(i).job.monitor(lstore);
}
if (uploading)
lstore.upload();
lstore.close();
}
public void cleanup() {
// nothing to be done
}
public static void main (String [] args) {
String configFilePath = "./conf/failmon.properties";
String [] onlyList = null;
// Parse command-line parameters
for (int i = 0; i < args.length - 1; i++) {
if (args[i].equalsIgnoreCase("--config"))
configFilePath = args[i + 1];
else if (args[i].equalsIgnoreCase("--only"))
onlyList = args[i + 1].split(",");
}
RunOnce ro = new RunOnce(configFilePath);
// only keep the requested types of jobs
if (onlyList != null)
ro.filter(onlyList);
// run once only
ro.run();
}
}

View File

@ -1,206 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**********************************************************
* Objects of this class parse the output of smartmontools to
* gather information about the state of disks in the system. The
* smartmontools utility reads the S.M.A.R.T. attributes from
* the disk devices and reports them to the user. Note that since
* running smartctl requires superuser provileges, one should
* grand sudo privileges to the running user for the command smartctl
* (without a password). Alternatively, one can set up a cron job that
* periodically dumps the output of smartctl into a user-readable file.
* See the configuration file for details.
*
**********************************************************/
public class SMARTParser extends ShellParser {
String[] devices;
/**
* Constructs a SMARTParser and reads the list of disk
* devices to query
*/
public SMARTParser() {
super();
String devicesStr = Environment.getProperty("disks.list");
System.out.println("skato " + devicesStr);
if (devicesStr != null)
devices = devicesStr.split(",\\s*");
}
/**
* Reads and parses the output of smartctl for a specified disk and
* creates an appropriate EventRecord that holds the desirable
* information for it. Since the output of smartctl is different for
* different kinds of disks, we try to identify as many attributes as
* posssible for all known output formats.
*
* @param device the disk device name to query
*
* @return the EventRecord created
*/
public EventRecord query(String device) throws Exception {
String conf = Environment.getProperty("disks." + device + ".source");
CharSequence sb;
if (conf == null)
sb = Environment.runCommandGeneric("sudo smartctl --all " + device);
else
sb = Environment.runCommandGeneric("cat " + conf);
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
.getHostName()), Calendar.getInstance(), "SMART", "Unknown",
(conf == null ? "sudo smartctl --all " + device : "file " + conf), "-");
// IBM SCSI disks
retval.set("model", findPattern("Device\\s*:\\s*(.*)", sb.toString(), 1));
retval.set("serial", findPattern("Serial\\s+Number\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("firmware", findPattern("Firmware\\s+Version\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("capacity", findPattern("User\\s+Capacity\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("status", findPattern("SMART\\s*Health\\s*Status:\\s*(.*)", sb
.toString(), 1));
retval.set("current_temperature", findPattern(
"Current\\s+Drive\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
retval.set("trip_temperature", findPattern(
"Drive\\s+Trip\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
retval.set("start_stop_count", findPattern(
"start\\s+stop\\s+count\\s*:\\s*(\\d*)", sb.toString(), 1));
String[] var = { "read", "write", "verify" };
for (String s : var) {
retval.set(s + "_ecc_fast", findPattern(s + "\\s*:\\s*(\\d*)", sb
.toString(), 1));
retval.set(s + "_ecc_delayed", findPattern(s
+ "\\s*:\\s*(\\d+\\s+){1}(\\d+)", sb.toString(), 2));
retval.set(s + "_rereads", findPattern(
s + "\\s*:\\s*(\\d+\\s+){2}(\\d+)", sb.toString(), 2));
retval.set(s + "_GBs", findPattern(s
+ "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*)", sb.toString(), 2));
retval.set(s + "_uncorrected",
findPattern(s + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*){1}\\s+(\\d+)", sb
.toString(), 3));
}
// Hitachi IDE, SATA
retval.set("model", findPattern("Device\\s*Model\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("serial", findPattern("Serial\\s+number\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("protocol", findPattern("Transport\\s+protocol\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("status", "PASSED".equalsIgnoreCase(findPattern(
"test\\s*result\\s*:\\s*(.*)", sb.toString(), 1)) ? "OK" : "FAILED");
readColumns(retval, sb);
return retval;
}
/**
* Reads attributes in the following format:
*
* ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
* 3 Spin_Up_Time 0x0027 180 177 063 Pre-fail Always - 10265
* 4 Start_Stop_Count 0x0032 253 253 000 Old_age Always - 34
* 5 Reallocated_Sector_Ct 0x0033 253 253 063 Pre-fail Always - 0
* 6 Read_Channel_Margin 0x0001 253 253 100 Pre-fail Offline - 0
* 7 Seek_Error_Rate 0x000a 253 252 000 Old_age Always - 0
* 8 Seek_Time_Performance 0x0027 250 224 187 Pre-fail Always - 53894
* 9 Power_On_Minutes 0x0032 210 210 000 Old_age Always - 878h+00m
* 10 Spin_Retry_Count 0x002b 253 252 157 Pre-fail Always - 0
* 11 Calibration_Retry_Count 0x002b 253 252 223 Pre-fail Always - 0
* 12 Power_Cycle_Count 0x0032 253 253 000 Old_age Always - 49
* 192 PowerOff_Retract_Count 0x0032 253 253 000 Old_age Always - 0
* 193 Load_Cycle_Count 0x0032 253 253 000 Old_age Always - 0
* 194 Temperature_Celsius 0x0032 037 253 000 Old_age Always - 37
* 195 Hardware_ECC_Recovered 0x000a 253 252 000 Old_age Always - 2645
*
* This format is mostly found in IDE and SATA disks.
*
* @param er the EventRecord in which to store attributes found
* @param sb the text to parse
*
* @return the EventRecord in which new attributes are stored.
*/
private EventRecord readColumns(EventRecord er, CharSequence sb) {
Pattern pattern = Pattern.compile("^\\s{0,2}(\\d{1,3}\\s+.*)$",
Pattern.MULTILINE);
Matcher matcher = pattern.matcher(sb);
while (matcher.find()) {
String[] tokens = matcher.group(1).split("\\s+");
boolean failed = false;
// check if this attribute is a failed one
if (!tokens[8].equals("-"))
failed = true;
er.set(tokens[1].toLowerCase(), (failed ? "FAILED:" : "") + tokens[9]);
}
return er;
}
/**
* Invokes query() to do the parsing and handles parsing errors for
* each one of the disks specified in the configuration.
*
* @return an array of EventRecords that holds one element that represents
* the current state of the disk devices.
*/
public EventRecord[] monitor() {
ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
for (String device : devices) {
try {
recs.add(query(device));
} catch (Exception e) {
e.printStackTrace();
}
}
EventRecord[] T = new EventRecord[recs.size()];
return recs.toArray(T);
}
/**
* Return a String with information about this class
*
* @return A String describing this class
*/
public String getInfo() {
String retval = "S.M.A.R.T. disk attributes parser for disks ";
for (String device : devices)
retval += device + " ";
return retval;
}
}

View File

@ -1,112 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.net.InetAddress;
import java.util.Calendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**********************************************************
* Objects of this class parse the output of the lm-sensors utility
* to gather information about fan speed, temperatures for cpus
* and motherboard etc.
*
**********************************************************/
public class SensorsParser extends ShellParser {
/**
* Reads and parses the output of the 'sensors' command
* and creates an appropriate EventRecord that holds
* the desirable information.
*
* @param s unused parameter
*
* @return the EventRecord created
*/
public EventRecord query(String s) throws Exception {
CharSequence sb;
//sb = Environment.runCommandGeneric("sensors -A");
sb = Environment.runCommandGeneric("cat sensors.out");
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
.getHostName()), Calendar.getInstance(), "lm-sensors", "Unknown",
"sensors -A", "-");
readGroup(retval, sb, "fan");
readGroup(retval, sb, "in");
readGroup(retval, sb, "temp");
readGroup(retval, sb, "Core");
return retval;
}
/**
* Reads and parses lines that provide the output
* of a group of sensors with the same functionality.
*
* @param er the EventRecord to which the new attributes are added
* @param sb the text to parse
* @param prefix a String prefix specifying the common prefix of the
* sensors' names in the group (e.g. "fan", "in", "temp"
*
* @return the EventRecord created
*/
private EventRecord readGroup(EventRecord er, CharSequence sb, String prefix) {
Pattern pattern = Pattern.compile(".*(" + prefix
+ "\\s*\\d*)\\s*:\\s*(\\+?\\d+)", Pattern.MULTILINE);
Matcher matcher = pattern.matcher(sb);
while (matcher.find())
er.set(matcher.group(1), matcher.group(2));
return er;
}
/**
* Invokes query() to do the parsing and handles parsing errors.
*
* @return an array of EventRecords that holds one element that represents
* the current state of the hardware sensors
*/
public EventRecord[] monitor() {
EventRecord[] recs = new EventRecord[1];
try {
recs[0] = query(null);
} catch (Exception e) {
e.printStackTrace();
}
return recs;
}
/**
* Return a String with information about this class
*
* @return A String describing this class
*/
public String getInfo() {
return ("lm-sensors parser");
}
}

View File

@ -1,163 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.text.DateFormat;
/**********************************************************
* Objects of this class hold the serialized representations
* of EventRecords. A SerializedRecord is essentially an EventRecord
* with all its property values converted to strings. It also provides
* some convenience methods for printing the property fields in a
* more readable way.
*
**********************************************************/
public class SerializedRecord {
HashMap<String, String> fields;
private static DateFormat dateFormatter =
DateFormat.getDateTimeInstance(DateFormat.LONG, DateFormat.LONG);;
/**
* Create the SerializedRecord given an EventRecord.
*/
public SerializedRecord(EventRecord source) {
fields = new HashMap<String, String>();
fields.clear();
for (String k : source.getMap().keySet()) {
ArrayList<String> strs = getStrings(source.getMap().get(k));
if (strs.size() == 1)
fields.put(k, strs.get(0));
else
for (int i = 0; i < strs.size(); i++)
fields.put(k + "#" + i, strs.get(i));
}
}
/**
* Extract String representations from an Object.
*
* @param o the input object
*
* @return an ArrayList that contains Strings found in o
*/
private ArrayList<String> getStrings(Object o) {
ArrayList<String> retval = new ArrayList<String>();
retval.clear();
if (o == null)
retval.add("null");
else if (o instanceof String)
retval.add((String) o);
else if (o instanceof Calendar)
retval.add(dateFormatter.format(((Calendar) o).getTime()));
else if (o instanceof InetAddress[])
for (InetAddress ip : ((InetAddress[]) o))
retval.add(ip.getHostAddress());
else if (o instanceof String[])
for (String s : (String []) o)
retval.add(s);
else
retval.add(o.toString());
return retval;
}
/**
* Set the value of a property of the EventRecord.
*
* @param fieldName the name of the property to set
* @param fieldValue the value of the property to set
*
*/
public void set(String fieldName, String fieldValue) {
fields.put(fieldName, fieldValue);
}
/**
* Get the value of a property of the EventRecord.
* If the property with the specific key is not found,
* null is returned.
*
* @param fieldName the name of the property to get.
*/
public String get(String fieldName) {
return fields.get(fieldName);
}
/**
* Arrange the keys to provide a more readable printing order:
* first goes the timestamp, then the hostname and then the type, followed
* by all other keys found.
*
* @param keys The input ArrayList of keys to re-arrange.
*/
public static void arrangeKeys(ArrayList<String> keys) {
move(keys, "timestamp", 0);
move(keys, "hostname", 1);
move(keys, "type", 2);
}
private static void move(ArrayList<String> keys, String key, int position) {
int cur = keys.indexOf(key);
if (cur == -1)
return;
keys.set(cur, keys.get(position));
keys.set(position, key);
}
/**
* Check if the SerializedRecord is a valid one, i.e., whether
* it represents meaningful metric values.
*
* @return true if the EventRecord is a valid one, false otherwise.
*/
public boolean isValid() {
return !("invalid".equalsIgnoreCase(fields.get("hostname")));
}
/**
* Creates and returns a string reperssentation of the object
*
* @return a String representing the object
*/
public String toString() {
String retval = "";
ArrayList<String> keys = new ArrayList<String>(fields.keySet());
arrangeKeys(keys);
for (int i = 0; i < keys.size(); i++) {
String value = fields.get(keys.get(i));
if (value == null)
retval += keys.get(i) + ":\tnull\n";
else
retval += keys.get(i) + ":\t" + value + "\n";
}
return retval;
}
}

View File

@ -1,102 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**********************************************************
* Objects of this class parse the output of system command-line
* utilities that can give information about the state of
* various hardware components in the system. Typically, each such
* object either invokes a command and reads its output or reads the
* output of one such command from a file on the disk. Currently
* supported utilities include ifconfig, smartmontools, lm-sensors,
* /proc/cpuinfo.
*
**********************************************************/
public abstract class ShellParser implements Monitored {
/**
* Find the first occurence ofa pattern in a piece of text
* and return a specific group.
*
* @param strPattern the regular expression to match
* @param text the text to search
* @param grp the number of the matching group to return
*
* @return a String containing the matched group of the regular expression
*/
protected String findPattern(String strPattern, String text, int grp) {
Pattern pattern = Pattern.compile(strPattern, Pattern.MULTILINE);
Matcher matcher = pattern.matcher(text);
if (matcher.find(0))
return matcher.group(grp);
return null;
}
/**
* Finds all occurences of a pattern in a piece of text and returns
* the matching groups.
*
* @param strPattern the regular expression to match
* @param text the text to search
* @param grp the number of the matching group to return
* @param separator the string that separates occurences in the returned value
*
* @return a String that contains all occurences of strPattern in text,
* separated by separator
*/
protected String findAll(String strPattern, String text, int grp,
String separator) {
String retval = "";
boolean firstTime = true;
Pattern pattern = Pattern.compile(strPattern);
Matcher matcher = pattern.matcher(text);
while (matcher.find()) {
retval += (firstTime ? "" : separator) + matcher.group(grp);
firstTime = false;
}
return retval;
}
/**
* Insert all EventRecords that can be extracted for
* the represented hardware component into a LocalStore.
*
* @param ls the LocalStore into which the EventRecords
* are to be stored.
*/
public void monitor(LocalStore ls) {
ls.insert(monitor());
}
abstract public EventRecord[] monitor();
abstract public EventRecord query(String s) throws Exception;
}

View File

@ -1,126 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.IOException;
import java.util.Calendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**********************************************************
* An object of this class parses a Unix system log file to create
* appropriate EventRecords. Currently, only the syslogd logging
* daemon is supported.
*
**********************************************************/
public class SystemLogParser extends LogParser {
static String[] months = { "January", "February", "March", "April", "May",
"June", "July", "August", "September", "October", "November", "December" };
/**
* Create a new parser object .
*/
public SystemLogParser(String fname) {
super(fname);
if ((dateformat = Environment.getProperty("log.system.dateformat")) == null)
dateformat = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+(\\d+)";
if ((timeformat = Environment.getProperty("log.system.timeformat")) == null)
timeformat = "\\d{2}:\\d{2}:\\d{2}";
}
/**
* Parses one line of the log. If the line contains a valid
* log entry, then an appropriate EventRecord is returned, after all
* relevant fields have been parsed.
*
* @param line the log line to be parsed
*
* @return the EventRecord representing the log entry of the line. If
* the line does not contain a valid log entry, then the EventRecord
* returned has isValid() = false. When the end-of-file has been reached,
* null is returned to the caller.
*/
public EventRecord parseLine(String line) throws IOException {
EventRecord retval = null;
if (line != null) {
// process line
String patternStr = "(" + dateformat + ")";
patternStr += "\\s+";
patternStr += "(" + timeformat + ")";
patternStr += "\\s+(\\S*)\\s"; // for hostname
// patternStr += "\\s*([\\w+\\.?]+)"; // for source
patternStr += ":?\\s*(.+)"; // for the message
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher = pattern.matcher(line);
if (matcher.find() && matcher.groupCount() >= 0) {
retval = new EventRecord(hostname, ips, parseDate(matcher.group(1),
matcher.group(4)), "SystemLog", "Unknown", // loglevel
"Unknown", // source
matcher.group(6)); // message
} else {
retval = new EventRecord();
}
}
return retval;
}
/**
* Parse a date found in the system log.
*
* @return a Calendar representing the date
*/
protected Calendar parseDate(String strDate, String strTime) {
Calendar retval = Calendar.getInstance();
// set date
String[] fields = strDate.split("\\s+");
retval.set(Calendar.MONTH, parseMonth(fields[0]));
retval.set(Calendar.DATE, Integer.parseInt(fields[1]));
// set time
fields = strTime.split(":");
retval.set(Calendar.HOUR_OF_DAY, Integer.parseInt(fields[0]));
retval.set(Calendar.MINUTE, Integer.parseInt(fields[1]));
retval.set(Calendar.SECOND, Integer.parseInt(fields[2]));
return retval;
}
/**
* Convert the name of a month to the corresponding int value.
*
* @return the int representation of the month.
*/
private int parseMonth(String month) {
for (int i = 0; i < months.length; i++)
if (months[i].startsWith(month))
return i;
return -1;
}
/**
* Return a String with information about this class
*
* @return A String describing this class
*/
public String getInfo() {
return ("System Log Parser for file : " + file.getAbsoluteFile());
}
}