HADOOP-7136. Remove failmon contrib component. (nigel)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1070008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ab611ed091
commit
96dd151504
|
@ -46,7 +46,6 @@
|
||||||
<!-- ====================================================== -->
|
<!-- ====================================================== -->
|
||||||
<target name="test">
|
<target name="test">
|
||||||
<subant target="test">
|
<subant target="test">
|
||||||
<fileset dir="." includes="failmon/build.xml"/>
|
|
||||||
<fileset dir="." includes="hod/build.xml"/>
|
<fileset dir="." includes="hod/build.xml"/>
|
||||||
</subant>
|
</subant>
|
||||||
</target>
|
</target>
|
||||||
|
|
|
@ -1,97 +0,0 @@
|
||||||
****************** FailMon Quick Start Guide ***********************
|
|
||||||
|
|
||||||
This document is a guide to quickly setting up and running FailMon.
|
|
||||||
For more information and details please see the FailMon User Manual.
|
|
||||||
|
|
||||||
***** Building FailMon *****
|
|
||||||
|
|
||||||
Normally, FailMon lies under <hadoop-dir>/src/contrib/failmon, where
|
|
||||||
<hadoop-source-dir> is the Hadoop project root folder. To compile it,
|
|
||||||
one can either run ant for the whole Hadoop project, i.e.:
|
|
||||||
|
|
||||||
$ cd <hadoop-dir>
|
|
||||||
$ ant
|
|
||||||
|
|
||||||
or run ant only for FailMon:
|
|
||||||
|
|
||||||
$ cd <hadoop-dir>/src/contrib/failmon
|
|
||||||
$ ant
|
|
||||||
|
|
||||||
The above will compile FailMon and place all class files under
|
|
||||||
<hadoop-dir>/build/contrib/failmon/classes.
|
|
||||||
|
|
||||||
By invoking:
|
|
||||||
|
|
||||||
$ cd <hadoop-dir>/src/contrib/failmon
|
|
||||||
$ ant tar
|
|
||||||
|
|
||||||
FailMon is packaged as a standalone jar application in
|
|
||||||
<hadoop-dir>/src/contrib/failmon/failmon.tar.gz.
|
|
||||||
|
|
||||||
|
|
||||||
***** Deploying FailMon *****
|
|
||||||
|
|
||||||
There are two ways FailMon can be deployed in a cluster:
|
|
||||||
|
|
||||||
a) Within Hadoop, in which case the whole Hadoop package is uploaded
|
|
||||||
to the cluster nodes. In that case, nothing else needs to be done on
|
|
||||||
individual nodes.
|
|
||||||
|
|
||||||
b) Independently of the Hadoop deployment, i.e., by uploading
|
|
||||||
failmon.tar.gz to all nodes and uncompressing it. In that case, the
|
|
||||||
bin/failmon.sh script needs to be edited; environment variable
|
|
||||||
HADOOPDIR should point to the root directory of the Hadoop
|
|
||||||
distribution. Also the location of the Hadoop configuration files
|
|
||||||
should be pointed by the property 'hadoop.conf.path' in file
|
|
||||||
conf/failmon.properties. Note that these files refer to the HDFS in
|
|
||||||
which we want to store the FailMon data (which can potentially be
|
|
||||||
different than the one on the cluster we are monitoring).
|
|
||||||
|
|
||||||
We assume that either way FailMon is placed in the same directory on
|
|
||||||
all nodes, which is typical for most clusters. If this is not
|
|
||||||
feasible, one should create the same symbolic link on all nodes of the
|
|
||||||
cluster, that points to the FailMon directory of each node.
|
|
||||||
|
|
||||||
One should also edit the conf/failmon.properties file on each node to
|
|
||||||
set his own property values. However, the default values are expected
|
|
||||||
to serve most practical cases. Refer to the FailMon User Manual about
|
|
||||||
the various properties and configuration parameters.
|
|
||||||
|
|
||||||
|
|
||||||
***** Running FailMon *****
|
|
||||||
|
|
||||||
In order to run FailMon using a node to do the ad-hoc scheduling of
|
|
||||||
monitoring jobs, one needs edit the hosts.list file to specify the
|
|
||||||
list of machine hostnames on which FailMon is to be run. Also, in file
|
|
||||||
conf/global.config the username used to connect to the machines has to
|
|
||||||
be specified (passwordless SSH is assumed) in property 'ssh.username'.
|
|
||||||
In property 'failmon.dir', the path to the FailMon folder has to be
|
|
||||||
specified as well (it is assumed to be the same on all machines in the
|
|
||||||
cluster). Then one only needs to invoke the command:
|
|
||||||
|
|
||||||
$ cd <hadoop-dir>
|
|
||||||
$ bin/scheduler.py
|
|
||||||
|
|
||||||
to start the system.
|
|
||||||
|
|
||||||
|
|
||||||
***** Merging HDFS files *****
|
|
||||||
|
|
||||||
For the purpose of merging the files created on HDFS by FailMon, the
|
|
||||||
following command can be used:
|
|
||||||
|
|
||||||
$ cd <hadoop-dir>
|
|
||||||
$ bin/failmon.sh --mergeFiles
|
|
||||||
|
|
||||||
This will concatenate all files in the HDFS folder (pointed to by the
|
|
||||||
'hdfs.upload.dir' property in conf/failmon.properties file) into a
|
|
||||||
single file, which will be placed in the same folder. Also the
|
|
||||||
location of the Hadoop configuration files should be pointed by the
|
|
||||||
property 'hadoop.conf.path' in file conf/failmon.properties. Note that
|
|
||||||
these files refer to the HDFS in which have stored the FailMon data
|
|
||||||
(which can potentially be different than the one on the cluster we are
|
|
||||||
monitoring). Also, the scheduler.py script can be set up to merge the
|
|
||||||
HDFS files when their number surpasses a configurable limit (see
|
|
||||||
'conf/global.config' file).
|
|
||||||
|
|
||||||
Please refer to the FailMon User Manual for more details.
|
|
|
@ -1,54 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
# or more contributor license agreements. See the NOTICE file
|
|
||||||
# distributed with this work for additional information
|
|
||||||
# regarding copyright ownership. The ASF licenses this file
|
|
||||||
# to you under the Apache License, Version 2.0 (the
|
|
||||||
# "License"); you may not use this file except in compliance
|
|
||||||
# with the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# First we need to determine whether Failmon has been distributed with
|
|
||||||
# Hadoop, or as standalone. In the latter case failmon.jar will lie in
|
|
||||||
# the current directory.
|
|
||||||
|
|
||||||
JARNAME="failmon.jar"
|
|
||||||
HADOOPDIR=""
|
|
||||||
CLASSPATH=""
|
|
||||||
|
|
||||||
if [ `ls -l | grep src | wc -l` == 0 ]
|
|
||||||
then
|
|
||||||
# standalone binary
|
|
||||||
if [ -n $1 ] && [ "$1" == "--mergeFiles" ]
|
|
||||||
then
|
|
||||||
jar -ufe $JARNAME org.apache.hadoop.contrib.failmon.HDFSMerger
|
|
||||||
java -jar $JARNAME
|
|
||||||
else
|
|
||||||
jar -ufe $JARNAME org.apache.hadoop.contrib.failmon.RunOnce
|
|
||||||
java -jar $JARNAME $*
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
# distributed with Hadoop
|
|
||||||
HADOOPDIR=`pwd`/../../../
|
|
||||||
CLASSPATH=$CLASSPATH:$HADOOPDIR/build/contrib/failmon/classes
|
|
||||||
CLASSPATH=$CLASSPATH:$HADOOPDIR/build/classes
|
|
||||||
CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/commons-logging-api-1*.jar`
|
|
||||||
CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/commons-logging-1*.jar`
|
|
||||||
CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/log4j-*.jar`
|
|
||||||
# echo $CLASSPATH
|
|
||||||
if [ -n $1 ] && [ "$1" == "--mergeFiles" ]
|
|
||||||
then
|
|
||||||
java -cp $CLASSPATH org.apache.hadoop.contrib.failmon.HDFSMerger
|
|
||||||
else
|
|
||||||
java -cp $CLASSPATH org.apache.hadoop.contrib.failmon.RunOnce $*
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
|
@ -1,235 +0,0 @@
|
||||||
#!/usr/bin/python
|
|
||||||
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
# or more contributor license agreements. See the NOTICE file
|
|
||||||
# distributed with this work for additional information
|
|
||||||
# regarding copyright ownership. The ASF licenses this file
|
|
||||||
# to you under the Apache License, Version 2.0 (the
|
|
||||||
# "License"); you may not use this file except in compliance
|
|
||||||
# with the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
|
|
||||||
# Schedule FailMon execution for nodes of file hosts.list, according to
|
|
||||||
# the properties file conf/global.config.
|
|
||||||
|
|
||||||
import time
|
|
||||||
import ConfigParser
|
|
||||||
import subprocess
|
|
||||||
import threading
|
|
||||||
import random
|
|
||||||
|
|
||||||
jobs = []
|
|
||||||
username = "user"
|
|
||||||
connections = 10
|
|
||||||
failmonDir = ""
|
|
||||||
maxFiles = 100
|
|
||||||
|
|
||||||
# This class represents a thread that connects to a set of cluster
|
|
||||||
# nodes to locally execute monitoring jobs. These jobs are specified
|
|
||||||
# as a shell command in the constructor.
|
|
||||||
class sshThread (threading.Thread):
|
|
||||||
|
|
||||||
def __init__(self, threadname, username, command, failmonDir):
|
|
||||||
threading.Thread.__init__(self)
|
|
||||||
self.name = threadname
|
|
||||||
self.username = username
|
|
||||||
self.command = command
|
|
||||||
self.failmonDir = failmonDir
|
|
||||||
self.hosts = []
|
|
||||||
|
|
||||||
def addHost(self, host):
|
|
||||||
self.hosts.append(host)
|
|
||||||
|
|
||||||
def run (self):
|
|
||||||
for host in self.hosts:
|
|
||||||
toRun = ["ssh", self.username + "@" + host, "cd " + self.failmonDir + " ; " + self.command]
|
|
||||||
print "Thread", self.name, "invoking command on", host, ":\t", toRun, "...",
|
|
||||||
subprocess.check_call(toRun)
|
|
||||||
print "Done!"
|
|
||||||
|
|
||||||
# This class represents a monitoring job. The param member is a string
|
|
||||||
# that can be passed in the '--only' list of jobs given to the Java
|
|
||||||
# class org.apache.hadoop.contrib.failmon.RunOnce for execution on a
|
|
||||||
# node.
|
|
||||||
class Job:
|
|
||||||
def __init__(self, param, interval):
|
|
||||||
self.param = param
|
|
||||||
self.interval = interval
|
|
||||||
self.counter = interval
|
|
||||||
return
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.counter = self.interval
|
|
||||||
|
|
||||||
# This function reads the configuration file to get the values of the
|
|
||||||
# configuration parameters.
|
|
||||||
def getJobs(file):
|
|
||||||
global username
|
|
||||||
global connections
|
|
||||||
global jobs
|
|
||||||
global failmonDir
|
|
||||||
global maxFiles
|
|
||||||
|
|
||||||
conf = ConfigParser.SafeConfigParser()
|
|
||||||
conf.read(file)
|
|
||||||
|
|
||||||
username = conf.get("Default", "ssh.username")
|
|
||||||
connections = int(conf.get("Default", "max.connections"))
|
|
||||||
failmonDir = conf.get("Default", "failmon.dir")
|
|
||||||
maxFiles = conf.get("Default", "hdfs.files.max")
|
|
||||||
|
|
||||||
# Hadoop Log
|
|
||||||
interval = int(conf.get("Default", "log.hadoop.interval"))
|
|
||||||
|
|
||||||
if interval != 0:
|
|
||||||
jobs.append(Job("hadoopLog", interval))
|
|
||||||
|
|
||||||
# System Log
|
|
||||||
interval = int(conf.get("Default", "log.system.interval"))
|
|
||||||
|
|
||||||
if interval != 0:
|
|
||||||
jobs.append(Job("systemLog", interval))
|
|
||||||
|
|
||||||
# NICs
|
|
||||||
interval = int(conf.get("Default", "nics.interval"))
|
|
||||||
|
|
||||||
if interval != 0:
|
|
||||||
jobs.append(Job("nics", interval))
|
|
||||||
|
|
||||||
# CPU
|
|
||||||
interval = int(conf.get("Default", "cpu.interval"))
|
|
||||||
|
|
||||||
if interval != 0:
|
|
||||||
jobs.append(Job("cpu", interval))
|
|
||||||
|
|
||||||
# CPU
|
|
||||||
interval = int(conf.get("Default", "disks.interval"))
|
|
||||||
|
|
||||||
if interval != 0:
|
|
||||||
jobs.append(Job("disks", interval))
|
|
||||||
|
|
||||||
# sensors
|
|
||||||
interval = int(conf.get("Default", "sensors.interval"))
|
|
||||||
|
|
||||||
if interval != 0:
|
|
||||||
jobs.append(Job("sensors", interval))
|
|
||||||
|
|
||||||
# upload
|
|
||||||
interval = int(conf.get("Default", "upload.interval"))
|
|
||||||
|
|
||||||
if interval != 0:
|
|
||||||
jobs.append(Job("upload", interval))
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
# Compute the gcd (Greatest Common Divisor) of two integerss
|
|
||||||
def GCD(a, b):
|
|
||||||
assert isinstance(a, int)
|
|
||||||
assert isinstance(b, int)
|
|
||||||
|
|
||||||
while a:
|
|
||||||
a, b = b%a, a
|
|
||||||
|
|
||||||
return b
|
|
||||||
|
|
||||||
# Compute the gcd (Greatest Common Divisor) of a list of integers
|
|
||||||
def listGCD(joblist):
|
|
||||||
assert isinstance(joblist, list)
|
|
||||||
|
|
||||||
if (len(joblist) == 1):
|
|
||||||
return joblist[0].interval
|
|
||||||
|
|
||||||
g = GCD(joblist[0].interval, joblist[1].interval)
|
|
||||||
|
|
||||||
for i in range (2, len(joblist)):
|
|
||||||
g = GCD(g, joblist[i].interval)
|
|
||||||
|
|
||||||
return g
|
|
||||||
|
|
||||||
# Merge all failmon files created on the HDFS into a single file
|
|
||||||
def mergeFiles():
|
|
||||||
global username
|
|
||||||
global failmonDir
|
|
||||||
hostList = []
|
|
||||||
hosts = open('./conf/hosts.list', 'r')
|
|
||||||
for host in hosts:
|
|
||||||
hostList.append(host.strip().rstrip())
|
|
||||||
randomHost = random.sample(hostList, 1)
|
|
||||||
mergeCommand = "bin/failmon.sh --mergeFiles"
|
|
||||||
toRun = ["ssh", username + "@" + randomHost[0], "cd " + failmonDir + " ; " + mergeCommand]
|
|
||||||
print "Invoking command on", randomHost, ":\t", mergeCommand, "...",
|
|
||||||
subprocess.check_call(toRun)
|
|
||||||
print "Done!"
|
|
||||||
return
|
|
||||||
|
|
||||||
# The actual scheduling is done here
|
|
||||||
def main():
|
|
||||||
getJobs("./conf/global.config")
|
|
||||||
|
|
||||||
for job in jobs:
|
|
||||||
print "Configuration: ", job.param, "every", job.interval, "seconds"
|
|
||||||
|
|
||||||
globalInterval = listGCD(jobs)
|
|
||||||
|
|
||||||
while True :
|
|
||||||
time.sleep(globalInterval)
|
|
||||||
params = []
|
|
||||||
|
|
||||||
for job in jobs:
|
|
||||||
job.counter -= globalInterval
|
|
||||||
|
|
||||||
if (job.counter <= 0):
|
|
||||||
params.append(job.param)
|
|
||||||
job.reset()
|
|
||||||
|
|
||||||
if (len(params) == 0):
|
|
||||||
continue;
|
|
||||||
|
|
||||||
onlyStr = "--only " + params[0]
|
|
||||||
for i in range(1, len(params)):
|
|
||||||
onlyStr += ',' + params[i]
|
|
||||||
|
|
||||||
command = "bin/failmon.sh " + onlyStr
|
|
||||||
|
|
||||||
# execute on all nodes
|
|
||||||
hosts = open('./conf/hosts.list', 'r')
|
|
||||||
threadList = []
|
|
||||||
# create a thread for every connection
|
|
||||||
for i in range(0, connections):
|
|
||||||
threadList.append(sshThread(i, username, command, failmonDir))
|
|
||||||
|
|
||||||
# assign some hosts/connections hosts to every thread
|
|
||||||
cur = 0;
|
|
||||||
for host in hosts:
|
|
||||||
threadList[cur].addHost(host.strip().rstrip())
|
|
||||||
cur += 1
|
|
||||||
if (cur == len(threadList)):
|
|
||||||
cur = 0
|
|
||||||
|
|
||||||
for ready in threadList:
|
|
||||||
ready.start()
|
|
||||||
|
|
||||||
for ssht in threading.enumerate():
|
|
||||||
if ssht != threading.currentThread():
|
|
||||||
ssht.join()
|
|
||||||
|
|
||||||
# if an upload has been done, then maybe we need to merge the
|
|
||||||
# HDFS files
|
|
||||||
if "upload" in params:
|
|
||||||
mergeFiles()
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
|
@ -1,120 +0,0 @@
|
||||||
<?xml version="1.0"?>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<project name="failmon" default="compile">
|
|
||||||
|
|
||||||
<import file="../build-contrib.xml"/>
|
|
||||||
|
|
||||||
<property name="jarfile" value="${build.dir}/${name}.jar"/>
|
|
||||||
|
|
||||||
<target name="jar" depends="compile" unless="skip.contrib">
|
|
||||||
<!-- Make sure that the hadoop jar has been created -->
|
|
||||||
<!-- This works, but causes findbugs to fail
|
|
||||||
<subant antfile="build.xml" target="jar">
|
|
||||||
<fileset dir="../../.." includes="build.xml"/>
|
|
||||||
</subant>
|
|
||||||
-->
|
|
||||||
<!-- Copy the required files so that the jar can run independently
|
|
||||||
of Hadoop source code -->
|
|
||||||
|
|
||||||
<!-- create the list of files to add to the classpath -->
|
|
||||||
<fileset dir="${hadoop.root}/lib" id="class.path">
|
|
||||||
<include name="**/*.jar" />
|
|
||||||
<exclude name="**/excluded/" />
|
|
||||||
</fileset>
|
|
||||||
|
|
||||||
<pathconvert pathsep=" " property="failmon-class-path" refid="class.path">
|
|
||||||
<map from="${basedir}/" to=""/>
|
|
||||||
</pathconvert>
|
|
||||||
|
|
||||||
<echo message="contrib: ${name}"/>
|
|
||||||
<jar jarfile="${jarfile}" basedir="${build.classes}">
|
|
||||||
<manifest>
|
|
||||||
<attribute name="Main-Class" value="org.apache.hadoop.contrib.failmon.RunOnce"/>
|
|
||||||
<attribute name="Class-Path" value="${failmon-class-path}"/>
|
|
||||||
</manifest>
|
|
||||||
</jar>
|
|
||||||
|
|
||||||
</target>
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Override test target to copy sample data -->
|
|
||||||
<target name="test" depends="compile-test, compile, compile-examples" if="test.available">
|
|
||||||
<echo message="contrib: ${name}"/>
|
|
||||||
<delete dir="${hadoop.log.dir}"/>
|
|
||||||
<mkdir dir="${hadoop.log.dir}"/>
|
|
||||||
<delete dir="${build.test}/sample"/>
|
|
||||||
<mkdir dir="${build.test}/sample"/>
|
|
||||||
<copy todir="${build.test}/sample">
|
|
||||||
<fileset dir="${root}/sample"/>
|
|
||||||
</copy>
|
|
||||||
<junit
|
|
||||||
printsummary="yes" showoutput="${test.output}"
|
|
||||||
haltonfailure="no" fork="yes" maxmemory="256m"
|
|
||||||
errorProperty="tests.failed" failureProperty="tests.failed"
|
|
||||||
timeout="${test.timeout}">
|
|
||||||
|
|
||||||
<sysproperty key="test.build.data" value="${build.test}/data"/>
|
|
||||||
<sysproperty key="build.test" value="${build.test}"/>
|
|
||||||
<sysproperty key="contrib.name" value="${name}"/>
|
|
||||||
|
|
||||||
<!-- requires fork=yes for:
|
|
||||||
relative File paths to use the specified user.dir
|
|
||||||
classpath to use build/contrib/*.jar
|
|
||||||
-->
|
|
||||||
<sysproperty key="user.dir" value="${build.test}/data"/>
|
|
||||||
|
|
||||||
<sysproperty key="fs.default.name" value="${fs.default.name}"/>
|
|
||||||
<sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
|
|
||||||
<sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
|
|
||||||
<classpath refid="test.classpath"/>
|
|
||||||
<formatter type="${test.junit.output.format}" />
|
|
||||||
<batchtest todir="${build.test}" unless="testcase">
|
|
||||||
<fileset dir="${src.test}"
|
|
||||||
includes="**/Test*.java" excludes="**/${test.exclude}.java" />
|
|
||||||
</batchtest>
|
|
||||||
<batchtest todir="${build.test}" if="testcase">
|
|
||||||
<fileset dir="${src.test}" includes="**/${testcase}.java"/>
|
|
||||||
</batchtest>
|
|
||||||
</junit>
|
|
||||||
<fail if="tests.failed">Tests failed!</fail>
|
|
||||||
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="tar" depends="jar">
|
|
||||||
|
|
||||||
<copy todir=".">
|
|
||||||
<fileset dir="${hadoop.root}/build/contrib/failmon/"
|
|
||||||
includes="failmon.jar"/>
|
|
||||||
</copy>
|
|
||||||
|
|
||||||
<tar tarfile="${name}.tar"
|
|
||||||
basedir=".."
|
|
||||||
includes="${name}/**"
|
|
||||||
excludes="${name}/${name}.tar.gz, ${name}/src/**, ${name}/logs/**, ${name}/build.xml*"/>
|
|
||||||
<gzip zipfile="${name}.tar.gz" src="${name}.tar"/>
|
|
||||||
<delete file="${name}.tar"/>
|
|
||||||
<delete file="${name}.jar"/>
|
|
||||||
|
|
||||||
<move file="${name}.tar.gz" todir="${build.dir}"/>
|
|
||||||
<echo message= "${hadoop.root}/build/contrib/failmon/${name}.jar"/>
|
|
||||||
|
|
||||||
</target>
|
|
||||||
|
|
||||||
</project>
|
|
|
@ -1,25 +0,0 @@
|
||||||
#
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
# or more contributor license agreements. See the NOTICE file
|
|
||||||
# distributed with this work for additional information
|
|
||||||
# regarding copyright ownership. The ASF licenses this file
|
|
||||||
# to you under the Apache License, Version 2.0 (the
|
|
||||||
# "License"); you may not use this file except in compliance
|
|
||||||
# with the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
#Logging Implementation
|
|
||||||
|
|
||||||
#Log4J
|
|
||||||
org.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger
|
|
||||||
|
|
||||||
#JDK Logger
|
|
||||||
#org.apache.commons.logging.Log=org.apache.commons.logging.impl.Jdk14Logger
|
|
|
@ -1,80 +0,0 @@
|
||||||
#
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
# or more contributor license agreements. See the NOTICE file
|
|
||||||
# distributed with this work for additional information
|
|
||||||
# regarding copyright ownership. The ASF licenses this file
|
|
||||||
# to you under the Apache License, Version 2.0 (the
|
|
||||||
# "License"); you may not use this file except in compliance
|
|
||||||
# with the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# HDFS properties
|
|
||||||
hdfs.upload.dir = /failmon
|
|
||||||
hadoop.conf.path = ../../../conf
|
|
||||||
|
|
||||||
# Hadoop Log file properties
|
|
||||||
log.hadoop.enabled = true
|
|
||||||
log.hadoop.filenames = /home/hadoop/hadoop-0.17.0/logs/
|
|
||||||
# set to non-zero only for continous mode:
|
|
||||||
log.hadoop.interval = 0
|
|
||||||
log.hadoop.dateformat = \\d{4}-\\d{2}-\\d{2}
|
|
||||||
log.hadoop.timeformat = \\d{2}:\\d{2}:\\d{2}
|
|
||||||
|
|
||||||
# System Log file properties
|
|
||||||
log.system.enabled = true
|
|
||||||
log.system.filenames = /var/log/messages
|
|
||||||
# set to non-zero only for continous mode:
|
|
||||||
log.system.interval = 0
|
|
||||||
log.system.dateformat = (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+(\\d+)
|
|
||||||
log.system.timeformat = \\d{2}:\\d{2}:\\d{2}
|
|
||||||
|
|
||||||
# Network Interfaces
|
|
||||||
nic.enabled = true
|
|
||||||
nic.list = eth0, eth1
|
|
||||||
# set to non-zero only for continous mode:
|
|
||||||
nic.interval = 0
|
|
||||||
|
|
||||||
# CPUs & Motherboard
|
|
||||||
cpu.enabled = true
|
|
||||||
# set to non-zero only for continous mode:
|
|
||||||
cpu.interval = 0
|
|
||||||
|
|
||||||
# Disk devices. For all devices listed under disks.list, the corresponding
|
|
||||||
# property disk./dev/xxx.source specifies where the output of
|
|
||||||
# "sudo smartctl --all /dev/xxx" can be read by a user. If this property is
|
|
||||||
# missing, super-user privileges are assumed and the smartctl command will be
|
|
||||||
# invoked itself.
|
|
||||||
|
|
||||||
disks.enabled = true
|
|
||||||
disks.list = /dev/sda, /dev/sdb, /dev/sdc, /dev/sdd, /dev/hda, /dev/hdb, /dev/hdc, /dev/hdd
|
|
||||||
#disks./dev/sda.source = hda.smart
|
|
||||||
# set to non-zero only for continous mode:
|
|
||||||
disks.interval = 0
|
|
||||||
|
|
||||||
# lm-sensors polling
|
|
||||||
sensors.enabled = true
|
|
||||||
# set to non-zero only for continous mode:
|
|
||||||
sensors.interval = 0
|
|
||||||
|
|
||||||
# Executor thread properties
|
|
||||||
executor.interval.min = 1
|
|
||||||
|
|
||||||
# Anonymization properties
|
|
||||||
anonymizer.hash.hostnames = false
|
|
||||||
anonymizer.hash.ips = false
|
|
||||||
anonymizer.hash.filenames = false
|
|
||||||
anonymizer.hostname.suffix = apache.org
|
|
||||||
|
|
||||||
# Local files options
|
|
||||||
local.tmp.filename = failmon.dat
|
|
||||||
local.tmp.compression = false
|
|
||||||
# set to non-zero only for continous mode:
|
|
||||||
local.upload.interval = 0
|
|
|
@ -1,39 +0,0 @@
|
||||||
[Default]
|
|
||||||
#
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
# or more contributor license agreements. See the NOTICE file
|
|
||||||
# distributed with this work for additional information
|
|
||||||
# regarding copyright ownership. The ASF licenses this file
|
|
||||||
# to you under the Apache License, Version 2.0 (the
|
|
||||||
# "License"); you may not use this file except in compliance
|
|
||||||
# with the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
# general settings
|
|
||||||
|
|
||||||
# the username to use to connect to cluster nodes
|
|
||||||
ssh.username = user
|
|
||||||
# the maximum number of SSH connections to keep open at any time
|
|
||||||
max.connections = 2
|
|
||||||
# the directory in which FailMon lies
|
|
||||||
failmon.dir = /home/user/hadoop-core-trunk/src/contrib/failmon
|
|
||||||
# the maximum number of HDFS files to allow FailMon to create. After
|
|
||||||
# this limit is surpassed, all HDFS files will be concatenated into
|
|
||||||
# one file.
|
|
||||||
hdfs.files.max = 100
|
|
||||||
|
|
||||||
# iteration intervals
|
|
||||||
log.hadoop.interval = 0
|
|
||||||
log.system.interval = 0
|
|
||||||
nics.interval = 10
|
|
||||||
cpu.interval = 10
|
|
||||||
disks.interval = 0
|
|
||||||
sensors.interval = 0
|
|
||||||
upload.interval = 20
|
|
|
@ -1,10 +0,0 @@
|
||||||
host00
|
|
||||||
host01
|
|
||||||
host02
|
|
||||||
host03
|
|
||||||
host04
|
|
||||||
host05
|
|
||||||
host06
|
|
||||||
host07
|
|
||||||
host08
|
|
||||||
host09
|
|
|
@ -1,40 +0,0 @@
|
||||||
#
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
# or more contributor license agreements. See the NOTICE file
|
|
||||||
# distributed with this work for additional information
|
|
||||||
# regarding copyright ownership. The ASF licenses this file
|
|
||||||
# to you under the Apache License, Version 2.0 (the
|
|
||||||
# "License"); you may not use this file except in compliance
|
|
||||||
# with the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
# Define some default values that can be overridden by system properties
|
|
||||||
failmon.log.dir=logs
|
|
||||||
failmon.log.file=failmon.log
|
|
||||||
|
|
||||||
log4j.rootLogger= INFO, simpleFile, console
|
|
||||||
|
|
||||||
# Logging Threshold
|
|
||||||
log4j.threshhold=ALL
|
|
||||||
|
|
||||||
#
|
|
||||||
# console
|
|
||||||
# Add "console" to rootlogger above if you want to use this
|
|
||||||
#
|
|
||||||
|
|
||||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
|
||||||
log4j.appender.console.target=System.err
|
|
||||||
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
|
||||||
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
|
|
||||||
|
|
||||||
log4j.appender.simpleFile=org.apache.log4j.FileAppender
|
|
||||||
log4j.appender.simpleFile.layout=org.apache.log4j.PatternLayout
|
|
||||||
log4j.appender.simpleFile.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
|
|
||||||
log4j.appender.simpleFile.file= ${failmon.log.dir}/${failmon.log.file}
|
|
|
@ -1,52 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<ivy-module version="1.0">
|
|
||||||
<info organisation="org.apache.hadoop" module="${ant.project.name}">
|
|
||||||
<license name="Apache 2.0"/>
|
|
||||||
<ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
|
|
||||||
<description>
|
|
||||||
Apache Hadoop
|
|
||||||
</description>
|
|
||||||
</info>
|
|
||||||
<configurations defaultconfmapping="default">
|
|
||||||
<!--these match the Maven configurations-->
|
|
||||||
<conf name="default" extends="master,runtime"/>
|
|
||||||
<conf name="master" description="contains the artifact but no dependencies"/>
|
|
||||||
<conf name="runtime" description="runtime but not the artifact" />
|
|
||||||
|
|
||||||
<conf name="common" visibility="private"
|
|
||||||
extends="runtime"
|
|
||||||
description="artifacts needed compile/test the application"/>
|
|
||||||
<conf name="test" visibility="private" extends="runtime"/>
|
|
||||||
</configurations>
|
|
||||||
|
|
||||||
<publications>
|
|
||||||
<!--get the artifact from our module name-->
|
|
||||||
<artifact conf="master"/>
|
|
||||||
</publications>
|
|
||||||
<dependencies>
|
|
||||||
<dependency org="commons-logging"
|
|
||||||
name="commons-logging"
|
|
||||||
rev="${commons-logging.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="log4j"
|
|
||||||
name="log4j"
|
|
||||||
rev="${log4j.version}"
|
|
||||||
conf="common->master"/>
|
|
||||||
</dependencies>
|
|
||||||
</ivy-module>
|
|
|
@ -1,17 +0,0 @@
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
#This properties file lists the versions of the various artifacts used by streaming.
|
|
||||||
#It drives ivy and the generation of a maven POM
|
|
||||||
|
|
||||||
#Please list the dependencies name with version if they are different from the ones
|
|
||||||
#listed in the global libraries.properties file (in alphabetical order)
|
|
|
@ -1,154 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.security.MessageDigest;
|
|
||||||
import java.security.NoSuchAlgorithmException;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class provides anonymization to SerializedRecord objects. It
|
|
||||||
* anonymizes all hostnames, ip addresses and file names/paths
|
|
||||||
* that appear in EventRecords gathered from the logs
|
|
||||||
* and other system utilities. Such values are hashed using a
|
|
||||||
* cryptographically safe one-way-hash algorithm (MD5).
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class Anonymizer {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Anonymize hostnames, ip addresses and file names/paths
|
|
||||||
* that appear in fields of a SerializedRecord.
|
|
||||||
*
|
|
||||||
* @param sr the input SerializedRecord
|
|
||||||
*
|
|
||||||
* @return the anonymized SerializedRecord
|
|
||||||
*/
|
|
||||||
public static SerializedRecord anonymize(SerializedRecord sr)
|
|
||||||
throws Exception {
|
|
||||||
|
|
||||||
String hostname = sr.get("hostname");
|
|
||||||
|
|
||||||
if (hostname == null)
|
|
||||||
throw new Exception("Malformed SerializedRecord: no hostname found");
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(Environment
|
|
||||||
.getProperty("anonymizer.hash.hostnames"))) {
|
|
||||||
// hash the node's hostname
|
|
||||||
anonymizeField(sr, "message", hostname, "_hn_");
|
|
||||||
anonymizeField(sr, "hostname", hostname, "_hn_");
|
|
||||||
// hash all other hostnames
|
|
||||||
String suffix = Environment.getProperty("anonymizer.hostname.suffix");
|
|
||||||
if (suffix != null)
|
|
||||||
anonymizeField(sr, "message", "(\\S+\\.)*" + suffix, "_hn_");
|
|
||||||
}
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.ips"))) {
|
|
||||||
// hash all ip addresses
|
|
||||||
String ipPattern = "(\\d{1,3}\\.){3}\\d{1,3}";
|
|
||||||
anonymizeField(sr, "message", ipPattern, "_ip_");
|
|
||||||
anonymizeField(sr, "ips", ipPattern, "_ip_");
|
|
||||||
// if multiple ips are present for a node:
|
|
||||||
int i = 0;
|
|
||||||
while (sr.get("ips" + "#" + i) != null)
|
|
||||||
anonymizeField(sr, "ips" + "#" + i++, ipPattern, "_ip_");
|
|
||||||
|
|
||||||
if ("NIC".equalsIgnoreCase(sr.get("type")))
|
|
||||||
anonymizeField(sr, "ipAddress", ipPattern, "_ip_");
|
|
||||||
}
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(Environment
|
|
||||||
.getProperty("anonymizer.hash.filenames"))) {
|
|
||||||
// hash every filename present in messages
|
|
||||||
anonymizeField(sr, "message", "\\s+/(\\S+/)*[^:\\s]*", " _fn_");
|
|
||||||
anonymizeField(sr, "message", "\\s+hdfs://(\\S+/)*[^:\\s]*",
|
|
||||||
" hdfs://_fn_");
|
|
||||||
}
|
|
||||||
|
|
||||||
return sr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Anonymize hostnames, ip addresses and file names/paths
|
|
||||||
* that appear in fields of an EventRecord, after it gets
|
|
||||||
* serialized into a SerializedRecord.
|
|
||||||
*
|
|
||||||
* @param er the input EventRecord
|
|
||||||
*
|
|
||||||
* @return the anonymized SerializedRecord
|
|
||||||
*/
|
|
||||||
public static SerializedRecord anonymize(EventRecord er) throws Exception {
|
|
||||||
return anonymize(new SerializedRecord(er));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private static String anonymizeField(SerializedRecord sr, String fieldName,
|
|
||||||
String pattern, String prefix) {
|
|
||||||
String txt = sr.get(fieldName);
|
|
||||||
|
|
||||||
if (txt == null)
|
|
||||||
return null;
|
|
||||||
else {
|
|
||||||
String anon = getMD5Hash(pattern);
|
|
||||||
sr.set(fieldName, txt.replaceAll(pattern, (prefix == null ? "" : prefix)
|
|
||||||
+ anon));
|
|
||||||
return txt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the MD5 digest of an input text.
|
|
||||||
*
|
|
||||||
* @param text the input text
|
|
||||||
*
|
|
||||||
* @return the hexadecimal representation of the MD5 digest
|
|
||||||
*/
|
|
||||||
public static String getMD5Hash(String text) {
|
|
||||||
MessageDigest md;
|
|
||||||
byte[] md5hash = new byte[32];
|
|
||||||
try {
|
|
||||||
md = MessageDigest.getInstance("MD5");
|
|
||||||
md.update(text.getBytes("iso-8859-1"), 0, text.length());
|
|
||||||
md5hash = md.digest();
|
|
||||||
} catch (NoSuchAlgorithmException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} catch (UnsupportedEncodingException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
return convertToHex(md5hash);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String convertToHex(byte[] data) {
|
|
||||||
StringBuilder buf = new StringBuilder();
|
|
||||||
for (int i = 0; i < data.length; i++) {
|
|
||||||
int halfbyte = (data[i] >>> 4) & 0x0F;
|
|
||||||
int two_halfs = 0;
|
|
||||||
do {
|
|
||||||
if ((0 <= halfbyte) && (halfbyte <= 9))
|
|
||||||
buf.append((char) ('0' + halfbyte));
|
|
||||||
else
|
|
||||||
buf.append((char) ('a' + (halfbyte - 10)));
|
|
||||||
halfbyte = data[i] & 0x0F;
|
|
||||||
} while (two_halfs++ < 1);
|
|
||||||
}
|
|
||||||
return buf.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,101 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.Calendar;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Objects of this class parse the /proc/cpuinfo file to
|
|
||||||
* gather information about present processors in the system.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
|
|
||||||
public class CPUParser extends ShellParser {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a CPUParser
|
|
||||||
*/
|
|
||||||
public CPUParser() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads and parses /proc/cpuinfo and creates an appropriate
|
|
||||||
* EventRecord that holds the desirable information.
|
|
||||||
*
|
|
||||||
* @param s unused parameter
|
|
||||||
*
|
|
||||||
* @return the EventRecord created
|
|
||||||
*/
|
|
||||||
public EventRecord query(String s) throws Exception {
|
|
||||||
CharSequence sb = Environment.runCommandGeneric("cat /proc/cpuinfo");
|
|
||||||
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
|
|
||||||
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
|
|
||||||
.getHostName()), Calendar.getInstance(), "CPU", "Unknown", "CPU", "-");
|
|
||||||
|
|
||||||
retval.set("processors", findAll("\\s*processor\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1, ", "));
|
|
||||||
|
|
||||||
retval.set("model name", findPattern("\\s*model name\\s*:\\s*(.+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
|
|
||||||
retval.set("frequency", findAll("\\s*cpu\\s*MHz\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1, ", "));
|
|
||||||
|
|
||||||
retval.set("physical id", findAll("\\s*physical\\s*id\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1, ", "));
|
|
||||||
|
|
||||||
retval.set("core id", findAll("\\s*core\\s*id\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1, ", "));
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Invokes query() to do the parsing and handles parsing errors.
|
|
||||||
*
|
|
||||||
* @return an array of EventRecords that holds one element that represents
|
|
||||||
* the current state of /proc/cpuinfo
|
|
||||||
*/
|
|
||||||
|
|
||||||
public EventRecord[] monitor() {
|
|
||||||
|
|
||||||
EventRecord[] recs = new EventRecord[1];
|
|
||||||
|
|
||||||
try {
|
|
||||||
recs[0] = query(null);
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
return recs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a String with information about this class
|
|
||||||
*
|
|
||||||
* @return A String describing this class
|
|
||||||
*/
|
|
||||||
public String getInfo() {
|
|
||||||
return ("CPU Info parser");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,41 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class runs FailMon in a continuous mode on the local
|
|
||||||
* node.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class Continuous {
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
|
||||||
|
|
||||||
|
|
||||||
Environment.prepare("failmon.properties");
|
|
||||||
|
|
||||||
Executor ex = new Executor(null);
|
|
||||||
new Thread(ex).start();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,486 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.*;
|
|
||||||
import org.apache.log4j.PropertyConfigurator;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class provides various methods for interaction with
|
|
||||||
* the configuration and the operating system environment. Also
|
|
||||||
* provides some helper methods for use by other classes in
|
|
||||||
* the package.
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class Environment {
|
|
||||||
|
|
||||||
public static final int DEFAULT_LOG_INTERVAL = 3600;
|
|
||||||
|
|
||||||
public static final int DEFAULT_POLL_INTERVAL = 360;
|
|
||||||
|
|
||||||
public static int MIN_INTERVAL = 5;
|
|
||||||
|
|
||||||
public static final int MAX_OUTPUT_LENGTH = 51200;
|
|
||||||
|
|
||||||
public static Log LOG;
|
|
||||||
|
|
||||||
static Properties fmProperties = new Properties();
|
|
||||||
|
|
||||||
static boolean superuser = false;
|
|
||||||
|
|
||||||
static boolean ready = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes structures needed by other methods. Also determines
|
|
||||||
* whether the executing user has superuser privileges.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public static void prepare(String fname) {
|
|
||||||
|
|
||||||
if (!"Linux".equalsIgnoreCase(System.getProperty("os.name"))) {
|
|
||||||
System.err.println("Linux system required for FailMon. Exiting...");
|
|
||||||
System.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
System.setProperty("log4j.configuration", "conf/log4j.properties");
|
|
||||||
PropertyConfigurator.configure("conf/log4j.properties");
|
|
||||||
LOG = LogFactory.getLog("org.apache.hadoop.contrib.failmon");
|
|
||||||
logInfo("********** FailMon started ***********");
|
|
||||||
|
|
||||||
// read parseState file
|
|
||||||
PersistentState.readState("conf/parsing.state");
|
|
||||||
|
|
||||||
try {
|
|
||||||
FileInputStream propFile = new FileInputStream(fname);
|
|
||||||
fmProperties.load(propFile);
|
|
||||||
propFile.close();
|
|
||||||
} catch (FileNotFoundException e1) {
|
|
||||||
e1.printStackTrace();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
ready = true;
|
|
||||||
|
|
||||||
try {
|
|
||||||
String sudo_prompt = "passwd_needed:";
|
|
||||||
String echo_txt = "access_ok";
|
|
||||||
|
|
||||||
Process p = Runtime.getRuntime().exec("sudo -S -p " + sudo_prompt + " echo " + echo_txt );
|
|
||||||
InputStream inps = p.getInputStream();
|
|
||||||
InputStream errs = p.getErrorStream();
|
|
||||||
|
|
||||||
while (inps.available() < echo_txt.length() && errs.available() < sudo_prompt.length())
|
|
||||||
Thread.sleep(100);
|
|
||||||
|
|
||||||
byte [] buf;
|
|
||||||
String s;
|
|
||||||
|
|
||||||
if (inps.available() >= echo_txt.length()) {
|
|
||||||
buf = new byte[inps.available()];
|
|
||||||
inps.read(buf);
|
|
||||||
s = new String(buf);
|
|
||||||
if (s.startsWith(echo_txt)) {
|
|
||||||
superuser = true;
|
|
||||||
logInfo("Superuser privileges found!");
|
|
||||||
} else {
|
|
||||||
// no need to read errs
|
|
||||||
superuser = false;
|
|
||||||
logInfo("Superuser privileges not found.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fetches the value of a property from the configuration file.
|
|
||||||
*
|
|
||||||
* @param key the name of the property
|
|
||||||
*
|
|
||||||
* @return the value of the property, if it exists and
|
|
||||||
* null otherwise
|
|
||||||
*/
|
|
||||||
public static String getProperty(String key) {
|
|
||||||
if (!ready)
|
|
||||||
prepare("conf/failmon.properties");
|
|
||||||
return fmProperties.getProperty(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the value of a property inthe configuration file.
|
|
||||||
*
|
|
||||||
* @param key the name of the property
|
|
||||||
* @param value the new value for the property
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
public static void setProperty(String key, String value) {
|
|
||||||
fmProperties.setProperty(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Scans the configuration file to determine which monitoring
|
|
||||||
* utilities are available in the system. For each one of them, a
|
|
||||||
* job is created. All such jobs are scheduled and executed by
|
|
||||||
* Executor.
|
|
||||||
*
|
|
||||||
* @return an ArrayList that contains jobs to be executed by theExecutor.
|
|
||||||
*/
|
|
||||||
public static ArrayList<MonitorJob> getJobs() {
|
|
||||||
|
|
||||||
ArrayList<MonitorJob> monitors = new ArrayList<MonitorJob>();
|
|
||||||
int timeInt = 0;
|
|
||||||
|
|
||||||
// for Hadoop Log parsing
|
|
||||||
String [] fnames_r = getProperty("log.hadoop.filenames").split(",\\s*");
|
|
||||||
String tmp = getProperty("log.hadoop.enabled");
|
|
||||||
|
|
||||||
String [] fnames = expandDirs(fnames_r, ".*(.log).*");
|
|
||||||
|
|
||||||
timeInt = setValue("log.hadoop.interval", DEFAULT_LOG_INTERVAL);
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(tmp) && fnames[0] != null)
|
|
||||||
for (String fname : fnames) {
|
|
||||||
File f = new File(fname);
|
|
||||||
if (f.exists() && f.canRead()) {
|
|
||||||
monitors.add(new MonitorJob(new HadoopLogParser(fname), "hadoopLog", timeInt));
|
|
||||||
logInfo("Created Monitor for Hadoop log file: " + f.getAbsolutePath());
|
|
||||||
} else if (!f.exists())
|
|
||||||
logInfo("Skipping Hadoop log file " + fname + " (file not found)");
|
|
||||||
else
|
|
||||||
logInfo("Skipping Hadoop log file " + fname + " (permission denied)");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// for System Log parsing
|
|
||||||
fnames_r = getProperty("log.system.filenames").split(",\\s*");
|
|
||||||
tmp = getProperty("log.system.enabled");
|
|
||||||
|
|
||||||
fnames = expandDirs(fnames_r, ".*(messages).*");
|
|
||||||
|
|
||||||
timeInt = setValue("log.system.interval", DEFAULT_LOG_INTERVAL);
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(tmp))
|
|
||||||
for (String fname : fnames) {
|
|
||||||
File f = new File(fname);
|
|
||||||
if (f.exists() && f.canRead()) {
|
|
||||||
monitors.add(new MonitorJob(new SystemLogParser(fname), "systemLog", timeInt));
|
|
||||||
logInfo("Created Monitor for System log file: " + f.getAbsolutePath());
|
|
||||||
} else if (!f.exists())
|
|
||||||
logInfo("Skipping system log file " + fname + " (file not found)");
|
|
||||||
else
|
|
||||||
logInfo("Skipping system log file " + fname + " (permission denied)");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// for network interfaces
|
|
||||||
tmp = getProperty("nic.enabled");
|
|
||||||
|
|
||||||
timeInt = setValue("nics.interval", DEFAULT_POLL_INTERVAL);
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(tmp)) {
|
|
||||||
monitors.add(new MonitorJob(new NICParser(), "nics", timeInt));
|
|
||||||
logInfo("Created Monitor for NICs");
|
|
||||||
}
|
|
||||||
|
|
||||||
// for cpu
|
|
||||||
tmp = getProperty("cpu.enabled");
|
|
||||||
|
|
||||||
timeInt = setValue("cpu.interval", DEFAULT_POLL_INTERVAL);
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(tmp)) {
|
|
||||||
monitors.add(new MonitorJob(new CPUParser(), "cpu", timeInt));
|
|
||||||
logInfo("Created Monitor for CPUs");
|
|
||||||
}
|
|
||||||
|
|
||||||
// for disks
|
|
||||||
tmp = getProperty("disks.enabled");
|
|
||||||
|
|
||||||
timeInt = setValue("disks.interval", DEFAULT_POLL_INTERVAL);
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(tmp)) {
|
|
||||||
// check privileges if a disk with no disks./dev/xxx/.source is found
|
|
||||||
boolean smart_present = checkExistence("smartctl");
|
|
||||||
int disks_ok = 0;
|
|
||||||
String devicesStr = getProperty("disks.list");
|
|
||||||
String[] devices = null;
|
|
||||||
|
|
||||||
if (devicesStr != null)
|
|
||||||
devices = devicesStr.split(",\\s*");
|
|
||||||
|
|
||||||
for (int i = 0; i< devices.length; i++) {
|
|
||||||
boolean file_present = false;
|
|
||||||
boolean disk_present = false;
|
|
||||||
|
|
||||||
String fileloc = getProperty("disks." + devices[i] + ".source");
|
|
||||||
if (fileloc != null && fileloc.equalsIgnoreCase("true"))
|
|
||||||
file_present = true;
|
|
||||||
|
|
||||||
if (!file_present)
|
|
||||||
if (superuser) {
|
|
||||||
CharSequence sb = runCommandGeneric("sudo smartctl -i " + devices[i]);
|
|
||||||
String patternStr = "[(failed)(device not supported)]";
|
|
||||||
Pattern pattern = Pattern.compile(patternStr);
|
|
||||||
Matcher matcher = pattern.matcher(sb.toString());
|
|
||||||
if (matcher.find(0))
|
|
||||||
disk_present = false;
|
|
||||||
else
|
|
||||||
disk_present = true;
|
|
||||||
}
|
|
||||||
if (file_present || (disk_present && smart_present)) {
|
|
||||||
disks_ok++;
|
|
||||||
} else
|
|
||||||
devices[i] = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now remove disks that dont exist
|
|
||||||
StringBuilder resetSB = new StringBuilder();
|
|
||||||
for (int j = 0; j < devices.length; j++) {
|
|
||||||
resetSB.append(devices[j] == null ? "" : devices[j] + ", ");
|
|
||||||
if (devices[j] != null)
|
|
||||||
logInfo("Found S.M.A.R.T. attributes for disk " + devices[j]);
|
|
||||||
}
|
|
||||||
// fix the property
|
|
||||||
if (resetSB.length() >= 2)
|
|
||||||
setProperty("disks.list", resetSB.substring(0, resetSB.length() - 2));
|
|
||||||
|
|
||||||
if (disks_ok > 0) {
|
|
||||||
monitors.add(new MonitorJob(new SMARTParser(), "disks", timeInt));
|
|
||||||
logInfo("Created Monitor for S.M.A.R.T disk attributes");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for lm-sensors
|
|
||||||
tmp = getProperty("sensors.enabled");
|
|
||||||
|
|
||||||
timeInt = setValue("sensors.interval", DEFAULT_POLL_INTERVAL);
|
|
||||||
|
|
||||||
if ("true".equalsIgnoreCase(tmp) && checkExistence("sensors")) {
|
|
||||||
monitors.add(new MonitorJob(new SensorsParser(), "sensors", timeInt));
|
|
||||||
logInfo("Created Monitor for lm-sensors output");
|
|
||||||
}
|
|
||||||
|
|
||||||
return monitors;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determines the minimum interval at which the executor thread
|
|
||||||
* needs to wake upto execute jobs. Essentially, this is interval
|
|
||||||
* equals the GCD of intervals of all scheduled jobs.
|
|
||||||
*
|
|
||||||
* @param monitors the list of scheduled jobs
|
|
||||||
*
|
|
||||||
* @return the minimum interval between two scheduled jobs
|
|
||||||
*/
|
|
||||||
public static int getInterval(ArrayList<MonitorJob> monitors) {
|
|
||||||
String tmp = getProperty("executor.interval.min");
|
|
||||||
if (tmp != null)
|
|
||||||
MIN_INTERVAL = Integer.parseInt(tmp);
|
|
||||||
|
|
||||||
int[] monIntervals = new int[monitors.size()];
|
|
||||||
|
|
||||||
for (int i = 0; i < monitors.size(); i++)
|
|
||||||
monIntervals[i] = monitors.get(i).interval;
|
|
||||||
|
|
||||||
return Math.max(MIN_INTERVAL, gcd(monIntervals));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks whether a specific shell command is available
|
|
||||||
* in the system.
|
|
||||||
*
|
|
||||||
* @param cmd the command to check against
|
|
||||||
*
|
|
||||||
* @return true, if the command is availble, false otherwise
|
|
||||||
*/
|
|
||||||
public static boolean checkExistence(String cmd) {
|
|
||||||
CharSequence sb = runCommandGeneric("which " + cmd);
|
|
||||||
if (sb.length() > 1)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Runs a shell command in the system and provides a StringBuilder
|
|
||||||
* with the output of the command.
|
|
||||||
* <p>This method is deprecated. See related method that returns a CharSequence as oppposed to a StringBuffer.
|
|
||||||
*
|
|
||||||
* @param cmd an array of string that form the command to run
|
|
||||||
*
|
|
||||||
* @return a text that contains the output of the command
|
|
||||||
* @see #runCommandGeneric(String[])
|
|
||||||
* @deprecated
|
|
||||||
*/
|
|
||||||
public static StringBuffer runCommand(String[] cmd) {
|
|
||||||
return new StringBuffer(runCommandGeneric(cmd));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Runs a shell command in the system and provides a StringBuilder
|
|
||||||
* with the output of the command.
|
|
||||||
*
|
|
||||||
* @param cmd an array of string that form the command to run
|
|
||||||
*
|
|
||||||
* @return a text that contains the output of the command
|
|
||||||
*/
|
|
||||||
public static CharSequence runCommandGeneric(String[] cmd) {
|
|
||||||
StringBuilder retval = new StringBuilder(MAX_OUTPUT_LENGTH);
|
|
||||||
Process p;
|
|
||||||
try {
|
|
||||||
p = Runtime.getRuntime().exec(cmd);
|
|
||||||
InputStream tmp = p.getInputStream();
|
|
||||||
p.waitFor();
|
|
||||||
int c;
|
|
||||||
while ((c = tmp.read()) != -1)
|
|
||||||
retval.append((char) c);
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Runs a shell command in the system and provides a StringBuilder
|
|
||||||
* with the output of the command.
|
|
||||||
* <p>This method is deprecated in favor of the one that returns CharSequence as opposed to StringBuffer
|
|
||||||
* @param cmd the command to run
|
|
||||||
*
|
|
||||||
* @return a text that contains the output of the command
|
|
||||||
* @see #runCommandGeneric(String)
|
|
||||||
* @deprecated
|
|
||||||
*/
|
|
||||||
public static StringBuffer runCommand(String cmd) {
|
|
||||||
return new StringBuffer(runCommandGeneric(cmd));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Runs a shell command in the system and provides a StringBuilder
|
|
||||||
* with the output of the command.
|
|
||||||
*
|
|
||||||
* @param cmd the command to run
|
|
||||||
*
|
|
||||||
* @return a text that contains the output of the command
|
|
||||||
*/
|
|
||||||
public static CharSequence runCommandGeneric(String cmd) {
|
|
||||||
return runCommandGeneric(cmd.split("\\s+"));
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Determines the greatest common divisor (GCD) of two integers.
|
|
||||||
*
|
|
||||||
* @param m the first integer
|
|
||||||
* @param n the second integer
|
|
||||||
*
|
|
||||||
* @return the greatest common divisor of m and n
|
|
||||||
*/
|
|
||||||
public static int gcd(int m, int n) {
|
|
||||||
if (m == 0 && n == 0)
|
|
||||||
return 0;
|
|
||||||
if (m < n) {
|
|
||||||
int t = m;
|
|
||||||
m = n;
|
|
||||||
n = t;
|
|
||||||
}
|
|
||||||
int r = m % n;
|
|
||||||
if (r == 0) {
|
|
||||||
return n;
|
|
||||||
} else {
|
|
||||||
return gcd(n, r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determines the greatest common divisor (GCD) of a list
|
|
||||||
* of integers.
|
|
||||||
*
|
|
||||||
* @param numbers the list of integers to process
|
|
||||||
*
|
|
||||||
* @return the greatest common divisor of all numbers
|
|
||||||
*/
|
|
||||||
public static int gcd(int[] numbers) {
|
|
||||||
|
|
||||||
if (numbers.length == 1)
|
|
||||||
return numbers[0];
|
|
||||||
|
|
||||||
int g = gcd(numbers[0], numbers[1]);
|
|
||||||
|
|
||||||
for (int i = 2; i < numbers.length; i++)
|
|
||||||
g = gcd(g, numbers[i]);
|
|
||||||
|
|
||||||
return g;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String [] expandDirs(String [] input, String patternStr) {
|
|
||||||
|
|
||||||
ArrayList<String> fnames = new ArrayList<String>();
|
|
||||||
Pattern pattern = Pattern.compile(patternStr);
|
|
||||||
Matcher matcher;
|
|
||||||
File f;
|
|
||||||
|
|
||||||
for (String fname : input) {
|
|
||||||
f = new File(fname);
|
|
||||||
if (f.exists()) {
|
|
||||||
if (f.isDirectory()) {
|
|
||||||
// add all matching files
|
|
||||||
File [] fcs = f.listFiles();
|
|
||||||
for (File fc : fcs) {
|
|
||||||
matcher = pattern.matcher(fc.getName());
|
|
||||||
if (matcher.find() && fc.isFile())
|
|
||||||
fnames.add(fc.getAbsolutePath());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// normal file, just add to output
|
|
||||||
fnames.add(f.getAbsolutePath());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fnames.toArray(input);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int setValue(String propname, int defaultValue) {
|
|
||||||
|
|
||||||
String v = getProperty(propname);
|
|
||||||
|
|
||||||
if (v != null)
|
|
||||||
return Integer.parseInt(v);
|
|
||||||
else
|
|
||||||
return defaultValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static void logInfo(String str) {
|
|
||||||
LOG.info(str);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,151 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Objects of this class represent metrics collected for
|
|
||||||
* a specific hardware source. Each EventRecord contains a HashMap of
|
|
||||||
* (key, value) pairs, each of which represents a property of
|
|
||||||
* the metered value. For instance, when parsing a log file, an
|
|
||||||
* EventRecord is created for each log entry, which contains
|
|
||||||
* the hostname and the ip addresses of the node, timestamp of
|
|
||||||
* the log entry, the actual message etc. Each and every EventRecord
|
|
||||||
* contains the hostname of the machine on which it was collected,
|
|
||||||
* its IP address and the time of collection.
|
|
||||||
*
|
|
||||||
* The main purpose of this class is to provide a uniform format
|
|
||||||
* for records collected from various system compontents (logs,
|
|
||||||
* ifconfig, smartmontools, lm-sensors etc). All metric values are
|
|
||||||
* converted into this format after they are collected by a
|
|
||||||
* Monitored object.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class EventRecord {
|
|
||||||
|
|
||||||
HashMap<String, Object> fields;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the EventRecord given the most common properties
|
|
||||||
* among different metric types.
|
|
||||||
*/
|
|
||||||
public EventRecord(String _hostname, Object [] _ips, Calendar _timestamp,
|
|
||||||
String _type, String _logLevel, String _source, String _message) {
|
|
||||||
fields = new HashMap<String, Object>();
|
|
||||||
fields.clear();
|
|
||||||
set("hostname", _hostname);
|
|
||||||
set("ips", _ips);
|
|
||||||
set("timestamp", _timestamp);
|
|
||||||
set("type", _type);
|
|
||||||
set("logLevel", _logLevel);
|
|
||||||
set("source", _source);
|
|
||||||
set("message", _message);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the EventRecord with no fields other than "invalid" as
|
|
||||||
* the hostname. This is only used as a dummy.
|
|
||||||
*/
|
|
||||||
public EventRecord() {
|
|
||||||
// creates an invalid record
|
|
||||||
fields = new HashMap<String, Object>();
|
|
||||||
fields.clear();
|
|
||||||
set("hostname", "invalid");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the HashMap of properties of the EventRecord.
|
|
||||||
*
|
|
||||||
* @return a HashMap that contains all properties of the record.
|
|
||||||
*/
|
|
||||||
public final HashMap<String, Object> getMap() {
|
|
||||||
return fields;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the value of a property of the EventRecord.
|
|
||||||
*
|
|
||||||
* @param fieldName the name of the property to set
|
|
||||||
* @param fieldValue the value of the property to set
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public void set(String fieldName, Object fieldValue) {
|
|
||||||
if (fieldValue != null)
|
|
||||||
fields.put(fieldName, fieldValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the value of a property of the EventRecord.
|
|
||||||
* If the property with the specific key is not found,
|
|
||||||
* null is returned.
|
|
||||||
*
|
|
||||||
* @param fieldName the name of the property to get.
|
|
||||||
*/
|
|
||||||
public Object get(String fieldName) {
|
|
||||||
return fields.get(fieldName);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the EventRecord is a valid one, i.e., whether
|
|
||||||
* it represents meaningful metric values.
|
|
||||||
*
|
|
||||||
* @return true if the EventRecord is a valid one, false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isValid() {
|
|
||||||
return !("invalid".equalsIgnoreCase((String) fields.get("hostname")));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates and returns a string representation of the object.
|
|
||||||
*
|
|
||||||
* @return a String representation of the object
|
|
||||||
*/
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
String retval = "";
|
|
||||||
ArrayList<String> keys = new ArrayList<String>(fields.keySet());
|
|
||||||
Collections.sort(keys);
|
|
||||||
|
|
||||||
for (int i = 0; i < keys.size(); i++) {
|
|
||||||
Object value = fields.get(keys.get(i));
|
|
||||||
if (value == null)
|
|
||||||
retval += keys.get(i) + ":\tnull\n";
|
|
||||||
else if (value instanceof String)
|
|
||||||
retval += keys.get(i) + ":\t" + value + "\n";
|
|
||||||
else if (value instanceof Calendar)
|
|
||||||
retval += keys.get(i) + ":\t" + ((Calendar) value).getTime() + "\n";
|
|
||||||
else if (value instanceof InetAddress[] || value instanceof String []) {
|
|
||||||
retval += "Known IPs:\t";
|
|
||||||
for (InetAddress ip : ((InetAddress[]) value))
|
|
||||||
retval += ip.getHostAddress() + " ";
|
|
||||||
retval += "\n";
|
|
||||||
} else {
|
|
||||||
retval += keys.get(i) + ":\t" + value.toString() + "\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,120 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class executes monitoring jobs on all nodes of the
|
|
||||||
* cluster, on which we intend to gather failure metrics.
|
|
||||||
* It is basically a thread that sleeps and periodically wakes
|
|
||||||
* up to execute monitoring jobs and ship all gathered data to
|
|
||||||
* a "safe" location, which in most cases will be the HDFS
|
|
||||||
* filesystem of the monitored cluster.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class Executor implements Runnable {
|
|
||||||
|
|
||||||
public static final int DEFAULT_LOG_INTERVAL = 3600;
|
|
||||||
|
|
||||||
public static final int DEFAULT_POLL_INTERVAL = 360;
|
|
||||||
|
|
||||||
public static int MIN_INTERVAL = 5;
|
|
||||||
|
|
||||||
public static int instances = 0;
|
|
||||||
|
|
||||||
LocalStore lstore;
|
|
||||||
|
|
||||||
ArrayList<MonitorJob> monitors;
|
|
||||||
|
|
||||||
int interval;
|
|
||||||
|
|
||||||
int upload_interval;
|
|
||||||
int upload_counter;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create an instance of the class and read the configuration
|
|
||||||
* file to determine the set of jobs that will be run and the
|
|
||||||
* maximum interval for which the thread can sleep before it
|
|
||||||
* wakes up to execute a monitoring job on the node.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
public Executor(Configuration conf) {
|
|
||||||
|
|
||||||
Environment.prepare("conf/failmon.properties");
|
|
||||||
|
|
||||||
String localTmpDir;
|
|
||||||
|
|
||||||
if (conf == null) {
|
|
||||||
// running as a stand-alone application
|
|
||||||
localTmpDir = System.getProperty("java.io.tmpdir");
|
|
||||||
Environment.setProperty("local.tmp.dir", localTmpDir);
|
|
||||||
} else {
|
|
||||||
// running from within Hadoop
|
|
||||||
localTmpDir = conf.get("hadoop.tmp.dir");
|
|
||||||
String hadoopLogPath = System.getProperty("hadoop.log.dir") + "/" + System.getProperty("hadoop.log.file");
|
|
||||||
Environment.setProperty("hadoop.log.file", hadoopLogPath);
|
|
||||||
Environment.setProperty("local.tmp.dir", localTmpDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
monitors = Environment.getJobs();
|
|
||||||
interval = Environment.getInterval(monitors);
|
|
||||||
upload_interval = LocalStore.UPLOAD_INTERVAL;
|
|
||||||
lstore = new LocalStore();
|
|
||||||
|
|
||||||
if (Environment.getProperty("local.upload.interval") != null)
|
|
||||||
upload_interval = Integer.parseInt(Environment.getProperty("local.upload.interval"));
|
|
||||||
|
|
||||||
instances++;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void run() {
|
|
||||||
upload_counter = upload_interval;
|
|
||||||
|
|
||||||
Environment.logInfo("Failmon Executor thread started successfully.");
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(interval * 1000);
|
|
||||||
for (int i = 0; i < monitors.size(); i++) {
|
|
||||||
monitors.get(i).counter -= interval;
|
|
||||||
if (monitors.get(i).counter <= 0) {
|
|
||||||
monitors.get(i).reset();
|
|
||||||
Environment.logInfo("Calling " + monitors.get(i).job.getInfo() + "...\t");
|
|
||||||
monitors.get(i).job.monitor(lstore);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
upload_counter -= interval;
|
|
||||||
if (upload_counter <= 0) {
|
|
||||||
lstore.upload();
|
|
||||||
upload_counter = upload_interval;
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void cleanup() {
|
|
||||||
instances--;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,154 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.BufferedWriter;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.FileReader;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.zip.CRC32;
|
|
||||||
import java.util.zip.CheckedOutputStream;
|
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipInputStream;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
|
|
||||||
public class HDFSMerger {
|
|
||||||
|
|
||||||
Configuration hadoopConf;
|
|
||||||
FileSystem hdfs;
|
|
||||||
|
|
||||||
String hdfsDir;
|
|
||||||
|
|
||||||
FileStatus [] inputFiles;
|
|
||||||
|
|
||||||
Path outputFilePath;
|
|
||||||
FSDataOutputStream outputFile;
|
|
||||||
|
|
||||||
boolean compress;
|
|
||||||
|
|
||||||
FileWriter fw;
|
|
||||||
|
|
||||||
BufferedWriter writer;
|
|
||||||
|
|
||||||
public HDFSMerger() throws IOException {
|
|
||||||
|
|
||||||
String hadoopConfPath;
|
|
||||||
|
|
||||||
if (Environment.getProperty("hadoop.conf.path") == null)
|
|
||||||
hadoopConfPath = "../../../conf";
|
|
||||||
else
|
|
||||||
hadoopConfPath = Environment.getProperty("hadoop.conf.path");
|
|
||||||
|
|
||||||
// Read the configuration for the Hadoop environment
|
|
||||||
Configuration hadoopConf = new Configuration();
|
|
||||||
hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-default.xml"));
|
|
||||||
hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-site.xml"));
|
|
||||||
|
|
||||||
// determine the local output file name
|
|
||||||
if (Environment.getProperty("local.tmp.filename") == null)
|
|
||||||
Environment.setProperty("local.tmp.filename", "failmon.dat");
|
|
||||||
|
|
||||||
// determine the upload location
|
|
||||||
hdfsDir = Environment.getProperty("hdfs.upload.dir");
|
|
||||||
if (hdfsDir == null)
|
|
||||||
hdfsDir = "/failmon";
|
|
||||||
|
|
||||||
hdfs = FileSystem.get(hadoopConf);
|
|
||||||
|
|
||||||
Path hdfsDirPath = new Path(hadoopConf.get("fs.default.name") + hdfsDir);
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (!hdfs.getFileStatus(hdfsDirPath).isDir()) {
|
|
||||||
Environment.logInfo("HDFSMerger: Not an HDFS directory: " + hdfsDirPath.toString());
|
|
||||||
System.exit(0);
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
Environment.logInfo("HDFSMerger: Directory not found: " + hdfsDirPath.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
inputFiles = hdfs.listStatus(hdfsDirPath);
|
|
||||||
|
|
||||||
outputFilePath = new Path(hdfsDirPath.toString() + "/" + "merge-"
|
|
||||||
+ Calendar.getInstance().getTimeInMillis() + ".dat");
|
|
||||||
outputFile = hdfs.create(outputFilePath);
|
|
||||||
|
|
||||||
for (FileStatus fstatus : inputFiles) {
|
|
||||||
appendFile(fstatus.getPath());
|
|
||||||
hdfs.delete(fstatus.getPath(), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
outputFile.close();
|
|
||||||
|
|
||||||
Environment.logInfo("HDFS file merging complete!");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void appendFile (Path inputPath) throws IOException {
|
|
||||||
|
|
||||||
FSDataInputStream anyInputFile = hdfs.open(inputPath);
|
|
||||||
InputStream inputFile;
|
|
||||||
byte buffer[] = new byte[4096];
|
|
||||||
|
|
||||||
if (inputPath.toString().endsWith(LocalStore.COMPRESSION_SUFFIX)) {
|
|
||||||
// the file is compressed
|
|
||||||
inputFile = new ZipInputStream(anyInputFile);
|
|
||||||
((ZipInputStream) inputFile).getNextEntry();
|
|
||||||
} else {
|
|
||||||
inputFile = anyInputFile;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
int bytesRead = 0;
|
|
||||||
while ((bytesRead = inputFile.read(buffer)) > 0) {
|
|
||||||
outputFile.write(buffer, 0, bytesRead);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
Environment.logInfo("Error while copying file:" + inputPath.toString());
|
|
||||||
} finally {
|
|
||||||
inputFile.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static void main(String [] args) {
|
|
||||||
|
|
||||||
Environment.prepare("./conf/failmon.properties");
|
|
||||||
|
|
||||||
try {
|
|
||||||
new HDFSMerger();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,136 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* An object of this class parses a Hadoop log file to create
|
|
||||||
* appropriate EventRecords. The log file can either be the log
|
|
||||||
* of a NameNode or JobTracker or DataNode or TaskTracker.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class HadoopLogParser extends LogParser {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new parser object and try to find the hostname
|
|
||||||
* of the node that generated the log
|
|
||||||
*/
|
|
||||||
public HadoopLogParser(String fname) {
|
|
||||||
super(fname);
|
|
||||||
if ((dateformat = Environment.getProperty("log.hadoop.dateformat")) == null)
|
|
||||||
dateformat = "\\d{4}-\\d{2}-\\d{2}";
|
|
||||||
if ((timeformat = Environment.getProperty("log.hadoop.timeformat")) == null)
|
|
||||||
timeformat = "\\d{2}:\\d{2}:\\d{2}";
|
|
||||||
findHostname();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses one line of the log. If the line contains a valid
|
|
||||||
* log entry, then an appropriate EventRecord is returned, after all
|
|
||||||
* relevant fields have been parsed.
|
|
||||||
*
|
|
||||||
* @param line the log line to be parsed
|
|
||||||
*
|
|
||||||
* @return the EventRecord representing the log entry of the line. If
|
|
||||||
* the line does not contain a valid log entry, then the EventRecord
|
|
||||||
* returned has isValid() = false. When the end-of-file has been reached,
|
|
||||||
* null is returned to the caller.
|
|
||||||
*/
|
|
||||||
public EventRecord parseLine(String line) throws IOException {
|
|
||||||
EventRecord retval = null;
|
|
||||||
|
|
||||||
if (line != null) {
|
|
||||||
// process line
|
|
||||||
String patternStr = "(" + dateformat + ")";
|
|
||||||
patternStr += "\\s+";
|
|
||||||
patternStr += "(" + timeformat + ")";
|
|
||||||
patternStr += ".{4}\\s(\\w*)\\s"; // for logLevel
|
|
||||||
patternStr += "\\s*([\\w+\\.?]+)"; // for source
|
|
||||||
patternStr += ":\\s+(.+)"; // for the message
|
|
||||||
Pattern pattern = Pattern.compile(patternStr);
|
|
||||||
Matcher matcher = pattern.matcher(line);
|
|
||||||
|
|
||||||
if (matcher.find(0) && matcher.groupCount() >= 5) {
|
|
||||||
retval = new EventRecord(hostname, ips, parseDate(matcher.group(1),
|
|
||||||
matcher.group(2)),
|
|
||||||
"HadoopLog",
|
|
||||||
matcher.group(3), // loglevel
|
|
||||||
matcher.group(4), // source
|
|
||||||
matcher.group(5)); // message
|
|
||||||
} else {
|
|
||||||
retval = new EventRecord();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse a date found in the Hadoop log.
|
|
||||||
*
|
|
||||||
* @return a Calendar representing the date
|
|
||||||
*/
|
|
||||||
protected Calendar parseDate(String strDate, String strTime) {
|
|
||||||
Calendar retval = Calendar.getInstance();
|
|
||||||
// set date
|
|
||||||
String[] fields = strDate.split("-");
|
|
||||||
retval.set(Calendar.YEAR, Integer.parseInt(fields[0]));
|
|
||||||
retval.set(Calendar.MONTH, Integer.parseInt(fields[1]));
|
|
||||||
retval.set(Calendar.DATE, Integer.parseInt(fields[2]));
|
|
||||||
// set time
|
|
||||||
fields = strTime.split(":");
|
|
||||||
retval.set(Calendar.HOUR_OF_DAY, Integer.parseInt(fields[0]));
|
|
||||||
retval.set(Calendar.MINUTE, Integer.parseInt(fields[1]));
|
|
||||||
retval.set(Calendar.SECOND, Integer.parseInt(fields[2]));
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Attempt to determine the hostname of the node that created the
|
|
||||||
* log file. This information can be found in the STARTUP_MSG lines
|
|
||||||
* of the Hadoop log, which are emitted when the node starts.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
private void findHostname() {
|
|
||||||
String startupInfo = Environment.runCommandGeneric(
|
|
||||||
"grep --max-count=1 STARTUP_MSG:\\s*host " + file.getName()).toString();
|
|
||||||
Pattern pattern = Pattern.compile("\\s+(\\w+/.+)\\s+");
|
|
||||||
Matcher matcher = pattern.matcher(startupInfo);
|
|
||||||
if (matcher.find(0)) {
|
|
||||||
hostname = matcher.group(1).split("/")[0];
|
|
||||||
ips = new String[1];
|
|
||||||
ips[0] = matcher.group(1).split("/")[1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a String with information about this class
|
|
||||||
*
|
|
||||||
* @return A String describing this class
|
|
||||||
*/
|
|
||||||
public String getInfo() {
|
|
||||||
return ("Hadoop Log Parser for file: " + file.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,282 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.BufferedWriter;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.FileReader;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.zip.CRC32;
|
|
||||||
import java.util.zip.CheckedOutputStream;
|
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipOutputStream;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class takes care of the temporary local storage of
|
|
||||||
* gathered metrics before they get uploaded into HDFS. It writes
|
|
||||||
* Serialized Records as lines in a temporary file and then
|
|
||||||
* compresses and uploads it into HDFS.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class LocalStore {
|
|
||||||
|
|
||||||
public final static char FIELD_SEPARATOR = '|';
|
|
||||||
|
|
||||||
public final static char RECORD_SEPARATOR = '\n';
|
|
||||||
|
|
||||||
public final static String COMPRESSION_SUFFIX = ".zip";
|
|
||||||
|
|
||||||
public final static int UPLOAD_INTERVAL = 600;
|
|
||||||
|
|
||||||
String filename;
|
|
||||||
String hdfsDir;
|
|
||||||
|
|
||||||
boolean compress;
|
|
||||||
|
|
||||||
FileWriter fw;
|
|
||||||
|
|
||||||
BufferedWriter writer;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create an instance of the class and read the configuration
|
|
||||||
* file to determine some output parameters. Then, initiate the
|
|
||||||
* structured needed for the buffered I/O (so that smal appends
|
|
||||||
* can be handled efficiently).
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
public LocalStore() {
|
|
||||||
// determine the local output file name
|
|
||||||
if (Environment.getProperty("local.tmp.filename") == null)
|
|
||||||
Environment.setProperty("local.tmp.filename", "failmon.dat");
|
|
||||||
|
|
||||||
// local.tmp.dir has been set by the Executor
|
|
||||||
if (Environment.getProperty("local.tmp.dir") == null)
|
|
||||||
Environment.setProperty("local.tmp.dir", System.getProperty("java.io.tmpdir"));
|
|
||||||
|
|
||||||
filename = Environment.getProperty("local.tmp.dir") + "/" +
|
|
||||||
Environment.getProperty("local.tmp.filename");
|
|
||||||
|
|
||||||
// determine the upload location
|
|
||||||
hdfsDir = Environment.getProperty("hdfs.upload.dir");
|
|
||||||
if (hdfsDir == null)
|
|
||||||
hdfsDir = "/failmon";
|
|
||||||
|
|
||||||
// determine if compression is enabled
|
|
||||||
compress = true;
|
|
||||||
if ("false".equalsIgnoreCase(Environment
|
|
||||||
.getProperty("local.tmp.compression")))
|
|
||||||
compress = false;
|
|
||||||
|
|
||||||
try {
|
|
||||||
fw = new FileWriter(filename, true);
|
|
||||||
writer = new BufferedWriter(fw);
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Insert an EventRecord to the local storage, after it
|
|
||||||
* gets serialized and anonymized.
|
|
||||||
*
|
|
||||||
* @param er the EventRecord to be inserted
|
|
||||||
*/
|
|
||||||
|
|
||||||
public void insert(EventRecord er) {
|
|
||||||
SerializedRecord sr = new SerializedRecord(er);
|
|
||||||
try {
|
|
||||||
Anonymizer.anonymize(sr);
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
append(sr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Insert an array of EventRecords to the local storage, after they
|
|
||||||
* get serialized and anonymized.
|
|
||||||
*
|
|
||||||
* @param ers the array of EventRecords to be inserted
|
|
||||||
*/
|
|
||||||
public void insert(EventRecord[] ers) {
|
|
||||||
for (EventRecord er : ers)
|
|
||||||
insert(er);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void append(SerializedRecord sr) {
|
|
||||||
try {
|
|
||||||
writer.write(pack(sr).toString());
|
|
||||||
writer.write(RECORD_SEPARATOR);
|
|
||||||
// writer.flush();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pack a SerializedRecord into an array of bytes
|
|
||||||
* <p>
|
|
||||||
* This method is deprecated.
|
|
||||||
* @param sr the SerializedRecord to be packed
|
|
||||||
* @return Packed representation fo the Serialized Record
|
|
||||||
* @see #packConcurrent(SerializedRecord)
|
|
||||||
* @deprecated
|
|
||||||
*/
|
|
||||||
public static StringBuffer pack(SerializedRecord sr) {
|
|
||||||
return new StringBuffer(packConcurrent(sr));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pack a SerializedRecord into an array of bytes
|
|
||||||
*
|
|
||||||
* @param sr the SerializedRecord to be packed
|
|
||||||
* @return Packed representation fo the Serialized Record
|
|
||||||
*/
|
|
||||||
public static CharSequence packConcurrent(SerializedRecord sr) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
|
|
||||||
ArrayList<String> keys = new ArrayList<String>(sr.fields.keySet());
|
|
||||||
|
|
||||||
if (sr.isValid())
|
|
||||||
SerializedRecord.arrangeKeys(keys);
|
|
||||||
|
|
||||||
for (int i = 0; i < keys.size(); i++) {
|
|
||||||
String value = sr.fields.get(keys.get(i));
|
|
||||||
sb.append(keys.get(i) + ":" + value);
|
|
||||||
sb.append(FIELD_SEPARATOR);
|
|
||||||
}
|
|
||||||
return sb;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Upload the local file store into HDFS, after it
|
|
||||||
* compressing it. Then a new local file is created
|
|
||||||
* as a temporary record store.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public void upload() {
|
|
||||||
try {
|
|
||||||
writer.flush();
|
|
||||||
if (compress)
|
|
||||||
zipCompress(filename);
|
|
||||||
String remoteName = "failmon-";
|
|
||||||
if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.hostnames")))
|
|
||||||
remoteName += Anonymizer.getMD5Hash(InetAddress.getLocalHost().getCanonicalHostName()) + "-";
|
|
||||||
else
|
|
||||||
remoteName += InetAddress.getLocalHost().getCanonicalHostName() + "-";
|
|
||||||
remoteName += Calendar.getInstance().getTimeInMillis();//.toString();
|
|
||||||
if (compress)
|
|
||||||
copyToHDFS(filename + COMPRESSION_SUFFIX, hdfsDir + "/" + remoteName + COMPRESSION_SUFFIX);
|
|
||||||
else
|
|
||||||
copyToHDFS(filename, hdfsDir + "/" + remoteName);
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
// delete and re-open
|
|
||||||
try {
|
|
||||||
fw.close();
|
|
||||||
fw = new FileWriter(filename);
|
|
||||||
writer = new BufferedWriter(fw);
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compress a text file using the ZIP compressing algorithm.
|
|
||||||
*
|
|
||||||
* @param filename the path to the file to be compressed
|
|
||||||
*/
|
|
||||||
public static void zipCompress(String filename) throws IOException {
|
|
||||||
FileOutputStream fos = new FileOutputStream(filename + COMPRESSION_SUFFIX);
|
|
||||||
CheckedOutputStream csum = new CheckedOutputStream(fos, new CRC32());
|
|
||||||
ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(csum));
|
|
||||||
out.setComment("Failmon records.");
|
|
||||||
|
|
||||||
BufferedReader in = new BufferedReader(new FileReader(filename));
|
|
||||||
out.putNextEntry(new ZipEntry(new File(filename).getName()));
|
|
||||||
int c;
|
|
||||||
while ((c = in.read()) != -1)
|
|
||||||
out.write(c);
|
|
||||||
in.close();
|
|
||||||
|
|
||||||
out.finish();
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copy a local file to HDFS
|
|
||||||
*
|
|
||||||
* @param localFile the filename of the local file
|
|
||||||
* @param hdfsFile the HDFS filename to copy to
|
|
||||||
*/
|
|
||||||
public static void copyToHDFS(String localFile, String hdfsFile) throws IOException {
|
|
||||||
|
|
||||||
String hadoopConfPath;
|
|
||||||
|
|
||||||
if (Environment.getProperty("hadoop.conf.path") == null)
|
|
||||||
hadoopConfPath = "../../../conf";
|
|
||||||
else
|
|
||||||
hadoopConfPath = Environment.getProperty("hadoop.conf.path");
|
|
||||||
|
|
||||||
// Read the configuration for the Hadoop environment
|
|
||||||
Configuration hadoopConf = new Configuration();
|
|
||||||
hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-default.xml"));
|
|
||||||
hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-site.xml"));
|
|
||||||
|
|
||||||
// System.out.println(hadoopConf.get("hadoop.tmp.dir"));
|
|
||||||
// System.out.println(hadoopConf.get("fs.default.name"));
|
|
||||||
FileSystem fs = FileSystem.get(hadoopConf);
|
|
||||||
|
|
||||||
// HadoopDFS deals with Path
|
|
||||||
Path inFile = new Path("file://" + localFile);
|
|
||||||
Path outFile = new Path(hadoopConf.get("fs.default.name") + hdfsFile);
|
|
||||||
|
|
||||||
// Read from and write to new file
|
|
||||||
Environment.logInfo("Uploading to HDFS (file " + outFile + ") ...");
|
|
||||||
fs.copyFromLocalFile(false, inFile, outFile);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Close the temporary local file
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public void close() {
|
|
||||||
try {
|
|
||||||
writer.flush();
|
|
||||||
writer.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,214 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.net.UnknownHostException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Calendar;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class represents objects that provide log parsing
|
|
||||||
* functionality. Typically, such objects read log files line
|
|
||||||
* by line and for each log entry they identify, they create a
|
|
||||||
* corresponding EventRecord. In this way, disparate log files
|
|
||||||
* can be merged using the uniform format of EventRecords and can,
|
|
||||||
* thus, be processed in a uniform way.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public abstract class LogParser implements Monitored {
|
|
||||||
|
|
||||||
File file;
|
|
||||||
|
|
||||||
BufferedReader reader;
|
|
||||||
|
|
||||||
String hostname;
|
|
||||||
|
|
||||||
Object [] ips;
|
|
||||||
|
|
||||||
String dateformat;
|
|
||||||
|
|
||||||
String timeformat;
|
|
||||||
|
|
||||||
private String firstLine;
|
|
||||||
private long offset;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a parser that will read from the specified log file.
|
|
||||||
*
|
|
||||||
* @param fname the filename of the log file to be read
|
|
||||||
*/
|
|
||||||
public LogParser(String fname) {
|
|
||||||
file = new File(fname);
|
|
||||||
|
|
||||||
ParseState ps = PersistentState.getState(file.getAbsolutePath());
|
|
||||||
firstLine = ps.firstLine;
|
|
||||||
offset = ps.offset;
|
|
||||||
|
|
||||||
try {
|
|
||||||
reader = new BufferedReader(new FileReader(file));
|
|
||||||
checkForRotation();
|
|
||||||
Environment.logInfo("Checked for rotation...");
|
|
||||||
reader.skip(offset);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
System.err.println(e.getMessage());
|
|
||||||
e.printStackTrace();
|
|
||||||
} catch (IOException e) {
|
|
||||||
System.err.println(e.getMessage());
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
setNetworkProperties();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setNetworkProperties() {
|
|
||||||
// determine hostname and ip addresses for the node
|
|
||||||
try {
|
|
||||||
// Get hostname
|
|
||||||
hostname = InetAddress.getLocalHost().getCanonicalHostName();
|
|
||||||
// Get all associated ip addresses
|
|
||||||
ips = InetAddress.getAllByName(hostname);
|
|
||||||
|
|
||||||
} catch (UnknownHostException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Insert all EventRecords that can be extracted for
|
|
||||||
* the represented hardware component into a LocalStore.
|
|
||||||
*
|
|
||||||
* @param ls the LocalStore into which the EventRecords
|
|
||||||
* are to be stored.
|
|
||||||
*/
|
|
||||||
public void monitor(LocalStore ls) {
|
|
||||||
int in = 0;
|
|
||||||
EventRecord er = null;
|
|
||||||
Environment.logInfo("Started processing log...");
|
|
||||||
|
|
||||||
while ((er = getNext()) != null) {
|
|
||||||
// Environment.logInfo("Processing log line:\t" + in++);
|
|
||||||
if (er.isValid()) {
|
|
||||||
ls.insert(er);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PersistentState.updateState(file.getAbsolutePath(), firstLine, offset);
|
|
||||||
PersistentState.writeState("conf/parsing.state");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get an array of all EventRecords that can be extracted for
|
|
||||||
* the represented hardware component.
|
|
||||||
*
|
|
||||||
* @return The array of EventRecords
|
|
||||||
*/
|
|
||||||
public EventRecord[] monitor() {
|
|
||||||
|
|
||||||
ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
|
|
||||||
EventRecord er;
|
|
||||||
|
|
||||||
while ((er = getNext()) != null)
|
|
||||||
recs.add(er);
|
|
||||||
|
|
||||||
EventRecord[] T = new EventRecord[recs.size()];
|
|
||||||
|
|
||||||
return recs.toArray(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Continue parsing the log file until a valid log entry is identified.
|
|
||||||
* When one such entry is found, parse it and return a corresponding EventRecord.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* @return The EventRecord corresponding to the next log entry
|
|
||||||
*/
|
|
||||||
public EventRecord getNext() {
|
|
||||||
try {
|
|
||||||
String line = reader.readLine();
|
|
||||||
if (line != null) {
|
|
||||||
if (firstLine == null)
|
|
||||||
firstLine = new String(line);
|
|
||||||
offset += line.length() + 1;
|
|
||||||
return parseLine(line);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the BufferedReader, that reads the log file
|
|
||||||
*
|
|
||||||
* @return The BufferedReader that reads the log file
|
|
||||||
*/
|
|
||||||
public BufferedReader getReader() {
|
|
||||||
return reader;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check whether the log file has been rotated. If so,
|
|
||||||
* start reading the file from the beginning.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public void checkForRotation() {
|
|
||||||
try {
|
|
||||||
BufferedReader probe = new BufferedReader(new FileReader(file.getAbsoluteFile()));
|
|
||||||
if (firstLine == null || (!firstLine.equals(probe.readLine()))) {
|
|
||||||
probe.close();
|
|
||||||
// start reading the file from the beginning
|
|
||||||
reader.close();
|
|
||||||
reader = new BufferedReader(new FileReader(file.getAbsoluteFile()));
|
|
||||||
firstLine = null;
|
|
||||||
offset = 0;
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses one line of the log. If the line contains a valid
|
|
||||||
* log entry, then an appropriate EventRecord is returned, after all
|
|
||||||
* relevant fields have been parsed.
|
|
||||||
*
|
|
||||||
* @param line the log line to be parsed
|
|
||||||
*
|
|
||||||
* @return the EventRecord representing the log entry of the line. If
|
|
||||||
* the line does not contain a valid log entry, then the EventRecord
|
|
||||||
* returned has isValid() = false. When the end-of-file has been reached,
|
|
||||||
* null is returned to the caller.
|
|
||||||
*/
|
|
||||||
abstract public EventRecord parseLine(String line) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse a date found in Hadoop log file.
|
|
||||||
*
|
|
||||||
* @return a Calendar representing the date
|
|
||||||
*/
|
|
||||||
abstract protected Calendar parseDate(String strDate, String strTime);
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,43 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class is a wrapper for a monitoring job.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class MonitorJob {
|
|
||||||
Monitored job;
|
|
||||||
|
|
||||||
String type;
|
|
||||||
int interval;
|
|
||||||
int counter;
|
|
||||||
|
|
||||||
public MonitorJob(Monitored _job, String _type, int _interval) {
|
|
||||||
job = _job;
|
|
||||||
type = _type;
|
|
||||||
interval = _interval;
|
|
||||||
counter = _interval;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void reset() {
|
|
||||||
counter = interval;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,53 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Represents objects that monitor specific hardware resources and
|
|
||||||
* can query them to get EventRecords describing the state of these
|
|
||||||
* resources.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public interface Monitored {
|
|
||||||
/**
|
|
||||||
* Get an array of all EventRecords that can be extracted for
|
|
||||||
* the represented hardware component.
|
|
||||||
*
|
|
||||||
* @return The array of EventRecords
|
|
||||||
*/
|
|
||||||
public EventRecord[] monitor();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts all EventRecords that can be extracted for
|
|
||||||
* the represented hardware component into a LocalStore.
|
|
||||||
*
|
|
||||||
* @param ls the LocalStore into which the EventRecords
|
|
||||||
* are to be stored.
|
|
||||||
*/
|
|
||||||
public void monitor(LocalStore ls);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a String with information about the implementing
|
|
||||||
* class
|
|
||||||
*
|
|
||||||
* @return A String describing the implementing class
|
|
||||||
*/
|
|
||||||
public String getInfo();
|
|
||||||
}
|
|
|
@ -1,140 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.net.UnknownHostException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Calendar;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Objects of this class parse the output of ifconfig to
|
|
||||||
* gather information about present Network Interface Cards
|
|
||||||
* in the system. The list of NICs to poll is specified in the
|
|
||||||
* configuration file.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
|
|
||||||
public class NICParser extends ShellParser {
|
|
||||||
|
|
||||||
String[] nics;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a NICParser and reads the list of NICs to query
|
|
||||||
*/
|
|
||||||
public NICParser() {
|
|
||||||
super();
|
|
||||||
nics = Environment.getProperty("nic.list").split(",\\s*");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads and parses the output of ifconfig for a specified NIC and
|
|
||||||
* creates an appropriate EventRecord that holds the desirable
|
|
||||||
* information for it.
|
|
||||||
*
|
|
||||||
* @param device the NIC device name to query
|
|
||||||
*
|
|
||||||
* @return the EventRecord created
|
|
||||||
*/
|
|
||||||
public EventRecord query(String device) throws UnknownHostException {
|
|
||||||
CharSequence sb = Environment.runCommandGeneric("/sbin/ifconfig " + device);
|
|
||||||
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
|
|
||||||
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
|
|
||||||
.getHostName()), Calendar.getInstance(), "NIC", "Unknown", device, "-");
|
|
||||||
|
|
||||||
retval.set("hwAddress", findPattern("HWaddr\\s*([\\S{2}:]{17})", sb
|
|
||||||
.toString(), 1));
|
|
||||||
|
|
||||||
retval.set("ipAddress", findPattern("inet\\s+addr:\\s*([\\w.?]*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
|
|
||||||
String tmp = findPattern("inet\\s+addr:\\s*([\\w.?]*)", sb.toString(), 1);
|
|
||||||
retval.set("status", (tmp == null) ? "DOWN" : "UP");
|
|
||||||
if (tmp != null)
|
|
||||||
retval.set("ipAddress", tmp);
|
|
||||||
|
|
||||||
retval.set("rxPackets", findPattern("RX\\s*packets\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("rxErrors", findPattern("RX.+errors\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("rxDropped", findPattern("RX.+dropped\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("rxOverruns", findPattern("RX.+overruns\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("rxFrame", findPattern("RX.+frame\\s*:\\s*(\\d+)",
|
|
||||||
sb.toString(), 1));
|
|
||||||
|
|
||||||
retval.set("txPackets", findPattern("TX\\s*packets\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("txErrors", findPattern("TX.+errors\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("txDropped", findPattern("TX.+dropped\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("txOverruns", findPattern("TX.+overruns\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("txCarrier", findPattern("TX.+carrier\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
|
|
||||||
retval.set("collisions", findPattern("\\s+collisions\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
|
|
||||||
retval.set("rxBytes", findPattern("RX\\s*bytes\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("txBytes", findPattern("TX\\s*bytes\\s*:\\s*(\\d+)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Invokes query() to do the parsing and handles parsing errors for
|
|
||||||
* each one of the NICs specified in the configuration.
|
|
||||||
*
|
|
||||||
* @return an array of EventRecords that holds one element that represents
|
|
||||||
* the current state of network interfaces.
|
|
||||||
*/
|
|
||||||
public EventRecord[] monitor() {
|
|
||||||
ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
|
|
||||||
|
|
||||||
for (String nic : nics) {
|
|
||||||
try {
|
|
||||||
recs.add(query(nic));
|
|
||||||
} catch (UnknownHostException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
EventRecord[] T = new EventRecord[recs.size()];
|
|
||||||
|
|
||||||
return recs.toArray(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a String with information about this class
|
|
||||||
*
|
|
||||||
* @return A String describing this class
|
|
||||||
*/
|
|
||||||
public String getInfo() {
|
|
||||||
String retval = "ifconfig parser for interfaces: ";
|
|
||||||
for (String nic : nics)
|
|
||||||
retval += nic + " ";
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,132 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.BufferedWriter;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class can be used to anonymize logs independently of
|
|
||||||
* Hadoop and the Executor. It parses the specified log file to
|
|
||||||
* create log records for it and then passes them to the Anonymizer.
|
|
||||||
* After they are anonymized, they are written to a local file,
|
|
||||||
* which is then compressed and stored locally.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class OfflineAnonymizer {
|
|
||||||
|
|
||||||
public enum LogType {
|
|
||||||
HADOOP, SYSTEM
|
|
||||||
};
|
|
||||||
|
|
||||||
LogType logtype;
|
|
||||||
|
|
||||||
File logfile;
|
|
||||||
|
|
||||||
LogParser parser;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an OfflineAnonymizer for a specific log file.
|
|
||||||
*
|
|
||||||
* @param logtype the type of the log file. This can either be
|
|
||||||
* LogFile.HADOOP or LogFile.SYSTEM
|
|
||||||
* @param filename the path to the log file
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public OfflineAnonymizer(LogType logtype, String filename) {
|
|
||||||
|
|
||||||
logfile = new File(filename);
|
|
||||||
|
|
||||||
if (!logfile.exists()) {
|
|
||||||
System.err.println("Input file does not exist!");
|
|
||||||
System.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (logtype == LogType.HADOOP)
|
|
||||||
parser = new HadoopLogParser(filename);
|
|
||||||
else
|
|
||||||
parser = new SystemLogParser(filename);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Performs anonymization for the log file. Log entries are
|
|
||||||
* read one by one and EventRecords are created, which are then
|
|
||||||
* anonymized and written to the output.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public void anonymize() throws Exception {
|
|
||||||
EventRecord er = null;
|
|
||||||
SerializedRecord sr = null;
|
|
||||||
|
|
||||||
BufferedWriter bfw = new BufferedWriter(new FileWriter(logfile.getName()
|
|
||||||
+ ".anonymized"));
|
|
||||||
|
|
||||||
System.out.println("Anonymizing log records...");
|
|
||||||
while ((er = parser.getNext()) != null) {
|
|
||||||
if (er.isValid()) {
|
|
||||||
sr = new SerializedRecord(er);
|
|
||||||
Anonymizer.anonymize(sr);
|
|
||||||
bfw.write(LocalStore.pack(sr).toString());
|
|
||||||
bfw.write(LocalStore.RECORD_SEPARATOR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bfw.flush();
|
|
||||||
bfw.close();
|
|
||||||
System.out.println("Anonymized log records written to " + logfile.getName()
|
|
||||||
+ ".anonymized");
|
|
||||||
|
|
||||||
System.out.println("Compressing output file...");
|
|
||||||
LocalStore.zipCompress(logfile.getName() + ".anonymized");
|
|
||||||
System.out.println("Compressed output file written to " + logfile.getName()
|
|
||||||
+ ".anonymized" + LocalStore.COMPRESSION_SUFFIX);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
|
||||||
|
|
||||||
if (args.length < 2) {
|
|
||||||
System.out.println("Usage: OfflineAnonymizer <log_type> <filename>");
|
|
||||||
System.out
|
|
||||||
.println("where <log_type> is either \"hadoop\" or \"system\" and <filename> is the path to the log file");
|
|
||||||
System.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
LogType logtype = null;
|
|
||||||
|
|
||||||
if (args[0].equalsIgnoreCase("-hadoop"))
|
|
||||||
logtype = LogType.HADOOP;
|
|
||||||
else if (args[0].equalsIgnoreCase("-system"))
|
|
||||||
logtype = LogType.SYSTEM;
|
|
||||||
else {
|
|
||||||
System.err.println("Invalid first argument.");
|
|
||||||
System.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
OfflineAnonymizer oa = new OfflineAnonymizer(logtype, args[1]);
|
|
||||||
|
|
||||||
try {
|
|
||||||
oa.anonymize();
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,163 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class takes care of the information that needs to be
|
|
||||||
* persistently stored locally on nodes. Bookkeeping is done for the
|
|
||||||
* state of parsing of log files, so that the portion of the file that
|
|
||||||
* has already been parsed in previous calls will not be parsed again.
|
|
||||||
* For each log file, we maintain the byte offset of the last
|
|
||||||
* character parsed in previous passes. Also, the first entry in the
|
|
||||||
* log file is stored, so that FailMon can determine when a log file
|
|
||||||
* has been rotated (and thus parsing needs to start from the
|
|
||||||
* beginning of the file). We use a property file to store that
|
|
||||||
* information. For each log file we create a property keyed by the
|
|
||||||
* filename, the value of which contains the byte offset and first log
|
|
||||||
* entry separated by a SEPARATOR.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class PersistentState {
|
|
||||||
|
|
||||||
private final static String SEPARATOR = "###";
|
|
||||||
|
|
||||||
static String filename;
|
|
||||||
static Properties persData = new Properties();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Read the state of parsing for all open log files from a property
|
|
||||||
* file.
|
|
||||||
*
|
|
||||||
* @param fname the filename of the property file to be read
|
|
||||||
*/
|
|
||||||
|
|
||||||
public static void readState(String fname) {
|
|
||||||
|
|
||||||
filename = fname;
|
|
||||||
|
|
||||||
try {
|
|
||||||
persData.load(new FileInputStream(filename));
|
|
||||||
} catch (FileNotFoundException e1) {
|
|
||||||
// ignore
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Read and return the state of parsing for a particular log file.
|
|
||||||
*
|
|
||||||
* @param fname the log file for which to read the state
|
|
||||||
*/
|
|
||||||
public static ParseState getState(String fname) {
|
|
||||||
String [] fields = persData.getProperty(fname, "null" + SEPARATOR + "0").split(SEPARATOR, 2);
|
|
||||||
String firstLine;
|
|
||||||
long offset;
|
|
||||||
|
|
||||||
if (fields.length < 2) {
|
|
||||||
System.err.println("Malformed persistent state data found");
|
|
||||||
Environment.logInfo("Malformed persistent state data found");
|
|
||||||
firstLine = null;
|
|
||||||
offset = 0;
|
|
||||||
} else {
|
|
||||||
firstLine = (fields[0].equals("null") ? null : fields[0]);
|
|
||||||
offset = Long.parseLong(fields[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new ParseState(fname, firstLine, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the state of parsing for a particular log file.
|
|
||||||
*
|
|
||||||
* @param state the ParseState to set
|
|
||||||
*/
|
|
||||||
public static void setState(ParseState state) {
|
|
||||||
|
|
||||||
if (state == null) {
|
|
||||||
System.err.println("Null state found");
|
|
||||||
Environment.logInfo("Null state found");
|
|
||||||
}
|
|
||||||
|
|
||||||
persData.setProperty(state.filename, state.firstLine + SEPARATOR + state.offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Upadate the state of parsing for a particular log file.
|
|
||||||
*
|
|
||||||
* @param filename the log file for which to update the state
|
|
||||||
* @param firstLine the first line of the log file currently
|
|
||||||
* @param offset the byte offset of the last character parsed
|
|
||||||
*/
|
|
||||||
public static void updateState(String filename, String firstLine, long offset) {
|
|
||||||
|
|
||||||
ParseState ps = getState(filename);
|
|
||||||
|
|
||||||
if (firstLine != null)
|
|
||||||
ps.firstLine = firstLine;
|
|
||||||
|
|
||||||
ps.offset = offset;
|
|
||||||
|
|
||||||
setState(ps);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write the state of parsing for all open log files to a property
|
|
||||||
* file on disk.
|
|
||||||
*
|
|
||||||
* @param fname the filename of the property file to write to
|
|
||||||
*/
|
|
||||||
public static void writeState(String fname) {
|
|
||||||
try {
|
|
||||||
persData.store(new FileOutputStream(fname), Calendar.getInstance().getTime().toString());
|
|
||||||
} catch (FileNotFoundException e1) {
|
|
||||||
e1.printStackTrace();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* This class represents the state of parsing for a particular log
|
|
||||||
* file.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
class ParseState {
|
|
||||||
|
|
||||||
public String filename;
|
|
||||||
public String firstLine;
|
|
||||||
public long offset;
|
|
||||||
|
|
||||||
public ParseState(String _filename, String _firstLine, long _offset) {
|
|
||||||
this.filename = _filename;
|
|
||||||
this.firstLine = _firstLine;
|
|
||||||
this.offset = _offset;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,120 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Runs a set of monitoring jobs once for the local node. The set of
|
|
||||||
* jobs to be run is the intersection of the jobs specifed in the
|
|
||||||
* configuration file and the set of jobs specified in the --only
|
|
||||||
* command line argument.
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class RunOnce {
|
|
||||||
|
|
||||||
LocalStore lstore;
|
|
||||||
|
|
||||||
ArrayList<MonitorJob> monitors;
|
|
||||||
|
|
||||||
boolean uploading = true;
|
|
||||||
|
|
||||||
public RunOnce(String confFile) {
|
|
||||||
|
|
||||||
Environment.prepare(confFile);
|
|
||||||
|
|
||||||
String localTmpDir;
|
|
||||||
|
|
||||||
// running as a stand-alone application
|
|
||||||
localTmpDir = System.getProperty("java.io.tmpdir");
|
|
||||||
Environment.setProperty("local.tmp.dir", localTmpDir);
|
|
||||||
|
|
||||||
monitors = Environment.getJobs();
|
|
||||||
lstore = new LocalStore();
|
|
||||||
uploading = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void filter (String [] ftypes) {
|
|
||||||
ArrayList<MonitorJob> filtered = new ArrayList<MonitorJob>();
|
|
||||||
boolean found;
|
|
||||||
|
|
||||||
// filter out unwanted monitor jobs
|
|
||||||
for (MonitorJob job : monitors) {
|
|
||||||
found = false;
|
|
||||||
for (String ftype : ftypes)
|
|
||||||
if (job.type.equalsIgnoreCase(ftype))
|
|
||||||
found = true;
|
|
||||||
if (found)
|
|
||||||
filtered.add(job);
|
|
||||||
}
|
|
||||||
|
|
||||||
// disable uploading if not requested
|
|
||||||
found = false;
|
|
||||||
for (String ftype : ftypes)
|
|
||||||
if (ftype.equalsIgnoreCase("upload"))
|
|
||||||
found = true;
|
|
||||||
|
|
||||||
if (!found)
|
|
||||||
uploading = false;
|
|
||||||
|
|
||||||
monitors = filtered;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void run() {
|
|
||||||
|
|
||||||
Environment.logInfo("Failmon started successfully.");
|
|
||||||
|
|
||||||
for (int i = 0; i < monitors.size(); i++) {
|
|
||||||
Environment.logInfo("Calling " + monitors.get(i).job.getInfo() + "...\t");
|
|
||||||
monitors.get(i).job.monitor(lstore);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (uploading)
|
|
||||||
lstore.upload();
|
|
||||||
|
|
||||||
lstore.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void cleanup() {
|
|
||||||
// nothing to be done
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static void main (String [] args) {
|
|
||||||
|
|
||||||
String configFilePath = "./conf/failmon.properties";
|
|
||||||
String [] onlyList = null;
|
|
||||||
|
|
||||||
// Parse command-line parameters
|
|
||||||
for (int i = 0; i < args.length - 1; i++) {
|
|
||||||
if (args[i].equalsIgnoreCase("--config"))
|
|
||||||
configFilePath = args[i + 1];
|
|
||||||
else if (args[i].equalsIgnoreCase("--only"))
|
|
||||||
onlyList = args[i + 1].split(",");
|
|
||||||
}
|
|
||||||
|
|
||||||
RunOnce ro = new RunOnce(configFilePath);
|
|
||||||
// only keep the requested types of jobs
|
|
||||||
if (onlyList != null)
|
|
||||||
ro.filter(onlyList);
|
|
||||||
// run once only
|
|
||||||
ro.run();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,206 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Objects of this class parse the output of smartmontools to
|
|
||||||
* gather information about the state of disks in the system. The
|
|
||||||
* smartmontools utility reads the S.M.A.R.T. attributes from
|
|
||||||
* the disk devices and reports them to the user. Note that since
|
|
||||||
* running smartctl requires superuser provileges, one should
|
|
||||||
* grand sudo privileges to the running user for the command smartctl
|
|
||||||
* (without a password). Alternatively, one can set up a cron job that
|
|
||||||
* periodically dumps the output of smartctl into a user-readable file.
|
|
||||||
* See the configuration file for details.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class SMARTParser extends ShellParser {
|
|
||||||
|
|
||||||
String[] devices;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a SMARTParser and reads the list of disk
|
|
||||||
* devices to query
|
|
||||||
*/
|
|
||||||
public SMARTParser() {
|
|
||||||
super();
|
|
||||||
String devicesStr = Environment.getProperty("disks.list");
|
|
||||||
System.out.println("skato " + devicesStr);
|
|
||||||
if (devicesStr != null)
|
|
||||||
devices = devicesStr.split(",\\s*");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads and parses the output of smartctl for a specified disk and
|
|
||||||
* creates an appropriate EventRecord that holds the desirable
|
|
||||||
* information for it. Since the output of smartctl is different for
|
|
||||||
* different kinds of disks, we try to identify as many attributes as
|
|
||||||
* posssible for all known output formats.
|
|
||||||
*
|
|
||||||
* @param device the disk device name to query
|
|
||||||
*
|
|
||||||
* @return the EventRecord created
|
|
||||||
*/
|
|
||||||
public EventRecord query(String device) throws Exception {
|
|
||||||
String conf = Environment.getProperty("disks." + device + ".source");
|
|
||||||
CharSequence sb;
|
|
||||||
|
|
||||||
if (conf == null)
|
|
||||||
sb = Environment.runCommandGeneric("sudo smartctl --all " + device);
|
|
||||||
else
|
|
||||||
sb = Environment.runCommandGeneric("cat " + conf);
|
|
||||||
|
|
||||||
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
|
|
||||||
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
|
|
||||||
.getHostName()), Calendar.getInstance(), "SMART", "Unknown",
|
|
||||||
(conf == null ? "sudo smartctl --all " + device : "file " + conf), "-");
|
|
||||||
// IBM SCSI disks
|
|
||||||
retval.set("model", findPattern("Device\\s*:\\s*(.*)", sb.toString(), 1));
|
|
||||||
retval.set("serial", findPattern("Serial\\s+Number\\s*:\\s*(.*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("firmware", findPattern("Firmware\\s+Version\\s*:\\s*(.*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("capacity", findPattern("User\\s+Capacity\\s*:\\s*(.*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("status", findPattern("SMART\\s*Health\\s*Status:\\s*(.*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("current_temperature", findPattern(
|
|
||||||
"Current\\s+Drive\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
|
|
||||||
retval.set("trip_temperature", findPattern(
|
|
||||||
"Drive\\s+Trip\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
|
|
||||||
retval.set("start_stop_count", findPattern(
|
|
||||||
"start\\s+stop\\s+count\\s*:\\s*(\\d*)", sb.toString(), 1));
|
|
||||||
|
|
||||||
String[] var = { "read", "write", "verify" };
|
|
||||||
for (String s : var) {
|
|
||||||
retval.set(s + "_ecc_fast", findPattern(s + "\\s*:\\s*(\\d*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set(s + "_ecc_delayed", findPattern(s
|
|
||||||
+ "\\s*:\\s*(\\d+\\s+){1}(\\d+)", sb.toString(), 2));
|
|
||||||
retval.set(s + "_rereads", findPattern(
|
|
||||||
s + "\\s*:\\s*(\\d+\\s+){2}(\\d+)", sb.toString(), 2));
|
|
||||||
retval.set(s + "_GBs", findPattern(s
|
|
||||||
+ "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*)", sb.toString(), 2));
|
|
||||||
retval.set(s + "_uncorrected",
|
|
||||||
findPattern(s + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*){1}\\s+(\\d+)", sb
|
|
||||||
.toString(), 3));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hitachi IDE, SATA
|
|
||||||
retval.set("model", findPattern("Device\\s*Model\\s*:\\s*(.*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("serial", findPattern("Serial\\s+number\\s*:\\s*(.*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("protocol", findPattern("Transport\\s+protocol\\s*:\\s*(.*)", sb
|
|
||||||
.toString(), 1));
|
|
||||||
retval.set("status", "PASSED".equalsIgnoreCase(findPattern(
|
|
||||||
"test\\s*result\\s*:\\s*(.*)", sb.toString(), 1)) ? "OK" : "FAILED");
|
|
||||||
|
|
||||||
readColumns(retval, sb);
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads attributes in the following format:
|
|
||||||
*
|
|
||||||
* ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
|
|
||||||
* 3 Spin_Up_Time 0x0027 180 177 063 Pre-fail Always - 10265
|
|
||||||
* 4 Start_Stop_Count 0x0032 253 253 000 Old_age Always - 34
|
|
||||||
* 5 Reallocated_Sector_Ct 0x0033 253 253 063 Pre-fail Always - 0
|
|
||||||
* 6 Read_Channel_Margin 0x0001 253 253 100 Pre-fail Offline - 0
|
|
||||||
* 7 Seek_Error_Rate 0x000a 253 252 000 Old_age Always - 0
|
|
||||||
* 8 Seek_Time_Performance 0x0027 250 224 187 Pre-fail Always - 53894
|
|
||||||
* 9 Power_On_Minutes 0x0032 210 210 000 Old_age Always - 878h+00m
|
|
||||||
* 10 Spin_Retry_Count 0x002b 253 252 157 Pre-fail Always - 0
|
|
||||||
* 11 Calibration_Retry_Count 0x002b 253 252 223 Pre-fail Always - 0
|
|
||||||
* 12 Power_Cycle_Count 0x0032 253 253 000 Old_age Always - 49
|
|
||||||
* 192 PowerOff_Retract_Count 0x0032 253 253 000 Old_age Always - 0
|
|
||||||
* 193 Load_Cycle_Count 0x0032 253 253 000 Old_age Always - 0
|
|
||||||
* 194 Temperature_Celsius 0x0032 037 253 000 Old_age Always - 37
|
|
||||||
* 195 Hardware_ECC_Recovered 0x000a 253 252 000 Old_age Always - 2645
|
|
||||||
*
|
|
||||||
* This format is mostly found in IDE and SATA disks.
|
|
||||||
*
|
|
||||||
* @param er the EventRecord in which to store attributes found
|
|
||||||
* @param sb the text to parse
|
|
||||||
*
|
|
||||||
* @return the EventRecord in which new attributes are stored.
|
|
||||||
*/
|
|
||||||
private EventRecord readColumns(EventRecord er, CharSequence sb) {
|
|
||||||
|
|
||||||
Pattern pattern = Pattern.compile("^\\s{0,2}(\\d{1,3}\\s+.*)$",
|
|
||||||
Pattern.MULTILINE);
|
|
||||||
Matcher matcher = pattern.matcher(sb);
|
|
||||||
|
|
||||||
while (matcher.find()) {
|
|
||||||
String[] tokens = matcher.group(1).split("\\s+");
|
|
||||||
boolean failed = false;
|
|
||||||
// check if this attribute is a failed one
|
|
||||||
if (!tokens[8].equals("-"))
|
|
||||||
failed = true;
|
|
||||||
er.set(tokens[1].toLowerCase(), (failed ? "FAILED:" : "") + tokens[9]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return er;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Invokes query() to do the parsing and handles parsing errors for
|
|
||||||
* each one of the disks specified in the configuration.
|
|
||||||
*
|
|
||||||
* @return an array of EventRecords that holds one element that represents
|
|
||||||
* the current state of the disk devices.
|
|
||||||
*/
|
|
||||||
public EventRecord[] monitor() {
|
|
||||||
ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
|
|
||||||
|
|
||||||
for (String device : devices) {
|
|
||||||
try {
|
|
||||||
recs.add(query(device));
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
EventRecord[] T = new EventRecord[recs.size()];
|
|
||||||
|
|
||||||
return recs.toArray(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a String with information about this class
|
|
||||||
*
|
|
||||||
* @return A String describing this class
|
|
||||||
*/
|
|
||||||
public String getInfo() {
|
|
||||||
String retval = "S.M.A.R.T. disk attributes parser for disks ";
|
|
||||||
for (String device : devices)
|
|
||||||
retval += device + " ";
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,112 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Objects of this class parse the output of the lm-sensors utility
|
|
||||||
* to gather information about fan speed, temperatures for cpus
|
|
||||||
* and motherboard etc.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class SensorsParser extends ShellParser {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads and parses the output of the 'sensors' command
|
|
||||||
* and creates an appropriate EventRecord that holds
|
|
||||||
* the desirable information.
|
|
||||||
*
|
|
||||||
* @param s unused parameter
|
|
||||||
*
|
|
||||||
* @return the EventRecord created
|
|
||||||
*/
|
|
||||||
public EventRecord query(String s) throws Exception {
|
|
||||||
CharSequence sb;
|
|
||||||
|
|
||||||
//sb = Environment.runCommandGeneric("sensors -A");
|
|
||||||
sb = Environment.runCommandGeneric("cat sensors.out");
|
|
||||||
|
|
||||||
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
|
|
||||||
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
|
|
||||||
.getHostName()), Calendar.getInstance(), "lm-sensors", "Unknown",
|
|
||||||
"sensors -A", "-");
|
|
||||||
readGroup(retval, sb, "fan");
|
|
||||||
readGroup(retval, sb, "in");
|
|
||||||
readGroup(retval, sb, "temp");
|
|
||||||
readGroup(retval, sb, "Core");
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads and parses lines that provide the output
|
|
||||||
* of a group of sensors with the same functionality.
|
|
||||||
*
|
|
||||||
* @param er the EventRecord to which the new attributes are added
|
|
||||||
* @param sb the text to parse
|
|
||||||
* @param prefix a String prefix specifying the common prefix of the
|
|
||||||
* sensors' names in the group (e.g. "fan", "in", "temp"
|
|
||||||
*
|
|
||||||
* @return the EventRecord created
|
|
||||||
*/
|
|
||||||
private EventRecord readGroup(EventRecord er, CharSequence sb, String prefix) {
|
|
||||||
|
|
||||||
Pattern pattern = Pattern.compile(".*(" + prefix
|
|
||||||
+ "\\s*\\d*)\\s*:\\s*(\\+?\\d+)", Pattern.MULTILINE);
|
|
||||||
Matcher matcher = pattern.matcher(sb);
|
|
||||||
|
|
||||||
while (matcher.find())
|
|
||||||
er.set(matcher.group(1), matcher.group(2));
|
|
||||||
|
|
||||||
return er;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Invokes query() to do the parsing and handles parsing errors.
|
|
||||||
*
|
|
||||||
* @return an array of EventRecords that holds one element that represents
|
|
||||||
* the current state of the hardware sensors
|
|
||||||
*/
|
|
||||||
public EventRecord[] monitor() {
|
|
||||||
EventRecord[] recs = new EventRecord[1];
|
|
||||||
|
|
||||||
try {
|
|
||||||
recs[0] = query(null);
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
return recs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a String with information about this class
|
|
||||||
*
|
|
||||||
* @return A String describing this class
|
|
||||||
*/
|
|
||||||
public String getInfo() {
|
|
||||||
return ("lm-sensors parser");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,163 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.text.DateFormat;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Objects of this class hold the serialized representations
|
|
||||||
* of EventRecords. A SerializedRecord is essentially an EventRecord
|
|
||||||
* with all its property values converted to strings. It also provides
|
|
||||||
* some convenience methods for printing the property fields in a
|
|
||||||
* more readable way.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class SerializedRecord {
|
|
||||||
|
|
||||||
HashMap<String, String> fields;
|
|
||||||
private static DateFormat dateFormatter =
|
|
||||||
DateFormat.getDateTimeInstance(DateFormat.LONG, DateFormat.LONG);;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the SerializedRecord given an EventRecord.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public SerializedRecord(EventRecord source) {
|
|
||||||
fields = new HashMap<String, String>();
|
|
||||||
fields.clear();
|
|
||||||
|
|
||||||
for (String k : source.getMap().keySet()) {
|
|
||||||
ArrayList<String> strs = getStrings(source.getMap().get(k));
|
|
||||||
if (strs.size() == 1)
|
|
||||||
fields.put(k, strs.get(0));
|
|
||||||
else
|
|
||||||
for (int i = 0; i < strs.size(); i++)
|
|
||||||
fields.put(k + "#" + i, strs.get(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract String representations from an Object.
|
|
||||||
*
|
|
||||||
* @param o the input object
|
|
||||||
*
|
|
||||||
* @return an ArrayList that contains Strings found in o
|
|
||||||
*/
|
|
||||||
private ArrayList<String> getStrings(Object o) {
|
|
||||||
ArrayList<String> retval = new ArrayList<String>();
|
|
||||||
retval.clear();
|
|
||||||
if (o == null)
|
|
||||||
retval.add("null");
|
|
||||||
else if (o instanceof String)
|
|
||||||
retval.add((String) o);
|
|
||||||
else if (o instanceof Calendar)
|
|
||||||
retval.add(dateFormatter.format(((Calendar) o).getTime()));
|
|
||||||
else if (o instanceof InetAddress[])
|
|
||||||
for (InetAddress ip : ((InetAddress[]) o))
|
|
||||||
retval.add(ip.getHostAddress());
|
|
||||||
else if (o instanceof String[])
|
|
||||||
for (String s : (String []) o)
|
|
||||||
retval.add(s);
|
|
||||||
else
|
|
||||||
retval.add(o.toString());
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the value of a property of the EventRecord.
|
|
||||||
*
|
|
||||||
* @param fieldName the name of the property to set
|
|
||||||
* @param fieldValue the value of the property to set
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public void set(String fieldName, String fieldValue) {
|
|
||||||
fields.put(fieldName, fieldValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the value of a property of the EventRecord.
|
|
||||||
* If the property with the specific key is not found,
|
|
||||||
* null is returned.
|
|
||||||
*
|
|
||||||
* @param fieldName the name of the property to get.
|
|
||||||
*/
|
|
||||||
public String get(String fieldName) {
|
|
||||||
return fields.get(fieldName);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Arrange the keys to provide a more readable printing order:
|
|
||||||
* first goes the timestamp, then the hostname and then the type, followed
|
|
||||||
* by all other keys found.
|
|
||||||
*
|
|
||||||
* @param keys The input ArrayList of keys to re-arrange.
|
|
||||||
*/
|
|
||||||
public static void arrangeKeys(ArrayList<String> keys) {
|
|
||||||
move(keys, "timestamp", 0);
|
|
||||||
move(keys, "hostname", 1);
|
|
||||||
move(keys, "type", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void move(ArrayList<String> keys, String key, int position) {
|
|
||||||
int cur = keys.indexOf(key);
|
|
||||||
if (cur == -1)
|
|
||||||
return;
|
|
||||||
keys.set(cur, keys.get(position));
|
|
||||||
keys.set(position, key);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the SerializedRecord is a valid one, i.e., whether
|
|
||||||
* it represents meaningful metric values.
|
|
||||||
*
|
|
||||||
* @return true if the EventRecord is a valid one, false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isValid() {
|
|
||||||
return !("invalid".equalsIgnoreCase(fields.get("hostname")));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates and returns a string reperssentation of the object
|
|
||||||
*
|
|
||||||
* @return a String representing the object
|
|
||||||
*/
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
String retval = "";
|
|
||||||
ArrayList<String> keys = new ArrayList<String>(fields.keySet());
|
|
||||||
arrangeKeys(keys);
|
|
||||||
|
|
||||||
for (int i = 0; i < keys.size(); i++) {
|
|
||||||
String value = fields.get(keys.get(i));
|
|
||||||
if (value == null)
|
|
||||||
retval += keys.get(i) + ":\tnull\n";
|
|
||||||
else
|
|
||||||
retval += keys.get(i) + ":\t" + value + "\n";
|
|
||||||
}
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,102 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* Objects of this class parse the output of system command-line
|
|
||||||
* utilities that can give information about the state of
|
|
||||||
* various hardware components in the system. Typically, each such
|
|
||||||
* object either invokes a command and reads its output or reads the
|
|
||||||
* output of one such command from a file on the disk. Currently
|
|
||||||
* supported utilities include ifconfig, smartmontools, lm-sensors,
|
|
||||||
* /proc/cpuinfo.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public abstract class ShellParser implements Monitored {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the first occurence ofa pattern in a piece of text
|
|
||||||
* and return a specific group.
|
|
||||||
*
|
|
||||||
* @param strPattern the regular expression to match
|
|
||||||
* @param text the text to search
|
|
||||||
* @param grp the number of the matching group to return
|
|
||||||
*
|
|
||||||
* @return a String containing the matched group of the regular expression
|
|
||||||
*/
|
|
||||||
protected String findPattern(String strPattern, String text, int grp) {
|
|
||||||
|
|
||||||
Pattern pattern = Pattern.compile(strPattern, Pattern.MULTILINE);
|
|
||||||
Matcher matcher = pattern.matcher(text);
|
|
||||||
|
|
||||||
if (matcher.find(0))
|
|
||||||
return matcher.group(grp);
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds all occurences of a pattern in a piece of text and returns
|
|
||||||
* the matching groups.
|
|
||||||
*
|
|
||||||
* @param strPattern the regular expression to match
|
|
||||||
* @param text the text to search
|
|
||||||
* @param grp the number of the matching group to return
|
|
||||||
* @param separator the string that separates occurences in the returned value
|
|
||||||
*
|
|
||||||
* @return a String that contains all occurences of strPattern in text,
|
|
||||||
* separated by separator
|
|
||||||
*/
|
|
||||||
protected String findAll(String strPattern, String text, int grp,
|
|
||||||
String separator) {
|
|
||||||
|
|
||||||
String retval = "";
|
|
||||||
boolean firstTime = true;
|
|
||||||
|
|
||||||
Pattern pattern = Pattern.compile(strPattern);
|
|
||||||
Matcher matcher = pattern.matcher(text);
|
|
||||||
|
|
||||||
while (matcher.find()) {
|
|
||||||
retval += (firstTime ? "" : separator) + matcher.group(grp);
|
|
||||||
firstTime = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Insert all EventRecords that can be extracted for
|
|
||||||
* the represented hardware component into a LocalStore.
|
|
||||||
*
|
|
||||||
* @param ls the LocalStore into which the EventRecords
|
|
||||||
* are to be stored.
|
|
||||||
*/
|
|
||||||
public void monitor(LocalStore ls) {
|
|
||||||
ls.insert(monitor());
|
|
||||||
}
|
|
||||||
|
|
||||||
abstract public EventRecord[] monitor();
|
|
||||||
|
|
||||||
abstract public EventRecord query(String s) throws Exception;
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,126 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.contrib.failmon;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/**********************************************************
|
|
||||||
* An object of this class parses a Unix system log file to create
|
|
||||||
* appropriate EventRecords. Currently, only the syslogd logging
|
|
||||||
* daemon is supported.
|
|
||||||
*
|
|
||||||
**********************************************************/
|
|
||||||
|
|
||||||
public class SystemLogParser extends LogParser {
|
|
||||||
|
|
||||||
static String[] months = { "January", "February", "March", "April", "May",
|
|
||||||
"June", "July", "August", "September", "October", "November", "December" };
|
|
||||||
/**
|
|
||||||
* Create a new parser object .
|
|
||||||
*/
|
|
||||||
public SystemLogParser(String fname) {
|
|
||||||
super(fname);
|
|
||||||
if ((dateformat = Environment.getProperty("log.system.dateformat")) == null)
|
|
||||||
dateformat = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+(\\d+)";
|
|
||||||
if ((timeformat = Environment.getProperty("log.system.timeformat")) == null)
|
|
||||||
timeformat = "\\d{2}:\\d{2}:\\d{2}";
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses one line of the log. If the line contains a valid
|
|
||||||
* log entry, then an appropriate EventRecord is returned, after all
|
|
||||||
* relevant fields have been parsed.
|
|
||||||
*
|
|
||||||
* @param line the log line to be parsed
|
|
||||||
*
|
|
||||||
* @return the EventRecord representing the log entry of the line. If
|
|
||||||
* the line does not contain a valid log entry, then the EventRecord
|
|
||||||
* returned has isValid() = false. When the end-of-file has been reached,
|
|
||||||
* null is returned to the caller.
|
|
||||||
*/
|
|
||||||
public EventRecord parseLine(String line) throws IOException {
|
|
||||||
|
|
||||||
EventRecord retval = null;
|
|
||||||
|
|
||||||
if (line != null) {
|
|
||||||
// process line
|
|
||||||
String patternStr = "(" + dateformat + ")";
|
|
||||||
patternStr += "\\s+";
|
|
||||||
patternStr += "(" + timeformat + ")";
|
|
||||||
patternStr += "\\s+(\\S*)\\s"; // for hostname
|
|
||||||
// patternStr += "\\s*([\\w+\\.?]+)"; // for source
|
|
||||||
patternStr += ":?\\s*(.+)"; // for the message
|
|
||||||
Pattern pattern = Pattern.compile(patternStr);
|
|
||||||
Matcher matcher = pattern.matcher(line);
|
|
||||||
if (matcher.find() && matcher.groupCount() >= 0) {
|
|
||||||
retval = new EventRecord(hostname, ips, parseDate(matcher.group(1),
|
|
||||||
matcher.group(4)), "SystemLog", "Unknown", // loglevel
|
|
||||||
"Unknown", // source
|
|
||||||
matcher.group(6)); // message
|
|
||||||
} else {
|
|
||||||
retval = new EventRecord();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse a date found in the system log.
|
|
||||||
*
|
|
||||||
* @return a Calendar representing the date
|
|
||||||
*/
|
|
||||||
protected Calendar parseDate(String strDate, String strTime) {
|
|
||||||
Calendar retval = Calendar.getInstance();
|
|
||||||
// set date
|
|
||||||
String[] fields = strDate.split("\\s+");
|
|
||||||
retval.set(Calendar.MONTH, parseMonth(fields[0]));
|
|
||||||
retval.set(Calendar.DATE, Integer.parseInt(fields[1]));
|
|
||||||
// set time
|
|
||||||
fields = strTime.split(":");
|
|
||||||
retval.set(Calendar.HOUR_OF_DAY, Integer.parseInt(fields[0]));
|
|
||||||
retval.set(Calendar.MINUTE, Integer.parseInt(fields[1]));
|
|
||||||
retval.set(Calendar.SECOND, Integer.parseInt(fields[2]));
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert the name of a month to the corresponding int value.
|
|
||||||
*
|
|
||||||
* @return the int representation of the month.
|
|
||||||
*/
|
|
||||||
private int parseMonth(String month) {
|
|
||||||
for (int i = 0; i < months.length; i++)
|
|
||||||
if (months[i].startsWith(month))
|
|
||||||
return i;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a String with information about this class
|
|
||||||
*
|
|
||||||
* @return A String describing this class
|
|
||||||
*/
|
|
||||||
public String getInfo() {
|
|
||||||
return ("System Log Parser for file : " + file.getAbsoluteFile());
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue