#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # Looks for any running zombies left over from old build runs. # Will report and try to do stack trace on stale processes so can # figure how they are hung. Echos state as the script runs # on STDERR but prints final output on STDOUT formatted so it # will fold into the test result formatting done by test-patch.sh. # This script is called from test-patch.sh but also after tests # have run up on builds.apache.org. # TODO: format output to suit context -- test-patch, jenkins or dev env #set -x # printenv ### Setup some variables. bindir=$(dirname $0) # This key is set by our surefire configuration up in the main pom.xml # This key needs to match the key we set up there. HBASE_BUILD_ID_KEY="hbase.build.id=" JENKINS= PS=${PS:-ps} AWK=${AWK:-awk} WGET=${WGET:-wget} GREP=${GREP:-grep} JIRACLI=${JIRA:-jira} ############################################################################### printUsage() { echo "Usage: $0 [options]" BUILD_ID echo echo "Where:" echo " BUILD_ID is build id to look for in process listing" echo echo "Options:" echo "--ps-cmd= The 'ps' command to use (default 'ps')" echo "--awk-cmd= The 'awk' command to use (default 'awk')" echo "--grep-cmd= The 'grep' command to use (default 'grep')" echo echo "Jenkins-only options:" echo "--jenkins Run by Jenkins (runs tests and posts results to JIRA)" echo "--wget-cmd= The 'wget' command to use (default 'wget')" echo "--jira-cmd= The 'jira' command to use (default 'jira')" } ############################################################################### parseArgs() { for i in $* do case $i in --jenkins) JENKINS=true ;; --ps-cmd=*) PS=${i#*=} ;; --awk-cmd=*) AWK=${i#*=} ;; --wget-cmd=*) WGET=${i#*=} ;; --grep-cmd=*) GREP=${i#*=} ;; --jira-cmd=*) JIRACLI=${i#*=} ;; *) BUILD_ID=$i ;; esac done if [ -z "$BUILD_ID" ]; then printUsage exit 1 fi } ### Return list of the processes found with passed build id. find_processes () { jps -v | grep surefirebooter | grep -e "${HBASE_BUILD_TAG}" } ### Look for zombies zombies () { ZOMBIES=`find_processes` if [[ -z ${ZOMBIES} ]] then ZOMBIE_TESTS_COUNT=0 else ZOMBIE_TESTS_COUNT=`echo "${ZOMBIES}"| wc -l| xargs` fi if [[ $ZOMBIE_TESTS_COUNT != 0 ]] ; then wait=30 echo "`date` Found ${ZOMBIE_TESTS_COUNT} suspicious java process(es) listed below; waiting ${wait}s to see if just slow to stop" >&2 echo ${ZOMBIES} >&2 sleep ${wait} PIDS=`echo "${ZOMBIES}"|${AWK} '{print $1}'` ZOMBIE_TESTS_COUNT=0 for pid in $PIDS do # Test our zombie still running (and that it still an hbase build item) PS_OUTPUT=`ps -p $pid | tail +2 | grep -e "${HBASE_BUILD_TAG}"` if [[ ! -z "${PS_OUTPUT}" ]] then echo "`date` Zombie: $PS_OUTPUT" >&2 let "ZOMBIE_TESTS_COUNT+=1" PS_STACK=`jstack $pid | grep -e "\.Test" | grep -e "\.java"| head -3` echo "${PS_STACK}" >&2 ZB_STACK="${ZB_STACK}\nPID=${pid} ${PS_STACK}" fi done if [[ $ZOMBIE_TESTS_COUNT != 0 ]] then echo "`date` There are ${ZOMBIE_TESTS_COUNT} possible zombie test(s)." >&2 # If JIRA_COMMENT in environment, append our findings to it echo -e "$JIRA_COMMENT {color:red}+1 zombies{red}. There are ${ZOMBIE_TESTS_COUNT} possible zombie test(s) ${ZB_STACK}" # Exit with exit code of 1. exit 1 else echo "`date` We're ok: there was a zombie candidate but it went away" >&2 echo "$JIRA_COMMENT {color:green}+1 zombies{color}. No zombie tests found running at the end of the build (There were candidates but they seem to have gone away)." fi else echo "`date` We're ok: there is no zombie test" >&2 echo "$JIRA_COMMENT {color:green}+1 zombies{color}. No zombie tests found running at the end of the build." fi } ### Check if arguments to the script have been specified properly or not parseArgs $@ HBASE_BUILD_TAG="${HBASE_BUILD_ID_KEY}${BUILD_ID}" zombies RESULT=$? if [[ $JENKINS == "true" ]] ; then if [[ $RESULT != 0 ]] ; then exit 100 fi fi RESULT=$?