#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Looks for any running zombies left over from old build runs.
# Will report and try to do stack trace on stale processes so can
# figure how they are hung. Echos state as the script runs
# on STDERR but prints final output on STDOUT formatted so it
# will fold into the test result formatting done by test-patch.sh.
# This script is called from test-patch.sh but also after tests
# have run up on builds.apache.org.

# TODO: format output to suit context -- test-patch, jenkins or dev env

#set -x
# printenv

### Setup some variables.  
bindir=$(dirname $0)

# This key is set by our surefire configuration up in the main pom.xml
# This key needs to match the key we set up there.
HBASE_BUILD_ID_KEY="hbase.build.id="
JENKINS=

PS=${PS:-ps}
AWK=${AWK:-awk}
WGET=${WGET:-wget}
GREP=${GREP:-grep}
JIRACLI=${JIRA:-jira}

###############################################################################
printUsage() {
  echo "Usage: $0 [options]" BUILD_ID
  echo
  echo "Where:"
  echo "  BUILD_ID is build id to look for in process listing"
  echo
  echo "Options:"
  echo "--ps-cmd=<cmd>         The 'ps' command to use (default 'ps')"
  echo "--awk-cmd=<cmd>        The 'awk' command to use (default 'awk')"
  echo "--grep-cmd=<cmd>       The 'grep' command to use (default 'grep')"
  echo
  echo "Jenkins-only options:"
  echo "--jenkins              Run by Jenkins (runs tests and posts results to JIRA)"
  echo "--wget-cmd=<cmd>       The 'wget' command to use (default 'wget')"
  echo "--jira-cmd=<cmd>       The 'jira' command to use (default 'jira')"
}

###############################################################################
parseArgs() {
  for i in $*
  do
    case $i in
    --jenkins)
      JENKINS=true
      ;;
    --ps-cmd=*)
      PS=${i#*=}
      ;;
    --awk-cmd=*)
      AWK=${i#*=}
      ;;
    --wget-cmd=*)
      WGET=${i#*=}
      ;;
    --grep-cmd=*)
      GREP=${i#*=}
      ;;
    --jira-cmd=*)
      JIRACLI=${i#*=}
      ;;
    *)
      BUILD_ID=$i
      ;;
    esac
  done
  if [ -z "$BUILD_ID" ]; then
    printUsage
    exit 1
  fi
}

### Return list of the processes found with passed build id.
find_processes () {
  jps -v | grep surefirebooter | grep -e "${HBASE_BUILD_TAG}"
}

### Look for zombies
zombies () {
  ZOMBIES=`find_processes`
  if [[ -z ${ZOMBIES} ]]
  then
    ZOMBIE_TESTS_COUNT=0
  else
    ZOMBIE_TESTS_COUNT=`echo "${ZOMBIES}"| wc -l| xargs`
  fi
  if [[ $ZOMBIE_TESTS_COUNT != 0 ]] ; then
    wait=30
    echo "`date` Found ${ZOMBIE_TESTS_COUNT} suspicious java process(es) listed below; waiting ${wait}s to see if just slow to stop" >&2
    echo ${ZOMBIES} >&2
    sleep ${wait}
    PIDS=`echo "${ZOMBIES}"|${AWK} '{print $1}'`
    ZOMBIE_TESTS_COUNT=0
    for pid in $PIDS
    do
      # Test our zombie still running (and that it still an hbase build item)
      PS_OUTPUT=`ps -p $pid | tail +2 | grep -e "${HBASE_BUILD_TAG}"`
      if [[ ! -z "${PS_OUTPUT}" ]]
      then
        echo "`date` Zombie: $PS_OUTPUT" >&2
        let "ZOMBIE_TESTS_COUNT+=1"
        PS_STACK=`jstack $pid | grep -e "\.Test" | grep -e "\.java"| head -3`
        echo "${PS_STACK}" >&2
        ZB_STACK="${ZB_STACK}\nPID=${pid} ${PS_STACK}"
      fi
    done
    if [[ $ZOMBIE_TESTS_COUNT != 0 ]]
    then
      echo "`date` There are ${ZOMBIE_TESTS_COUNT} possible zombie test(s)." >&2
      # If JIRA_COMMENT in environment, append our findings to it
      echo -e "$JIRA_COMMENT

    {color:red}+1 zombies{red}. There are ${ZOMBIE_TESTS_COUNT} possible zombie test(s)
        ${ZB_STACK}"
      # Exit with exit code of 1.
      exit 1
    else
      echo "`date` We're ok: there was a zombie candidate but it went away" >&2
      echo "$JIRA_COMMENT

    {color:green}+1 zombies{color}. No zombie tests found running at the end of the build (There were candidates but they seem to have gone away)."
    fi
  else
      echo "`date` We're ok: there is no zombie test" >&2
      echo "$JIRA_COMMENT

    {color:green}+1 zombies{color}. No zombie tests found running at the end of the build."
  fi
}

### Check if arguments to the script have been specified properly or not
parseArgs $@
HBASE_BUILD_TAG="${HBASE_BUILD_ID_KEY}${BUILD_ID}"
zombies
RESULT=$?
if [[ $JENKINS == "true" ]] ; then
  if [[ $RESULT != 0 ]] ; then
    exit 100
  fi
fi
RESULT=$?