process_chk.sh_cur 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #!/bin/bash
  2. #Whritted: dufs
  3. #E-mail: fengshan.du@zznode.com
  4. #Date: 2016-09-19 13:00
  5. #disk check
  6. SH_NAME=`basename $0`
  7. SH_HOME=$HOME/zsjk
  8. SH_Log=$SH_HOME/log/${0%\.sh*}.log
  9. function outlog {
  10. echo "`date '+%Y-%m-%d %T'` : " "$*" >> $SH_Log
  11. logSize=`ls -lrt $SH_Log|awk '{print $5}'`
  12. if [ $logSize -gt 10240000 ]
  13. then
  14. cp -rp $SH_Log $SH_Log.`date '+%Y-%m-%d'`
  15. gzip $SH_Log.`date '+%Y-%m-%d'`
  16. cat /dev/null>$SH_Log
  17. fi
  18. }
  19. outlog =============================================================
  20. outlog start check
  21. sysapp="【集中故障】"
  22. msgTemp1=$SH_HOME/conf/sendmsg_temp_begin.sql
  23. msgTemp2=$SH_HOME/conf/sendmsg_temp_end.sql
  24. if [ ! -f $msgTemp1 -o ! -f $msgTemp2 ]
  25. then
  26. outlog can not find $msgTemp1 or $$msgTemp2, please check !!!
  27. exit 0
  28. fi
  29. ip_list=$SH_HOME/conf/appchk.conf
  30. if [ ! -f $ip_list ]
  31. then
  32. outlog can not find $ip_list, please check !!!
  33. exit 0
  34. fi
  35. psInfo=""
  36. cat /dev/null > $SH_HOME/data/ps_info_cur.txt
  37. sess=`date +'%y%m%d%h%M'`
  38. cd $SH_HOME/bin
  39. for line in `cat $ip_list`
  40. do
  41. sharppos=$(echo $line|awk '{print index($1,"#")}')
  42. if [ $sharppos = 1 ]
  43. then
  44. #echo remarked line,ignore
  45. continue
  46. fi
  47. outlog -------------------------------------------------------------
  48. Local_IP=`echo $line | awk -F"," '{print $1}'`
  49. chk_user=`echo $line | awk -F"," '{print $2}'`
  50. ssh_port=`echo $line | awk -F"," '{print $3}'`
  51. outlog read config: Local_IP $Local_IP
  52. outlog read config: chk_user $chk_user
  53. outlog read config: ssh_port $ssh_port
  54. ssh -t -p $ssh_port $chk_user@$Local_IP "ps -fu $chk_user" >$SH_HOME/data/ps.$Local_IP.$ssh_port
  55. rowNum=`cat $SH_HOME/data/ps.$Local_IP.$ssh_port|wc -l`
  56. if [ $rowNum -eq 0 ]
  57. then
  58. outlog can not get 'ps -fu $chk_user' from $Local_IP, please check !!!
  59. curT=`date '+%Y-%m-%d %T'`
  60. msg="$sysapp$curT获取$Local_IP的ps -fu $chk_user信息失败,请检查。"
  61. sed -e "s/__MSG__/$msg/g" \
  62. -e "s/__SHELL__/$SH_NAME/g" $msgTemp > $SH_HOME/bin/process_chk_$Local_IP.$ssh_port.sql
  63. outlog `./sendMsg.sh process_chk_$Local_IP.$ssh_port.sql`
  64. rm $SH_HOME/bin/process_chk_$Local_IP.$ssh_port.sql
  65. ./zsjkAlarmIsert.sh "进程监控,1,$msg,10.102.52.9,zsjk/bin/process_chk.sh_cur"
  66. continue
  67. else
  68. psCur=`cat $SH_HOME/data/ps.$Local_IP.$ssh_port|grep "java -D"|awk -F"java " '{print $2}'|awk '{print $1}'|sed 's/\-D/D/g'`
  69. psInfo=$psCur" "$psInfo
  70. outlog running process : $psCur
  71. echo $Local_IP"_"$ssh_port,$psCur >> $SH_HOME/data/ps_info_cur.txt
  72. fi
  73. outlog -------------------------------------------------------------
  74. done
  75. #outlog all running process : $psInfo
  76. cat /dev/null >$SH_HOME/data/allps_cur.list
  77. for i in `echo $psInfo`
  78. do
  79. echo $i >>$SH_HOME/data/allps_cur.list
  80. done
  81. ps_conf=$SH_HOME/conf/process_cur.conf
  82. # check 1 : 进程重复
  83. cat $SH_HOME/data/allps_cur.list|sort |uniq -c|awk '{if($1>1) print$2","$1}' >$SH_HOME/data/psRept_cur.list
  84. if [ `cat $SH_HOME/data/psRept_cur.list|wc -l` -gt 0 ]
  85. then
  86. for j in `cat $SH_HOME/data/psRept_cur.list`
  87. do
  88. psName=`echo $j|awk -F"," '{print $1}'`
  89. psNum=`echo $j|awk -F"," '{print $2}'`
  90. isChk=`cat $ps_conf|egrep -v "^#"|grep -wc "$psName"`
  91. if [ $isChk -eq 0 ]
  92. then
  93. continue
  94. fi
  95. psPos=`cat $SH_HOME/data/ps_info_cur.txt |grep -w "$psName"|awk -F"," '{print $1}'`
  96. outlog repeat process : $psName , repeat times : $psNum , running on $psPos
  97. curT=`date '+%Y-%m-%d %T'`
  98. msg="$sysapp$curT监测到$psName进程运行重复$psNum次,该进程同时运行在"`echo $psPos`",请检查。"
  99. outlog send mesge : $msg
  100. cat $msgTemp1 >$SH_HOME/bin/process_chk_rep_cur.sql
  101. echo "'"$msg"'" >>$SH_HOME/bin/process_chk_rep_cur.sql
  102. cat $msgTemp2 >>$SH_HOME/bin/process_chk_rep_cur.sql
  103. outlog `./sendMsg.sh process_chk_rep_cur.sql`
  104. rm $SH_HOME/bin/process_chk_rep_cur.sql
  105. ./zsjkAlarmIsert.sh "进程监控,1,$msg,10.102.52.9,zsjk/bin/process_chk.sh_cur"
  106. done
  107. fi
  108. # check 2 : 进程缺少
  109. if [ ! -f $ps_conf ]
  110. then
  111. outlog can not find $ps_conf , please check !!!
  112. exit 0
  113. fi
  114. psNoR=""
  115. while read line
  116. do
  117. sharppos=$(echo $line|awk '{print index($1,"#")}')
  118. if [ $sharppos = 1 ]
  119. then
  120. #echo remarked line,ignore
  121. continue
  122. fi
  123. proKey=`echo $line | awk -F":" '{print $2}'`
  124. proNum=`cat $SH_HOME/data/ps_info_cur.txt| grep -wc "$proKey"`
  125. if [ $proNum -eq 0 ]
  126. then
  127. psNoR=$proKey" "$psNoR
  128. else
  129. continue
  130. fi
  131. done < $ps_conf
  132. if [ "$psNoR" = "" ]
  133. then
  134. outlog all process is running . it is OK.
  135. else
  136. outlog find not running process : $psNoR please check !!
  137. curT=`date '+%Y-%m-%d %T'`
  138. msg="$sysapp$curT监测到以下进程未运行:$psNoR请检查。"
  139. outlog send mesge : $msg
  140. cat $msgTemp1 >$SH_HOME/bin/process_chk_notrun_cur.sql
  141. echo "'"$msg"'" >>$SH_HOME/bin/process_chk_notrun_cur.sql
  142. cat $msgTemp2 >>$SH_HOME/bin/process_chk_notrun_cur.sql
  143. outlog `./sendMsg.sh process_chk_notrun_cur.sql`
  144. cd $SH_HOME/pyChk/
  145. python Demo_sms.pyo 13730885681 "$msg"
  146. rm $SH_HOME/bin/process_chk_notrun_cur.sql
  147. $SH_HOME/bin/zsjkAlarmIsert.sh "进程监控,1,$msg,10.102.52.9,zsjk/bin/process_chk.sh_cur"
  148. fi
  149. cp $SH_HOME/data/ps_info_cur.txt $SH_HOME/conf/ps_info_cur.txt
  150. #rm $SH_HOME/data/ps.*
  151. outlog =============================================================
  152. exit 0