Zabbix如何自定义脚本监控RAC11g
2019-08-27 本文已影响0人
阿乐_822e
Zabbix上Rac的模板较少,必须得自己实现。大致思路是:每个node上分别监控CPU、Memory、Disk等指标,再创建一个SCAN_IP主机,主要用于监控RAC相关的指标。
提示:本文假设数据库版本为oracle11g,2个node,且所有机器的zabbix agent都安装在/usr/local/zabbix-agent目录下。
前期准备:在每个节点上,把zabbix加入到oracle的用户组oinstall,命令为:useradd -G oinstall Zabbix,随后查看一下:
1.png
下面开始安装:
1、 分别把四个sh文件拷到每个节点的/usr/local/zabbix-agent/scripts/目录下并加上执行权限,再逐一检查文件中的PATH路径是否正确,如不正确要修改好;
- SCAN_IP归属地:chk_scanip.sh文件
#!/bin/bash
export PATH=$PATH:/u01/app/11.2.0/grid/bin
#检查SCAN_IP归属地
scan_master_node=`srvctl status scan | grep "on" | awk '{print $8}'`
if [ -z $scan_master_node ] ; then
$scan_master_node="missed"
fi
echo $scan_master_node
- 检查ASM状态:chk_asmstatus.sh
#ASM状态
#!/bin/bash
export PATH=$PATH:/u01/app/11.2.0/grid/bin
srvctl status asm | awk '{print $5}'
- 检查在用实例:chk_instancestatus.sh
#在用实例
#!/bin/bash
export PATH=$PATH:/u01/app/11.2.0/grid/bin
dbName=$1
srvctl status database -d $dbName | tr "\n" "\t" | awk '{print $7,",",$14}'
- 检查查节点状态:chk_nodestatus.sh
#检查节点状态
#!/bin/bash
export PATH=$PATH:/u01/app/11.2.0/grid/bin
node=$1
olsnodes -s | grep $node | awk '{print $2}'
2、分别修改每个节点的zabbix_cofigd.cong文件,添加自定义项,再重启zgent;
UserParameter=rac.cluster.chkscanip[*],/usr/local/zabbix-agent/scripts/chk_scanip.sh
UserParameter=rac.cluster.chknodestatus[*],/usr/local/zabbix-agent/scripts/chk_nodestatus.sh $1
UserParameter=rac.cluster.chkinstancestatus[*],/usr/local/zabbix-agent/scripts/chk_instancestatus.sh $1
UserParameter=rac.cluster.chkasmstatus[*],/usr/local/zabbix-agent/scripts/chk_asmstatus.sh
3、在zbx监控的网页上,创建一台RAC的SCAN_IP地址的机器,再在此机器上创建三个宏变量
{$DB_NAME} :填入srvctl config database命令的结果
{$NODE1_NAME}:填入第一个节点的hostname
{$NODE2_NAME}:填入第二个节点的hostname
2.png
4、将附件中文件存为导入RAC_Cluster_Check_11g.xml模板文件,先导入zabbix,再应用到步骤3的机器上
5、最后的图形显示如下:
3.png
附件:模板文件RAC_Cluster_Check_11g.xml
<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
<version>3.4</version>
<date>2019-08-27T07:50:37Z</date>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<templates>
<template>
<template>RAC_Cluster_Check_11g</template>
<name>RAC_Cluster_Check_11g</name>
<description/>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<applications>
<application>
<name>RAC_Cluster_Check</name>
</application>
</applications>
<items>
<item>
<name>检查ASM状态</name>
<type>0</type>
<snmp_community/>
<snmp_oid/>
<key>rac.cluster.chkasmstatus</key>
<delay>1m</delay>
<history>90d</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<params/>
<ipmi_sensor/>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>RAC_Cluster_Check</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<preprocessing/>
<jmx_endpoint/>
<master_item/>
</item>
<item>
<name>检查在线实例</name>
<type>0</type>
<snmp_community/>
<snmp_oid/>
<key>rac.cluster.chkinstancestatus[{$DB_NAME}]</key>
<delay>1m</delay>
<history>90d</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<params/>
<ipmi_sensor/>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>RAC_Cluster_Check</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<preprocessing/>
<jmx_endpoint/>
<master_item/>
</item>
<item>
<name>检查节点一状态</name>
<type>0</type>
<snmp_community/>
<snmp_oid/>
<key>rac.cluster.chknodestatus[{$NODE1_NAME}]</key>
<delay>1m</delay>
<history>90d</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<params/>
<ipmi_sensor/>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>RAC_Cluster_Check</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<preprocessing/>
<jmx_endpoint/>
<master_item/>
</item>
<item>
<name>检查节点二状态</name>
<type>0</type>
<snmp_community/>
<snmp_oid/>
<key>rac.cluster.chknodestatus[{$NODE2_NAME}]</key>
<delay>1m</delay>
<history>90d</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<params/>
<ipmi_sensor/>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>RAC_Cluster_Check</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<preprocessing/>
<jmx_endpoint/>
<master_item/>
</item>
<item>
<name>检查scanip归属机器</name>
<type>0</type>
<snmp_community/>
<snmp_oid/>
<key>rac.cluster.chkscanip</key>
<delay>1m</delay>
<history>90d</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<params/>
<ipmi_sensor/>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>RAC_Cluster_Check</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<preprocessing/>
<jmx_endpoint/>
<master_item/>
</item>
</items>
<discovery_rules/>
<httptests/>
<macros/>
<templates/>
<screens/>
</template>
</templates>
<triggers>
<trigger>
<expression>{RAC_Cluster_Check_11g:rac.cluster.chkscanip.change()}=1 and {RAC_Cluster_Check_11g:rac.cluster.chkscanip.str(missed)}=0</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>SCANIP发生漂移</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>2</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger>
<trigger>
<expression>{RAC_Cluster_Check_11g:rac.cluster.chkscanip.str(missed)}=1</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>SCAN_IP可能丢失</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>4</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger>
<trigger>
<expression>{RAC_Cluster_Check_11g:rac.cluster.chkasmstatus.str({$NODE1_NAME})}=0</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>{$NODE1_NAME} 上的asm未运行</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>2</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger>
<trigger>
<expression>{RAC_Cluster_Check_11g:rac.cluster.chkinstancestatus[{$DB_NAME}].str({$NODE1_NAME})}=0</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>{$NODE1_NAME} 上的实例未运行</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>2</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger>
<trigger>
<expression>{RAC_Cluster_Check_11g:rac.cluster.chkasmstatus.str({$NODE2_NAME})}=0</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>{$NODE2_NAME} 上的asm未运行</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>2</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger>
<trigger>
<expression>{RAC_Cluster_Check_11g:rac.cluster.chkinstancestatus[{$DB_NAME}].str({$NODE2_NAME})}=0</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>{$NODE2_NAME} 上的实例未运行</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>2</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger>
<trigger>
<expression>{RAC_Cluster_Check_11g:rac.cluster.chknodestatus[{$NODE1_NAME}].str(Active)}=0</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>节点:{$NODE1_NAME} 下线</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>3</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger>
<trigger>
<expression>{RAC_Cluster_Check_11g:rac.cluster.chknodestatus[{$NODE2_NAME}].str(Active)}=0</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>节点:{$NODE2_NAME}下线</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>3</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger>
</triggers>
</zabbix_export>