Elves 自动化运维

2020-01-21  本文已影响0人  柘龙义

为什么需要自动化运维

 # 大批量机器靠人工运维有以下弊端(以配置更新为例):
    # 运维时间长: 每一台都需要远程上去更改
    # 容易出错: 人工输入命令及其容易出错
    # 结果反馈不明显: 需要靠人工自己判断
    # 回退麻烦: 出错回退无法保证
 # ps: 离职交接的时候及其繁琐

为什么选用Elves

# 安装简单
# 界面管理
# 扩展性及其强悍(使用编程)
# 上手快
# 运维范围广
# 安全管理

Elves安装

  # 没有安装docker可以安装(默认的docker版本会比较低),已经有docker跳过这一步
    centos:
      yum install -y yum-utils device-mapper-persistent-data lvm2
      yum-config-manager \
        --add-repo \
        https://download.docker.com/linux/centos/docker-ce.repo
      yum makecache fast
      yum install -y --setopt=obsoletes=0 \
        docker-ce-18.06.1.ce-3.el7

  # 没有安装docker-compose的可以安装,已经有docker-compose跳过这一步
    centos: yum install -y docker-compose
  # docker安装 Elves
    git clone https://github.com/elves-project/docker.git
    cd docker
    chmod u+x ./control
    ./control build     //下载Base镜像并构建新镜像。 心细的小伙伴可以自己把镜像tag成自己的,再安装避免rebuild。
    ./control start     //调用docker-compose启动各容器,也可以docker-compose up -d.
    ./control insertsql //插入Elves 数据表结构. 失败了可以手动执行,手动查看标注
    ./control restart   //组件依赖mysql,重启容器刷新程序.
    # 为了正常执行,更改ftp目录权限
    docker exec -it vsftp bash # 进入容器
       chown -R ftpuser:ftpuser /data/ # 修改权限
  # 标注
    Elves-Dashboard页面端口: 8004
    Elves-supervisor页面端口:9092 ; user/password: [admin@gyyx.cn](mailto:admin@gyyx.cn)/admin
    Rabbitmq 页面端口:15672 ; user/password: admin/1q2w3e4r
    Nginx 页面端口:80
    Ftp 端口:21 ; user/passwd: ftpuser/1q2w3e4r

使用Elves

注册主机
git clone https://github.com/elves-project/agent.git
cd agent
cp conf/cfg.example.json conf/cfg.json
vi conf/cfg.json # 更改配置ip,asset以及服务器的配置
chmod u+x ./control
./control start # 在服务界面上查看添加结果,可能需要几分钟同步心跳。
访问 Elves-supervisor: 本机: 127.0.0.1:9092
elves-supervisor
 # 如上图:
 #  agent列表: 注册主机的列表
 #  app管理: 我们运维逻辑,zip包管理
 # auth管理: app 密钥,使用app的时候需要验证

上传app

app
# 如图:
  # 点击右上角 + 添加
  # 添加完成后点app右边的编辑图标(橙色小笔)上传app包(zip,制作参考 “app 制作”)
  # 上传成功后点击app中间(淡蓝色)选择版本启用
  # 启用成功后点击app最右边(深蓝色)选择需要运维的主机
  # 之后到auth管理设置app的秘钥
  # 之后通过命令启用即可自动运维(启动命令参看 “app 启动”)

app (以machineCheck,开发语言选python2 为例)

# 需要特定的目录结构(这里选择实时反馈型)
  tree machineCheck
   ├── appcfg.json
   ├── app-worker.py
   └── machineCheck.py
appcfg.json 配置文件
{
    "Processor":{
        "Commnet"       :   "This Is Processor CFG , Do Not Use For Other",
        "Addr"          :   "127.0.0.1",
        "Port"          :   10010,
        "Timeout"       :   0
    }
}
app-worker.py app的入口
#!/usr/bin/python
# coding=utf-8  
# Author: toryzen  
#
#   app worker入口

import sys
import json
import base64
import os
import traceback
sys.path.append(os.path.abspath(__file__))

def agentExec(app,func,jsonParam=""):
    flag = "false"
    try:
        param = ""
        if(jsonParam!=""):
            param = json.loads(repr(base64.b64decode(jsonParam))[1:-1])
        
        #print param
        agentObj = __import__(app)
        agentClass = getattr(agentObj,app) # 这里加载我们处理逻辑
        obj = agentClass() 
        mtd = getattr(obj,func) # 这里加载我们方法
        flag,result = mtd(param) # 这里调用,由于有param,所以我们app需要param选项
    except Exception,e:
        flag,result = "false",traceback.format_exc()
    elvesPrint(flag,result)
    
def elvesPrint(flag,result):
    print "<ElvesWFlag>"+str(flag)+"</ElvesWFlag> <ElvesWResult>"+str(result)+"</ElvesWResult>"
    
if __name__ == '__main__':
    if(len(sys.argv)==3):
        agentExec(sys.argv[1],sys.argv[2])
    elif(len(sys.argv)==4):
        agentExec(sys.argv[1],sys.argv[2],sys.argv[3])
    else:
        elvesPrint("false","param error")
machineCheck.py 实现逻辑
#!/usr/bin/python
# coding: utf-8

import logging
import socket
import traceback
import commands
import os
import json

if not os.path.isdir('/var/log/elves/machinecheck'):
    os.makedirs('/var/log/elves/machinecheck')

logger = logging.getLogger(__name__)
logger.setLevel(level=logging.INFO)
handler = logging.FileHandler('/var/log/elves/machinecheck/info.log')
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)


class machineCheck: # 这里需要和最后的zip包前缀一样,elves会是使用同样的名字

    @staticmethod
    def threads_over():
        over_threads = []
        pids=commands.getoutput("ps -xH|awk '{ print $1}'").split('\n')
        set_pids = set(pids)
        for i in set_pids:
            if pids.count(i) > 1500: # 超过1500则进行处理
                over_threads.append(i)
        return over_threads

    @staticmethod
    def write_log(flag, result): # 记录结果,在服务器中的mysql里面也记录了

        message = 'status: %s, message: %s' % (flag, result)

        if flag not in ('success', 'error'):
            message = 'status: error, message: Function exec failed!'

        logger.info(message)

    def check(self, params=""): # 这里需要params

        return_flag, return_result = ('error', 'Internal Error!')

        try:

            over_threads = machineCheck.threads_over()
            ip = socket.gethostbyname(socket.getfqdn(socket.gethostname()))
            if over_threads: # 处理逻辑
                return_flag, return_result = ('error', 'have process\' threads more than 1500! Check /var/log/elves/machinecheck/error')
                over_threads_command = ["pid: " + pid + commands.getoutput('cat /proc/%s/cmdline' % pid) + '\n' for pid in over_threads]
                with open('/var/log/elves/machinecheck/error', 'a') as f:
                    json.dump(over_threads_command, f) # 记录进程信息
                os.system("""curl 'https://oapi.dingtalk.com/robot/send?access_token=xxxxxx'  -H 'Content-Type: application/json' -d '{"msgtype": "text","text": {"content": "threads too mush in %s"}}'""" % ip) # 发送报警,我这里接了钉钉报警。
                return
            return_flag, return_result = ('success', 'host is healthy')

        except Exception as e:

            return_flag, return_result = ('error', traceback.format_exc())

        finally:

            machineCheck.write_log(return_flag, return_result)
            return_result = 'status: %s, message: %s!' % (return_flag, return_result)

            if return_flag == "success":
                return_flag = "true"
            else:
                return_flag = "false"

            return (return_flag, return_result)


if __name__ == '__main__':
    pass

制作zip包

cd machineCheck
zip ../machineCheck_1.0.zip * # 这里1.0是版本,elves会自己获取。
chown 1000:1000 machineCheck_1.0.zip # 让浏览器可以加载。

app 启动

这里还是以machineCheck为例,我写成了python3脚本调用。启动需要访问openapi(统一入口,8080端口),api详情查看 “Elves Api”

签名

# 调用前需要普及个概念,Elves交互时的签名认证
  # Elves 签名使用md5签名.
  # 拼凑签名字段:  请求路径 + ? + 参数(按字母排序,并且不带sign_type和sign) + auth_key(在服务端9092端口的auth管理界面)
  # 签名: hashlib.md5(签名字段).encode('utf-8')).hexdigest() # python中,下面案例详细讲解

运行app进行测试

machineCheck.py
#!/bin/env/python3
# coding: utf-8
import os
import json
import time
import hashlib
import requests
server_url = "http://127.0.0.1:8080"
ip = "10.1.9.173" # 运维主机的ip
path = "/api/v2/rt/exec"
func = "check"
param = ""
app = "machineCheck"
auth_id = "0906DDE6518477A8" # auth管理界面的id
authkey = "FF6DB1AB43393D3F" # auth管理界面的key
sign_type = 'MD5'

def get_sign():
    params = json.dumps(param)
    timestamp = int(time.time())
    # params = "app=%sp&auth_id=%s&func=%s&ip=%s&param=%s&proxy=&timeout=&timestamp=%s" % (app, auth_id, func, ip, params, timestamp)
    params = "app=%s&auth_id=%s&func=%s&ip=%s&param=%s&timestamp=%s" % (app, auth_id, func, ip, params, timestamp)
    paths = path + '?' + params
    sign = hashlib.md5((paths + authkey).encode('utf-8')).hexdigest() # 签名
    return params, sign

def send_bytes(sign, params):
    response = requests.post(server_url + path + "?%s&sign_type=MD5&sign=%s" % (params, sign)) # 发送post请求
    return response.text # 返回结果

if __name__ == "__main__":
    params, sign = get_sign()
    print(send_bytes(sign, params))

使用定时任务来启动

add-cron.py
#!/bin/env/python3
import os
import json
import time
import hashlib
import requests
import sys
import json
import subprocess


server_url = "http://127.0.01:8080"
ip = None

try:

    ip = sys.argv[1] # 运行时候输入ip,动态调用

except Exception:

    print('Error: exp. python x.py $IP')
    sys.exit(-1)

path = "/api/v2/cron/add" # 路径
func = "check" # 方法
app = "machineCheck" # app名
rule = "0 0 */1 * * ?" #cron规则
mode = "NP"
auth_id = "0906DDE6518477A8" # auth 管理界面的id
authkey = "FF6DB1AB43393D3F" # auth管理界面的key
timestamp = int(time.time()) # 时间戳
sign_type = 'MD5'

def get_sign():
    params = "app=%s&auth_id=%s&func=%s&ip=%s&mode=%s&rule=%s&timestamp=%s" % (app, auth_id, func, ip, mode, rule, timestamp)
    paths = path + '?' + params
    sign = hashlib.md5((paths + authkey).encode('utf-8')).hexdigest()
    return params, sign

def send_bytes(sign, params):
    url = server_url + path + "?%s&sign_type=MD5&sign=%s" % (params, sign)
    response = requests.post(url)
    return response.text

if __name__ == "__main__":
    params, sign = get_sign()
    message = send_bytes(sign, params)
    print('Add cron:', message)
    cron_id = json.loads(message).get('result').get('id')
    os.system("echo '%s %s' >> ./cron_id" % (cron_id, ip)) # 记录cron_id,在服务端mysql中也有记录
    print('Start cron:', subprocess.getoutput("python3 ./start-cron.py %s" % cron_id)) # 启动cron
start-cron.py
#!/bin/env/python3
import os
import json
import time
import hashlib
import requests
import sys

server_url = "http://127.0.0.1:8080"
path = "/api/v2/cron/start"

cron_id = None

try:

    cron_id = sys.argv[1]

except Exception:

    print('Error: exp. python x.py $cron_id')
    sys.exit(-1)
auth_id = "0906DDE6518477A8"
authkey = "FF6DB1AB43393D3F"
timestamp = int(time.time())
sign_type = 'MD5'

def get_sign():
    params = "auth_id=%s&cron_id=%s&timestamp=%s" % (auth_id, cron_id, timestamp)
    paths = path + '?' + params
    sign = hashlib.md5((paths + authkey).encode('utf-8')).hexdigest()
    return params, sign

def send_bytes(sign, params):
    url = server_url + path + "?%s&sign_type=MD5&sign=%s" % (params, sign)
    response = requests.post(url)
    return response.text

if __name__ == "__main__":
    params, sign = get_sign()
    print(send_bytes(sign, params))

标注

# 签名的时候一定要保证参数齐全且顺序排列正确,不然签名验证不通过
# cron规则如果报mysql字段范围错误,可以手动更改mysql字段
    alter table task_cron modify column mode varchar(6) not null;
# cron 规则遵循quartz cron,和linux cron有区别,周那里使用? 代替

参考文献

Elves 官网: [https://gy-games.gitbooks.io/elves/module/elves-app.html](https://gy-games.gitbooks.io/elves/module/elves-app.html)
Evels-Api: [https://gy-games.gitbooks.io/elves/api.html](https://gy-games.gitbooks.io/elves/api.html)
Elves-docker: [https://github.com/elves-project/docker](https://github.com/elves-project/docker)

上一篇下一篇

猜你喜欢

热点阅读