Python全栈工程师

26.3-代码合并和分析

2019-10-24  本文已影响0人  BeautifulSoulpy

load函数就是从日志中提取合格的数据的生成器函数。

它可以作为dispatcher函数的数据源。

原来写的handler函数处理一个字典的'datetime'字段,不能处理日志抽取函数extract返回的字典,提供一个新的函数!

代码跑起来

# !/usr/bin/env python
# encoding:utf-8
'''
@auther:administrator

'''

import random  # 产生随机数;
import re, datetime, time, threading
from queue import Queue


#line = '''183.69.210.164 - - [07/Apr/2017:09:32:40 +0800] "GET /index.php?m=login HTTP/1.1" 200 3661 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"'''

# pattern = '''([\d.]{7,15}) - - \[([/\w +:]+)\] "(\w+) (\S+) ([\w/\d.]+)" (\d+) (\d+) .+ "(.+)"'''
pattern = '(?P<remote>[\d.]{7,15}) - - \[(?P<datetime>[^\[\]]+)\] "(?P<method>[^" ]+) (?P<url>[^" ]+) (?P<protocol>[^" ]+)" (?P<status>\d+) (?P<size>\d+) \S+ "(?P<userggent>[^"]*)"'

regex = re.compile(pattern)
matcher = regex.match(line)

ops = {
    'datetime': lambda dstr: datetime.datetime.strptime(dstr, '%d/%b/%Y:%H:%M:%S %z'),
    'status': int, 'size': int
}

# 提取
def extract(line: str):
    matcher = regex.match(line)
    if matcher:
        return {k: ops.get(k, lambda x: x)(v) for k, v in matcher.groupdict().items()}
# print(extract(line))

# 数据源
def loadfile(filename: str,encoding='utf-8'):
    with open(filename, encoding=encoding) as f:
        for line in f:
            fields = extract(line)
            if isinstance(fields,(dict,)):
                yield fields
            else:
                print("No match.{}".format(fields)) # TODO 解析失败就抛弃,或者打印日志

from pathlib import Path

# 文件目录处理;
def load(*paths,encoding='utf-8',ext='*.log',r=False):
    for p in paths:
        path = Path(p)
        if path.is_dir():
            if isinstance(ext,str):
                ext = [ext]
            for e in ext:
                logs = path.rglob(e) if r else path.glob(e)
                for log in logs:
                    yield from loadfile(str(log.absolute()))

        elif path.is_file():
            loadfile(str(log.absolute()))

for x in load('test.log'):
    print(x)

# 生成器;

# 模拟用的数据源
def source(seconds=1):
    while True:
        yield {'datetime': datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=8))),
               'value': random.randint(1, 100)}
        time.sleep(seconds)

s = source()
# collecting date

# 分析函数、处理函数;
# 平均数
def avg_handler(iterable):
    return sum(map(lambda item: item['value'], iterable)) / len(iterable)

ret = avg_handler(items)
print('{:.2f}'.format(ret))



# 窗口函数;
def window(q: Queue, handler, width: int, interval: int):
    buf = []
    start = datetime.datetime.strptime('19700101 00:00:01 +0800', '%Y%m%d %H:%M:%S %z')
    current = datetime.datetime.strptime('19700101 00:00:01 +0800', '%Y%m%d %H:%M:%S %z')
    delta = datetime.timedelta(seconds=width - interval)

    while True:
        data = q.get()  # 阻塞的 next(iterator)
        if data:
            buf.append(data)
            current = data['datetime']
        print(current, start)

        if (current - start).total_seconds() > interval:
            print('~~~~~~~~~~~~~')
            ret = handler(buf)
            print('{:.2f}'.format(ret))
            print(threading.current_thread())
            start = current

            # clean old_date
            buf = [x for x in buf if x['datetime'] > current - delta]

# 分发器,数据的调入;
def dispatcher(src):
    handlers = []  # 线程对象,但是里面实际上是不同的handler;
    queues = []

    def reg(handler, width, interval):  # 数据谁,handler、width、interval  ;
        q = Queue()
        t = threading.Thread(target=window, args=(q, handler, width, interval))

        queues.append(q)
        handlers.append(t)

    def run():
        for t in handlers:
            t.start()

        while True:
            data = next(src)
            for q in queues:
                q.put(data)

    return reg, run

if __name__ == ' __main__ ':
    src = load('test.log')
    for x in s:
        print(x)
    reg, run = dispatcher(src)

    reg(avg_handler, 10, 5)
    # reg(avg_handler,10,5)
    # window(s,avg_handler,10,5)
    # run()
    print(threading.current_thread())
    run()
#----------------------------------------------------
C:\Users\Administrator\PycharmProjects\learn\venv\Scripts\python.exe C:/Users/Administrator/PycharmProjects/learn/ttt3.py
test.log
123.125.71.36 - - [06/Apr/2017:18:09:25 +0800] "GET / HTTP/1.1" 200 8642 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"

112.64.118.97 - - [06/Apr/2017:19:13:59 +0800] "GET /favicon.ico HTTP/1.1" 200 4101 "-" "Dalvik/2.1.0 (Linux; U; Android 5.1.1; SM-G9250 Build/LMY47X)"

119.123.183.219 - - [06/Apr/2017:20:59:39 +0800] "GET /favicon.ico HTTP/1.1" 200 4101 "-" "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

140.205.201.44 - - [07/Apr/2017:08:11:06 +0800] "GET / HTTP/1.1" 200 8642 "http://job.magedu.com/" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;Alibaba.Security.Heimdall.950384.p)"

183.69.210.164 - - [07/Apr/2017:09:32:39 +0800] "GET /member/ HTTP/1.1" 302 31 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:40 +0800] "GET /index.php?m=login HTTP/1.1" 200 3661 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/css.css HTTP/1.1" 200 8803 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/css.css HTTP/1.1" 200 8803 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/login.css HTTP/1.1" 200 3080 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/login.css HTTP/1.1" 200 3080 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /index.php?m=ajax&c=RedLoginHead HTTP/1.1" 200 294 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:53 +0800] "GET /app/include/authcode.inc.php HTTP/1.1" 200 384 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:54 +0800] "GET /js/layer/skin/layer.css HTTP/1.1" 200 1601 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

183.69.210.164 - - [07/Apr/2017:09:32:55 +0800] "GET /js/layer/skin/default/xubox_ico0.png HTTP/1.1" 200 32954 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
上一篇下一篇

猜你喜欢

热点阅读