Metis

重构Metis之数据库Model

2020-11-17  本文已影响0人  万州客

重构一下Metis,可以学好时间序列异常检测的技术,还有前后端知识。

URL:

https://git.code.tencent.com/Tencent_Open_Source/Metis

Models文件

from django.db import models
from django.contrib.auth import get_user_model

User = get_user_model()


# 指标集
class ViewSet(models.Model):
    view_id = models.CharField(max_length=64,
                               verbose_name='指标集id')
    view_name = models.CharField(max_length=64,
                                 verbose_name='指标集名称')
    create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
    update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
    status = models.BooleanField(default=True, verbose_name='状态')

    def __str__(self):
        return self.view_name

    class Meta:
        db_table = 'ViewSet'
        ordering = ('-update_date', )


# 指标
class Attr(models.Model):
    attr_id = models.CharField(max_length=64,
                               verbose_name='指标id')
    attr_name = models.CharField(max_length=64,
                                 verbose_name='指标名称')
    view_set = models.ForeignKey(ViewSet,
                                 related_name='ra_attr',
                                 on_delete=models.CASCADE,
                                 verbose_name='指标集')
    create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
    update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
    status = models.BooleanField(default=True, verbose_name='状态')

    def __str__(self):
        return self.attr_name

    class Meta:
        db_table = 'Attr'
        ordering = ('-update_date',)


# 异常库
class Anomaly(models.Model):
    attr = models.ForeignKey(Attr,
                             related_name='ra_anomaly',
                             on_delete=models.CASCADE,
                             verbose_name='指标')
    anomaly_time = models.DateTimeField(verbose_name='异常检测时间')
    data_a = models.TextField(verbose_name='当天180分钟数据')
    data_b = models.TextField(verbose_name='一天前180分钟数据')
    data_c = models.TextField(verbose_name='一周前180分钟数据')
    mark_flag = models.BooleanField(default=False, verbose_name='是否标注')
    create_user = models.ForeignKey(User,
                                    related_name='ra_anomaly',
                                    on_delete=models.CASCADE,
                                    verbose_name='创建者')
    create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
    update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
    status = models.BooleanField(default=True, verbose_name='状态')

    def __str__(self):
        return self.attr.attr_name + '-异常-' + str(self.id)

    class Meta:
        db_table = 'Anomaly'
        ordering = ('-update_date', )


# 样本库
class SampleSet(models.Model):
    attr = models.ForeignKey(Attr,
                             related_name='ra_sample_set',
                             on_delete=models.CASCADE,
                             verbose_name='指标')
    source = models.CharField(max_length=32,
                              default='Metis',
                              verbose_name='来源')
    train_or_test = models.CharField(max_length=32,
                                     null=True,
                                     blank=True,
                                     verbose_name='训练集OR测试集')
    positive_or_negative = models.CharField(max_length=32,
                                            null=True,
                                            blank=True,
                                            verbose_name='正样本OR负样本')
    window = models.IntegerField(verbose_name='时间窗口')
    anomaly_time = models.DateTimeField(verbose_name='异常检测时间')
    data_a = models.TextField(verbose_name='当天180分钟数据')
    data_b = models.TextField(verbose_name='一天前360分钟数据')
    data_c = models.TextField(verbose_name='一周前360分钟数据')
    anomaly = models.ForeignKey(Anomaly,
                                null=True,
                                blank=True,
                                related_name='ra_sample_set',
                                on_delete=models.CASCADE,
                                verbose_name='关联异常')
    create_user = models.ForeignKey(User,
                                    null=True,
                                    blank=True,
                                    related_name='ra_sample_set',
                                    on_delete=models.CASCADE,
                                    verbose_name='创建者')
    create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
    update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
    status = models.BooleanField(default=True, verbose_name='状态')

    def __str__(self):
        return self.attr.attr_name + '-样本-' + str(self.id)

    class Meta:
        db_table = 'SampleSet'
        ordering = ('-update_date', )


# 训练任务
class TrainTask(models.Model):
    task_id = models.CharField(max_length=255,
                               verbose_name='训练任务ID')
    sample_num = models.IntegerField(verbose_name='样本总量')
    positive_sample_num = models.IntegerField(verbose_name='正样本数量')
    negative_sample_num = models.IntegerField(verbose_name='负样本数量')
    window = models.IntegerField(verbose_name='时间窗口')
    model_name = models.CharField(max_length=64,
                                  verbose_name='模型名称')
    source = models.CharField(max_length=32,
                              default='Metis',
                              verbose_name='来源')
    start_date = models.DateTimeField(verbose_name='开始训练时间')
    end_date = models.DateTimeField(verbose_name='结束训练时间')
    create_user = models.ForeignKey(User,
                                    null=True,
                                    blank=True,
                                    related_name='ra_train_task',
                                    on_delete=models.CASCADE,
                                    verbose_name='创建者')
    create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
    update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
    status = models.BooleanField(default=True, verbose_name='状态')

    def __str__(self):
        return self.model_name

    class Meta:
        db_table = 'TrainTask'
        ordering = ('-update_date', )



新增python manage.py命令文件,导入模拟数据

from datetime import datetime
from django.utils import timezone
import pytz
import random
from django.core.management.base import BaseCommand, CommandError
from .utils import get_anomaly, get_sample_set
from MetisModels.models import ViewSet, Attr, Anomaly, SampleSet, TrainTask
from django.contrib.auth import get_user_model

User = get_user_model()

user_name = 'Chen_Gang'


class Command(BaseCommand):
    help = '将模拟数据导入数据库'

    def add_arguments(self, parser):
        parser.add_argument('db_name', type=str, help='导入所有数据')

    def handle(self, *args, **options):
        db_name = options['db_name']
        self.add_user()
        self.add_view_set()
        self.add_attr()
        self.add_anomaly()
        self.add_sample_set()
        self.add_train_task()
        self.stdout.write('模拟数据导入完成')

    # 新建一个用户
    def add_user(self):
        try:
            result = User.objects.get(username=user_name)
            result.delete()
        except User.DoesNotExist as e:
            print(e)
        User.objects.create_user(username=user_name,
                                 password='password',
                                 is_active=True,
                                 is_superuser=True)
        self.stdout.write('用户{}重建完成。'.format(user_name))

    # 新建一个多个指标集
    def add_view_set(self):
        ViewSet.objects.all().delete()
        ViewSet.objects.create(view_id='1001', view_name='系统性能')
        ViewSet.objects.create(view_id='1002', view_name='网络流量')
        ViewSet.objects.create(view_id='1003', view_name='用户登陆')
        ViewSet.objects.create(view_id='1004', view_name='中间件连接')
        ViewSet.objects.create(view_id='1005', view_name='数据库性能')
        self.stdout.write('ViewSet数据表删除并重建完成。')

    # 新建多个指标
    def add_attr(self):
        Attr.objects.all().delete()
        view_set = ViewSet.objects.get(view_name='系统性能')
        Attr.objects.create(attr_id='50001', attr_name='CPU负载', view_set=view_set)
        Attr.objects.create(attr_id='50002', attr_name='内存负载', view_set=view_set)
        view_set = ViewSet.objects.get(view_name='网络流量')
        Attr.objects.create(attr_id='50003', attr_name='上海机房', view_set=view_set)
        Attr.objects.create(attr_id='50004', attr_name='北京机房', view_set=view_set)
        view_set = ViewSet.objects.get(view_name='用户登陆')
        Attr.objects.create(attr_id='50005', attr_name='登陆时长', view_set=view_set)
        view_set = ViewSet.objects.get(view_name='中间件连接')
        Attr.objects.create(attr_id='50007', attr_name='Redis连接数', view_set=view_set)
        Attr.objects.create(attr_id='50008', attr_name='Kafka吞吐量', view_set=view_set)
        self.stdout.write('Attr数据表删除并重建完成。')

    # 增加模拟的异常数据
    def add_anomaly(self):
        Anomaly.objects.all().delete()
        data_a, data_b, data_c = get_anomaly()
        user = User.objects.get(username=user_name)
        for (a, b, c) in zip(data_a, data_b, data_c):
            anomaly_time = timezone.now() + timezone.timedelta(hours=random.randint(1, 10))
            attr = Attr.objects.order_by('?').first()
            Anomaly.objects.create(
                attr=attr,
                anomaly_time=anomaly_time,
                data_a=a,
                data_b=b,
                data_c=c,
                create_user=user,
            )

        self.stdout.write('Anomaly数据表删除并重建完成。')

    # 增加模拟的样本库
    def add_sample_set(self):
        SampleSet.objects.all().delete()

        data_a, data_b, data_c = get_sample_set()
        user = User.objects.get(username=user_name)
        for (a, b, c) in zip(data_a, data_b, data_c):
            anomaly_time = timezone.now() + timezone.timedelta(hours=random.randint(1, 10))
            attr = Attr.objects.order_by('?').first()
            SampleSet.objects.create(
                attr=attr,
                train_or_test=random.choice(['train', 'test']),
                positive_or_negative=random.choice(['positive', 'negative']),
                window=180,
                anomaly_time=anomaly_time,
                data_a=a,
                data_b=b,
                data_c=c,
                create_user=user,
            )

        self.stdout.write('SampleSet数据表删除并重建完成。')

    # 增加两个训练任务,没有关联哟,只为有数据记录
    def add_train_task(self):
        TrainTask.objects.all().delete()
        user = User.objects.get(username=user_name)
        TrainTask.objects.create(
            task_id='1535790960079',
            sample_num=90675,
            positive_sample_num=45228,
            negative_sample_num=45447,
            window=180,
            model_name='xgb_default_model',
            source='Metis',
            start_date=timezone.now() - timezone.timedelta(hours=random.randint(1, 10)),
            end_date=timezone.now(),
            create_user=user,
        )
        TrainTask.objects.create(
            task_id='1535790964836',
            sample_num=88675,
            positive_sample_num=44228,
            negative_sample_num=44447,
            window=180,
            model_name='xgb_2nd_model',
            source='Metis',
            start_date=timezone.now(),
            end_date=timezone.now() + timezone.timedelta(hours=random.randint(1, 10)),
            create_user=user,
        )
        self.stdout.write('TrainTask数据表删除并重建完成。')


def str_to_datetime(time_str):
    return datetime.strptime(time_str,
                             '%Y-%m-%d %H:%M:%S')\
        .replace(tzinfo=(pytz.timezone('Asia/Shanghai')))

导入数据时,用到的utils.py文件(针对metis原源里那两个SQL文件设计)

# 从SQL文件中获取data_a, data_b, data_c,


def get_anomaly():
    data_a = []
    data_b = []
    data_c = []
    with open('/MetisBackend/MetisModels/management/commands/anomaly.sql', 'r') as f_r:
        for line in f_r.readlines():
            if line.startswith('INSERT INTO'):
                item = line.split()
                if item:
                    data_a.append(item[13].rstrip(",").replace("'", ""))
                    data_b.append(item[12].rstrip(",").replace("'", ""))
                    data_c.append(item[11].rstrip(",").replace("'", ""))
    return data_a, data_b, data_c


def get_sample_set():
    data_a = []
    data_b = []
    data_c = []
    with open('/MetisBackend/MetisModels/management/commands/sample_set.sql', 'r') as f_r:
        for line in f_r.readlines():
            if line.startswith('INSERT INTO'):
                item = line.split()
                if item:
                    data_a.append(item[18].rstrip(",").replace("'", ""))
                    data_b.append(item[17].rstrip(",").replace("'", ""))
                    data_c.append(item[16].rstrip(",").replace("'", ""))
    return data_a, data_b, data_c


if __name__ == '__main__':
    # get_anomaly()
    get_sample_set()


运行导入数据的命令

python3 manage.py import_db all
输出

用户Chen_Gang重建完成。
ViewSet数据表删除并重建完成。
Attr数据表删除并重建完成。
Anomaly数据表删除并重建完成。
SampleSet数据表删除并重建完成。
TrainTask数据表删除并重建完成。
模拟数据导入完成

文件结构组织

截屏2020-11-17下午3.31.54.png
上一篇 下一篇

猜你喜欢

热点阅读