重构Metis之数据库Model
2020-11-17 本文已影响0人
万州客
重构一下Metis,可以学好时间序列异常检测的技术,还有前后端知识。
URL:
https://git.code.tencent.com/Tencent_Open_Source/Metis
Models文件
from django.db import models
from django.contrib.auth import get_user_model
User = get_user_model()
# 指标集
class ViewSet(models.Model):
view_id = models.CharField(max_length=64,
verbose_name='指标集id')
view_name = models.CharField(max_length=64,
verbose_name='指标集名称')
create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
status = models.BooleanField(default=True, verbose_name='状态')
def __str__(self):
return self.view_name
class Meta:
db_table = 'ViewSet'
ordering = ('-update_date', )
# 指标
class Attr(models.Model):
attr_id = models.CharField(max_length=64,
verbose_name='指标id')
attr_name = models.CharField(max_length=64,
verbose_name='指标名称')
view_set = models.ForeignKey(ViewSet,
related_name='ra_attr',
on_delete=models.CASCADE,
verbose_name='指标集')
create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
status = models.BooleanField(default=True, verbose_name='状态')
def __str__(self):
return self.attr_name
class Meta:
db_table = 'Attr'
ordering = ('-update_date',)
# 异常库
class Anomaly(models.Model):
attr = models.ForeignKey(Attr,
related_name='ra_anomaly',
on_delete=models.CASCADE,
verbose_name='指标')
anomaly_time = models.DateTimeField(verbose_name='异常检测时间')
data_a = models.TextField(verbose_name='当天180分钟数据')
data_b = models.TextField(verbose_name='一天前180分钟数据')
data_c = models.TextField(verbose_name='一周前180分钟数据')
mark_flag = models.BooleanField(default=False, verbose_name='是否标注')
create_user = models.ForeignKey(User,
related_name='ra_anomaly',
on_delete=models.CASCADE,
verbose_name='创建者')
create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
status = models.BooleanField(default=True, verbose_name='状态')
def __str__(self):
return self.attr.attr_name + '-异常-' + str(self.id)
class Meta:
db_table = 'Anomaly'
ordering = ('-update_date', )
# 样本库
class SampleSet(models.Model):
attr = models.ForeignKey(Attr,
related_name='ra_sample_set',
on_delete=models.CASCADE,
verbose_name='指标')
source = models.CharField(max_length=32,
default='Metis',
verbose_name='来源')
train_or_test = models.CharField(max_length=32,
null=True,
blank=True,
verbose_name='训练集OR测试集')
positive_or_negative = models.CharField(max_length=32,
null=True,
blank=True,
verbose_name='正样本OR负样本')
window = models.IntegerField(verbose_name='时间窗口')
anomaly_time = models.DateTimeField(verbose_name='异常检测时间')
data_a = models.TextField(verbose_name='当天180分钟数据')
data_b = models.TextField(verbose_name='一天前360分钟数据')
data_c = models.TextField(verbose_name='一周前360分钟数据')
anomaly = models.ForeignKey(Anomaly,
null=True,
blank=True,
related_name='ra_sample_set',
on_delete=models.CASCADE,
verbose_name='关联异常')
create_user = models.ForeignKey(User,
null=True,
blank=True,
related_name='ra_sample_set',
on_delete=models.CASCADE,
verbose_name='创建者')
create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
status = models.BooleanField(default=True, verbose_name='状态')
def __str__(self):
return self.attr.attr_name + '-样本-' + str(self.id)
class Meta:
db_table = 'SampleSet'
ordering = ('-update_date', )
# 训练任务
class TrainTask(models.Model):
task_id = models.CharField(max_length=255,
verbose_name='训练任务ID')
sample_num = models.IntegerField(verbose_name='样本总量')
positive_sample_num = models.IntegerField(verbose_name='正样本数量')
negative_sample_num = models.IntegerField(verbose_name='负样本数量')
window = models.IntegerField(verbose_name='时间窗口')
model_name = models.CharField(max_length=64,
verbose_name='模型名称')
source = models.CharField(max_length=32,
default='Metis',
verbose_name='来源')
start_date = models.DateTimeField(verbose_name='开始训练时间')
end_date = models.DateTimeField(verbose_name='结束训练时间')
create_user = models.ForeignKey(User,
null=True,
blank=True,
related_name='ra_train_task',
on_delete=models.CASCADE,
verbose_name='创建者')
create_date = models.DateTimeField(auto_now_add=True, verbose_name='新建时间')
update_date = models.DateTimeField(auto_now=True, verbose_name='更新时间')
status = models.BooleanField(default=True, verbose_name='状态')
def __str__(self):
return self.model_name
class Meta:
db_table = 'TrainTask'
ordering = ('-update_date', )
新增python manage.py命令文件,导入模拟数据
from datetime import datetime
from django.utils import timezone
import pytz
import random
from django.core.management.base import BaseCommand, CommandError
from .utils import get_anomaly, get_sample_set
from MetisModels.models import ViewSet, Attr, Anomaly, SampleSet, TrainTask
from django.contrib.auth import get_user_model
User = get_user_model()
user_name = 'Chen_Gang'
class Command(BaseCommand):
help = '将模拟数据导入数据库'
def add_arguments(self, parser):
parser.add_argument('db_name', type=str, help='导入所有数据')
def handle(self, *args, **options):
db_name = options['db_name']
self.add_user()
self.add_view_set()
self.add_attr()
self.add_anomaly()
self.add_sample_set()
self.add_train_task()
self.stdout.write('模拟数据导入完成')
# 新建一个用户
def add_user(self):
try:
result = User.objects.get(username=user_name)
result.delete()
except User.DoesNotExist as e:
print(e)
User.objects.create_user(username=user_name,
password='password',
is_active=True,
is_superuser=True)
self.stdout.write('用户{}重建完成。'.format(user_name))
# 新建一个多个指标集
def add_view_set(self):
ViewSet.objects.all().delete()
ViewSet.objects.create(view_id='1001', view_name='系统性能')
ViewSet.objects.create(view_id='1002', view_name='网络流量')
ViewSet.objects.create(view_id='1003', view_name='用户登陆')
ViewSet.objects.create(view_id='1004', view_name='中间件连接')
ViewSet.objects.create(view_id='1005', view_name='数据库性能')
self.stdout.write('ViewSet数据表删除并重建完成。')
# 新建多个指标
def add_attr(self):
Attr.objects.all().delete()
view_set = ViewSet.objects.get(view_name='系统性能')
Attr.objects.create(attr_id='50001', attr_name='CPU负载', view_set=view_set)
Attr.objects.create(attr_id='50002', attr_name='内存负载', view_set=view_set)
view_set = ViewSet.objects.get(view_name='网络流量')
Attr.objects.create(attr_id='50003', attr_name='上海机房', view_set=view_set)
Attr.objects.create(attr_id='50004', attr_name='北京机房', view_set=view_set)
view_set = ViewSet.objects.get(view_name='用户登陆')
Attr.objects.create(attr_id='50005', attr_name='登陆时长', view_set=view_set)
view_set = ViewSet.objects.get(view_name='中间件连接')
Attr.objects.create(attr_id='50007', attr_name='Redis连接数', view_set=view_set)
Attr.objects.create(attr_id='50008', attr_name='Kafka吞吐量', view_set=view_set)
self.stdout.write('Attr数据表删除并重建完成。')
# 增加模拟的异常数据
def add_anomaly(self):
Anomaly.objects.all().delete()
data_a, data_b, data_c = get_anomaly()
user = User.objects.get(username=user_name)
for (a, b, c) in zip(data_a, data_b, data_c):
anomaly_time = timezone.now() + timezone.timedelta(hours=random.randint(1, 10))
attr = Attr.objects.order_by('?').first()
Anomaly.objects.create(
attr=attr,
anomaly_time=anomaly_time,
data_a=a,
data_b=b,
data_c=c,
create_user=user,
)
self.stdout.write('Anomaly数据表删除并重建完成。')
# 增加模拟的样本库
def add_sample_set(self):
SampleSet.objects.all().delete()
data_a, data_b, data_c = get_sample_set()
user = User.objects.get(username=user_name)
for (a, b, c) in zip(data_a, data_b, data_c):
anomaly_time = timezone.now() + timezone.timedelta(hours=random.randint(1, 10))
attr = Attr.objects.order_by('?').first()
SampleSet.objects.create(
attr=attr,
train_or_test=random.choice(['train', 'test']),
positive_or_negative=random.choice(['positive', 'negative']),
window=180,
anomaly_time=anomaly_time,
data_a=a,
data_b=b,
data_c=c,
create_user=user,
)
self.stdout.write('SampleSet数据表删除并重建完成。')
# 增加两个训练任务,没有关联哟,只为有数据记录
def add_train_task(self):
TrainTask.objects.all().delete()
user = User.objects.get(username=user_name)
TrainTask.objects.create(
task_id='1535790960079',
sample_num=90675,
positive_sample_num=45228,
negative_sample_num=45447,
window=180,
model_name='xgb_default_model',
source='Metis',
start_date=timezone.now() - timezone.timedelta(hours=random.randint(1, 10)),
end_date=timezone.now(),
create_user=user,
)
TrainTask.objects.create(
task_id='1535790964836',
sample_num=88675,
positive_sample_num=44228,
negative_sample_num=44447,
window=180,
model_name='xgb_2nd_model',
source='Metis',
start_date=timezone.now(),
end_date=timezone.now() + timezone.timedelta(hours=random.randint(1, 10)),
create_user=user,
)
self.stdout.write('TrainTask数据表删除并重建完成。')
def str_to_datetime(time_str):
return datetime.strptime(time_str,
'%Y-%m-%d %H:%M:%S')\
.replace(tzinfo=(pytz.timezone('Asia/Shanghai')))
导入数据时,用到的utils.py文件(针对metis原源里那两个SQL文件设计)
# 从SQL文件中获取data_a, data_b, data_c,
def get_anomaly():
data_a = []
data_b = []
data_c = []
with open('/MetisBackend/MetisModels/management/commands/anomaly.sql', 'r') as f_r:
for line in f_r.readlines():
if line.startswith('INSERT INTO'):
item = line.split()
if item:
data_a.append(item[13].rstrip(",").replace("'", ""))
data_b.append(item[12].rstrip(",").replace("'", ""))
data_c.append(item[11].rstrip(",").replace("'", ""))
return data_a, data_b, data_c
def get_sample_set():
data_a = []
data_b = []
data_c = []
with open('/MetisBackend/MetisModels/management/commands/sample_set.sql', 'r') as f_r:
for line in f_r.readlines():
if line.startswith('INSERT INTO'):
item = line.split()
if item:
data_a.append(item[18].rstrip(",").replace("'", ""))
data_b.append(item[17].rstrip(",").replace("'", ""))
data_c.append(item[16].rstrip(",").replace("'", ""))
return data_a, data_b, data_c
if __name__ == '__main__':
# get_anomaly()
get_sample_set()
运行导入数据的命令
python3 manage.py import_db all
输出
用户Chen_Gang重建完成。
ViewSet数据表删除并重建完成。
Attr数据表删除并重建完成。
Anomaly数据表删除并重建完成。
SampleSet数据表删除并重建完成。
TrainTask数据表删除并重建完成。
模拟数据导入完成