数据分析

可视化看板搭建项目总结

2020-11-20  本文已影响0人  分类讨论

本文主要针对Adventure案例业务仪表盘搭建的总结,记录了整个项目整个项目的完整过程。通过从数据库获取数据,利用Numpy,Pandas分析工具对数据进行处理后转存到数据库。最后通过PowerBI实现可视化。
成果展示
分析成果链接:Adventure报表

Home Trend City

一、项目目标

通过对Adventure业务销量,销售额等指标的多维拆分展示,实现对数据的日常监控,帮助业务在数据异常是能够第一时间发现问题并改善。

二、思维框架

思维框架

三、分析过程

准备工作:

# 导入包
import pandas as pd
import pymysql
import random
from sqlalchemy import create_engine
pymysql.install_as_MySQLdb()
import datetime

1. 阶段指标情况

# 导入数据
adventure_ods = create_engine('mysql+pymysql://root:1234@localhost:3306/adventure_ods?charset=gbk')
ods_sales_orders = pd.read_sql_query("select * from ods_sales_orders ", con=adventure_ods)
dim_date_df = "select create_date, is_current_year, is_last_year, is_yesterday, is_today, is_current_month, is_current_quarter, is_21_day from dim_date_df"
dim_date_df = pd.read_sql_query(dim_date_df, con=adventure_ods)
ods_sales_orders.head()
dim_date_df.head()
ods_sales_orders dim_date_df
# 统计每日销售额,订单量,客户数,客单价
sum_amount_order = ods_sales_orders.groupby('create_date').agg({'unit_price':'sum','customer_key':pd.Series.nunique,'sales_order_key':pd.Series.nunique}).reset_index()
sum_amount_order = sum_amount_customer.rename(columns = {'unit_price':'sum_amount','customer_key':'sum_customer','sales_order_key':'sum_order'})
sum_amount_customer['amount_div_order'] = sum_amount_customer['sum_amount'] / sum_amount_customer['sum_order']
sum_amount_order.head()
sum_amount_order
# 为销量与销售额指定KPI。这里采用了随机生成的方式
sum_amount_goal_list = []
sum_order_goal_list = []
create_date_list = list(sum_amount_order['create_date'])
# 对每天的销量与销售额 * (0.85, 1.1)作为每天的KPI
for i in create_date_list:
    a = random.uniform(0.85,1.1)
    b = random.uniform(0.85,1.1)
    amount_goal = list(sum_amount_order[sum_amount_order['create_date'] == i]['sum_amount'])[0] * a
    order_goal = list(sum_amount_order[sum_amount_order['create_date'] == i]['sum_order'])[0] * b
    sum_amount_goal_list.append(amount_goal)
    sum_order_goal_list.append(order_goal)
# 将生成的KPI列表连接到表中
sum_amount_order_goal = pd.concat([sum_amount_order,pd.DataFrame({'sum_amount_goal':sum_amount_goal_list,'sum_order_goal':sum_order_goal_list})],axis = 1)
sum_amount_order_goal.head()
sum_amount_order_goal
# 将每日的业务情况与日期维度表进行连接
dw_order_by_day = pd.merge(sum_amount_order_goal,dim_date_df,on = 'create_date',how = 'inner')
dw_order_by_day['create_date'] = dw_order_by_day['create_date'].astype("str")
dw_order_by_day.head()
dw_order_by_day
# 销售额环比计算
dw_order_by_day['amount_diff'] = dw_order_by_day['sum_amount'].pct_change().fillna(0)
# 结果导入数据库,供后续使用
adventure_dw = create_engine('mysql+pymysql://root:1234@localhost:3306/adventure_dw?charset=gbk')
dw_order_by_day.to_sql('dw_order_by_day' ,con = adventure_dw, if_exists = 'replace', index = False)

2. 业务指标同比情况

# 导入第一步生成的表
adventure_dw = create_engine('mysql+pymysql://root:1234@localhost:3306/adventure_dw?charset=gbk')
dw_order_by_day = pd.read_sql_query('select * from dw_order_by_day', con = adventure_dw)
dw_order_by_day.head()
dw_order_by_day
# 今日及去年同期销售金额
# 求今天的销售金额
today_amount = dw_order_by_day[dw_order_by_day['is_today'] == 1]['sum_amount'].sum()
# 求去年同期的日期
before_year_today = list(dw_order_by_day[dw_order_by_day['is_today'] == 1]['create_date'] + datetime.timedelta(days=-365))
# 通过去年同期的日期获取去年同期的销售金额
before_year_today_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_today)]['sum_amount'].sum()

# 昨日及去年同期销售金额
yesterday_amount = dw_order_by_day[dw_order_by_day['is_yesterday'] == 1]['sum_amount'].sum()
before_year_yesterday = list(dw_order_by_day[dw_order_by_day['is_yesterday'] == 1]['create_date'] + datetime.timedelta(days=-365))
before_year_yesterday_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_yesterday)]['sum_amount'].sum()

# 本月及去年同期销售金额
month_amount = dw_order_by_day[dw_order_by_day['is_current_month'] == 1]['sum_amount'].sum()
before_year_month = list(dw_order_by_day[dw_order_by_day['is_current_month'] == 1]['create_date'] + datetime.timedelta(days = -365))
before_year_month_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_month)]['sum_amount'].sum()

# 本季度及去年同期销售金额
quarter_amount = dw_order_by_day[dw_order_by_day['is_current_quarter'] == 1]['sum_amount'].sum()
before_year_quarter = list(dw_order_by_day[dw_order_by_day['is_current_quarter'] == 1]['create_date'] + datetime.timedelta(days = -365))
before_year_quarter_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_quarter)]['sum_amount'].sum()

# 本年度及去年同期销售金额
year_amount = dw_order_by_day[dw_order_by_day['is_current_year'] == 1]['sum_amount'].sum()
before_year_year = list(dw_order_by_day[dw_order_by_day['is_current_year'] == 1]['create_date'] + datetime.timedelta(days = -365))
before_year_year_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_year)]['sum_amount'].sum()
# 同销售金额,只是更新为对销量的统计
# 当天
today_order = dw_order_by_day[dw_order_by_day['is_today'] == 1]['sum_order'].sum()
before_year_today_order = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_today)]['sum_order'].sum()

# 昨天
yesterday_order = dw_order_by_day[dw_order_by_day['is_yesterday'] == 1]['sum_order'].sum()
before_year_yesterday_order = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_yesterday)]['sum_order'].sum()

# 本月
month_order = dw_order_by_day[dw_order_by_day['is_current_month'] == 1]['sum_order'].sum()
before_year_month_order = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_month)]['sum_order'].sum()

# 本季度
quarter_order = dw_order_by_day[dw_order_by_day['is_current_quarter'] == 1]['sum_order'].sum()
before_year_quarter_order = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_quarter)]['sum_order'].sum()

# 本年
year_order = dw_order_by_day[dw_order_by_day['is_current_year'] == 1]['sum_order'].sum()
before_year_year_order = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_year)]['sum_order'].sum()
amount_dict = {'today_diff': [today_amount/before_year_today_amount - 1,today_order/before_year_today_order - 1,(today_amount/today_order)/(before_year_today_amount/before_year_today_order) - 1],
               'yesterday_diff': [yesterday_amount/before_year_yesterday_amount - 1,yesterday_order/before_year_yesterday_order - 1,(yesterday_amount/yesterday_order)/(before_year_yesterday_amount/before_year_yesterday_order) - 1],
               'month_diff': [month_amount/before_year_month_amount - 1,month_order/before_year_month_order - 1,(month_amount/month_order)/(before_year_month_amount/before_year_month_order) - 1],
               'quarter_diff': [quarter_amount/before_year_quarter_amount - 1,quarter_order/before_year_quarter_order - 1,(quarter_amount/quarter_order)/(before_year_quarter_amount/before_year_quarter_order) - 1],
               'year_diff': [year_amount/before_year_year_amount - 1,year_order/before_year_year_order - 1,(year_amount/year_order)/(before_year_year_amount/before_year_year_order) - 1],
               'flag': ['amount', 'order', 'avg']}
dw_amount_diff = pd.DataFrame(amount_dict)
dw_amount_diff
dw_amount_diff
dw_amount_diff.to_sql('dw_amount_diff', con=adventure_dw, if_exists='replace', index=False)

3. 区域指标情况

adventure_ods = create_engine('mysql+pymysql://root:1234@localhost:3306/adventure_ods?charset=gbk')
#  获取订单明细表
ods_sales_orders = "select sales_order_key, create_date,customer_key, english_product_name, cpzl_zw, cplb_zw, unit_price from ods_sales_orders where create_date in (select create_date from dim_date_df order by create_date desc)"
ods_sales_orders = pd.read_sql_query(ods_sales_orders,con=adventure_ods)
# 获取用户信息表
ods_customer = "select customer_key,chinese_territory,chinese_province,chinese_cityfrom ods_customer"
ods_customer = pd.read_sql_query(ods_customer, con=adventure_ods)
# 读取日期维度表
dim_date_df = "select create_date,is_current_year,is_last_year,is_yesterday,is_today,is_current_month,is_current_quarter,is_21_day from dim_date_df"
dim_date_df = pd.read_sql_query(dim_date_df, con=adventure_ods)
#查看数据
ods_sales_orders.head()
ods_customer.head()
dim_date_df.head()
ods_sales_orders ods_customer dim_date_df
# 为了表连接匹配,格式转化
ods_sales_orders['customer_key'] = ods_sales_orders['customer_key'].astype("int")
# 将订单明细表与客户信息表关联
sales_customer_order = pd.merge(ods_sales_orders,ods_customer,on = 'customer_key',how = 'left')
sales_customer_order = sales_customer_order[['sales_order_key','create_date','customer_key','english_product_name','cpzl_zw','cplb_zw','unit_price','chinese_territory','chinese_province','chinese_city']]
sum_customer_order = sales_customer_order.groupby(['create_date','english_product_name','cpzl_zw','cplb_zw','chinese_territory','chinese_province','chinese_city'],as_index = False).agg({'sales_order_key':pd.Series.nunique,'customer_key':pd.Series.nunique,'unit_price':'sum'}).rename(columns = {'sales_order_key':'order_num','customer_key':'customer_num','unit_price':'sum_amount','english_product_name':'product_name'})
# 格式转化为字符串
sum_customer_order['create_date'] = sum_customer_order['create_date'].apply(lambda x:x.strftime('%Y-%m-%d'))
# 关联日期维度表
dw_customer_order = pd.merge(sum_customer_order,dim_date_df,on = 'create_date', how = 'inner')
dw_customer_order.to_sql('dw_customer_order', con=adventure_dw,if_exists='replace', index=False)

4. 报表制作

选择合适的可视化工具,从多个维度展示销售情况。

Home Trend City
上一篇 下一篇

猜你喜欢

热点阅读