数据分析

python执行mysql 计算复购率+pyechart+Exc

2019-10-26  本文已影响0人  DeepWindLee

现有某超市的订单数据,内容如下:

demo库订单表.png

先求每个会员在每个月的订单数

sql = ''' -- 
select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
from OrderList 
where MemberID IS NOT NULL 
group by MemberID ,YearMonth
order by YearMonth
;
''' 
df = pd.read_sql_query(sql, engine)
df.head(8)
每个会员每个月的订单数.png

给上表新增一列,判断其是否在本月多次购买

select temp.YearMonth,temp.MemberID, temp.每个会员的订单数,
case when temp.每个会员的订单数>1 then 1 else null 
end as 是否在本月多次购买 
from
(
select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
from OrderList 
where MemberID IS NOT NULL 
group by MemberID ,YearMonth
order by YearMonth
) temp
;
新增一列.png

再对上表进行分组统计多少会员是复购会员

sql = ''' -- 使用 count(temp_out.是否在本月多次购买) 也可以
select temp_out.YearMonth ,count(temp_out.MemberID) AS 本月购物的会员数, sum(temp_out.是否在本月多次购买) as 复购会员人数,
sum(temp_out.是否在本月多次购买)/count(temp_out.MemberID) as 占比
from (
    select temp.YearMonth,temp.MemberID, temp.每个会员的订单数,
    case when temp.每个会员的订单数>1 then 1 else null 
    end as 是否在本月多次购买 
    from
        (
        select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
        from OrderList 
        where MemberID IS NOT NULL 
        group by MemberID ,YearMonth
        order by YearMonth
        ) temp

    ) temp_out
group by temp_out.YearMonth
;
''' 
df = pd.read_sql_query(sql, engine)
df
统计复购人数.png
上面的方法嵌套了三层子表:orderlist本身, temp , temp_out,实际上可以不用产生 是否在本月多次购买的子表,即上表的temp表,可以使用 sum(case when 每个会员的订单数 >1 else 0 end )统计,这样就少产生了一个子表。
sql = ''' 
select temp.YearMonth ,count(temp.MemberID) AS 本月购物的会员数,
sum(case when temp.每个会员的订单数 >1 then 1 else 0 end) as 复购会员人数,
sum(case when temp.每个会员的订单数 >1 then 1 else 0 end)/count(temp.MemberID) as 占比
from
    (
    select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
    from OrderList 
    where MemberID IS NOT NULL 
    group by MemberID ,YearMonth
    order by YearMonth
    ) temp

group by temp.YearMonth
;
''' 
df = pd.read_sql_query(sql, engine)
df.tail(24).to_csv('每月的复购率.csv',encoding='utf_8_sig')
df.head(8)
sumcase统计复购率.png

使用pyecharts 0.5画图

#0.5版本
import pyecharts
from pyecharts import Overlap, Bar, Line, Grid, EffectScatter
grid = Grid()
v1 = list( df.tail(24)['本月购物的会员数'].values)        
v2 =  list(df.tail(24)['复购会员人数'].values)           
v3 = list( df.tail(24)['占比'].values*100) #
my_attr = list(df.tail(24)['YearMonth'].values) # ["{}号".format(i) for i in range(1, len(v1)+1)]  #attr =
bar = Bar(title="DeepWind超市(南沙区)", title_pos="20%")
bar.add("会员人数", my_attr, v1)
bar.add("复购人数",my_attr,v2,yaxis_formatter=" 人",
    yaxis_max=5200,
    legend_pos="25%",
    legend_orient="horizontal",
    legend_top="15%",
)
overlap = Overlap(width=1200, height=600)
overlap.add(bar)
line = Line()
line.add("复购人数占比", my_attr, v3, yaxis_formatter=" %",yaxis_max=100)
es = EffectScatter()
#overlap = Overlap(width=1200, height=600)
overlap.add(line, is_add_yaxis=True, yaxis_index=1)
#es.add("", my_attr, v3, effect_scale=8,is_add_yaxis=True, yaxis_index=1,yaxis_max=4000)
#overlap.add(es)
grid.add(overlap, grid_right="20%")
grid.render()
overlap.render()
#bar
grid
pyechart画复购率.png

使用pyecharts1.6 画图

import pyecharts.options as opts
from pyecharts.charts import Bar, Line
from pyecharts.globals import ThemeType
v1 = list( df.tail(24)['本月购物的会员数'].values)        
v2 =  list(df.tail(24)['复购会员人数'].values)           
v3 = list( df.tail(24)['占比'].values*100) #
x_data= list(df.tail(24)['YearMonth'].values) 

v1 = [int(each) for each in v1]
v2 = [int(each) for each in v2]
v3 = [int(each) for each in v3]
x_data= [str(each) for each in x_data]
bar = (
    Bar(init_opts=opts.InitOpts(width="800px", height="400px",theme=ThemeType.DARK))
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(
        series_name="会员人数",
        yaxis_data= v1,
        label_opts=opts.LabelOpts(is_show=False),
        
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
    )
    .add_yaxis(
        series_name="复购人数",
        yaxis_data= v2,
        label_opts=opts.LabelOpts(is_show=False),
        
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
    )
    .extend_axis(
        yaxis=opts.AxisOpts(
            name="占比",
            type_="value",
            min_=50,
            max_=100,
            interval=10,
            axislabel_opts=opts.LabelOpts(formatter="{value} %"),
        )
    )
    .set_global_opts(
        tooltip_opts=opts.TooltipOpts(
            is_show=True, trigger="axis", axis_pointer_type="cross"
        ),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
        ),
        yaxis_opts=opts.AxisOpts(
            name="人数",
            type_="value",
            min_=0,
            max_=5500,
            interval=500,
            axislabel_opts=opts.LabelOpts(formatter="{value}人"),
            axistick_opts=opts.AxisTickOpts(is_show=True),
            splitline_opts=opts.SplitLineOpts(is_show=True),
        ),
    )
)

line = (
    Line()
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(
        series_name="占比",
        yaxis_index=1,
        y_axis=v3,
        label_opts=opts.LabelOpts(is_show=False),
        
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
    )
)
bar.overlap(line).render("复购人数.html")
bar.overlap(line).render_notebook()#render("mixed_bar_and_line.html")
复购1.6.png

使用Excel画双Y轴图

excel画图11.png

此时只有一个Y轴,需要对占比重新生成一个列。

设置数据系列格式22.jpg
EXCEL 次坐标.png

得到下图


y轴1.png
更改次y轴.png 更改y轴为线型.png

最终得到


最终结果.png

使用Tableau画双轴图

占比y轴.png

将得到


占比y轴2.png
上一篇 下一篇

猜你喜欢

热点阅读