python执行mysql 计算复购率+pyechart+Exc
2019-10-26 本文已影响0人
DeepWindLee
现有某超市的订单数据,内容如下:

先求每个会员在每个月的订单数
sql = ''' --
select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
from OrderList
where MemberID IS NOT NULL
group by MemberID ,YearMonth
order by YearMonth
;
'''
df = pd.read_sql_query(sql, engine)
df.head(8)

给上表新增一列,判断其是否在本月多次购买
select temp.YearMonth,temp.MemberID, temp.每个会员的订单数,
case when temp.每个会员的订单数>1 then 1 else null
end as 是否在本月多次购买
from
(
select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
from OrderList
where MemberID IS NOT NULL
group by MemberID ,YearMonth
order by YearMonth
) temp
;

再对上表进行分组统计多少会员是复购会员
sql = ''' -- 使用 count(temp_out.是否在本月多次购买) 也可以
select temp_out.YearMonth ,count(temp_out.MemberID) AS 本月购物的会员数, sum(temp_out.是否在本月多次购买) as 复购会员人数,
sum(temp_out.是否在本月多次购买)/count(temp_out.MemberID) as 占比
from (
select temp.YearMonth,temp.MemberID, temp.每个会员的订单数,
case when temp.每个会员的订单数>1 then 1 else null
end as 是否在本月多次购买
from
(
select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
from OrderList
where MemberID IS NOT NULL
group by MemberID ,YearMonth
order by YearMonth
) temp
) temp_out
group by temp_out.YearMonth
;
'''
df = pd.read_sql_query(sql, engine)
df

上面的方法嵌套了三层子表:orderlist本身, temp , temp_out,实际上可以不用产生 是否在本月多次购买的子表,即上表的temp表,可以使用 sum(case when 每个会员的订单数 >1 else 0 end )统计,这样就少产生了一个子表。
sql = '''
select temp.YearMonth ,count(temp.MemberID) AS 本月购物的会员数,
sum(case when temp.每个会员的订单数 >1 then 1 else 0 end) as 复购会员人数,
sum(case when temp.每个会员的订单数 >1 then 1 else 0 end)/count(temp.MemberID) as 占比
from
(
select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth
from OrderList
where MemberID IS NOT NULL
group by MemberID ,YearMonth
order by YearMonth
) temp
group by temp.YearMonth
;
'''
df = pd.read_sql_query(sql, engine)
df.tail(24).to_csv('每月的复购率.csv',encoding='utf_8_sig')
df.head(8)

使用pyecharts 0.5画图
#0.5版本
import pyecharts
from pyecharts import Overlap, Bar, Line, Grid, EffectScatter
grid = Grid()
v1 = list( df.tail(24)['本月购物的会员数'].values)
v2 = list(df.tail(24)['复购会员人数'].values)
v3 = list( df.tail(24)['占比'].values*100) #
my_attr = list(df.tail(24)['YearMonth'].values) # ["{}号".format(i) for i in range(1, len(v1)+1)] #attr =
bar = Bar(title="DeepWind超市(南沙区)", title_pos="20%")
bar.add("会员人数", my_attr, v1)
bar.add("复购人数",my_attr,v2,yaxis_formatter=" 人",
yaxis_max=5200,
legend_pos="25%",
legend_orient="horizontal",
legend_top="15%",
)
overlap = Overlap(width=1200, height=600)
overlap.add(bar)
line = Line()
line.add("复购人数占比", my_attr, v3, yaxis_formatter=" %",yaxis_max=100)
es = EffectScatter()
#overlap = Overlap(width=1200, height=600)
overlap.add(line, is_add_yaxis=True, yaxis_index=1)
#es.add("", my_attr, v3, effect_scale=8,is_add_yaxis=True, yaxis_index=1,yaxis_max=4000)
#overlap.add(es)
grid.add(overlap, grid_right="20%")
grid.render()
overlap.render()
#bar
grid

使用pyecharts1.6 画图
import pyecharts.options as opts
from pyecharts.charts import Bar, Line
from pyecharts.globals import ThemeType
v1 = list( df.tail(24)['本月购物的会员数'].values)
v2 = list(df.tail(24)['复购会员人数'].values)
v3 = list( df.tail(24)['占比'].values*100) #
x_data= list(df.tail(24)['YearMonth'].values)
v1 = [int(each) for each in v1]
v2 = [int(each) for each in v2]
v3 = [int(each) for each in v3]
x_data= [str(each) for each in x_data]
bar = (
Bar(init_opts=opts.InitOpts(width="800px", height="400px",theme=ThemeType.DARK))
.add_xaxis(xaxis_data=x_data)
.add_yaxis(
series_name="会员人数",
yaxis_data= v1,
label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
)
.add_yaxis(
series_name="复购人数",
yaxis_data= v2,
label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
)
.extend_axis(
yaxis=opts.AxisOpts(
name="占比",
type_="value",
min_=50,
max_=100,
interval=10,
axislabel_opts=opts.LabelOpts(formatter="{value} %"),
)
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(
is_show=True, trigger="axis", axis_pointer_type="cross"
),
xaxis_opts=opts.AxisOpts(
type_="category",
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
),
yaxis_opts=opts.AxisOpts(
name="人数",
type_="value",
min_=0,
max_=5500,
interval=500,
axislabel_opts=opts.LabelOpts(formatter="{value}人"),
axistick_opts=opts.AxisTickOpts(is_show=True),
splitline_opts=opts.SplitLineOpts(is_show=True),
),
)
)
line = (
Line()
.add_xaxis(xaxis_data=x_data)
.add_yaxis(
series_name="占比",
yaxis_index=1,
y_axis=v3,
label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
)
)
bar.overlap(line).render("复购人数.html")
bar.overlap(line).render_notebook()#render("mixed_bar_and_line.html")

使用Excel画双Y轴图

此时只有一个Y轴,需要对占比重新生成一个列。


得到下图



最终得到

使用Tableau画双轴图

将得到
