数据分析

北京五城区美食数据分析

2020-02-15  本文已影响0人  yuys

看了一些数据分析的案例,自己上手完成了这个对北京五个主城区餐饮类的数据分析,主要从店铺的类型,评分和热门评论等信息提炼分析了北京各类餐饮的分布及特点。也顺便当作吃货们寻找北京美食的一份小指南吧。

数据来源于美团·美食

1. 获取数据与数据清洗
2. 数据的描述性分析
from pyecharts import Bar,Grid

name1 = data1['name'].iloc[0:10]
per_ca = data1['per_capita'].iloc[0:10] # 人均
com_counts1 = data1['comments_counts'].iloc[0:10] # 评论数

bar = Bar("餐饮人均消费前十")
bar.add("人均",name1,per_ca,xaxis_rotate=20,mark_line=['average'],mark_point=['max'])
bar.add("评论数",name1,com_counts1,xaxis_rotate=20,mark_line=['average'],mark_point=['max'])

grid = Grid()
grid.add(bar,grid_bottom='22%',grid_left='20%')

和木The Home(前门店) 人均 1184 元

# sql选取中位数
set @ROW_NUMBER:=0;
set @median_group:='';

select median_group, avg(per_capita) as median
from
 (select 
   @ROW_NUMBER:=
      case when @median_group = type then @ROW_NUMBER + 1 else 1
      end as count_of_group,
   @median_group:= type as median_group,
   name,
   per_capita,
   type,
   (select count(*)
    from beijing_all
    where a.type = type) as totle_of_type
  from
  (select name,per_capita,type
  from beijing_all
  order by type,per_capita) as a) as b
where count_of_group between totle_of_type*0.5 and totle_of_type*0.5 + 1
group by median_group;

'''
pyecharts可直接绘制箱线图,但需要用prepare_data()将原数据进行转换。
'''
sql_shop = "select type, count(*) counts from 
          (select * from beijing_all where (locate('菜',type)>0 or locate('餐',type)>0) and type not in 
          (select type from beijing_all where locate('小吃快餐',type)>0 or locate('创意菜',type)>0 or locate('自助餐',type)>0 
          or locate('特色菜',type)>0)) as a group by type order by counts desc"

data_shop = pd.read_sql(sql_shop,conn)

from pyecharts import Pie

pie_name = data_shop['type'].tolist()
pie_values = data_shop['counts'].values.tolist()

pie = Pie("各大菜系店铺数")
pie.add("",pie_name,pie_values,radius=[35,70],
          center=[35,50],rosetype='radius',
          is_label_show=True,legend_orient='vertical',
          legend_pos="right")
from pyecharts import WordCloud

word = data_foreignfood['comment1']
word_values = data_foreignfood['count3']
wordcloud = WordCloud()
wordcloud.add("",word,word_values,shape='triangle')
select 
    type,name,score,per_capita,comments_counts,address,(tran_per + tran_com + tran_sco) as sum
from 
   (select type,name,score,per_capita,comments_counts,address,
          (1 - (per_capita - min_per)/(max_per - min_per))*10 as tran_per,
          (comments_counts - min_com)/(max_com - min_com)*10 as tran_com,
          (score - min_sco)/(max_sco - min_sco)*10 as tran_sco
    from 
    beijing_all,
    (select 
        max(ifnull(per_capita,0)) max_per,
        min(ifnull(per_capita,0)) min_per,
        max(ifnull(comments_counts,0)) max_com,
        min(ifnull(comments_counts,0)) min_com,
        max(ifnull(score,0)) max_sco,
        min(ifnull(score,0)) min_sco
    from beijing_all) as trans) as total
where tran_sco >= 7
order by sum desc;
高性价比店铺(部分)
# 热门店铺的热评
select comment1,ifnull(count1,0)+ifnull(count2,0) count3
from 
(select comment1,count(comment1) count1 from beijing_all_good_all_comments group by comment1) a1
left join
(select comment2,count(comment2) count2 from beijing_all_good_all_comments group by comment2) a2
on comment1 = comment2
union
select comment2,ifnull(count1,0)+ifnull(count2,0) count3
from 
(select comment1,count(comment1) count1 from beijing_all_good_all_comments group by comment1) a1
right join
(select comment2,count(comment2) count2 from beijing_all_good_all_comments group by comment2) a2
on comment1 = comment2
order by count3 desc;

最后放上本项目中其他提数的sql语句


参考:

https://www.jianshu.com/p/8855dae7de2c

上一篇 下一篇

猜你喜欢

热点阅读