10. 日月光华 Python数据分析-Pandas-字符串方法
2023-07-09 本文已影响0人
薛东弗斯
import pandas as pd
import numpy as np
data = pd.DataFrame({'k1':['beijing,haidian,chushou','beijing,chaoyang,chushou','beijing,fengtai,chuzu'] ,'k2': ['beijing,xicheng,chuzu','beijing,shijingshan,chushou',np.nan]})
data
# k1 k2
# 0 beijing,haidian,chushou beijing,xicheng,chuzu
# 1 beijing,chaoyang,chushou beijing,shijingshan,chushou
# 2 beijing,fengtai,chuzu NaN
data['k1'] = data.k1.str.replace('beijing', 'shanghai') # str.replace 替换部分字符。 如果不用str,则是整体替换
data
# k1 k2
# 0 shanghai,haidian,chushou beijing,xicheng,chuzu
# 1 shanghai,chaoyang,chushou beijing,shijingshan,chushou
# 2 shanghai,fengtai,chuzu NaN
data.k1.str.contains('chushou')
# 0 True
# 1 True
# 2 False
# Name: k1, dtype: bool
data[data.k1.str.contains('chushou')] # 字符串过滤
# k1 k2
# 0 shanghai,haidian,chushou beijing,xicheng,chuzu
# 1 shanghai,chaoyang,chushou beijing,shijingshan,chushou
# data.k1.str.strip()
data.k1.str.split(',')
# 0 [shanghai, haidian, chushou]
# 1 [shanghai, chaoyang, chushou]
# 2 [shanghai, fengtai, chuzu]
# Name: k1, dtype: object
data.k1.str[:5]
# 0 shang
# 1 shang
# 2 shang
# Name: k1, dtype: object
data.k1.apply(lambda x:x[:5])
# 0 shang
# 1 shang
# 2 shang
# Name: k1, dtype: object
函数映射来转换数据
用于series的map方法接受一个函数,或是一个字典,包含着映射关系,但这里有一个小问题,有些肉是大写,有些是小写。因此,我们先用str.lower把所有的值变为小写:
quyu_to_chengshi = {
'chaoyang': 'beijing',
'huangpu': 'shanghai',
'jingan': 'shanghai',
'haidian': 'beijing',
'fengtai': 'beijing',
'tongzhou': 'beijing'
}
data = pd.Series(['chaoyang','jingan','huangpu','jingan','fengtai','jingan','tongzhou','haidian','jingan'])
data
# 0 chaoyang
# 1 jingan
# 2 huangpu
# 3 jingan
# 4 fengtai
# 5 jingan
# 6 tongzhou
# 7 haidian
# 8 jingan
# dtype: object
data.apply(lambda x:quyu_to_chengshi.get(x)) # 由区域映射到城市
# 0 beijing
# 1 shanghai
# 2 shanghai
# 3 shanghai
# 4 beijing
# 5 shanghai
# 6 beijing
# 7 beijing
# 8 shanghai
# dtype: object
data.map(quyu_to_chengshi) # map方法直接映射
# 0 beijing
# 1 shanghai
# 2 shanghai
# 3 shanghai
# 4 beijing
# 5 shanghai
# 6 beijing
# 7 beijing
# 8 shanghai
# dtype: object