10. 日月光华 Python数据分析-Pandas-字符串方法

2023-07-09  本文已影响0人  薛东弗斯
import pandas as pd
import numpy as np

data = pd.DataFrame({'k1':['beijing,haidian,chushou','beijing,chaoyang,chushou','beijing,fengtai,chuzu'] ,'k2': ['beijing,xicheng,chuzu','beijing,shijingshan,chushou',np.nan]})

data
#      k1                                       k2
# 0 beijing,haidian,chushou         beijing,xicheng,chuzu
# 1 beijing,chaoyang,chushou    beijing,shijingshan,chushou
# 2 beijing,fengtai,chuzu           NaN

data['k1'] = data.k1.str.replace('beijing', 'shanghai')   # str.replace  替换部分字符。 如果不用str,则是整体替换
data
#      k1                                       k2
# 0 shanghai,haidian,chushou    beijing,xicheng,chuzu
# 1 shanghai,chaoyang,chushou   beijing,shijingshan,chushou
# 2 shanghai,fengtai,chuzu      NaN

data.k1.str.contains('chushou')
# 0     True
# 1     True
# 2    False
# Name: k1, dtype: bool

data[data.k1.str.contains('chushou')]       # 字符串过滤
#       k1                                          k2
# 0 shanghai,haidian,chushou    beijing,xicheng,chuzu
# 1 shanghai,chaoyang,chushou   beijing,shijingshan,chushou

# data.k1.str.strip()

data.k1.str.split(',')
# 0     [shanghai, haidian, chushou]
# 1    [shanghai, chaoyang, chushou]
# 2       [shanghai, fengtai, chuzu]
# Name: k1, dtype: object

data.k1.str[:5]
# 0    shang
# 1    shang
# 2    shang
# Name: k1, dtype: object

data.k1.apply(lambda x:x[:5])
# 0    shang
# 1    shang
# 2    shang
# Name: k1, dtype: object

函数映射来转换数据

用于series的map方法接受一个函数,或是一个字典,包含着映射关系,但这里有一个小问题,有些肉是大写,有些是小写。因此,我们先用str.lower把所有的值变为小写:

quyu_to_chengshi = {
    'chaoyang': 'beijing',
    'huangpu': 'shanghai',
    'jingan': 'shanghai',
    'haidian': 'beijing',
    'fengtai': 'beijing',
    'tongzhou': 'beijing'
}

data = pd.Series(['chaoyang','jingan','huangpu','jingan','fengtai','jingan','tongzhou','haidian','jingan'])
data
# 0    chaoyang
# 1      jingan
# 2     huangpu
# 3      jingan
# 4     fengtai
# 5      jingan
# 6    tongzhou
# 7     haidian
# 8      jingan
# dtype: object

data.apply(lambda x:quyu_to_chengshi.get(x))  # 由区域映射到城市
# 0     beijing
# 1    shanghai
# 2    shanghai
# 3    shanghai
# 4     beijing
# 5    shanghai
# 6     beijing
# 7     beijing
# 8    shanghai
# dtype: object

data.map(quyu_to_chengshi)  # map方法直接映射
# 0     beijing
# 1    shanghai
# 2    shanghai
# 3    shanghai
# 4     beijing
# 5    shanghai
# 6     beijing
# 7     beijing
# 8    shanghai
# dtype: object
上一篇下一篇

猜你喜欢

热点阅读