Pandas data clean demo

2019-02-16  本文已影响0人  geeker_leon
image
# coding:utf-8

import pandasas pd

from decimalimport Decimal

df = pd.DataFrame({'姓名': ['小乔', '貂蝉', '虞姬', '甄姬', '大乔', '大乔'],

                  '身高': [158, 165, 167, 164, 163, 163],

                  '胸围': [80, 85, 88, 86, 83, 83],

                  '腰围': [65, 70, None, 72, 68, 68],

                  '臀围': [83, 90, 92, 88, 88, 88]})

print df

# Remove duplicated data

df.drop_duplicates(inplace=True)

print df

# Fill None data

df['腰围'].fillna(df['腰围'].mean(), inplace=True)

print df

# Add new column

df['三围指数'] = ((df['胸围'] + df['腰围'] + df['臀围']) / df['身高'] *100).apply(

lambda x: Decimal(str(x)).quantize(Decimal('0.00')))

print df
上一篇下一篇

猜你喜欢

热点阅读