Pandas data clean demo
2019-02-16 本文已影响0人
geeker_leon
image
# coding:utf-8
import pandasas pd
from decimalimport Decimal
df = pd.DataFrame({'姓名': ['小乔', '貂蝉', '虞姬', '甄姬', '大乔', '大乔'],
'身高': [158, 165, 167, 164, 163, 163],
'胸围': [80, 85, 88, 86, 83, 83],
'腰围': [65, 70, None, 72, 68, 68],
'臀围': [83, 90, 92, 88, 88, 88]})
print df
# Remove duplicated data
df.drop_duplicates(inplace=True)
print df
# Fill None data
df['腰围'].fillna(df['腰围'].mean(), inplace=True)
print df
# Add new column
df['三围指数'] = ((df['胸围'] + df['腰围'] + df['臀围']) / df['身高'] *100).apply(
lambda x: Decimal(str(x)).quantize(Decimal('0.00')))
print df