pandas总结(一)——Series的使用
2017-06-06 本文已影响65人
SnailTyan
文章作者:Tyan
博客:noahsnail.com | CSDN | 简书
# pandas是一个用来进行数据分析的基于numpy的库
import pandas as pd
import numpy as np
# Series是一个一维的数据结构
# 用list构建Series
series1 = pd.Series([3, 5, 'test', -5, 0.3])
print series1
0 3
1 5
2 test
3 -5
4 0.3
dtype: object
# 用list, index构建Series
series2 = pd.Series([3, 5, 'test', -5, 0.3], index = ['A', 'B', 'C', 'D', 'E'])
print series2
A 3
B 5
C test
D -5
E 0.3
dtype: object
# 通过dict构建Series
companies = {'Baidu': 400, 'Alibaba': 500, 'Tecent': 600, 'Jingdong': 300}
series3 = pd.Series(companies)
print series3
Alibaba 500
Baidu 400
Jingdong 300
Tecent 600
dtype: int64
# Series数据选择
# 通过index选择数据
print series3['Baidu']
# 选择多个数据
print series3[['Baidu', 'Tecent']]
400
Baidu 400
Tecent 600
dtype: int64
# 根据条件选择数据
print series3[series3 < 500]
Baidu 400
Jingdong 300
dtype: int64
# 条件选择原理
print series3 < 500
temp = series3 < 500
print series3[temp]
Alibaba False
Baidu True
Jingdong True
Tecent False
dtype: bool
Baidu 400
Jingdong 300
dtype: int64
# Series元素赋值
print 'old value: ', series3['Baidu']
series3['Baidu'] = 450
print 'new value: ', series3['Baidu']
old value: 400
new value: 450
# 根据条件赋值
print 'old series: '
print series3
series3[series3 < 500] = 500
print 'new series: '
print series3
old series:
Alibaba 500
Baidu 400
Jingdong 300
Tecent 600
dtype: int64
new series:
Alibaba 500
Baidu 500
Jingdong 500
Tecent 600
dtype: int64
# Series数学运算
print 'Division: '
print series3 / 2
print 'Square: '
print series3 ** 2
print np.square(series3)
Division:
Alibaba 250.0
Baidu 250.0
Jingdong 250.0
Tecent 300.0
dtype: float64
Square:
Alibaba 250000
Baidu 250000
Jingdong 250000
Tecent 360000
dtype: int64
Alibaba 250000
Baidu 250000
Jingdong 250000
Tecent 360000
dtype: int64
# 定义新的Series, 公司人数
people = {'Baidu': 50000, 'Alibaba': 45000, 'Tecent': 60000, 'Jingdong': 80000, 'Netease': 30000}
series4 = pd.Series(people)
print series4
Alibaba 45000
Baidu 50000
Jingdong 80000
Netease 30000
Tecent 60000
dtype: int64
# Series相加, series3没有Netease, 因此结果为NaN
print series3 + series4
Alibaba 45500.0
Baidu 50500.0
Jingdong 80500.0
Netease NaN
Tecent 60600.0
dtype: float64
# 判断数据是否数据缺失
print 'Netease' in series3
print 'Baidu' in series3
False
True
# 找出数据为null或非null的元素
result = series3 + series4
print result.notnull()
print result.isnull()
print result[result.isnull()]
print result[result.isnull() != True]
Alibaba True
Baidu True
Jingdong True
Netease False
Tecent True
dtype: bool
Alibaba False
Baidu False
Jingdong False
Netease True
Tecent False
dtype: bool
Netease NaN
dtype: float64
Alibaba 45500.0
Baidu 50500.0
Jingdong 80500.0
Tecent 60600.0
dtype: float64