用Python做数据分析之DataFrame3——数据选取
2017-03-12 本文已影响0人
iCode_01
准备数据
import pandas as pd
import numpy as np
df =pd.read_excel('filePath/test.xlsx') #导入数据
print df
A B C
2017-01-01 0.743068 0.775753 0.586364
2017-01-02 0.726336 0.917315 0.770945
2017-01-03 0.448482 0.062748 0.792973
2017-01-04 0.481502 0.219382 0.835761
2017-01-05 0.475752 0.966919 0.491558
2017-01-06 0.885991 0.252072 0.913809
2017-01-07 0.076248 0.374731 0.595837
2017-01-08 0.395501 0.733482 0.228993
2017-01-09 0.390069 0.493331 0.069293
2017-01-10 0.679217 0.538165 0.376052
单列选取
#根据列名选取单列数据,并以Series的形式返回列数据
series_A =df['A']
print series_A
2017-01-01 0.743068
2017-01-02 0.726336
2017-01-03 0.448482
2017-01-04 0.481502
2017-01-05 0.475752
2017-01-06 0.885991
2017-01-07 0.076248
2017-01-08 0.395501
2017-01-09 0.390069
2017-01-10 0.679217
Freq: D, Name: A, dtype: float64
#根据列名选取单列数据,与上述方法的区别就是返回数据的类型是numpy.ndarray
array_A =df['A'].values
print array_A,type(array_A)
[ 0.74306764 0.72633612 0.44848208 0.48150193 0.47575176 0.88599117
0.07624825 0.39550126 0.39006884 0.67921732] <type 'numpy.ndarray'>
多列选取
#选取多列数据 ,返回是Dataframe
print df[['A','B']]
A B
2017-01-01 0.743068 0.775753
2017-01-02 0.726336 0.917315
2017-01-03 0.448482 0.062748
2017-01-04 0.481502 0.219382
2017-01-05 0.475752 0.966919
2017-01-06 0.885991 0.252072
2017-01-07 0.076248 0.374731
2017-01-08 0.395501 0.733482
2017-01-09 0.390069 0.493331
2017-01-10 0.679217 0.538165
单行选取
#根据行索引获取单行数据,返回是Series
series_row = df.loc[dates[2]]
print series_row,type(series_row)
A 0.448482
B 0.062748
C 0.792973
Name: 2017-01-03 00:00:00, dtype: float64 <class 'pandas.core.series.Series'>
#根据行数选取单行数据,返回是Series
series_row = df.iloc[3] #返回第4行数据
print series_row,type(series_row)
A 0.481502
B 0.219382
C 0.835761
Name: 2017-01-04 00:00:00, dtype: float64 <class 'pandas.core.series.Series'>
#选取单行数据,返回是ndarray
array_row =df.iloc[3].values
print array_row,type(array_row)
[ 0.48150193 0.21938152 0.83576114] <type 'numpy.ndarray'>
单个元素选取
#通过行数列数获取某个具体位置的数据,用iloc[i,j]或者iat[i,j]
iloc00 =df.iloc[0,0] #第1行第1列
print iloc00,type(iloc00)
iat11 =df.iat[1,1] #第2行第2列
print iat11,type(iat11)
0.74306764384 <type 'numpy.float64'>
0.917314866922 <type 'numpy.float64'>
#通过行索引与列名获取数据,用at
print df.at[dates[2],'B']
0.0627479827402
print(df.loc['20170101':'20170103',['A','B']])
A B
2017-01-01 0.743068 0.775753
2017-01-02 0.726336 0.917315
2017-01-03 0.448482 0.062748