6种从Pandas 数据帧(DataFrame)中获取列名的方法

2020-02-19  本文已影响0人  python测试开发

从CSV文件导入数据

>>> import pandas as pd
>>> df = pd.read_csv('UN98.csv', index_col=0)
>>> df.head()
                region   tfr  contraception  educationMale  ...  economicActivityMale  economicActivityFemale  illiteracyMale  illiteracyFemale
Afghanistan       Asia  6.90            NaN            NaN  ...                  87.5                     7.2          52.800             85.00
Albania         Europe  2.60            NaN            NaN  ...                   NaN                     NaN             NaN               NaN
Algeria         Africa  3.81           52.0           11.1  ...                  76.4                     7.8          26.100             51.00
American.Samoa    Asia   NaN            NaN            NaN  ...                  58.8                    42.4           0.264              0.36
Andorra         Europe   NaN            NaN            NaN  ...                   NaN                     NaN             NaN               NaN

[5 rows x 13 columns]

注意index_col=0,表示用第一列作为index, 同时第一列不会出现在数据里面。'UN98.csv'可以在扣扣群630011153 144081101找到。

获取列名的方法

>>> df.columns
Index(['region', 'tfr', 'contraception', 'educationMale', 'educationFemale',
       'lifeMale', 'lifeFemale', 'infantMortality', 'GDPperCapita',
       'economicActivityMale', 'economicActivityFemale', 'illiteracyMale',
       'illiteracyFemale'],
      dtype='object')
>>> 'tfr' in df.columns
True
>>> df.keys()
Index(['region', 'tfr', 'contraception', 'educationMale', 'educationFemale',
       'lifeMale', 'lifeFemale', 'infantMortality', 'GDPperCapita',
       'economicActivityMale', 'economicActivityFemale', 'illiteracyMale',
       'illiteracyFemale'],
      dtype='object')
>>> for col_name in df.columns: 
...      print(col_name)
... 
region
tfr
contraception
educationMale
educationFemale
lifeMale
lifeFemale
infantMortality
GDPperCapita
economicActivityMale
economicActivityFemale
illiteracyMale
illiteracyFemale

>>> list(df.columns)
['region', 'tfr', 'contraception', 'educationMale', 'educationFemale', 'lifeMale', 'lifeFemale', 'infantMortality', 'GDPperCapita', 'economicActivityMale', 'economicActivityFemale', 'illiteracyMale', 'illiteracyFemale']

>>> df.columns.values.tolist()
['region', 'tfr', 'contraception', 'educationMale', 'educationFemale', 'lifeMale', 'lifeFemale', 'infantMortality', 'GDPperCapita', 'economicActivityMale', 'economicActivityFemale', 'illiteracyMale', 'illiteracyFemale']
>>> sorted(df)
['GDPperCapita', 'contraception', 'economicActivityFemale', 'economicActivityMale', 'educationFemale', 'educationMale', 'illiteracyFemale', 'illiteracyMale', 'infantMortality', 'lifeFemale', 'lifeMale', 'region', 'tfr']

参考资料

根据列名获取列值

>>> df['tfr'].values
array([6.9 , 2.6 , 3.81,  nan,  nan, 6.69,  nan, 2.62, 1.7 , 1.89, 1.42,
       2.3 , 1.95, 2.97, 3.14, 1.73, 1.4 , 1.62, 3.66, 5.83, 5.89, 4.36,
       1.4 , 4.45, 2.17, 2.7 , 1.45, 6.57, 6.28, 4.5 , 5.3 , 1.61, 3.56,
       4.95, 5.51, 2.44, 1.8 , 2.69, 5.51, 5.87, 3.5 , 2.95, 1.6 , 1.55,
       2.31, 1.4 , 6.24, 1.82, 5.39,  nan, 2.8 , 4.32, 3.1 , 3.4 , 3.09,
       5.51, 5.34, 1.3 , 7.  , 2.76, 1.83, 1.63,  nan, 2.85, 5.4 , 5.2 ,
       8.  , 1.9 , 1.3 , 5.28, 1.38,  nan, 2.1 , 3.04, 4.9 , 6.61, 5.42,
       2.32, 4.6 , 4.3 , 1.32, 1.4 , 2.19, 3.07, 2.63, 4.77, 5.25, 1.8 ,
       2.75, 1.19, 5.1 , 2.44, 1.48, 5.13, 2.3 , 4.85, 3.8 , 2.1 , 1.65,
       2.77, 3.21, 6.69, 1.4 , 2.75, 4.86, 6.33, 5.92, 1.45, 1.5 , 1.76,
       1.6 , 1.9 , 5.65, 6.69, 3.24, 6.8 , 6.6 , 2.1 , 4.49, 2.  , 5.03,
       2.28, 2.75, 5.6 , 1.8 ,  nan, 3.27, 3.1 , 6.06, 3.3 , 4.9 , 4.95,
       1.55, 2.1 , 2.53, 2.02, 3.85, 7.1 , 5.97, 5.11, 1.88, 7.2 , 5.02,
       3.  , 2.63, 4.65, 4.17, 2.98, 3.62, 1.65, 1.48, 2.1 , 3.77, 2.1 ,
       1.4 , 1.35, 6.  , 2.63, 3.82, 3.8 ,  nan,  nan, 5.9 , 5.62, 2.59,
       6.06, 1.79, 1.5 , 1.3 , 4.98, 7.  , 3.81, 1.22, 2.1 , 3.86, 4.61,
       2.39, 4.46, 1.8 , 1.46, 4.  , 3.93, 5.48, 1.74, 6.08, 4.02, 2.1 ,
       2.92, 2.5 , 3.58,  nan, 7.1 , 1.38, 3.46, 1.72, 1.96, 2.25, 3.48,
       4.36, 2.98, 2.97, 3.03, 3.98, 7.6 , 1.8 , 5.49, 4.68])
>>> list(df['tfr'].values)
[6.9, 2.6, 3.81, nan, nan, 6.69, nan, 2.62, 1.7, 1.89, 1.42, 2.3, 1.95, 2.97, 3.14, 1.73, 1.4, 1.62, 3.66, 5.83, 5.89, 4.36, 1.4, 4.45, 2.17, 2.7, 1.45, 6.57, 6.28, 4.5, 5.3, 1.61, 3.56, 4.95, 5.51, 2.44, 1.8, 2.69, 5.51, 5.87, 3.5, 2.95, 1.6, 1.55, 2.31, 1.4, 6.24, 1.82, 5.39, nan, 2.8, 4.32, 3.1, 3.4, 3.09, 5.51, 5.34, 1.3, 7.0, 2.76, 1.83, 1.63, nan, 2.85, 5.4, 5.2, 8.0, 1.9, 1.3, 5.28, 1.38, nan, 2.1, 3.04, 4.9, 6.61, 5.42, 2.32, 4.6, 4.3, 1.32, 1.4, 2.19, 3.07, 2.63, 4.77, 5.25, 1.8, 2.75, 1.19, 5.1, 2.44, 1.48, 5.13, 2.3, 4.85, 3.8, 2.1, 1.65, 2.77, 3.21, 6.69, 1.4, 2.75, 4.86, 6.33, 5.92, 1.45, 1.5, 1.76, 1.6, 1.9, 5.65, 6.69, 3.24, 6.8, 6.6, 2.1, 4.49, 2.0, 5.03, 2.28, 2.75, 5.6, 1.8, nan, 3.27, 3.1, 6.06, 3.3, 4.9, 4.95, 1.55, 2.1, 2.53, 2.02, 3.85, 7.1, 5.97, 5.11, 1.88, 7.2, 5.02, 3.0, 2.63, 4.65, 4.17, 2.98, 3.62, 1.65, 1.48, 2.1, 3.77, 2.1, 1.4, 1.35, 6.0, 2.63, 3.82, 3.8, nan, nan, 5.9, 5.62, 2.59, 6.06, 1.79, 1.5, 1.3, 4.98, 7.0, 3.81, 1.22, 2.1, 3.86, 4.61, 2.39, 4.46, 1.8, 1.46, 4.0, 3.93, 5.48, 1.74, 6.08, 4.02, 2.1, 2.92, 2.5, 3.58, nan, 7.1, 1.38, 3.46, 1.72, 1.96, 2.25, 3.48, 4.36, 2.98, 2.97, 3.03, 3.98, 7.6, 1.8, 5.49, 4.68]

重命名列名

>>> df.rename(columns={'tfr': 'TFR'})
                region   TFR  contraception  educationMale  ...  economicActivityMale  economicActivityFemale  illiteracyMale  illiteracyFemale
Afghanistan       Asia  6.90            NaN            NaN  ...                  87.5                     7.2          52.800            85.000
Albania         Europe  2.60            NaN            NaN  ...                   NaN                     NaN             NaN               NaN
Algeria         Africa  3.81           52.0           11.1  ...                  76.4                     7.8          26.100            51.000
American.Samoa    Asia   NaN            NaN            NaN  ...                  58.8                    42.4           0.264             0.360
Andorra         Europe   NaN            NaN            NaN  ...                   NaN                     NaN             NaN               NaN
...                ...   ...            ...            ...  ...                   ...                     ...             ...               ...
Western.Sahara  Africa  3.98            NaN            NaN  ...                   NaN                     NaN             NaN               NaN
Yemen             Asia  7.60            7.0            NaN  ...                  80.6                     1.9          32.406            69.552
Yugoslavia      Europe  1.80            NaN            NaN  ...                   NaN                     NaN           1.782             9.072
Zambia          Africa  5.49           25.0            7.9  ...                   NaN                     NaN          14.400            28.700
Zimbabwe        Africa  4.68           48.0            NaN  ...                  77.7                    46.7           9.600            20.100

[207 rows x 13 columns]

上一篇下一篇

猜你喜欢

热点阅读