Pandas简单运用

2020-10-17  本文已影响0人  自学java的菜鸟小赵

直接复制到编辑器里面学习就好了,哪个函数不清楚再去了解

例1

import pandas as pd
import numpy as np

data = pd.DataFrame(np.arange(12).reshape(3,4),index=list("123"),columns=list("abcd"))
print(data.head(1))
print(data.info) #z展示dataForm里面的数据信息
print(data.describe()) #展示数据的统计信息
print(data.sort_values(by='c',ascending=False)) #by通过哪个属性进行排序,ascending=True,表示升序
print("*"*100)
print(data.loc['1']['a']) #根据标签索引获取数据
print(data.iloc[1][2]) #根据位置获取数据,这里是第2行第3列,所以显示6
print("*"*100)
'''
------------------------------------------------------
'''
#插入一列
data.insert(4,'e',1)
print(data)
#读取一行
print(data[:1])
#读取多行
print('读取多行\n',data.iloc[[1,2]])
#读取多列
print(data[['a','b']])
#读取a列并转化成数组
print(np.array(data.a))
#转化成一维数组
print(np.array(data[1:]).flatten())

data.loc[4]=[1,2,3,4,5]
print('增加行\n',data)
data['f']=[1,2,3,4]
print('增加列\n',data)
print("*"*100)
#读取列,可以直接指明列的索引值
print(data['b'])
print("*"*100)
print(data[1:]['c'])
print('data.index->******',list(data.index))
print("*"*100)
data2=[{"name":'zhangsan','age':12},{'age':12,'tel':113},{"name":'zhangsan','age':12,'tel':113}]
data2_dataframe=pd.DataFrame(data2)
print(data2_dataframe)

print("*"*100)
mean = data2_dataframe['tel'].mean()
print(mean)
#这里mean如果不知名列会自动计算当前数据列的平均值,也可以指明列名如data2_dataframe['tel'].mean()
data2_dataframe=data2_dataframe.fillna(data2_dataframe.mean())
print(data2_dataframe)

print("*"*100)
data3=[[1,2,3],[4,5,6],[1,2,3],[2,3,4]]
data4=np.array(data3)
print(type(data4))
array = np.array(data3).flatten()
print(set(array))

输出

E:\Python\python.exe E:/ideaproject/pythonProject/matplotlib/day04/pandas03.py
   a  b  c  d
1  0  1  2  3
<bound method DataFrame.info of    a  b   c   d
1  0  1   2   3
2  4  5   6   7
3  8  9  10  11>
         a    b     c     d
count  3.0  3.0   3.0   3.0
mean   4.0  5.0   6.0   7.0
std    4.0  4.0   4.0   4.0
min    0.0  1.0   2.0   3.0
25%    2.0  3.0   4.0   5.0
50%    4.0  5.0   6.0   7.0
75%    6.0  7.0   8.0   9.0
max    8.0  9.0  10.0  11.0
   a  b   c   d
3  8  9  10  11
2  4  5   6   7
1  0  1   2   3
****************************************************************************************************
0
6
****************************************************************************************************
   a  b   c   d  e
1  0  1   2   3  1
2  4  5   6   7  1
3  8  9  10  11  1
   a  b  c  d  e
1  0  1  2  3  1
读取多行
    a  b   c   d  e
2  4  5   6   7  1
3  8  9  10  11  1
   a  b
1  0  1
2  4  5
3  8  9
[0 4 8]
[ 4  5  6  7  1  8  9 10 11  1]
增加行
    a  b   c   d  e
1  0  1   2   3  1
2  4  5   6   7  1
3  8  9  10  11  1
4  1  2   3   4  5
增加列
    a  b   c   d  e  f
1  0  1   2   3  1  1
2  4  5   6   7  1  2
3  8  9  10  11  1  3
4  1  2   3   4  5  4
****************************************************************************************************
1    1
2    5
3    9
4    2
Name: b, dtype: int64
****************************************************************************************************
2     6
3    10
4     3
Name: c, dtype: int64
data.index->****** ['1', '2', '3', 4]
****************************************************************************************************
       name  age    tel
0  zhangsan   12    NaN
1       NaN   12  113.0
2  zhangsan   12  113.0
****************************************************************************************************
113.0
       name  age    tel
0  zhangsan   12  113.0
1       NaN   12  113.0
2  zhangsan   12  113.0
****************************************************************************************************
<class 'numpy.ndarray'>
{1, 2, 3, 4, 5, 6}

Process finished with exit code 0

例2

import pandas as pd
import numpy as np

df1=pd.DataFrame(np.ones((2,4)),index=['A','B'],columns=list('abcd'))
print('df1\n',df1)

df2=pd.DataFrame(np.zeros((3,3)),index=['A','B','C'],columns=list('xyz'))
print('df2\n',df2)

data1=df1.join(df2)
print('data1\n',data1)

data2=df2.join(df1)

print('data2\n',data2)

输出

      x    y    z
A  0.0  0.0  0.0
B  0.0  0.0  0.0
C  0.0  0.0  0.0
data1
      a    b    c    d    x    y    z
A  1.0  1.0  1.0  1.0  0.0  0.0  0.0
B  1.0  1.0  1.0  1.0  0.0  0.0  0.0
data2
      x    y    z    a    b    c    d
A  0.0  0.0  0.0  1.0  1.0  1.0  1.0
B  0.0  0.0  0.0  1.0  1.0  1.0  1.0
C  0.0  0.0  0.0  NaN  NaN  NaN  NaN

例3

import numpy as np
import pandas as pd

df1=pd.DataFrame(np.ones((2,4)),index=['A','B'],columns=list('abcd'))
print('df1\n',df1)

df2=pd.DataFrame(np.arange(9).reshape(3,3),columns=list("fax"))
print('df2\n',df2)

#根据a属性进行内连接 交集
merge = df1.merge(df2,on='a')
print('merge\n',merge)

df1.loc['A']['a']=100
merge2 = df1.merge(df2,on='a')
print('merge2\n',merge2)

#外连接  并集
merge_outer=df1.merge(df2,on='a',how='outer')
print('merge_outer\n',merge_outer)
#左连接 
merge_left=df1.merge(df2,on='a',how='left')
print('merge_left\n',merge_left)
#有连接
merge_right=df1.merge(df2,on='a',how='right')
print('merge_right\n',merge_right)

输出

df1
      a    b    c    d
A  1.0  1.0  1.0  1.0
B  1.0  1.0  1.0  1.0
df2
    f  a  x
0  0  1  2
1  3  4  5
2  6  7  8
merge
      a    b    c    d  f  x
0  1.0  1.0  1.0  1.0  0  2
1  1.0  1.0  1.0  1.0  0  2
merge2
      a    b    c    d  f  x
0  1.0  1.0  1.0  1.0  0  2
merge_outer
        a    b    c    d    f    x
0  100.0  1.0  1.0  1.0  NaN  NaN
1    1.0  1.0  1.0  1.0  0.0  2.0
2    4.0  NaN  NaN  NaN  3.0  5.0
3    7.0  NaN  NaN  NaN  6.0  8.0
merge_left
        a    b    c    d    f    x
0  100.0  1.0  1.0  1.0  NaN  NaN
1    1.0  1.0  1.0  1.0  0.0  2.0
merge_right
      a    b    c    d  f  x
0  1.0  1.0  1.0  1.0  0  2
1  4.0  NaN  NaN  NaN  3  5
2  7.0  NaN  NaN  NaN  6  8

Process finished with exit code 0

例4

import pandas as pd
import numpy as np

'''
分组和聚合
'''

data1=pd.DataFrame(np.arange(24).reshape(4,6),index=['a','b','c','d'],columns=list("qwerty"))
print('data1\n',data1)

data1.iloc[[1,2]]=100
print(data1)

grouped = data1.groupby(by='r')

for i,j in grouped:
    print(i)
    print("*"*100)
    print(j)

print("*"*100)

count=grouped['r'].count()
print('count\n',count)

输出

data1
     q   w   e   r   t   y
a   0   1   2   3   4   5
b   6   7   8   9  10  11
c  12  13  14  15  16  17
d  18  19  20  21  22  23
     q    w    e    r    t    y
a    0    1    2    3    4    5
b  100  100  100  100  100  100
c  100  100  100  100  100  100
d   18   19   20   21   22   23
3
****************************************************************************************************
   q  w  e  r  t  y
a  0  1  2  3  4  5
21
****************************************************************************************************
    q   w   e   r   t   y
d  18  19  20  21  22  23
100
****************************************************************************************************
     q    w    e    r    t    y
b  100  100  100  100  100  100
c  100  100  100  100  100  100
****************************************************************************************************
count
 r
3      1
21     1
100    2
Name: r, dtype: int64

例5

import pandas as pd
import numpy as np

'''
分组聚合联系和总结
'''
data = pd.DataFrame({'a':range(7),'b':range(7,0,-1),'c':['one','one','one','two','two','two','two'],'d':list("hjklmno")})
print("data****\n",data)

data2=data.set_index(['c','d'],drop=False)
print('data2****\n',data2)

data3=data.set_index(['d','c'])['a']
print('data3*****\n',data3)

data4 = data3.swaplevel()
print('data4*****\n',data4)

data5 = data2.loc['one'].loc['h']
print('data5*****\n',data5)

输出

data****
    a  b    c  d
0  0  7  one  h
1  1  6  one  j
2  2  5  one  k
3  3  4  two  l
4  4  3  two  m
5  5  2  two  n
6  6  1  two  o
data2****
        a  b    c  d
c   d              
one h  0  7  one  h
    j  1  6  one  j
    k  2  5  one  k
two l  3  4  two  l
    m  4  3  two  m
    n  5  2  two  n
    o  6  1  two  o
data3*****
 d  c  
h  one    0
j  one    1
k  one    2
l  two    3
m  two    4
n  two    5
o  two    6
Name: a, dtype: int64
data4*****
 c    d
one  h    0
     j    1
     k    2
two  l    3
     m    4
     n    5
     o    6
Name: a, dtype: int64
data5*****
 a      0
b      7
c    one
d      h
Name: h, dtype: object

Process finished with exit code 0
上一篇下一篇

猜你喜欢

热点阅读