利用Python进行数据分析

布尔索引

2019-01-15  本文已影响7人  庵下桃花仙

布尔值数组的长度必须和数组轴索引长度一致。

In [1]: import numpy as np

In [2]: names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [3]: data = np.random.randn(7, 4)

In [4]: names
Out[4]: array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [5]: data
Out[5]:
array([[ 1.1110709 , -0.47574111, -0.32405598,  1.68851732],
       [ 0.18338712,  1.86099602,  0.16028324, -0.77338395],
       [ 1.30208873,  0.05613606,  0.09993033,  0.9296732 ],
       [-2.37345175,  0.7715262 ,  0.30284201, -0.11449909],
       [ 1.15309792,  0.57371153, -0.16297593, -0.922199  ],
       [ 0.30940118, -0.75740561, -0.30268499, -1.49159479],
       [ 1.30522225, -0.18000825,  0.40052368, -1.6624808 ]])

In [6]: names == 'Bob'
Out[6]: array([ True, False, False,  True, False, False, False])

In [7]: data[names == 'Bob']
Out[7]:
array([[ 1.1110709 , -0.47574111, -0.32405598,  1.68851732],
       [-2.37345175,  0.7715262 ,  0.30284201, -0.11449909]])
In [8]: data[names == 'Bob', 2:]
Out[8]:
array([[-0.32405598,  1.68851732],
       [ 0.30284201, -0.11449909]])

In [9]: data[names == 'Bob', 3]
Out[9]: array([ 1.68851732, -0.11449909])

In [10]: names != 'Bob'
Out[10]: array([False,  True,  True, False,  True,  True,  True])

In [11]: data[~(names == 'Bob')]
Out[11]:
array([[ 0.18338712,  1.86099602,  0.16028324, -0.77338395],
       [ 1.30208873,  0.05613606,  0.09993033,  0.9296732 ],
       [ 1.15309792,  0.57371153, -0.16297593, -0.922199  ],
       [ 0.30940118, -0.75740561, -0.30268499, -1.49159479],
       [ 1.30522225, -0.18000825,  0.40052368, -1.6624808 ]])

In [12]: cond = names == 'Bob'

In [13]: cond
Out[13]: array([ True, False, False,  True, False, False, False])

In [14]: data[~cond]
Out[14]:
array([[ 0.18338712,  1.86099602,  0.16028324, -0.77338395],
       [ 1.30208873,  0.05613606,  0.09993033,  0.9296732 ],
       [ 1.15309792,  0.57371153, -0.16297593, -0.922199  ],
       [ 0.30940118, -0.75740561, -0.30268499, -1.49159479],
       [ 1.30522225, -0.18000825,  0.40052368, -1.6624808 ]])

In [15]: mask = (names == 'Bob') | (names == 'Will')

In [16]: mask
Out[16]: array([ True, False,  True,  True,  True, False, False])

In [17]: data[mask]
Out[17]:
array([[ 1.1110709 , -0.47574111, -0.32405598,  1.68851732],
       [ 1.30208873,  0.05613606,  0.09993033,  0.9296732 ],
       [-2.37345175,  0.7715262 ,  0.30284201, -0.11449909],
       [ 1.15309792,  0.57371153, -0.16297593, -0.922199  ]])

使用布尔值索引选择数据时,总是生成数据的拷贝,即使返回的数组没有任何变化。使用 & 和 | 代替Python中的关键字and和or

In [18]: data[data < 0] = 0

In [19]: data
Out[19]:
array([[1.1110709 , 0.        , 0.        , 1.68851732],
       [0.18338712, 1.86099602, 0.16028324, 0.        ],
       [1.30208873, 0.05613606, 0.09993033, 0.9296732 ],
       [0.        , 0.7715262 , 0.30284201, 0.        ],
       [1.15309792, 0.57371153, 0.        , 0.        ],
       [0.30940118, 0.        , 0.        , 0.        ],
       [1.30522225, 0.        , 0.40052368, 0.        ]])

In [20]: data[names != 'Joe'] = 7

In [21]: data
Out[21]:
array([[7.        , 7.        , 7.        , 7.        ],
       [0.18338712, 1.86099602, 0.16028324, 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.30940118, 0.        , 0.        , 0.        ],
       [1.30522225, 0.        , 0.40052368, 0.        ]])
上一篇 下一篇

猜你喜欢

热点阅读