pandas如何找到连续/不连续的0
2020-04-02 本文已影响0人
井底蛙蛙呱呱呱
import pandas as pd
df = pd.DataFrame({
'names': ['A','B','C','D','E','F','G','H','I','J','K','L'],
'col1': [0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0],
'col2': [0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0]})
names col1 col2
A 0 0
B 1 0
C 0 0
D 1 0
E 1 1
F 1 0
G 0 1
H 0 0
I 0 1
J 1 0
K 0 0
L 0 0
def f(col, threshold=3):
mask = col.groupby((col != col.shift()).cumsum()).transform('count').lt(threshold)
mask &= col.eq(0)
col.update(col.loc[mask].replace(0,1))
return col
In [79]: df.apply(f, threshold=3)
Out[79]:
col1 col2
names
A 1 0
B 1 0
C 1 0
D 1 0
E 1 1
F 1 1
G 0 1
H 0 1
I 0 1
J 1 0
K 1 0
L 1 0
step by step
In [84]: col = df['col2']
In [85]: col
Out[85]:
names
A 0
B 0
C 0
D 0
E 1
F 0
G 1
H 0
I 1
J 0
K 0
L 0
Name: col2, dtype: int64
In [86]: (col != col.shift()).cumsum()
Out[86]:
names
A 1
B 1
C 1
D 1
E 2
F 3
G 4
H 5
I 6
J 7
K 7
L 7
Name: col2, dtype: int32
In [87]: col.groupby((col != col.shift()).cumsum()).transform('count')
Out[87]:
names
A 4
B 4
C 4
D 4
E 1
F 1
G 1
H 1
I 1
J 3
K 3
L 3
Name: col2, dtype: int64
In [88]: col.groupby((col != col.shift()).cumsum()).transform('count').lt(3)
Out[88]:
names
A False
B False
C False
D False
E True
F True
G True
H True
I True
J False
K False
L False
Name: col2, dtype: bool
In [89]: col.groupby((col != col.shift()).cumsum()).transform('count').lt(3) & col.eq(0)
Out[89]:
names
A False
B False
C False
D False
E False
F True
G False
H True
I False
J False
K False
L False
Name: col2, dtype: bool