Pandas实现Excel一行变多行

2023-03-14  本文已影响0人  Viterbi

Pandas实现Excel一行变多行

import pandas as pd
import numpy as np

df = pd.DataFrame({
    "id": np.arange(10),
    "name": ["a","b"]*5,
    "a": [f"a{i}" for i in range(10)],
    "b": [f"b{i}" for i in range(10)],
    "aa": [f"aa{i}" for i in range(10)],
    "bb": [f"bb{i}" for i in range(10)],
    "aaa": [f"aaa{i}" for i in range(10)],
    "bbb": [f"bbb{i}" for i in range(10)]
})

# 1. 这是原始的数据
df
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
id name a b aa bb aaa bbb
0 0 a a0 b0 aa0 bb0 aaa0 bbb0
1 1 b a1 b1 aa1 bb1 aaa1 bbb1
2 2 a a2 b2 aa2 bb2 aaa2 bbb2
3 3 b a3 b3 aa3 bb3 aaa3 bbb3
4 4 a a4 b4 aa4 bb4 aaa4 bbb4
5 5 b a5 b5 aa5 bb5 aaa5 bbb5
6 6 a a6 b6 aa6 bb6 aaa6 bbb6
7 7 b a7 b7 aa7 bb7 aaa7 bbb7
8 8 a a8 b8 aa8 bb8 aaa8 bbb8
9 9 b a9 b9 aa9 bb9 aaa9 bbb9
# 2. 把要拆分的列,merge到一列
def merge_cols(x):
    y = x.values
    result = []
    for idx in range(0, len(y), 2):
        result.append(f"{y[idx]},{y[idx+1]}")
    return "#".join(result)
        
df["merge"] = df.loc[:, "a":"bbb"].apply(merge_cols, axis=1)
df
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
id name a b aa bb aaa bbb merge
0 0 a a0 b0 aa0 bb0 aaa0 bbb0 a0,b0#aa0,bb0#aaa0,bbb0
1 1 b a1 b1 aa1 bb1 aaa1 bbb1 a1,b1#aa1,bb1#aaa1,bbb1
2 2 a a2 b2 aa2 bb2 aaa2 bbb2 a2,b2#aa2,bb2#aaa2,bbb2
3 3 b a3 b3 aa3 bb3 aaa3 bbb3 a3,b3#aa3,bb3#aaa3,bbb3
4 4 a a4 b4 aa4 bb4 aaa4 bbb4 a4,b4#aa4,bb4#aaa4,bbb4
5 5 b a5 b5 aa5 bb5 aaa5 bbb5 a5,b5#aa5,bb5#aaa5,bbb5
6 6 a a6 b6 aa6 bb6 aaa6 bbb6 a6,b6#aa6,bb6#aaa6,bbb6
7 7 b a7 b7 aa7 bb7 aaa7 bbb7 a7,b7#aa7,bb7#aaa7,bbb7
8 8 a a8 b8 aa8 bb8 aaa8 bbb8 a8,b8#aa8,bb8#aaa8,bbb8
9 9 b a9 b9 aa9 bb9 aaa9 bbb9 a9,b9#aa9,bb9#aaa9,bbb9
# 3. 把不用的列删除掉
drop_names = list(df.loc[:, "a":"bbb"].columns.values)
df.drop(drop_names, axis=1, inplace=True)
# 进行merge列拆分,然后explode
df["merge"] = df["merge"].str.split("#")
df.explode("merge")
df
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
id name merge
0 0 a [a0,b0, aa0,bb0, aaa0,bbb0]
1 1 b [a1,b1, aa1,bb1, aaa1,bbb1]
2 2 a [a2,b2, aa2,bb2, aaa2,bbb2]
3 3 b [a3,b3, aa3,bb3, aaa3,bbb3]
4 4 a [a4,b4, aa4,bb4, aaa4,bbb4]
5 5 b [a5,b5, aa5,bb5, aaa5,bbb5]
6 6 a [a6,b6, aa6,bb6, aaa6,bbb6]
7 7 b [a7,b7, aa7,bb7, aaa7,bbb7]
8 8 a [a8,b8, aa8,bb8, aaa8,bbb8]
9 9 b [a9,b9, aa9,bb9, aaa9,bbb9]
# 4. 将数据explode还原成多列
df_explode = df.explode("merge")
df_explode["a"]=df_explode["merge"].str.split(",").str[0]
df_explode["b"]=df_explode["merge"].str.split(",").str[1]
df_explode.drop("merge", axis=1)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
id name a b
0 0 a a0 b0
0 0 a aa0 bb0
0 0 a aaa0 bbb0
1 1 b a1 b1
1 1 b aa1 bb1
1 1 b aaa1 bbb1
2 2 a a2 b2
2 2 a aa2 bb2
2 2 a aaa2 bbb2
3 3 b a3 b3
3 3 b aa3 bb3
3 3 b aaa3 bbb3
4 4 a a4 b4
4 4 a aa4 bb4
4 4 a aaa4 bbb4
5 5 b a5 b5
5 5 b aa5 bb5
5 5 b aaa5 bbb5
6 6 a a6 b6
6 6 a aa6 bb6
6 6 a aaa6 bbb6
7 7 b a7 b7
7 7 b aa7 bb7
7 7 b aaa7 bbb7
8 8 a a8 b8
8 8 a aa8 bb8
8 8 a aaa8 bbb8
9 9 b a9 b9
9 9 b aa9 bb9
9 9 b aaa9 bbb9

本文使用 文章同步助手 同步

上一篇下一篇

猜你喜欢

热点阅读