polars
2023-08-24 本文已影响0人
hehehehe
窗口函数
In [67]: df.select(pl.col('names','id','distance').sort_by("distance", descendin
...: g=False).over('id'))
Out[67]:
shape: (5, 3)
┌───────┬─────┬──────────┐
│ names ┆ id ┆ distance │
│ --- ┆ --- ┆ --- │
│ str ┆ str ┆ f64 │
╞═══════╪═════╪══════════╡
│ ham ┆ A ┆ 0.074971 │
│ foo ┆ A ┆ 0.664746 │
│ cc ┆ B ┆ 0.151077 │
│ egg ┆ C ┆ 0.355057 │
│ spam ┆ B ┆ 0.308573 │
└───────┴─────┴──────────┘
with_columns 保存原有列
In [167]: out = filtered.with_columns(
...: pl.col(["Name", "Speed"]).sort_by("Speed", descending=False).over(
...: "Type 1",
...: mapping_strategy = 'group_to_rows'),
...: pl.col('Name').cumcount().sort_by("Speed", descending=False).over("
...: Type 1", mapping_strategy = 'group_to_rows')
...: .alias('rn')).sort('Type 1',descending=False)
...: print(out)
shape: (7, 4)
┌─────────────────────┬────────┬───────┬─────┐
│ Name ┆ Type 1 ┆ Speed ┆ rn │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i64 ┆ u32 │
╞═════════════════════╪════════╪═══════╪═════╡
│ Exeggcute ┆ Grass ┆ 40 ┆ 0 │
│ Exeggutor ┆ Grass ┆ 55 ┆ 1 │
│ Jynx ┆ Ice ┆ 95 ┆ 0 │
│ Slowpoke ┆ Water ┆ 15 ┆ 0 │
│ Slowbro ┆ Water ┆ 30 ┆ 1 │
│ SlowbroMega Slowbro ┆ Water ┆ 30 ┆ 2 │
│ Starmie ┆ Water ┆ 115 ┆ 3 │
└─────────────────────┴────────┴───────┴─────┘
select
In [199]: out = filtered.select(
...: pl.col(["Name",'Type 1','Speed']).sort_by("Speed", descending=True
...: ).over("Type 1",
...: mapping_strategy = 'group_to_rows'),
...: pl.col('Name').cumcount().sort_by("Speed", descending=False).over("
...: Type 1", mapping_strategy = 'group_to_rows')
...: .alias('rn')).sort('Type 1',descending=False)
...: print(out)
shape: (7, 4)
┌─────────────────────┬────────┬───────┬─────┐
│ Name ┆ Type 1 ┆ Speed ┆ rn │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i64 ┆ u32 │
╞═════════════════════╪════════╪═══════╪═════╡
│ Exeggutor ┆ Grass ┆ 55 ┆ 0 │
│ Exeggcute ┆ Grass ┆ 40 ┆ 1 │
│ Jynx ┆ Ice ┆ 95 ┆ 0 │
│ Starmie ┆ Water ┆ 115 ┆ 0 │
│ Slowbro ┆ Water ┆ 30 ┆ 1 │
│ SlowbroMega Slowbro ┆ Water ┆ 30 ┆ 2 │
│ Slowpoke ┆ Water ┆ 15 ┆ 3 │
└─────────────────────┴────────┴───────┴─────┘
groupby
In [84]: df.groupby('id').agg(pl.col('names').sort_by('distance',descending=Fals
...: e),pl.col('distance').sort_by('distance',descending=False))
Out[84]:
shape: (3, 3)
┌─────┬────────────────┬──────────────────────┐
│ id ┆ names ┆ distance │
│ --- ┆ --- ┆ --- │
│ str ┆ list[str] ┆ list[f64] │
╞═════╪════════════════╪══════════════════════╡
│ B ┆ ["cc", "spam"] ┆ [0.151077, 0.308573] │
│ C ┆ ["egg"] ┆ [0.355057] │
│ A ┆ ["ham", "foo"] ┆ [0.074971, 0.664746] │
└─────┴────────────────┴──────────────────────┘
In [77]: df.sort("distance",descending=False).groupby("id").agg(pl.col('names')
...: ,pl.col('distance'))
Out[77]:
shape: (3, 3)
┌─────┬────────────────┬──────────────────────┐
│ id ┆ names ┆ distance │
│ --- ┆ --- ┆ --- │
│ str ┆ list[str] ┆ list[f64] │
╞═════╪════════════════╪══════════════════════╡
│ B ┆ ["cc", "spam"] ┆ [0.151077, 0.308573] │
│ A ┆ ["ham", "foo"] ┆ [0.074971, 0.664746] │
│ C ┆ ["egg"] ┆ [0.355057] │
└─────┴────────────────┴──────────────────────┘