polars

2023-08-24  本文已影响0人  hehehehe

窗口函数

In [67]: df.select(pl.col('names','id','distance').sort_by("distance", descendin
    ...: g=False).over('id'))
Out[67]: 
shape: (5, 3)
┌───────┬─────┬──────────┐
│ names ┆ id  ┆ distance │
│ ---   ┆ --- ┆ ---      │
│ str   ┆ str ┆ f64      │
╞═══════╪═════╪══════════╡
│ ham   ┆ A   ┆ 0.074971 │
│ foo   ┆ A   ┆ 0.664746 │
│ cc    ┆ B   ┆ 0.151077 │
│ egg   ┆ C   ┆ 0.355057 │
│ spam  ┆ B   ┆ 0.308573 │
└───────┴─────┴──────────┘

with_columns 保存原有列

In [167]: out = filtered.with_columns(
     ...:     pl.col(["Name", "Speed"]).sort_by("Speed", descending=False).over(
     ...: "Type 1",
     ...:    mapping_strategy = 'group_to_rows'),
     ...:    pl.col('Name').cumcount().sort_by("Speed", descending=False).over("
     ...: Type 1",   mapping_strategy = 'group_to_rows')
     ...: .alias('rn')).sort('Type 1',descending=False)
     ...: print(out)
shape: (7, 4)
┌─────────────────────┬────────┬───────┬─────┐
│ Name                ┆ Type 1 ┆ Speed ┆ rn  │
│ ---                 ┆ ---    ┆ ---   ┆ --- │
│ str                 ┆ str    ┆ i64   ┆ u32 │
╞═════════════════════╪════════╪═══════╪═════╡
│ Exeggcute           ┆ Grass  ┆ 40    ┆ 0   │
│ Exeggutor           ┆ Grass  ┆ 55    ┆ 1   │
│ Jynx                ┆ Ice    ┆ 95    ┆ 0   │
│ Slowpoke            ┆ Water  ┆ 15    ┆ 0   │
│ Slowbro             ┆ Water  ┆ 30    ┆ 1   │
│ SlowbroMega Slowbro ┆ Water  ┆ 30    ┆ 2   │
│ Starmie             ┆ Water  ┆ 115   ┆ 3   │
└─────────────────────┴────────┴───────┴─────┘

select

In [199]: out = filtered.select(
     ...:     pl.col(["Name",'Type 1','Speed']).sort_by("Speed", descending=True
     ...: ).over("Type 1",
     ...:    mapping_strategy = 'group_to_rows'),
     ...:    pl.col('Name').cumcount().sort_by("Speed", descending=False).over("
     ...: Type 1",   mapping_strategy = 'group_to_rows')
     ...: .alias('rn')).sort('Type 1',descending=False)
     ...: print(out)
shape: (7, 4)
┌─────────────────────┬────────┬───────┬─────┐
│ Name                ┆ Type 1 ┆ Speed ┆ rn  │
│ ---                 ┆ ---    ┆ ---   ┆ --- │
│ str                 ┆ str    ┆ i64   ┆ u32 │
╞═════════════════════╪════════╪═══════╪═════╡
│ Exeggutor           ┆ Grass  ┆ 55    ┆ 0   │
│ Exeggcute           ┆ Grass  ┆ 40    ┆ 1   │
│ Jynx                ┆ Ice    ┆ 95    ┆ 0   │
│ Starmie             ┆ Water  ┆ 115   ┆ 0   │
│ Slowbro             ┆ Water  ┆ 30    ┆ 1   │
│ SlowbroMega Slowbro ┆ Water  ┆ 30    ┆ 2   │
│ Slowpoke            ┆ Water  ┆ 15    ┆ 3   │
└─────────────────────┴────────┴───────┴─────┘

groupby

In [84]: df.groupby('id').agg(pl.col('names').sort_by('distance',descending=Fals
    ...: e),pl.col('distance').sort_by('distance',descending=False))
Out[84]: 
shape: (3, 3)
┌─────┬────────────────┬──────────────────────┐
│ id  ┆ names          ┆ distance             │
│ --- ┆ ---            ┆ ---                  │
│ str ┆ list[str]      ┆ list[f64]            │
╞═════╪════════════════╪══════════════════════╡
│ B   ┆ ["cc", "spam"] ┆ [0.151077, 0.308573] │
│ C   ┆ ["egg"]        ┆ [0.355057]           │
│ A   ┆ ["ham", "foo"] ┆ [0.074971, 0.664746] │
└─────┴────────────────┴──────────────────────┘
In [77]:  df.sort("distance",descending=False).groupby("id").agg(pl.col('names')
    ...: ,pl.col('distance'))
Out[77]: 
shape: (3, 3)
┌─────┬────────────────┬──────────────────────┐
│ id  ┆ names          ┆ distance             │
│ --- ┆ ---            ┆ ---                  │
│ str ┆ list[str]      ┆ list[f64]            │
╞═════╪════════════════╪══════════════════════╡
│ B   ┆ ["cc", "spam"] ┆ [0.151077, 0.308573] │
│ A   ┆ ["ham", "foo"] ┆ [0.074971, 0.664746] │
│ C   ┆ ["egg"]        ┆ [0.355057]           │
└─────┴────────────────┴──────────────────────┘
上一篇下一篇

猜你喜欢

热点阅读