用 tidyverse 的方式玩转 Python 数据处理

import numpy as np
import pandas as pd
from plydata import define, query, if_else, ply

# NOTE: query is the equivalent of dplyr's filter but with
#      slightly different python syntax  for the expressions

df = pd.DataFrame({
    'x': [0, 1, 2, 3],
    'y': ['zero', 'one', 'two', 'three']})

df >> define(z='x')
   x      y  z
0  0   zero  0
1  1    one  1
2  2    two  2
3  3  three  3

df >> define(z=if_else('x > 1', 1, 0))
   x      y  z
0  0   zero  0
1  1    one  0
2  2    two  1
3  3  three  1

# You can pass the dataframe as the # first argument
query(df, 'x > 1')  # same as `df >> query('x > 1')`
   x      y
2  2    two
3  3  three

# You can use the ply function instead of the >> operator
    define(z=if_else('x > 1', 1, 0)),
    query('z == 1')
    x      y  z
 2  2    two  1
 3  3  three  1
from plotnine import ggplot, aes, geom_line

df = pd.DataFrame({'x': np.linspace(0, 2*np.pi, 500)})
 >> define(y='np.sin(x)')
 >> define(sign=if_else('y >= 0', '"positive"', '"negative"'))
 >> (ggplot(aes('x', 'y'))
     + geom_line(aes(color='sign'), size=1.5))


machow/siuba: Python library for using dplyr like syntax with pandas and SQL (

from siuba import group_by, summarize, _
from import mtcars

  >> group_by(_.cyl)
  >> summarize(avg_hp = _.hp.mean())


python 学习之 python 里也能用 dplyr? (

Python and Tidyverse (

import pandas
from dplython import (DplyFrame, X, diamonds, select, sift, sample_n,
    sample_frac, head, arrange, mutate, group_by, summarize, DelayFunction) 

print(diamondsSmall >> head(4)) 

##    carat      cut  price color clarity  depth  table
## 0   0.23    Ideal    326     E     SI2   61.5   55.0
## 1   0.21  Premium    326     E     SI1   59.8   61.0
## 2   0.23     Good    327     E     VS1   56.9   65.0
## 3   0.29  Premium    334     I     VS2   62.4   58.0

