特征工程

时间序列挖掘分析:tsfresh特征中文(一)

2019-03-05  本文已影响13人  司马山哥

tsfresh是一个Python的时序数据特征挖掘的模块(官网https://tsfresh.readthedocs.io/en/latest/index.html,安装可用pip install tsfresh),提取的特征可以用来描述或聚类基于提取特征的时间序列。此外,它们还可以用于构建在时间序列上执行分类/回归任务的模型。通常,这些特性为时间序列及其动态特性提供了新的见解。该项目总共涉及64个特征,前32个特征由我翻译和调试,后32个特征由我的同事托马斯所翻译。我们在GitHub存放了项目资料和介绍https://github.com/SimaShanhe/tsfresh-feature-translation/

abs_energy(x)

E=\sum_{i=1}^n x_i^2

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae = tsf.feature_extraction.feature_calculators.abs_energy(ts)

absolute_sum_of_changes(x)

\sum_{i=1}^{n-1} |x_{i+1}-x_i|

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae = tsf.feature_extraction.feature_calculators.absolute_sum_of_changes(ts)

agg_autocorrelation(x, param)

R(l)=\frac{1}{(n-l)\sigma^2}\sum_{i=1}^{n-l}(x_i-\mu)(x_{i+l}-\mu)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'f_agg': 'mean', 'maxlag':2}]
ae = tsf.feature_extraction.feature_calculators.agg_autocorrelation(ts, param)

agg_linear_trend(x, param)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'f_agg': 'mean','attr': 'pvalue', 'chunk_len': 2}]
ae=tsf.feature_extraction.feature_calculators.agg_linear_trend(ts,param)
print(ae,list(ae))

——————————————————————————————————————————————————————————

approximate_entropy(x, m, r)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.approximate_entropy(ts, 10, 0.1)

——————————————————————————————————————————————————————————

ar_coefficient(x, param)

X_t=\psi_0+\sum_{i=1}^k \psi_i X_{t-i} + \varepsilon_t

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'coeff': 0, 'k': 10}]
ae=tsf.feature_extraction.feature_calculators.ar_coefficient(ts, param)

augmented_dickey_fuller(x, param)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'attr': 'pvalue'}]
ae=tsf.feature_extraction.feature_calculators.augmented_dickey_fuller(ts, param)

autocorrelation(x, lag)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.autocorrelation(ts, 2)

binned_entropy(x, max_bins)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.binned_entropy(ts, 10)

c3(x, lag)

\frac{1}{n-2lag}\sum_{i=0}^{n-2lag}x_{i+2lag}^2x_{i+lag}x_i
等同于计算
\mathbb{E}[L^2(X)L(X)X]
其中L为时滞算子。

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.c3(ts, 2)

change_quantiles(x, ql, qh, isabs, f_agg)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd
import numpy as np

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.change_quantiles(ts, 0.05, 0.95, False, 'mean')

cid_ce(x, normalize)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.cid_ce(ts, True)

count_above_mean(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.count_above_mean(ts)

count_below_mean(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.count_below_mean(ts)

cwt_coefficients(x, param)

\frac{2}{\sqrt{3a}\pi^{\frac{1}{4}}}(1-\frac{x^2}{a^2})\exp(-\frac{x^2}{2a^2})
其中,a是小波变换函数中的宽度参数。

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [ {'widths':tuple([2,2,2]), 'coeff': 2, 'w': 2}]
ae=tsf.feature_extraction.feature_calculators.cwt_coefficients(ts, param)
print(list(ae))

energy_ratio_by_chunks(x, param)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'num_segments': 10, 'segment_focus': 5} ]
ae=tsf.feature_extraction.feature_calculators.energy_ratio_by_chunks(ts, param)

fft_aggregated(x, param)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'aggtype': 'skew'}]
ae=tsf.feature_extraction.feature_calculators.fft_aggregated(ts, param)
print(list(ae))

fft_coefficient(x, param)

A_k=\sum_{m=0}^{n-1}a_m\exp{(-2\pi i\frac{mk}{n})}

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'coeff': 2, 'attr': 'angle'}]
ae=tsf.feature_extraction.feature_calculators.fft_coefficient(ts, param)
print(list(ae))

first_location_of_maximum(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.first_location_of_maximum(ts)

first_location_of_minimum(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.first_location_of_minimum(ts)

friedrich_coefficients(x, param)

<font face="黑体" color=red size=5>调用接口未成功</font>

\dot{x}(t)=h(x(t))+\mathcal{N}(0,R)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.friedrich_coefficients(ts, param)

has_duplicate(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.has_duplicate(ts)

has_duplicate_max(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.has_duplicate_max(ts)

has_duplicate_min(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.has_duplicate_min(ts)

index_mass_quantile(x, param)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'q':50}]
ae=tsf.feature_extraction.feature_calculators.index_mass_quantile(ts, param)

kurtosis(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.kurtosis(ts)

large_standard_deviation(x, r)

std(x)>r*(max(x)-min(x))

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.large_standard_deviation(ts, 0.2)

last_location_of_maximum(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.last_location_of_maximum(ts)

last_location_of_minimum(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.last_location_of_minimum(ts)

length(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.length(ts)

linear_trend(x, param)

<font face="黑体" color=red size=5>调用接口未成功</font>

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
param = [{'attr': 'pvalue'}]
ae=tsf.feature_extraction.feature_calculators.linear_trend(ts, param)

longest_strike_above_mean(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.longest_strike_above_mean(ts)

longest_strike_below_mean(x)

#!/usr/bin/python3
import tsfresh as tsf
import pandas as pd

ts = pd.Series(x)  #数据x假设已经获取
ae=tsf.feature_extraction.feature_calculators.longest_strike_below_mean(ts)
上一篇 下一篇

猜你喜欢

热点阅读