week1

2020-09-20  本文已影响0人  SunJi_

1. Matplotlib

import matplotlib.pyplot as plt

plt.figure(figsize=(a,b))
plt.subplot(2,2,1)
#空心散点图
plt.scatter(x,y,marker='o',c='',label='xxx')
#线型图
plt.plot(x,y,color='b',label='xxx')
#填充区域
plt.fill_between(x,y1,y1,facecolor='pink',label='')

plt.legend(bbox_to_anchor=(a,b),loc=c)
plt.show()

2. LinearRegression

class PolynomialFeature(object):
    """
    polynomial features

    transforms input array with polynomial features

    Example
    =======
    x =
    [[a, b],
    [c, d]]

    y = PolynomialFeatures(degree=2).transform(x)
    y =
    [[1, a, b, a^2, a * b, b^2],
    [1, c, d, c^2, c * d, d^2]]
    """

    def __init__(self, degree=2):
        """
        construct polynomial features

        Parameters
        ----------
        degree : int
            degree of polynomial
        """
        assert isinstance(degree, int)
        self.degree = degree

    def transform(self, x):
        """
        transforms input array with polynomial features

        Parameters
        ----------
        x : (sample_size, n) ndarray
            input array

        Returns
        -------
        output : (sample_size, 1 + nC1 + ... + nCd) ndarray
            polynomial features
        """
        if x.ndim == 1:
            x = x[:, None]
        x_t = x.transpose()
        features = [np.ones(len(x))]
        for degree in range(1, self.degree + 1):
            for items in itertools.combinations_with_replacement(x_t, degree):
                features.append(functools.reduce(lambda x, y: x * y, items))
        return np.asarray(features).transpose()
    
class Regression(object):
    """
    Base class for regressors
    """
    pass
    
class LinearRegression(Regression):
    """
    Linear regression model
    y = X @ w
    t ~ N(t|X @ w, var)
    """

    def fit(self, X:np.ndarray, t:np.ndarray):
        """
        perform least squares fitting

        Parameters
        ----------
        X : (N, D) np.ndarray
            training independent variable
        t : (N,) np.ndarray
            training dependent variable
        """
        self.w = np.linalg.pinv(X) @ t
        self.var = np.mean(np.square(X @ self.w - t))

    def predict(self, X:np.ndarray, return_std:bool=False):
        """
        make prediction given input

        Parameters
        ----------
        X : (N, D) np.ndarray
            samples to predict their output
        return_std : bool, optional
            returns standard deviation of each predition if True

        Returns
        -------
        y : (N,) np.ndarray
            prediction of each sample
        y_std : (N,) np.ndarray
            standard deviation of each predition
        """
        y = X @ self.w
        if return_std:
            y_std = np.sqrt(self.var) + np.zeros_like(y)
            return y, y_std
        return y
  1. 先创建数据(x_train、y_train、x_test、y_test)
x_test = x_train = np.linsapce(0,1,100)
y_train = np.sin(2*np.pi*x_train) + np.random.normal(scale=0.25, size=x.shape)
y_test = np.sin(2*np.pi*x_test)
  1. 利用PolyPolynomialFeature类对数据进行多项式分解```
x_train_poly = PolynomialFeature(degree=n).transform(x_train)
  1. 创建一个LinearRegression的类,利用训练数据来训练得到w值
lr = LinearRegression()
lr.fit(x_train_poly,y_train)
  1. 利用训练好的参数,对测试数据进行预测
y_pred = lr.predict(x_test_poly)

3. Conclusion

当采样数据较少时,高阶会出现过拟合问题。增大采样数量和增加正则化项,可以有效解决这个问题。

上一篇 下一篇

猜你喜欢

热点阅读