机器学习与数据挖掘

林轩田机器学习基石课程 - Pocket PLA算法 pytho

2019-02-09  本文已影响54人  Spareribs

作业1:

Q1. Run the pocket algorithm with a total of 50 updates on D, and verify the performance of w pocket using the test set.
Please repeat your experiment for 2000 times, each with a different random seed.
What is the average error rate on the test set? Plot a histogram to show error rate versus frequency.

# calculate error count
def calError(self, X, Y, W):
    score = np.dot(X, W)
    Y_pred = np.ones_like(Y)
    Y_pred[score < 0] = -1
    err_cnt = np.sum(Y_pred != Y)
    return err_cnt

def pocket_pla_1(self, X_train, Y_train, X_test, Y_test):
    Iteration = 2000  # number of iteration
    Update = 50
    Errors = []  # list store error rate every iteration

    for iter in range(Iteration):
        np.random.seed(iter)  # set random seed, different by iteration
        permutation = np.random.permutation(X_train.shape[0])  # random select index
        X_train = X_train[permutation]  # random order X_train
        Y_train = Y_train[permutation]  # random order Y_train, as the same as X_train

        # look through the 50 iterations
        W = np.zeros(X_train.shape[1])  # weights initialization
        min_err = self.calError(X_train, Y_train, W)  # set initial W can make minimal error
        for i in range(Update):
            score = np.dot(X_train[i, :], W)  # score
            if score * Y_train[i] <= 0:  # classification error
                tmp = W + np.dot(X_train[i, :].T, Y_train[i])  # new tmp, wait to decide replace W
                tmp_err = self.calError(X_train, Y_train, tmp)  # calculate new error
                if tmp_err < min_err:
                    W = tmp  # update W
                    min_err = tmp_err  # update min_err

        # get W to test data
        Y_pred_test = np.dot(X_test, W)  # calculate score
        Y_pred_test[Y_pred_test > 0] = 1  # positive
        Y_pred_test[Y_pred_test < 0] = -1  # negative
        error = np.mean(Y_pred_test != Y_test)
        Errors.append(error)  # store error to list

    # mean of errors
    error_mean = np.mean(Errors)

    return error_mean

作业2

Q2. Modify your algorithm to return w50w50 (the PLA vector after 50 updates) instead of w (the pocket vector) after 50 updates. Run the modified algorithm on D, and verify the performance using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. Compare your result to the previous problem and briefly discuss your findings.

def pocket_pla_2(self, X_train, Y_train, X_test, Y_test):
    Iteration = 2000  # number of iteration
    Update = 50
    Errors = []  # list store error rate every iteration

    for iter in range(Iteration):
        np.random.seed(iter)  # set random seed, different by iteration
        permutation = np.random.permutation(X_train.shape[0])  # random select index
        X_train = X_train[permutation]  # random order X_train
        Y_train = Y_train[permutation]  # random order Y_train, as the same as X_train

        # look through the 50 iterations
        W = np.zeros(X_train.shape[1])  # weights initialization
        for i in range(Update):
            score = np.dot(X_train[i, :], W)  # score
            if score * Y_train[i] <= 0:  # classification error
                W = W + np.dot(X_train[i, :].T, Y_train[i])

        # get W to test data
        Y_pred_test = np.dot(X_test, W)  # calculate score
        Y_pred_test[Y_pred_test > 0] = 1  # positive
        Y_pred_test[Y_pred_test < 0] = -1  # negative
        error = np.mean(Y_pred_test != Y_test)
        Errors.append(error)  # store error to list

    # mean of error
    error_mean = np.mean(Errors)

    return error_mean

作业3

Q3. Modify your algorithm in Problem 1 to run for 100 updates instead of 50, and verify the performance of w pocket using the test set.
Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. Compare your result to Problem 18 and briefly discuss your findings.

def pocket_pla_3(self, X_train, Y_train, X_test, Y_test):
    Iteration = 2000  # number of iteration
    Update = 100
    Errors = []  # list store error rate every iteration

    for iter in range(Iteration):
        np.random.seed(iter)  # set random seed, different by iteration
        permutation = np.random.permutation(X_train.shape[0])  # random select index
        X_train = X_train[permutation]  # random order X_train
        Y_train = Y_train[permutation]  # random order Y_train, as the same as X_train

        # look through the 50 iterations
        W = np.zeros(X_train.shape[1])  # weights initialization
        min_err = self.calError(X_train, Y_train, W)  # set initial W can make minimal error
        for i in range(Update):
            score = np.dot(X_train[i, :], W)  # score
            if score * Y_train[i] <= 0:  # classification error
                tmp = W + np.dot(X_train[i, :].T, Y_train[i])  # new tmp, wait to decide replace W
                tmp_err = self.calError(X_train, Y_train, tmp)  # calculate new error
                if tmp_err < min_err:
                    W = tmp  # update W
                    min_err = tmp_err  # update min_err

        # get W to test data
        Y_pred_test = np.dot(X_test, W)  # calculate score
        Y_pred_test[Y_pred_test > 0] = 1  # positive
        Y_pred_test[Y_pred_test < 0] = -1  # negative
        error = np.mean(Y_pred_test != Y_test)
        Errors.append(error)  # store error to list

    # mean of errors
    error_mean = np.mean(Errors)

    return error_mean
上一篇下一篇

猜你喜欢

热点阅读