林轩田机器学习基石课程 - Pocket PLA算法 pytho
作业1:
Q1. Run the pocket algorithm with a total of 50 updates on D, and verify the performance of w pocket using the test set.
Please repeat your experiment for 2000 times, each with a different random seed.
What is the average error rate on the test set? Plot a histogram to show error rate versus frequency.
# calculate error count
def calError(self, X, Y, W):
score = np.dot(X, W)
Y_pred = np.ones_like(Y)
Y_pred[score < 0] = -1
err_cnt = np.sum(Y_pred != Y)
return err_cnt
def pocket_pla_1(self, X_train, Y_train, X_test, Y_test):
Iteration = 2000 # number of iteration
Update = 50
Errors = [] # list store error rate every iteration
for iter in range(Iteration):
np.random.seed(iter) # set random seed, different by iteration
permutation = np.random.permutation(X_train.shape[0]) # random select index
X_train = X_train[permutation] # random order X_train
Y_train = Y_train[permutation] # random order Y_train, as the same as X_train
# look through the 50 iterations
W = np.zeros(X_train.shape[1]) # weights initialization
min_err = self.calError(X_train, Y_train, W) # set initial W can make minimal error
for i in range(Update):
score = np.dot(X_train[i, :], W) # score
if score * Y_train[i] <= 0: # classification error
tmp = W + np.dot(X_train[i, :].T, Y_train[i]) # new tmp, wait to decide replace W
tmp_err = self.calError(X_train, Y_train, tmp) # calculate new error
if tmp_err < min_err:
W = tmp # update W
min_err = tmp_err # update min_err
# get W to test data
Y_pred_test = np.dot(X_test, W) # calculate score
Y_pred_test[Y_pred_test > 0] = 1 # positive
Y_pred_test[Y_pred_test < 0] = -1 # negative
error = np.mean(Y_pred_test != Y_test)
Errors.append(error) # store error to list
# mean of errors
error_mean = np.mean(Errors)
return error_mean
作业2
Q2. Modify your algorithm to return w50w50 (the PLA vector after 50 updates) instead of w (the pocket vector) after 50 updates. Run the modified algorithm on D, and verify the performance using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. Compare your result to the previous problem and briefly discuss your findings.
def pocket_pla_2(self, X_train, Y_train, X_test, Y_test):
Iteration = 2000 # number of iteration
Update = 50
Errors = [] # list store error rate every iteration
for iter in range(Iteration):
np.random.seed(iter) # set random seed, different by iteration
permutation = np.random.permutation(X_train.shape[0]) # random select index
X_train = X_train[permutation] # random order X_train
Y_train = Y_train[permutation] # random order Y_train, as the same as X_train
# look through the 50 iterations
W = np.zeros(X_train.shape[1]) # weights initialization
for i in range(Update):
score = np.dot(X_train[i, :], W) # score
if score * Y_train[i] <= 0: # classification error
W = W + np.dot(X_train[i, :].T, Y_train[i])
# get W to test data
Y_pred_test = np.dot(X_test, W) # calculate score
Y_pred_test[Y_pred_test > 0] = 1 # positive
Y_pred_test[Y_pred_test < 0] = -1 # negative
error = np.mean(Y_pred_test != Y_test)
Errors.append(error) # store error to list
# mean of error
error_mean = np.mean(Errors)
return error_mean
作业3
Q3. Modify your algorithm in Problem 1 to run for 100 updates instead of 50, and verify the performance of w pocket using the test set.
Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. Compare your result to Problem 18 and briefly discuss your findings.
def pocket_pla_3(self, X_train, Y_train, X_test, Y_test):
Iteration = 2000 # number of iteration
Update = 100
Errors = [] # list store error rate every iteration
for iter in range(Iteration):
np.random.seed(iter) # set random seed, different by iteration
permutation = np.random.permutation(X_train.shape[0]) # random select index
X_train = X_train[permutation] # random order X_train
Y_train = Y_train[permutation] # random order Y_train, as the same as X_train
# look through the 50 iterations
W = np.zeros(X_train.shape[1]) # weights initialization
min_err = self.calError(X_train, Y_train, W) # set initial W can make minimal error
for i in range(Update):
score = np.dot(X_train[i, :], W) # score
if score * Y_train[i] <= 0: # classification error
tmp = W + np.dot(X_train[i, :].T, Y_train[i]) # new tmp, wait to decide replace W
tmp_err = self.calError(X_train, Y_train, tmp) # calculate new error
if tmp_err < min_err:
W = tmp # update W
min_err = tmp_err # update min_err
# get W to test data
Y_pred_test = np.dot(X_test, W) # calculate score
Y_pred_test[Y_pred_test > 0] = 1 # positive
Y_pred_test[Y_pred_test < 0] = -1 # negative
error = np.mean(Y_pred_test != Y_test)
Errors.append(error) # store error to list
# mean of errors
error_mean = np.mean(Errors)
return error_mean