统计学习方法之感知机Perceptron
2021-02-01 本文已影响0人
周恩国的学习笔记
1. 感知机模型详解
感知机由1957年提出,感知机模型较为简单,是NN和SVM的基础模型。
结构如下图
perceptron.jpg
定义:
给定训练集合
2.原始学习方法
一个常见的想法是给定误分类点集合,
3.学习方法的对偶形式
对偶形式的思想在于,
4.代码实现
抽象类 classifier.py
感知机模型 perceptron.py
测试 test_perceptron.py
#classifier.py
class Classifier(metaclass=ABCMeta):
"""Base class for all classifiers
Warning: This class should not be used directly.
Use derived classes instead.
"""
@abstractmethod
def fit(self, X, y):
"""Given train data X and labels y,and feature labels, fit the classifier
Parameters
----------
X : array_like or sparse matrix, shape = [n_samples, n_features]
The input samples. Internally, it will be converted to
``dtype=np.float32`` and if a sparse matrix is provided
to a sparse ``csr_matrix``.
y : array_like, length = n_samples
Returns
-------
None
"""
raise NotImplementedError()
@abstractmethod
def predict(self, X):
"""Given train data X and labels y, fit the classifier
Parameters
----------
X : array_like or sparse matrix, shape = [n_samples, n_features]
The input samples. Internally, it will be converted to
``dtype=np.float32`` and if a sparse matrix is provided
to a sparse ``csr_matrix``.
Returns
-------
predit labels,array_like, length=n_samples
"""
raise NotImplementedError()
#perceptron.py
import numpy as np
from numpy import shape
from base import Classifier
from utils import accuracy_score
from utils import sign
class Perceptron(Classifier):
'''
Implementation of Perceptron
'''
def __init__(self, max_iterations=100, esplion=1e-3, learning_rate=0.1, threshold=0.9):
assert max_iterations > 0
assert 1 > esplion > 0
assert 0 < learning_rate <= 1
self.max_iterations = max_iterations
self.esplion = esplion
self.learning_rate = learning_rate
self.threshold = threshold
def fit(self, X, y):
'''
fit process of perceptron
'''
self.X = X
self.y = y
self._check_params()
n_samples, n_features = shape(self.X)
# Gram matrix
gram_matrix = np.dot(self.X, self.X.T)
self.alpha = np.zeros(n_samples)
self.b = 0
for iter in range(self.max_iterations):
for ind in range(n_samples):
# misclassification point
if self.y[ind] * sum(self.alpha * (gram_matrix[:, ind].T * self.y)) <= 0:
self.alpha[ind] = self.alpha[ind] + self.learning_rate
self.b = self.b + self.y[ind] * self.learning_rate
# compare accuracy
if self.score(X, y) > self.threshold:
break
def score(self, X, y):
return accuracy_score(y, self.predict(X))
def _check_params(self):
'''
check params
'''
# assert type(self.X).__name__=='ndarray'
# assert type(self.y).__name__=='ndarray'
assert shape(self.X)[0] == len(self.y)
def _predict_sample(self, sample):
return sign(sum((sum((self.alpha * self.X.T * self.y).T)) * sample) + self.b)
def predict(self, X):
return np.array([self._predict_sample(sample) for sample in X])
#test_perceptron.py
import numpy as np
from linear import Perceptron
class TestPerceptron(object):
def test_perceptron(self):
clf = Perceptron(learning_rate=1)
X, y = np.array([[3, 3], [4, 3], [1, 1]]), np.array([1, 1, -1])
clf.fit(X, y)
assert clf.score(X, y) > 0.9
5.FAQ
- Q1 感知机和NN以及SVM的区别与联系?
A: