Pyton10

2019-11-01  本文已影响0人  C_Z_Q_

jupyter notebook 操作

import numpy as np 
#reshape
X = np.arange(15).reshape(5,3)
X
    array([[ 0,  1,  2],
           [ 3,  4,  5],
           [ 6,  7,  8],
           [ 9, 10, 11],
           [12, 13, 14]])
x = np.arange(10)
x
    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[2]
2
X[1,2]
5
X[1]
array([3, 4, 5])
X[3]
array([ 9, 10, 11])
X[2,2]
8
x[5:]
array([5, 6, 7, 8, 9])
#h行切片,列切片
X[2:4,1:]
array([[ 7,  8],
       [10, 11]])
X[2:,:2]
array([[ 6,  7],
       [ 9, 10],
       [12, 13]])
x.reshape(5,-1)
array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])
#numpy's universal function
X
array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])
X + 1
array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [13, 14, 15]])
X * 2
array([[ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16],
       [18, 20, 22],
       [24, 26, 28]])
np.sin(X)
array([[ 0.        ,  0.84147098,  0.90929743],
       [ 0.14112001, -0.7568025 , -0.95892427],
       [-0.2794155 ,  0.6569866 ,  0.98935825],
       [ 0.41211849, -0.54402111, -0.99999021],
       [-0.53657292,  0.42016704,  0.99060736]])
x = np.arange(16)
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
#随机打乱
np.random.shuffle(x)
x
array([ 2, 10,  4, 13,  7, 11,  8, 12,  5,  0,  3,  6, 14,  9, 15,  1])
#argsort   元素从小排序所对应的索引
np.argsort(x)
array([ 9, 15,  0, 10,  2,  8, 11,  4,  6, 13,  1,  5,  7,  3, 12, 14],
          dtype=int64)
names = np.array(['Bob','Mor','Will','Joe','Mor','Will','Will'])
names
array(['Bob', 'Mor', 'Will', 'Joe', 'Mor', 'Will', 'Will'], dtype='<U4')
#使用np.random 模块的randn   生成一些正态分布的随机数据
data = np.random.randn(7,4)
data
array([[ 0.30878037, -0.4096957 , -0.83750594, -0.97213605],
       [-0.14167727, -0.04756769,  1.97948161, -1.702129  ],
       [-1.16628504, -0.4982823 ,  1.53364224, -1.15853535],
       [-0.11616092,  1.07999647, -0.35885284, -0.84570262],
       [-0.12763289, -1.29314938, -0.80078425, -0.08737958],
       [ 1.45718967, -1.14880644,  0.09459774, -0.25857695],
       [-0.2095555 , -0.53830113,  0.63900549,  0.05845763]])
#假设  每个名字对应data数据的一行
#布尔型索引可以应用于数据的删选
data[names=='Joe']
array([[-0.11616092,  1.07999647, -0.35885284, -0.84570262]])
#布尔型索引应用修改值
#选取所有Mor的行,并且全部值   赋值为0
data[names=='Mor'] = 0
data
array([[ 0.30878037, -0.4096957 , -0.83750594, -0.97213605],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-1.16628504, -0.4982823 ,  1.53364224, -1.15853535],
       [-0.11616092,  1.07999647, -0.35885284, -0.84570262],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.45718967, -1.14880644,  0.09459774, -0.25857695],
       [-0.2095555 , -0.53830113,  0.63900549,  0.05845763]])
data[names=='Will',2:]=0
data
array([[ 0.30878037, -0.4096957 , -0.83750594, -0.97213605],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-1.16628504, -0.4982823 ,  0.        ,  0.        ],
       [-0.11616092,  1.07999647, -0.35885284, -0.84570262],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.45718967, -1.14880644,  0.        ,  0.        ],
       [-0.2095555 , -0.53830113,  0.        ,  0.        ]])


2.鸢尾花散点图

#import warnings
#warnings.filterwarnings('ignore')
from sklearn import datasets
from matplotlib import pyplot as plt
iris = datasets.load_iris()
iris.keys
 <function Bunch.keys>
#iris['DESCR']
#print(iris.DESCR)
X= iris.data
X.ndim
2
X.shape
(150, 4)
X.size
600
iris.feature_names
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']
y= iris.target
y
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
iris.target_names
array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

#绘制萼片额度

X = X[:,:2]
X[y==0,:0]
array([], shape=(50, 0), dtype=float64)

#绘制散点图

plt.scatter(X[y==0,0],X[y==0,1],color = 'r') #setosa
plt.scatter(X[y==1,0],X[y==1,1],color = 'b') #versicolor
plt.scatter(X[y==2,0],X[y==2,1],color = 'g') #virginica
plt.show()
效果图
y= iris.target
y
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
#绘制萼片维度
X = iris.data[:,2:]
X[y==0,0]
array([1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4,
       1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1. , 1.7, 1.9, 1.6,
       1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.5, 1.3,
       1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4])
plt.scatter(X[y==0,0],X[y==0,1],color = 'r') #setosa
plt.scatter(X[y==1,0],X[y==1,1],color = 'b') #versicolor
plt.scatter(X[y==2,0],X[y==2,1],color = 'g') #virginica
plt.show()

3.机器学习初识

1). 监督学习(supervised learning),无监督学习(unsupervised learning),半监督学习(Semi-Supervised Learning),强化学习(reinforcement Learning )

2). 监督学习(supervised learning)和无监督学习(unsupervised learning)的判断:
是否有监督(supervised),就看输入数据是否有标签(label)。输入数据有标签,则为有监督学习,没标签则为无监督学习。
3). 监督学习:回归(Regression,连续)、分类(Classification,离散)
无监督学习:聚类(clustering)

分类算法KNN:
K近邻算法,即K-Nearest Neighbor algorithm,简称KNN算法。
可认为是:找最接近K的那个邻居。
实例:肿瘤良,恶性判断(手动实现)

from matplotlib import pyplot as plt
import numpy as np 
raw_data_X = [[3.393533211, 2.331273381],
              [3.110073483, 1.781539638],
              [1.343808831, 3.368360954],
              [3.582294042, 4.679179110],
              [2.280362439, 2.866990263],
              [7.423436942, 4.696522875],
              [5.745051997, 3.533989803],
              [9.172168622, 2.511101045],
              [7.792783481, 3.424088941],
              [7.939820817, 0.791637231]
             ]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)

预测

# 假设新来一个样本数据判断x是恶性还是良性
x = np.array([8.093607318, 3.365731514])
plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color = 'r') #setosa
plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color = 'b') #versicolor
plt.scatter(x[0],x[1],color = 'g') #virginica
plt.show()

通过knn算法来预测

from math import sqrt
# 计算x距离所有的是十个个点的距离,然后选距离最近的前k个
# distances = []
# for x_train in X_train:
#     d = sqrt(np.sum((x_train-x)**2))
#     distances.append(d)
distances = [sqrt(np.sum((x_train-x)**2)) for x_train in X_train]
distances
 [4.812566907609877,
  5.229270827235305,
  6.749798999160064,
  4.6986266144110695,
  5.83460014556857,
  1.4900114024329525,
  2.354574897431513,
  1.3761132675144652,
  0.3064319992975,
  2.5786840957478887]

nearst = np.argsort(distances)
nearst
array([8, 7, 5, 6, 9, 3, 0, 1, 4, 2], dtype=int64)

# 假设我们指定K的值是6
k =6 
top_k_y =[y_train[i] for i in  nearst[:6]]
top_k_y
[1, 1, 1, 1, 1, 0]

# 数据统计量大的话使用的统计办法

from collections import Counter
votes = Counter(top_k_y)
votes
Counter({1: 5, 0: 1})

# 返回数量前 i 的数据信息
votes.most_common(1)
[(1, 5)]
predict_y = votes.most_common(1)[0][0]
predict_y
1

x患者是恶性肿瘤的可能性大



上一篇 下一篇

猜你喜欢

热点阅读