9.数据降维--NMF非负矩阵分解
2020-04-06 本文已影响0人
羽天驿
一、原理
image.png二、代码实例
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import NMF
from sklearn import datasets
d:\python3.7.4\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
return f(*args, **kwds)
# 花萼长宽,花瓣长宽
# 物理意义,不会负数
X,y = datasets.load_iris(True)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
X_pca[:5]
array([[-2.68412563, 0.31939725],
[-2.71414169, -0.17700123],
[-2.88899057, -0.14494943],
[-2.74534286, -0.31829898],
[-2.72871654, 0.32675451]])
pca.n_components_
2
lda = LinearDiscriminantAnalysis(n_components=2)
X_lda = lda.fit_transform(X,y)
display(X_lda.shape,X_lda[:5])
(150, 2)
array([[ 8.06179978, 0.30042062],
[ 7.12868772, -0.78666043],
[ 7.48982797, -0.26538449],
[ 6.81320057, -0.67063107],
[ 8.13230933, 0.51446253]])
X_lda.dot(lda.coef_[:-1,])
array([[ 50.44831456, 96.55009091, -135.20800789, -166.5239578 ],
[ 46.22047489, 89.97986155, -124.49966339, -150.47244786],
[ 47.70281105, 92.08273143, -128.1719621 , -156.37689851],
[ 44.05058567, 85.64231344, -118.60844912, -143.56441268],
[ 50.56582449, 96.46937191, -135.39782337, -167.33292323],
[ 46.39774739, 87.09981993, -123.65670774, -155.49320907],
[ 45.00108204, 85.99909832, -120.55719337, -148.71668303],
[ 48.04340548, 92.37398079, -128.93716264, -157.99799177],
[ 42.98271638, 84.08301682, -115.94476184, -139.37204002],
[ 47.82018397, 93.28524316, -128.88690512, -155.41700082],
[ 52.03622528, 99.10564815, -139.26587617, -172.43157086],
[ 45.75600633, 88.11715168, -122.85613289, -150.28099117],
[ 47.90990747, 93.63774879, -129.20139536, -155.46407438],
[ 49.0516492 , 95.4493693 , -132.10849352, -159.74743177],
[ 59.77099396, 112.63023913, -159.47259291, -199.72470935],
[ 53.64026108, 99.19492566, -142.34456872, -181.83299901],
[ 51.38701256, 96.15705143, -136.82737868, -172.63928826],
[ 48.23884055, 91.89594305, -129.11209491, -159.81709087],
[ 49.52979433, 93.82720803, -132.35116725, -164.82159137],
[ 48.90417834, 92.37826005, -130.57141666, -163.10318856],
[ 47.62893861, 91.83040753, -127.92855121, -156.28591379],
[ 46.05715296, 86.80857057, -122.89150719, -153.8721158 ],
[ 53.47492395, 101.54156092, -142.99177028, -177.61804081],
[ 38.80284092, 73.96364046, -103.87427276, -128.49538597],
[ 42.01405745, 81.32422806, -112.97812968, -137.42143178],
[ 44.24588373, 86.44750642, -119.30850893, -143.61464866],
[ 42.37714116, 80.8013772 , -113.45266896, -140.29773812],
[ 49.7210397 , 95.28204366, -133.30952116, -163.9526784 ],
[ 50.33080463, 96.63080991, -135.0181924 , -165.71499237],
[ 43.96086217, 85.28980781, -118.29395889, -143.51733911],
[ 43.84335224, 85.3705268 , -118.1041434 , -142.70837369],
[ 45.70462316, 87.05072756, -122.32206073, -151.44521952],
[ 55.75582189, 105.42944126, -148.90941311, -185.8047803 ],
[ 56.99133982, 106.94392477, -151.87270739, -191.0543602 ],
[ 45.61070996, 88.6310953 , -122.79099215, -148.71013389],
[ 50.51025163, 97.33582116, -135.64717286, -165.80913951],
[ 53.77579657, 102.79944127, -144.07739966, -177.6714392 ],
[ 52.25525708, 100.12725915, -140.09955534, -172.32454975],
[ 44.86758404, 87.26286632, -120.82142608, -146.18276565],
[ 48.56344691, 93.37024141, -130.33134365, -159.71323217],
[ 48.96611541, 93.1639903 , -131.01058164, -162.38837027],
[ 38.71529191, 77.19618771, -105.03171864, -123.52169832],
[ 46.14268676, 89.09394957, -123.98941907, -151.2311773 ],
[ 38.59574449, 72.4086231 , -102.8448395 , -129.40821009],
[ 41.70543916, 78.6668807 , -111.30483274, -139.25024244],
[ 43.49095945, 84.32945307, -117.0095694 , -142.05034053],
[ 49.86633606, 94.76810004, -133.3746619 , -165.52353569],
[ 45.93545333, 88.82216293, -123.48511335, -150.37513831],
[ 51.51618385, 98.10938753, -137.87169516, -170.71633046],
[ 48.65317041, 93.72274704, -130.64583389, -159.76030574],
[ -9.25867819, -17.83951631, 24.86353344, 30.3966485 ],
[ -12.09376819, -23.94261215, 32.739197 , 38.82191824],
[ -15.12037758, -28.93408216, 40.52295939, 49.91600125],
[ -11.85657383, -20.51899727, 30.88762991, 42.13060147],
[ -15.3712485 , -28.8728259 , 40.97366971, 51.49002095],
[ -13.86531562, -25.27030727, 36.6426241 , 47.51169051],
[ -16.68036486, -33.20609475, 45.23062996, 53.29285934],
[ 1.02036505, 4.23157933, -3.66769361, -0.22840132],
[ -9.79470768, -17.65272793, 25.80366625, 33.83684086],
[ -11.82865039, -22.23545262, 31.5374322 , 39.59984649],
[ -3.50443156, -2.9629423 , 7.85944684, 16.72622119],
[ -12.22712919, -23.96207488, 33.00009183, 39.58697251],
[ -3.265496 , -2.49079221, 7.21298243, 15.9580045 ],
[ -15.85170514, -29.55248677, 42.16315201, 53.4064296 ],
[ -2.52195905, -4.97225541, 6.81879898, 8.12404691],
[ -7.71440496, -14.95091617, 20.75206975, 25.20701614],
[ -17.52920235, -33.74378036, 47.06063807, 57.59225311],
[ -2.36513834, -2.1699132 , 5.37402971, 11.05397596],
[ -19.50936468, -35.09054963, 51.36752397, 67.49445732],
[ -4.39516535, -6.1190498 , 10.84096222, 17.66669575],
[ -25.06434626, -49.67928245, 67.87584419, 80.37828043],
[ -5.54856844, -9.96372542, 14.60256138, 19.21812992],
[ -22.06593433, -40.40489563, 58.39202443, 75.35267863],
[ -12.07030847, -21.15973267, 31.55532256, 42.51690157],
[ -7.09284167, -12.85232555, 18.71402506, 24.40776227],
[ -8.87199775, -16.86271842, 23.73024726, 29.44646237],
[ -14.09628279, -25.75851192, 37.28054918, 48.21047241],
[ -22.46427612, -43.41474862, 60.3798116 , 73.57094153],
[ -16.086588 , -30.6742795 , 43.06791053, 53.25549733],
[ 3.96116662, 9.5041318 , -11.40379921, -10.42569659],
[ -4.30544185, -5.76654418, 10.52647199, 17.61962219],
[ -0.84865155, 1.15191156, 1.13789128, 6.62623546],
[ -4.28945378, -6.94959317, 10.98052019, 15.89467022],
[ -27.05506249, -50.74535785, 72.08782291, 90.7298947 ],
[ -18.56928521, -35.73630161, 49.84900009, 61.02273392],
[ -15.1083052 , -30.75071924, 41.24384102, 47.34133512],
[ -13.66582785, -26.39798765, 36.72598594, 44.77344246],
[ -12.68550756, -21.60614379, 32.90485278, 45.55475744],
[ -8.12098915, -15.37825315, 21.69814117, 27.03244007],
[ -10.5814711 , -18.68791402, 27.71963692, 37.08218981],
[ -12.72371089, -22.17545603, 33.2103984 , 44.99719624],
[ -13.96683748, -26.37263727, 37.28648778, 46.59570397],
[ -6.17432144, -10.12944267, 15.85718442, 22.70539585],
[ 0.90285511, 4.31229832, -3.47787812, 0.58056411],
[ -11.28095953, -20.3891859 , 29.74279839, 38.89157735],
[ -6.63879 , -11.99215254, 17.50071492, 22.89685253],
[ -9.48581538, -17.56184203, 25.18062439, 32.12792529],
[ -8.13292452, -14.84484679, 21.50238708, 27.83824308],
[ 4.23047413, 9.27841795, -11.82214236, -12.33578041],
[ -8.87605045, -16.21307578, 23.47195315, 30.36561132],
[ -52.78074278, -104.52942784, 142.89852732, 169.37941906],
[ -34.72356739, -66.70032267, 93.16392386, 114.28097629],
[ -40.44785309, -78.42506844, 108.82074909, 132.11612582],
[ -34.87536496, -66.55532783, 93.39245153, 115.38209468],
[ -44.53025939, -86.79247215, 119.98908039, 144.82791536],
[ -46.57885999, -89.29392045, 124.8985182 , 153.54556239],
[ -28.7761972 , -54.60360855, 76.93171357, 95.63369892],
[ -38.40616472, -72.44287673, 102.49931559, 128.23532924],
[ -37.84003728, -70.76106759, 100.73704895, 127.19099598],
[ -47.43503212, -95.08076526, 128.89157649, 150.65615775],
[ -30.10499459, -59.80293807, 81.58058729, 96.36013126],
[ -34.0979514 , -65.2513747 , 91.38417328, 112.56257347],
[ -37.01871221, -72.35661881, 99.83262117, 120.11576783],
[ -37.48086927, -71.91750653, 100.5293431 , 123.46497548],
[ -45.1333861 , -89.05552035, 122.05949224, 145.2911051 ],
[ -39.74809064, -79.29025801, 107.8478427 , 126.76901205],
[ -31.95041445, -61.38295709, 85.72742528, 105.14088825],
[ -44.41519796, -87.89178256, 120.22094594, 142.63006209],
[ -56.76992091, -108.06104566, 151.91015234, 188.20043855],
[ -26.78602899, -48.40461024, 70.61922468, 92.35753711],
[ -42.13714867, -83.3661864 , 114.0476086 , 135.33888923],
[ -34.84094031, -67.9028344 , 93.87886689, 113.3210786 ],
[ -46.37176356, -87.73890309, 123.86908494, 154.45838651],
[ -27.41925365, -52.53625596, 73.51177036, 90.42486776],
[ -38.12073213, -75.1348703 , 103.06014817, 122.83143035],
[ -33.27160318, -63.89958886, 89.2635039 , 109.51839277],
[ -26.05442742, -50.35266709, 70.02928714, 85.32938253],
[ -26.54668242, -51.78215233, 71.54814289, 86.28273108],
[ -41.62129694, -80.43705241, 111.87000594, 136.31166089],
[ -27.6331253 , -51.89376064, 73.65433547, 92.58003098],
[ -38.23851608, -72.48768985, 102.19970858, 127.178122 ],
[ -35.2142182 , -69.79804198, 95.36275476, 112.92628804],
[ -43.83077096, -85.09120027, 117.96591892, 143.01852782],
[ -22.64791282, -42.18688651, 60.22537041, 76.35310074],
[ -28.99020585, -52.6778825 , 76.54915113, 99.55772526],
[ -44.24118513, -86.28441618, 119.23282446, 143.81145686],
[ -44.94445224, -89.90250687, 122.04794691, 143.00226711],
[ -31.83290452, -61.46367609, 85.53760979, 104.33192283],
[ -25.81940755, -50.51410508, 69.64965617, 83.71145168],
[ -34.61380312, -68.18050869, 93.56177593, 111.58980181],
[ -44.77694061, -88.66408925, 121.22321235, 143.71392299],
[ -35.29080228, -70.69588079, 95.87559867, 112.14397627],
[ -34.72356739, -66.70032267, 93.16392386, 114.28097629],
[ -45.15182268, -88.89106277, 122.02712508, 145.62716922],
[ -46.95862819, -93.75146174, 127.44380007, 149.65889806],
[ -38.21575279, -75.86825153, 103.54062493, 122.3851827 ],
[ -32.15114668, -61.28579494, 86.06834407, 106.46666614],
[ -32.6274136 , -63.89832919, 88.04124802, 105.69506272],
[ -40.76038707, -81.71600388, 110.76087947, 129.43760099],
[ -30.08139786, -58.30328932, 80.92184039, 98.28625148]])
属性由4个变成了2个,变换之后,发现存在负数,属性长宽
希望,既进行降维,由可以理解,NMF出来了!
# 经过非负矩阵分解,将X分解成了W点乘H
nmf = NMF(n_components=2)
W = nmf.fit_transform(X)
display(W.shape,W[:5])
(150, 2)
array([[0.10666948, 1.32441671],
[0.13650159, 1.1703995 ],
[0.10290364, 1.21005821],
[0.14288053, 1.12082689],
[0.0985986 , 1.33211468]])
H = nmf.components_
X[:5]
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2]])
W.dot(H)[:5]
array([[5.09591336, 3.50618857, 1.40120511, 0.20158305],
[4.74639643, 3.19626659, 1.4607764 , 0.25795951],
[4.68723639, 3.21605128, 1.30890071, 0.1944664 ],
[4.61535011, 3.0890499 , 1.46297066, 0.27001439],
[5.07551302, 3.50644084, 1.36356276, 0.18633077]])
# 糖尿病数据
X,y = datasets.load_diabetes(True)
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X)
X_pca[:5]
array([[ 0.02793062, -0.09260116, 0.02802696, -0.00393895, -0.01220663],
[-0.13468605, 0.06526341, 0.00132778, -0.02235559, -0.00681271],
[ 0.01294474, -0.07776417, 0.0351635 , -0.03764663, -0.05535734],
[ 0.00234544, 0.01818194, -0.0957504 , 0.06531834, 0.01215425],
[-0.03598069, 0.03862136, -0.00272351, -0.00654115, -0.00634255]])
是否可以使用LDA进行降维???
# 糖尿病数据
X,y = datasets.load_diabetes(True)
lda = LinearDiscriminantAnalysis(n_components=5)
X_lda = lda.fit_transform(X,y)
X_lda[:5]
array([[ 1.66315808e+00, -1.36707353e+00, -7.15149583e-01,
1.26506299e-03, -3.00979387e-01],
[-2.31201866e+00, 7.24031633e-01, -1.02431701e+00,
-1.08829255e+00, -1.71829187e+00],
[ 7.50934957e-01, -1.95233657e+00, -4.14891935e-01,
-6.97181752e-01, -9.40486840e-01],
[ 4.25155569e-01, 9.24825714e-01, 1.28107318e+00,
3.99298577e-01, 1.28859141e+00],
[-7.60851671e-01, 1.54348805e-01, -1.05722164e+00,
-1.43505916e+00, 3.79962339e-01]])
是否可以使用NMF进行降维???
nmf = NMF(n_components=5)
nmf.fit_transform(X)
-------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-26-a144852d1706> in <module>
1 nmf = NMF(n_components=5)
2
----> 3 nmf.fit_transform(X)
d:\python3.7.4\lib\site-packages\sklearn\decomposition\_nmf.py in fit_transform(self, X, y, W, H)
1285 l1_ratio=self.l1_ratio, regularization='both',
1286 random_state=self.random_state, verbose=self.verbose,
-> 1287 shuffle=self.shuffle)
1288
1289 self.reconstruction_err_ = _beta_divergence(X, W, H, self.beta_loss,
d:\python3.7.4\lib\site-packages\sklearn\decomposition\_nmf.py in non_negative_factorization(X, W, H, n_components, init, update_H, solver, beta_loss, tol, max_iter, alpha, l1_ratio, regularization, random_state, verbose, shuffle)
1007
1008 X = check_array(X, accept_sparse=('csr', 'csc'), dtype=float)
-> 1009 check_non_negative(X, "NMF (input X)")
1010 beta_loss = _check_string_param(solver, regularization, beta_loss, init)
1011
d:\python3.7.4\lib\site-packages\sklearn\utils\validation.py in check_non_negative(X, whom)
977
978 if X_min < 0:
--> 979 raise ValueError("Negative values in data passed to %s" % whom)
980
981
ValueError: Negative values in data passed to NMF (input X)