9.数据降维--NMF非负矩阵分解

2020-04-06  本文已影响0人  羽天驿

一、原理

image.png

二、代码实例

import numpy as np

from sklearn.decomposition import PCA

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.decomposition import NMF

from sklearn import datasets
d:\python3.7.4\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
  return f(*args, **kwds)
# 花萼长宽,花瓣长宽
# 物理意义,不会负数
X,y = datasets.load_iris(True)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
X_pca[:5]
array([[-2.68412563,  0.31939725],
       [-2.71414169, -0.17700123],
       [-2.88899057, -0.14494943],
       [-2.74534286, -0.31829898],
       [-2.72871654,  0.32675451]])
pca.n_components_
2
lda = LinearDiscriminantAnalysis(n_components=2)
X_lda = lda.fit_transform(X,y)
display(X_lda.shape,X_lda[:5])
(150, 2)



array([[ 8.06179978,  0.30042062],
       [ 7.12868772, -0.78666043],
       [ 7.48982797, -0.26538449],
       [ 6.81320057, -0.67063107],
       [ 8.13230933,  0.51446253]])
X_lda.dot(lda.coef_[:-1,])
array([[  50.44831456,   96.55009091, -135.20800789, -166.5239578 ],
       [  46.22047489,   89.97986155, -124.49966339, -150.47244786],
       [  47.70281105,   92.08273143, -128.1719621 , -156.37689851],
       [  44.05058567,   85.64231344, -118.60844912, -143.56441268],
       [  50.56582449,   96.46937191, -135.39782337, -167.33292323],
       [  46.39774739,   87.09981993, -123.65670774, -155.49320907],
       [  45.00108204,   85.99909832, -120.55719337, -148.71668303],
       [  48.04340548,   92.37398079, -128.93716264, -157.99799177],
       [  42.98271638,   84.08301682, -115.94476184, -139.37204002],
       [  47.82018397,   93.28524316, -128.88690512, -155.41700082],
       [  52.03622528,   99.10564815, -139.26587617, -172.43157086],
       [  45.75600633,   88.11715168, -122.85613289, -150.28099117],
       [  47.90990747,   93.63774879, -129.20139536, -155.46407438],
       [  49.0516492 ,   95.4493693 , -132.10849352, -159.74743177],
       [  59.77099396,  112.63023913, -159.47259291, -199.72470935],
       [  53.64026108,   99.19492566, -142.34456872, -181.83299901],
       [  51.38701256,   96.15705143, -136.82737868, -172.63928826],
       [  48.23884055,   91.89594305, -129.11209491, -159.81709087],
       [  49.52979433,   93.82720803, -132.35116725, -164.82159137],
       [  48.90417834,   92.37826005, -130.57141666, -163.10318856],
       [  47.62893861,   91.83040753, -127.92855121, -156.28591379],
       [  46.05715296,   86.80857057, -122.89150719, -153.8721158 ],
       [  53.47492395,  101.54156092, -142.99177028, -177.61804081],
       [  38.80284092,   73.96364046, -103.87427276, -128.49538597],
       [  42.01405745,   81.32422806, -112.97812968, -137.42143178],
       [  44.24588373,   86.44750642, -119.30850893, -143.61464866],
       [  42.37714116,   80.8013772 , -113.45266896, -140.29773812],
       [  49.7210397 ,   95.28204366, -133.30952116, -163.9526784 ],
       [  50.33080463,   96.63080991, -135.0181924 , -165.71499237],
       [  43.96086217,   85.28980781, -118.29395889, -143.51733911],
       [  43.84335224,   85.3705268 , -118.1041434 , -142.70837369],
       [  45.70462316,   87.05072756, -122.32206073, -151.44521952],
       [  55.75582189,  105.42944126, -148.90941311, -185.8047803 ],
       [  56.99133982,  106.94392477, -151.87270739, -191.0543602 ],
       [  45.61070996,   88.6310953 , -122.79099215, -148.71013389],
       [  50.51025163,   97.33582116, -135.64717286, -165.80913951],
       [  53.77579657,  102.79944127, -144.07739966, -177.6714392 ],
       [  52.25525708,  100.12725915, -140.09955534, -172.32454975],
       [  44.86758404,   87.26286632, -120.82142608, -146.18276565],
       [  48.56344691,   93.37024141, -130.33134365, -159.71323217],
       [  48.96611541,   93.1639903 , -131.01058164, -162.38837027],
       [  38.71529191,   77.19618771, -105.03171864, -123.52169832],
       [  46.14268676,   89.09394957, -123.98941907, -151.2311773 ],
       [  38.59574449,   72.4086231 , -102.8448395 , -129.40821009],
       [  41.70543916,   78.6668807 , -111.30483274, -139.25024244],
       [  43.49095945,   84.32945307, -117.0095694 , -142.05034053],
       [  49.86633606,   94.76810004, -133.3746619 , -165.52353569],
       [  45.93545333,   88.82216293, -123.48511335, -150.37513831],
       [  51.51618385,   98.10938753, -137.87169516, -170.71633046],
       [  48.65317041,   93.72274704, -130.64583389, -159.76030574],
       [  -9.25867819,  -17.83951631,   24.86353344,   30.3966485 ],
       [ -12.09376819,  -23.94261215,   32.739197  ,   38.82191824],
       [ -15.12037758,  -28.93408216,   40.52295939,   49.91600125],
       [ -11.85657383,  -20.51899727,   30.88762991,   42.13060147],
       [ -15.3712485 ,  -28.8728259 ,   40.97366971,   51.49002095],
       [ -13.86531562,  -25.27030727,   36.6426241 ,   47.51169051],
       [ -16.68036486,  -33.20609475,   45.23062996,   53.29285934],
       [   1.02036505,    4.23157933,   -3.66769361,   -0.22840132],
       [  -9.79470768,  -17.65272793,   25.80366625,   33.83684086],
       [ -11.82865039,  -22.23545262,   31.5374322 ,   39.59984649],
       [  -3.50443156,   -2.9629423 ,    7.85944684,   16.72622119],
       [ -12.22712919,  -23.96207488,   33.00009183,   39.58697251],
       [  -3.265496  ,   -2.49079221,    7.21298243,   15.9580045 ],
       [ -15.85170514,  -29.55248677,   42.16315201,   53.4064296 ],
       [  -2.52195905,   -4.97225541,    6.81879898,    8.12404691],
       [  -7.71440496,  -14.95091617,   20.75206975,   25.20701614],
       [ -17.52920235,  -33.74378036,   47.06063807,   57.59225311],
       [  -2.36513834,   -2.1699132 ,    5.37402971,   11.05397596],
       [ -19.50936468,  -35.09054963,   51.36752397,   67.49445732],
       [  -4.39516535,   -6.1190498 ,   10.84096222,   17.66669575],
       [ -25.06434626,  -49.67928245,   67.87584419,   80.37828043],
       [  -5.54856844,   -9.96372542,   14.60256138,   19.21812992],
       [ -22.06593433,  -40.40489563,   58.39202443,   75.35267863],
       [ -12.07030847,  -21.15973267,   31.55532256,   42.51690157],
       [  -7.09284167,  -12.85232555,   18.71402506,   24.40776227],
       [  -8.87199775,  -16.86271842,   23.73024726,   29.44646237],
       [ -14.09628279,  -25.75851192,   37.28054918,   48.21047241],
       [ -22.46427612,  -43.41474862,   60.3798116 ,   73.57094153],
       [ -16.086588  ,  -30.6742795 ,   43.06791053,   53.25549733],
       [   3.96116662,    9.5041318 ,  -11.40379921,  -10.42569659],
       [  -4.30544185,   -5.76654418,   10.52647199,   17.61962219],
       [  -0.84865155,    1.15191156,    1.13789128,    6.62623546],
       [  -4.28945378,   -6.94959317,   10.98052019,   15.89467022],
       [ -27.05506249,  -50.74535785,   72.08782291,   90.7298947 ],
       [ -18.56928521,  -35.73630161,   49.84900009,   61.02273392],
       [ -15.1083052 ,  -30.75071924,   41.24384102,   47.34133512],
       [ -13.66582785,  -26.39798765,   36.72598594,   44.77344246],
       [ -12.68550756,  -21.60614379,   32.90485278,   45.55475744],
       [  -8.12098915,  -15.37825315,   21.69814117,   27.03244007],
       [ -10.5814711 ,  -18.68791402,   27.71963692,   37.08218981],
       [ -12.72371089,  -22.17545603,   33.2103984 ,   44.99719624],
       [ -13.96683748,  -26.37263727,   37.28648778,   46.59570397],
       [  -6.17432144,  -10.12944267,   15.85718442,   22.70539585],
       [   0.90285511,    4.31229832,   -3.47787812,    0.58056411],
       [ -11.28095953,  -20.3891859 ,   29.74279839,   38.89157735],
       [  -6.63879   ,  -11.99215254,   17.50071492,   22.89685253],
       [  -9.48581538,  -17.56184203,   25.18062439,   32.12792529],
       [  -8.13292452,  -14.84484679,   21.50238708,   27.83824308],
       [   4.23047413,    9.27841795,  -11.82214236,  -12.33578041],
       [  -8.87605045,  -16.21307578,   23.47195315,   30.36561132],
       [ -52.78074278, -104.52942784,  142.89852732,  169.37941906],
       [ -34.72356739,  -66.70032267,   93.16392386,  114.28097629],
       [ -40.44785309,  -78.42506844,  108.82074909,  132.11612582],
       [ -34.87536496,  -66.55532783,   93.39245153,  115.38209468],
       [ -44.53025939,  -86.79247215,  119.98908039,  144.82791536],
       [ -46.57885999,  -89.29392045,  124.8985182 ,  153.54556239],
       [ -28.7761972 ,  -54.60360855,   76.93171357,   95.63369892],
       [ -38.40616472,  -72.44287673,  102.49931559,  128.23532924],
       [ -37.84003728,  -70.76106759,  100.73704895,  127.19099598],
       [ -47.43503212,  -95.08076526,  128.89157649,  150.65615775],
       [ -30.10499459,  -59.80293807,   81.58058729,   96.36013126],
       [ -34.0979514 ,  -65.2513747 ,   91.38417328,  112.56257347],
       [ -37.01871221,  -72.35661881,   99.83262117,  120.11576783],
       [ -37.48086927,  -71.91750653,  100.5293431 ,  123.46497548],
       [ -45.1333861 ,  -89.05552035,  122.05949224,  145.2911051 ],
       [ -39.74809064,  -79.29025801,  107.8478427 ,  126.76901205],
       [ -31.95041445,  -61.38295709,   85.72742528,  105.14088825],
       [ -44.41519796,  -87.89178256,  120.22094594,  142.63006209],
       [ -56.76992091, -108.06104566,  151.91015234,  188.20043855],
       [ -26.78602899,  -48.40461024,   70.61922468,   92.35753711],
       [ -42.13714867,  -83.3661864 ,  114.0476086 ,  135.33888923],
       [ -34.84094031,  -67.9028344 ,   93.87886689,  113.3210786 ],
       [ -46.37176356,  -87.73890309,  123.86908494,  154.45838651],
       [ -27.41925365,  -52.53625596,   73.51177036,   90.42486776],
       [ -38.12073213,  -75.1348703 ,  103.06014817,  122.83143035],
       [ -33.27160318,  -63.89958886,   89.2635039 ,  109.51839277],
       [ -26.05442742,  -50.35266709,   70.02928714,   85.32938253],
       [ -26.54668242,  -51.78215233,   71.54814289,   86.28273108],
       [ -41.62129694,  -80.43705241,  111.87000594,  136.31166089],
       [ -27.6331253 ,  -51.89376064,   73.65433547,   92.58003098],
       [ -38.23851608,  -72.48768985,  102.19970858,  127.178122  ],
       [ -35.2142182 ,  -69.79804198,   95.36275476,  112.92628804],
       [ -43.83077096,  -85.09120027,  117.96591892,  143.01852782],
       [ -22.64791282,  -42.18688651,   60.22537041,   76.35310074],
       [ -28.99020585,  -52.6778825 ,   76.54915113,   99.55772526],
       [ -44.24118513,  -86.28441618,  119.23282446,  143.81145686],
       [ -44.94445224,  -89.90250687,  122.04794691,  143.00226711],
       [ -31.83290452,  -61.46367609,   85.53760979,  104.33192283],
       [ -25.81940755,  -50.51410508,   69.64965617,   83.71145168],
       [ -34.61380312,  -68.18050869,   93.56177593,  111.58980181],
       [ -44.77694061,  -88.66408925,  121.22321235,  143.71392299],
       [ -35.29080228,  -70.69588079,   95.87559867,  112.14397627],
       [ -34.72356739,  -66.70032267,   93.16392386,  114.28097629],
       [ -45.15182268,  -88.89106277,  122.02712508,  145.62716922],
       [ -46.95862819,  -93.75146174,  127.44380007,  149.65889806],
       [ -38.21575279,  -75.86825153,  103.54062493,  122.3851827 ],
       [ -32.15114668,  -61.28579494,   86.06834407,  106.46666614],
       [ -32.6274136 ,  -63.89832919,   88.04124802,  105.69506272],
       [ -40.76038707,  -81.71600388,  110.76087947,  129.43760099],
       [ -30.08139786,  -58.30328932,   80.92184039,   98.28625148]])

属性由4个变成了2个,变换之后,发现存在负数,属性长宽

希望,既进行降维,由可以理解,NMF出来了!

X = WH

# 经过非负矩阵分解,将X分解成了W点乘H
nmf = NMF(n_components=2)
W = nmf.fit_transform(X)
display(W.shape,W[:5])
(150, 2)



array([[0.10666948, 1.32441671],
       [0.13650159, 1.1703995 ],
       [0.10290364, 1.21005821],
       [0.14288053, 1.12082689],
       [0.0985986 , 1.33211468]])
H = nmf.components_
X[:5]
array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])
W.dot(H)[:5]
array([[5.09591336, 3.50618857, 1.40120511, 0.20158305],
       [4.74639643, 3.19626659, 1.4607764 , 0.25795951],
       [4.68723639, 3.21605128, 1.30890071, 0.1944664 ],
       [4.61535011, 3.0890499 , 1.46297066, 0.27001439],
       [5.07551302, 3.50644084, 1.36356276, 0.18633077]])
# 糖尿病数据
X,y = datasets.load_diabetes(True)
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X)
X_pca[:5]
array([[ 0.02793062, -0.09260116,  0.02802696, -0.00393895, -0.01220663],
       [-0.13468605,  0.06526341,  0.00132778, -0.02235559, -0.00681271],
       [ 0.01294474, -0.07776417,  0.0351635 , -0.03764663, -0.05535734],
       [ 0.00234544,  0.01818194, -0.0957504 ,  0.06531834,  0.01215425],
       [-0.03598069,  0.03862136, -0.00272351, -0.00654115, -0.00634255]])

是否可以使用LDA进行降维???

# 糖尿病数据
X,y = datasets.load_diabetes(True)
lda = LinearDiscriminantAnalysis(n_components=5)
X_lda = lda.fit_transform(X,y)
X_lda[:5]
array([[ 1.66315808e+00, -1.36707353e+00, -7.15149583e-01,
         1.26506299e-03, -3.00979387e-01],
       [-2.31201866e+00,  7.24031633e-01, -1.02431701e+00,
        -1.08829255e+00, -1.71829187e+00],
       [ 7.50934957e-01, -1.95233657e+00, -4.14891935e-01,
        -6.97181752e-01, -9.40486840e-01],
       [ 4.25155569e-01,  9.24825714e-01,  1.28107318e+00,
         3.99298577e-01,  1.28859141e+00],
       [-7.60851671e-01,  1.54348805e-01, -1.05722164e+00,
        -1.43505916e+00,  3.79962339e-01]])

是否可以使用NMF进行降维???

nmf = NMF(n_components=5)

nmf.fit_transform(X)
-------------------------------------------------------

ValueError            Traceback (most recent call last)

<ipython-input-26-a144852d1706> in <module>
      1 nmf = NMF(n_components=5)
      2 
----> 3 nmf.fit_transform(X)


d:\python3.7.4\lib\site-packages\sklearn\decomposition\_nmf.py in fit_transform(self, X, y, W, H)
   1285             l1_ratio=self.l1_ratio, regularization='both',
   1286             random_state=self.random_state, verbose=self.verbose,
-> 1287             shuffle=self.shuffle)
   1288 
   1289         self.reconstruction_err_ = _beta_divergence(X, W, H, self.beta_loss,


d:\python3.7.4\lib\site-packages\sklearn\decomposition\_nmf.py in non_negative_factorization(X, W, H, n_components, init, update_H, solver, beta_loss, tol, max_iter, alpha, l1_ratio, regularization, random_state, verbose, shuffle)
   1007 
   1008     X = check_array(X, accept_sparse=('csr', 'csc'), dtype=float)
-> 1009     check_non_negative(X, "NMF (input X)")
   1010     beta_loss = _check_string_param(solver, regularization, beta_loss, init)
   1011 


d:\python3.7.4\lib\site-packages\sklearn\utils\validation.py in check_non_negative(X, whom)
    977 
    978     if X_min < 0:
--> 979         raise ValueError("Negative values in data passed to %s" % whom)
    980 
    981 


ValueError: Negative values in data passed to NMF (input X)
上一篇下一篇

猜你喜欢

热点阅读