BP神经网络公式推导

2018-10-20  本文已影响0人  雪地团子
BP神经网络.PNG

正向传播

第1层
公式1
\begin{bmatrix} W_{n_1,n_0}^1&B_{n_1}^1\\ 0&1\\ \end{bmatrix} ^1{\cdot} \begin{bmatrix} H_{n_0}^0\\ 1 \end{bmatrix} = \begin{bmatrix} I_{n_1}^1\\ 1 \end{bmatrix}
公式2
\begin{bmatrix} {w_{1,1}^1}&{w_{1,2}^1}&{\cdots}&{w_{1,n_0}^1}&{b_1^1}\\ {w_{2,1}^1}&{w_{2,2}^1}&{\cdots}&{w_{2,n_0}^1}&{b_2^1}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_1,1}^1}&{w_{n_1,2}^1}&{\cdots}&{w_{n_1,n_0}^1}&{b_{n_1}^1}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^0}\\{h_2^0}\\{\vdots}\\{h_{n_0}^0}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^{1}}\\{i_{2}^{1}}\\{\vdots}\\{i_{n_{1}}^{1}}\\1\\ \end{bmatrix}
公式3
S(I_{n_1}^1) = H_{n_1}^1
公式4
S( \begin{bmatrix} {i_{1}^{1}}\\{i_{2}^{1}}\\{\vdots}\\{i_{n_{1}}^{1}}\\ \end{bmatrix} ) = \begin{bmatrix} {h_1^1}\\{h_2^1}\\{\vdots}\\{h_{n_1}^1}\\ \end{bmatrix}
第2层
公式5
\begin{bmatrix} W_{n_2,n_1}^2&B_{n_2}^2\\ 0&1\\ \end{bmatrix} {\cdot} \begin{bmatrix} H_{n_1}^1\\ 1 \end{bmatrix} = \begin{bmatrix} I_{n_2}^2\\ 1 \end{bmatrix}
公式6
\begin{bmatrix} {w_{1,1}^2}&{w_{1,2}^2}&{\cdots}&{w_{1,n_1}^2}&{b_1^2}\\ {w_{2,1}^2}&{w_{2,2}^2}&{\cdots}&{w_{2,n_1}^2}&{b_2^2}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_2,1}^2}&{w_{n_2,2}^2}&{\cdots}&{w_{n_2,n_1}^2}&{b_{n_2}^2}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^1}\\{h_2^1}\\{\vdots}\\{h_{n_1}^1}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^2}\\{i_2^2}\\{\vdots}\\{i_{n_2}^2}\\1\\ \end{bmatrix}
公式7
S(I_{n_2}^2) = H_{n_2}^2
公式8
S( \begin{bmatrix} {i_{1}^2}\\{i_{2}^2}\\{\vdots}\\{i_{n_2}^2}\\ \end{bmatrix} ) = \begin{bmatrix} {h_1^2}\\{h_2^2}\\{\vdots}\\{h_{n_2}^2}\\ \end{bmatrix}
第m层
公式9
\begin{bmatrix} W_{n_m,n_{m-1}}^m&B_{n_m}^m\\ 0&1\\ \end{bmatrix} {\cdot} \begin{bmatrix} H_{n_{m-1}}^{m-1} \\ 1 \end{bmatrix} = \begin{bmatrix} I_{n_m}^m\\ 1 \end{bmatrix}
公式10
\begin{bmatrix} {w_{1,1}^m}&{w_{1,2}^m}&{\cdots}&{w_{1,n_{m-1}}^m}&{b_1^m}\\ {w_{2,1}^m}&{w_{2,2}^m}&{\cdots}&{w_{2,n_{m-1}}^m}&{b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_m,1}^m}&{w_{n_m,2}^m}&{\cdots}&{w_{n_m,n_{m-1}}^m}&{b_{n_m}^m}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^{m-1}}\\{h_2^{m-1}}\\{\vdots}\\{h_{n_{m-1}}^{m-1}}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^m}\\{i_2^m}\\{\vdots}\\{i_{n_m}^m}\\1\\ \end{bmatrix}
公式11
S(I_{n_m}^m) = H_{n_m}^m
公式12
S( \begin{bmatrix} {i_{1}^m}\\{i_{2}^m}\\{\vdots}\\{i_{n_m}^m}\\ \end{bmatrix} ) = \begin{bmatrix} {h_1^m}\\{h_2^m}\\{\vdots}\\{h_{n_m}^m}\\ \end{bmatrix}

反向传播

期望输出

公式13
Y_{n_m} = \begin{bmatrix} {y_1}\\{y_2}\\{\vdots}\\{y_{n_m}}\\ \end{bmatrix}

误差

公式14
E_{total} = {\frac{1}{2}(h_1^m-y_1)^2}+{\frac{1}{2}(h_2^m-y_2)^2}+{\dots}+{\frac{1}{2}(h_{n_m}^m-y_{n_m})^2}
公式15
\frac{\partial E_{total}}{\partial H_{n_m}^m} = \begin{bmatrix} {h_1^m-y_1}\\{h_2^m-y_2}\\{\vdots}\\{h_{n_m}^m-y_{n_m}}\\ \end{bmatrix}
公式16
\frac{\partial H_{n_m}^m}{\partial I_{n_m}^m} = \begin{bmatrix} {\frac{\partial h_1^m}{\partial i_1^m}}&0&0&0\\ 0&\frac{\partial h_2^m}{\partial i_2^m}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{\frac{\partial h_{n_m}^m}{\partial i_{n_m}^m}}\\ \end{bmatrix} = \begin{bmatrix} {h_1^m(1-h_1^m)}&0&0&0\\ 0&{h_2^m(1-h_2^m)}&0&0\\ {\vdots}&{\vdots}&{\ddots}{\vdots}\\ 0&0&0&{h_{n_m}^m(1-h_{n_m}^m)}\\ \end{bmatrix}

权重偏导

公式17
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial w_{1,1}^m}&\frac{\partial E_{total}}{\partial w_{1,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{1,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_1^m}\\ \frac{\partial E_{total}}{\partial w_{2,1}^m}&\frac{\partial E_{total}}{\partial w_{2,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{2,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial w_{n_m,1}^m}&\frac{\partial E_{total}}{\partial w_{n_m,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{n_m,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_{n_m}^m}\\ \end{bmatrix}
公式18
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,1}^m}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial b_1^m}\\ \frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,1}^m}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,1}^m}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial b_{n_m}^m}\\ \end{bmatrix}
公式19
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_1^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_1^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_1^m}1\\ \frac{\partial E_{total}}{\partial i_2^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_2^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_2^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_2^m}1\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_{n_m}^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_{n_m}^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_{n_m}^m}1\\ \end{bmatrix}
公式20
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}\\ \frac{\partial E_{total}}{\partial i_2^m}\\ {\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}\\ \end{bmatrix} \begin{bmatrix} {h_1^{m-1}}&{h_2^{m-1}}&{\dots}&{h_{n_{m-1}}^{m-1}}&1\\ \end{bmatrix}
公式21
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \frac{\partial E_{total}}{\partial I_{n_m}^m} \begin{bmatrix} [H_{n_{m-1}}^{m-1}]^T&1\\ \end{bmatrix} = \frac{\partial H_{n_m}^m}{\partial I_{n_m}^m} \frac{\partial E_{total}}{\partial H_{n_m}^m} \begin{bmatrix} [H_{n_{m-1}}^{m-1}]^T&1\\ \end{bmatrix}
公式22
\frac{\partial I_{n_m}^m}{\partial H_{n_{m-1}}^{m-1}} = \begin{bmatrix} {\frac{\partial i_1^m}{\partial h_1^{m-1}}}&\frac{\partial i_2^m}{\partial h_1^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_1^{m-1}}}\\ {\frac{\partial i_1^m}{\partial h_2^{m-1}}}&\frac{\partial i_2^m}{\partial h_2^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_2^{m-1}}}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ {\frac{\partial i_1^m}{\partial h_{n_{m-1}}^{m-1}}}&\frac{\partial i_2^m}{\partial h_{n_{m-1}}^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_{n_{m-1}}^{m-1}}}\\ \end{bmatrix}_{n_{m-1},n_m} = \begin{bmatrix} {w_{1,1}^m}&{w_{1,2}^m}&{\cdots}&{w_{1,n_{m-1}}^m}\\ {w_{2,1}^m}&{w_{2,2}^m}&{\cdots}&{w_{2,n_{m-1}}^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ {w_{n_m,1}^m}&{w_{n_m,2}^m}&{\cdots}&{w_{n_m,n_{m-1}}^m}\\ \end{bmatrix}^T =[W_{n_m,n_{m-1}}^m]^T
公式23
\frac{\partial E_{total}}{\partial H_{n_{m-1}}^{m-1}} = \frac{\partial I_{n_m}^m}{\partial H_{n_{m-1}}^{m-1}} \frac{\partial H_{n_m}^m}{\partial I_{n_m}^m} \frac{\partial E_{total}}{\partial H_{n_m}^m}


公式24
\frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} = \begin{bmatrix} {h_1^{m-1}(1-h_1^{m-1})}&0&0&0\\ 0&{h_2^{m-1}(1-h_2^{m-1})}&0&0\\ {\vdots}&{\vdots}&{\ddots}{\vdots}\\ 0&0&0&{h_{n_{m-1}}^{m-1}(1-h_{n_{m-1}}^{m-1})}\\ \end{bmatrix}
公式25
\begin{bmatrix} \Delta W_{n_{m-1},n_{m-2}}^{m-1}&\Delta B_{n_{m-1}}^{m-1} \end{bmatrix} = \frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} \frac{\partial E_{total}}{\partial H_{n_{m-1}}^{m-1}} \begin{bmatrix} [H_{n_{m-2}}^{m-2}]^T&1\\ \end{bmatrix}
公式26
\frac{\partial I_{n_{m-1}}^{m-1}}{\partial H_{n_{m-2}}^{m-2}} =[W_{n_{m-1},n_{m-2}}^{m-1}]^T
公式27
\frac{\partial E_{total}}{\partial H_{n_{m-2}}^{m-2}} = \frac{\partial I_{n_{m-1}}^{m-1}}{\partial H_{n_{m-2}}^{m-2}} \frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} \frac{\partial E_{total}}{\partial H_{n_{m-1}}^{m-1}}


公式28
\frac{\partial H_{n_{m-2}}^{m-2}}{\partial I_{n_{m-2}}^{m-2}} = \begin{bmatrix} {\frac{\partial h_1^{m-2}}{\partial i_1^{m-2}}}&0&0&0\\ 0&\frac{\partial h_2^{m-2}}{\partial i_2^{m-2}}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{\frac{\partial h_{n_{m-2}}^{m-2}}{\partial i_{n_{m-2}}^{m-2}}}\\ \end{bmatrix} = \begin{bmatrix} {h_1^{m-2}(1-h_1^{m-2})}&0&0&0\\ 0&{h_2^{m-2}(1-h_2^{m-2})}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{h_{n_{m-2}}^{m-2}(1-h_{n_{m-2}}^{m-2})}\\ \end{bmatrix}
公式29
\begin{bmatrix} \Delta W_{n_{m-2},n_{m-3}}^{m-2}&\Delta B_{n_{m-2}}^{m-2} \end{bmatrix} = \frac{\partial H_{n_{m-2}}^{m-2}}{\partial I_{n_{m-2}}^{m-2}} \frac{\partial E_{total}}{\partial H_{n_{m-2}}^{m-2}} \begin{bmatrix} [H_{n_{m-3}}^{m-3}]^T&1\\ \end{bmatrix}


上一篇 下一篇

猜你喜欢

热点阅读