1.1数据操作

2021-04-24 本文已影响0人纵春水东流

1.数据生成或加载：数据生成与数据转化
1.1数据与格式

文本：txt -> 列表： ['第一句','第二句话']]-> 列表：[[1,3,2],[1,4,2,5]]->填充成数组:[[1,3,2,0],[1,4,2,5]]
图片：读取数组
音频：一般44100hz采样，每秒444100个数字 -> 向量
表格 csv：-> 数组
表格 execl:->数组
tab 文本  -> 数组

文件 ->函数读取
数据库 ->数据库接口

1.2使用的包 numpy 或 pandas

tab 文本：np.loadtxt() 
表格：pd.read_csv()    pd.read_excel()
声音文件：sampling_freq,audio =wavefile.read('input_read.wav')#from scipy.io import wavfile
图片：img = image.open('file_name")#from PIL import image

1.3 numpy
1.3.1numpy数据类型与形状

数据类型:int,float,unit,complex,str
  
x = np.arange(12).reshpae(3,4)#转化3行4列
x = np.arange(12).reshape(2,3,4)#第一个轴2个元素，第二个轴3个元素，第三个轴4个元素
x.shape#查看数组形状

1.3.2numpy 索引

x = np.arange(12)
x[0], x[-1]#精确索引
x[0:1], x[0:], x[:1]#切片索引，保留维度
x = np.arange(24).reshape(2,3,4); x[:,1:1,:-1]#可以在不同的轴分别进行索引
x[x>4]#布尔索引

1.3.3numpy 数组运算与广播

numpy数组在+-*/运算前会对两个数据进行广播，使得对应轴的元素相同
[3,3]+[1,] -> [3,3]+[3,3,3] #将1在0轴重复三次后，在沿着1轴重复三次成广播
[3,3]+[2,] -> #出错,在0轴重复后非3的单倍数
[3,3]+[3,]  ->  [3,3]+[3,3] #0轴已满，1轴重复3次完成广播
[3,3]+[3,1] -> [3,3]+[3,3] #0轴已满，1轴重复3次完成广播
[3,3]+[1,3] -> [3,3]+[3,3]#0轴重复3次，1轴已满，完成广播
[1,2]+[2,1]->[2,2]+[2,2] 左边0轴重复2次，右边1轴重复2次，完成广播
np.exp(x) #指数运算
np.log(x) #对数运算
np.sum(x,axis=[0:])#求和运算,也可以单独对某几个轴进行求和运算
np.average....

1.3.4数组增删改

#删除
x = np.ones(3,4);x=x[1:,:]
#增加
np.vstack((x,x))#按0轴合并数据，即列数变，行数变
np.hstack((x,x))#按1轴合并数据，即列数不变，行数变
np.concatenate((x,x),axis=0)#按0轴合并数据,0轴变大，1轴不变
#改
x[:,0] = np.ones(3)#更新索引对应的值

1.4 pandas
1.4.1 pandas数据类型

一维：Series，带标签的一维同构数组
二维：DataFrame：带标签，大小可变，二维异构表格
(1)Series是标量的容器
s = pd.Series([1, 3, 5, np.nan, 6, 8])
(2)DataFrame是Series的容器
用Series字典对象生成DataFrame
df2 = pd.DataFrame({'A': 1.,
                     'B': pd.Timestamp('20130102'),
                     'C': pd.Series(1, index=list(range(4)),

1.4.2 pandas索引

1.4.2.1Series索引
1.4.2.2DataFrame索引
df.iloc#类似numpy的索引
df.loc#可以列名进行索引
df.at
df.act
df[df.A>)]#布尔索引

1.4.3缺失值处理

缺失值处理
df.dropna(how='any')
df.fillna(value=5)
df.isna(df)
运算
df.nean(1)
df.apply(np.cumsum)
df.apply(lambda x: x.max()-x.min())
直方图
s.value_counts()
合并
pd.concat(pieces)
pd.merge(left,right,on='key')
追加
pd.append(s,ignore_index=True)
分组
df.grounpby('A').sum()
df.grounpby('A','B').sum()
数据透视表
pd.pivot_table(df,values='D',index=['A','B'],colums=['C'])

1.5 tensorflow张量

张量：n维度数组，跟numpy差不多，包括，标量、矢量和矩阵特殊张量
张量分类：常量、变量和占位符
tf.constant(1,name='a')#标量常量
tf.constant([[4,5],[6,7]])#矢量常量
tf.Variable([1,2],name='name')#矢量变量

#数据生成
tf.zeros([2,3],tf.int),tf.seros_like()
tf.linspace(2.,10,1);tf.range()
tf.random_normal([2,3],mean=2.,stddev=4,seed=12)#正态分布
tf.random_uniform([2,3],maxval=4,seed=12)#伽马分布
tf.random_crop(tf_random,[2,4],seed=12)#随机裁剪成[2,4]
tf.random_shuffle(tf_random)#随机打乱数据
tf.set_random_seed(12)#
#变量初始化
tf.Variable(weights.initialized_value(),name='w2')

数据形状
x.shape;tf.reshape(x,(3,4))
#数据整合
tf.concat([X,Y],axis=0)
tf
运算：

1.6 pytorch 张量

#张量，跟numpy差不多
tf.tensor([1,2,3])#支持浮点数、整数
#张量生成
torch.rand(2,3)#[2,3]随机数
torch.randn()#均值为0，方差为1
torch.ones();torch.zeros();torch.eye()
torch.cat((x,x),dim=0)
#运算
torch.mm(A,B)
torch.

1.7 mxnet张量 #跟numpy差不多

1.8将加载的数据转化成张量

#tensorflow
x,y = tf.constant(x),tf.constan(y)
#torch
x,y = torch.tensor(x),torch.tensor(y)
#mxnet
x,y = np.array(x),np.array(y)

1.9 张量运算

项目		mxnet	torch	tensorflow
n阶，张量	0阶，标量	np.array(1)	torch.tensor(1)	tf.constant(1)
	1阶，向量	np.arrange(4)	torch.arrange(4)	tf.range(4)
	2阶，矩阵	np.array([1,2,3])	torch.tensor([1,2,3])	tf.constant([1,2,3])
	轴形状	x.shape	x.shape	x.shape
	轴数量	len(x)	len(x)	len(x)
运算	元素运算	+-*/	+-*/	+-*/
	hadmard	*	*	*
	转置	x.T	x.T	x.T
	求和	x.sum(axis=[0:])	x.sum(axis=[0:])	tf.reduce_sum(x,axis=[0:])
	向量点积	np.dot(x,y)	torch.dot(x,y)	tf.tensordot(x,y,axes=1)
	矩阵向量积	np.dot(A,x)	torch.mv(A,x)	tf.linlg.matvec(A,x)
	矩阵乘积	np.dot(A,B)	torch.mm(A,B)	tf.matmu(A,B)
范数	L1	np.abs(u).sum()	torch.abs(u).sum()	tf.reduce_sum(tf.abs(u))
	L2	np.linalg.norm(u)	torch.norm(u)	tf.norm(u)
	Lp(Frobenius)

1.10 微积分

#1.1标量自动微分
#-------------mxnet
x = np.arange(4.0)
x.attach_grad()
with autograd.record(): 
    y = x.sum()
    y.backward()
x.grad  
#-------------torch
x = torch.arange(4.0)
x.requires_grad_(True)
y = 2 * torch.dot(x, x)
y.backward()
x.grad
#-------------tensorflow
x = tf.range(4, dtype=tf.float32)
x = tf.Variable(x)
with tf.GradientTape() as t:
    y = 2 * tf.tensordot(x, x, axes=1) 
t.gradient(y, x)

#1.2向量自动微分
#-------------mxnet
x = np.arange(4.0)
with autograd.record():
    y = x * x  # `y` is a vector
y.backward()
x.grad  # Equals to y = sum(x * x)
#-------------torch
x.grad.zero_()
y = x * x
# y.backward(torch.ones(len(x))) equivalent to the below
y.sum().backward()
x.grad
#-------------tensorflow
with tf.GradientTape() as t:
    y = x * x
t.gradient(y, x)  # Same as `y = tf.reduce_sum(x * x)`

#1.3分离微分计算
#-------------mxnet
with autograd.record():
    y = x * x
    u = y.detach()
    z = u * x
z.backward()
x.grad == u
#-------------torch
x.grad.zero_()
y = x * x
u = y.detach()
z = u * x

z.sum().backward()
x.grad == u
#-------------tensorflow
# Set `persistent=True` to run `t.gradient` more than once
with tf.GradientTape(persistent=True) as t:
    y = x * x
    u = tf.stop_gradient(y)
    z = u * x

x_grad = t.gradient(z, x)
x_grad == u

1.11文档

print(dir(tf.random))
help(tf.ones)

1.1数据操作

猜你喜欢

热点阅读