深度学习 —— 个人学习笔记10（池化层、LeNet）

声明

本文章为个人学习使用，版面观感若有不适请谅解，文中知识仅代表个人观点，若出现错误，欢迎各位批评指正。

二十一、池化层

1、最大池化层和平均池化层

与互相关运算符一样，汇聚窗口从输入张量的左上角开始，从左往右、从上往下的在输入张量内滑动。在汇聚窗口到达的每个位置，它计算该窗口中输入子张量的最大值或平均值。计算最大值或平均值是取决于使用了最大汇聚层还是平均汇聚层。

2、 torch.stack 和 torch.cat 的区别

维度创建：
torch.stack 会在堆叠时创建一个新的维度，‌将输入张量序列沿着这个新维度进行堆叠。‌这意味着堆叠后的张量的维度比输入张量序列的维度多一。‌
‌ torch.cat 不会引入新的维度，‌只在现有的某个维度上对输入张量进行拼接。‌
拼接方式：
torch.stack 会将输入张量序列按照指定维度进行逐个元素的堆叠，‌生成一个新的张量。‌这要求所有输入张量的形状必须相同。‌
torch.cat 则会对输入张量进行连接，‌不关心元素的位置，‌只要各个张量的拼接维度匹配即可。‌这种连接方式更加灵活，‌因为它不要求所有输入张量的形状完全相同，‌只要在拼接的维度上尺寸一致即可。

3、代码演示‌

import torch
from torch import nndevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")def pool2d(X, pool_size, mode='max'):p_h, p_w = pool_sizeY = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)).to(device)for i in range(Y.shape[0]):for j in range(Y.shape[1]):if mode == 'max':Y[i, j] = X[i: i + p_h, j: j + p_w].max().to(device)elif mode == 'avg':Y[i, j] = X[i: i + p_h, j: j + p_w].mean().to(device)return YX = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
print("pool_size = (2, 2) 时 max : ", pool2d(X, (2, 2)))print("pool_size = (2, 2) 时 avg : ", pool2d(X, (2, 2), 'avg'))X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4)).to(device)
print(f'X : {X}')pool2d = nn.MaxPool2d(3)
print("pool = (3, 3) : ", pool2d(X))pool2d = nn.MaxPool2d(3, padding=1, stride=2)
print("pool = (3, 3), padding = 1, stride = 2 : ", pool2d(X))pool2d = nn.MaxPool2d((2, 3), stride=(2, 3), padding=(0, 1))
print("pool = (2, 3), padding = (2, 3), stride = (0, 1) : ", pool2d(X))##### 多个通道 #####
X = torch.cat((X, X + 1), 1)
print(f'X : {X}')pool2d = nn.MaxPool2d(3, padding=1, stride=2)
print("pool = (3, 3), padding = 1, stride = 2 : ", pool2d(X))

‌

二十二、LeNet

import torch
import torchvision
import time
from torch import nn
from IPython import display
import matplotlib.pyplot as plt
from matplotlib_inline import backend_inline
from torchvision import transforms
from torch.utils import datamydevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")def accuracy(y_hat, y):                                                           # 定义一个函数来为预测正确的数量计数"""计算预测正确的数量"""if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:y_hat = y_hat.argmax(axis=1)cmp = y_hat.type(y.dtype) == y                                                # bool 类型，若预测结果与实际结果一致，则为 Truereturn float(cmp.type(y.dtype).sum())def evaluate_accuracy_gpu(net, data_iter, device=None):"""使用GPU计算模型在数据集上的精度"""if isinstance(net, nn.Module):net.eval()  # 设置为评估模式if not device:device = next(iter(net.parameters())).device# 正确预测的数量，总预测的数量metric = Accumulator(2)with torch.no_grad():for X, y in data_iter:if isinstance(X, list):# BERT微调所需的（之后将介绍）X = [x.to(device) for x in X]else:X = X.to(device)y = y.to(device)metric.add(accuracy(net(X), y), y.numel())return metric[0] / metric[1]def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):axes.set_xlabel(xlabel), axes.set_ylabel(ylabel)axes.set_xscale(xscale), axes.set_yscale(yscale)axes.set_xlim(xlim),     axes.set_ylim(ylim)if legend:axes.legend(legend)axes.grid()class Accumulator:                                                                # 定义一个实用程序类 Accumulator，用于对多个变量进行累加"""在n个变量上累加"""def __init__(self, n):self.data = [0.0] * ndef add(self, *args):self.data = [a + float(b) for a, b in zip(self.data, args)]def reset(self):self.data = [0.0] * len(self.data)def __getitem__(self, idx):return self.data[idx]class Animator:                                                                   # 定义一个在动画中绘制数据的实用程序类 Animator"""在动画中绘制数据"""def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,ylim=None, xscale='linear', yscale='linear',fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,figsize=(3.5, 2.5)):# 增量地绘制多条线if legend is None:legend = []backend_inline.set_matplotlib_formats('svg')self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)if nrows * ncols == 1:self.axes = [self.axes, ]# 使用lambda函数捕获参数self.config_axes = lambda: set_axes(self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)self.X, self.Y, self.fmts = None, None, fmtsdef add(self, x, y):# Add multiple data points into the figureif not hasattr(y, "__len__"):y = [y]n = len(y)if not hasattr(x, "__len__"):x = [x] * nif not self.X:self.X = [[] for _ in range(n)]if not self.Y:self.Y = [[] for _ in range(n)]for i, (a, b) in enumerate(zip(x, y)):if a is not None and b is not None:self.X[i].append(a)self.Y[i].append(b)self.axes[0].cla()for x, y, fmt in zip(self.X, self.Y, self.fmts):self.axes[0].plot(x, y, fmt)self.config_axes()display.display(self.fig)# 通过以下两行代码实现了在PyCharm中显示动图plt.draw()plt.pause(interval=0.001)display.clear_output(wait=True)plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']class Timer:def __init__(self):self.times = []self.start()def start(self):self.tik = time.time()def stop(self):self.times.append(time.time() - self.tik)return self.times[-1]def sum(self):"""Return the sum of time."""return sum(self.times)def load_data_fashion_mnist(batch_size, resize=None):"""下载 Fashion-MNIST 数据集，然后将其加载到内存中"""trans = [transforms.ToTensor()]if resize:trans.insert(0, transforms.Resize(resize))trans = transforms.Compose(trans)mnist_train = torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=False)mnist_test = torchvision.datasets.FashionMNIST(root="../data", train=False, transform=trans, download=False)return (data.DataLoader(mnist_train, batch_size, shuffle=True,num_workers=4),data.DataLoader(mnist_test, batch_size, shuffle=False,num_workers=4))net = nn.Sequential(nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),nn.AvgPool2d(kernel_size=2, stride=2),nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),nn.AvgPool2d(kernel_size=2, stride=2),nn.Flatten(),nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),nn.Linear(120, 84), nn.Sigmoid(),nn.Linear(84, 10))""" 尝试更换不同的激活函数及将平均池化层替换为最大池化层 """
net_demo = nn.Sequential(nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Tanh(),nn.MaxPool2d(kernel_size=2, stride=2),nn.Conv2d(8, 32, kernel_size=5), nn.Tanh(),nn.MaxPool2d(kernel_size=2, stride=2),nn.Flatten(),nn.Linear(32 * 5 * 5, 128), nn.Tanh(),nn.Linear(128, 84), nn.Tanh(),nn.Linear(84, 10))X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in net:X = layer(X)print(layer.__class__.__name__,'output shape: \t\t',X.shape)batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size)def train(net, train_iter, test_iter, num_epochs, lr, device):def init_weights(m):if type(m) == nn.Linear or type(m) == nn.Conv2d:nn.init.xavier_uniform_(m.weight)net.apply(init_weights)print('training on', torch.cuda.get_device_name(device))net.to(device)optimizer = torch.optim.SGD(net.parameters(), lr=lr)loss = nn.CrossEntropyLoss()animator = Animator(xlabel='epoch', xlim=[1, num_epochs],legend=['train loss', 'train acc', 'test acc'])timer, num_batches = Timer(), len(train_iter)for epoch in range(num_epochs):# 训练损失之和，训练准确率之和，样本数metric = Accumulator(3)net.train()for i, (X, y) in enumerate(train_iter):timer.start()optimizer.zero_grad()X, y = X.to(device), y.to(device)y_hat = net(X)l = loss(y_hat, y)l.backward()optimizer.step()with torch.no_grad():metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])timer.stop()train_l = metric[0] / metric[2]train_acc = metric[1] / metric[2]if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:animator.add(epoch + (i + 1) / num_batches,(train_l, train_acc, None))test_acc = evaluate_accuracy_gpu(net, test_iter)animator.add(epoch + 1, (None, None, test_acc))plt.title(f'loss {train_l:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}\n'f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on {str(device)}')plt.show()lr, num_epochs = 0.9, 10
train(net, train_iter, test_iter, num_epochs, lr, mydevice)def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):figsize = (num_cols * scale, num_rows * scale)_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)axes = axes.flatten()for i, (ax, img) in enumerate(zip(axes, imgs)):try:numpy = lambda x, *args, **kwargs: x.detach().numpy(*args, **kwargs)img = numpy(img)except:passax.imshow(img)ax.axes.get_xaxis().set_visible(False)ax.axes.get_yaxis().set_visible(False)if titles:plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']plt.title(titles)plt.show()return axesdef show_activate(net, train_iter, num_epochs, lr, device):def init_weights(m):if type(m) == nn.Linear or type(m) == nn.Conv2d:nn.init.xavier_uniform_(m.weight)net.apply(init_weights)print('training on', torch.cuda.get_device_name(device))net.to(device)optimizer = torch.optim.SGD(net.parameters(), lr=lr)loss = nn.CrossEntropyLoss()Animator(xlabel='epoch', xlim=[1, num_epochs], legend=['train loss', 'train acc', 'test acc'])for epoch in range(num_epochs):net.train()for i, (X, y) in enumerate(train_iter):optimizer.zero_grad()X, y = X.to(device), y.to(device)y_hat = net(X)l = loss(y_hat, y)l.backward()optimizer.step()torch.no_grad()x_first_Sigmoid_layer = net[0:2](X)[0:9, 1, :, :]show_images(x_first_Sigmoid_layer.reshape(9, 28, 28).cpu().detach(), 1, 9, titles=f'第一次 {net[1]}')x_second_Sigmoid_layer = net[0:5](X)[0:9, 1, :, :]show_images(x_second_Sigmoid_layer.reshape(9, 10, 10).cpu().detach(), 1, 9, titles=f'第二次 {net[4]}')""" 可多尝试不同情况 """
show_activate(net_demo, train_iter, num_epochs, lr, mydevice)

net = nn.Sequential(nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),nn.AvgPool2d(kernel_size=2, stride=2),nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),nn.AvgPool2d(kernel_size=2, stride=2),nn.Flatten(),nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),nn.Linear(120, 84), nn.Sigmoid(),nn.Linear(84, 10))

net_demo = nn.Sequential(nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Sigmoid(),nn.MaxPool2d(kernel_size=2, stride=2),nn.Conv2d(8, 32, kernel_size=5), nn.Sigmoid(),nn.MaxPool2d(kernel_size=2, stride=2),nn.Flatten(),nn.Linear(32 * 5 * 5, 128), nn.Sigmoid(),nn.Linear(128, 84), nn.Sigmoid(),nn.Linear(84, 10))

net_demo = nn.Sequential(nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Tanh(),nn.MaxPool2d(kernel_size=2, stride=2),nn.Conv2d(8, 32, kernel_size=5), nn.Sigmoid(),nn.MaxPool2d(kernel_size=2, stride=2),nn.Flatten(),nn.Linear(32 * 5 * 5, 128), nn.Sigmoid(),nn.Linear(128, 84), nn.Sigmoid(),nn.Linear(84, 10))

文中部分知识参考：B 站 —— 跟李沐学AI；百度百科

深度学习 —— 个人学习笔记10（池化层、LeNet）

声明

二十一、池化层

1、 最大池化层和平均池化层

2、 torch.stack 和 torch.cat 的区别

3、 代码演示‌

二十二、LeNet

1、最大池化层和平均池化层

3、代码演示‌