完整的神经网络模型训练过程
1.导入常用的库和函数
import osimport randomimport numpy as npimport torchimport torch.nn as nnfrom torch.utils.data import DataLoaderimport torchvision.transforms as transformsimport torch.optim as optimfrom matplotlib import pyplot as pltfrom model.lenet import LeNetfrom tools.my_dataset import RMBDataset
2.定义超参数(包括标签)
def set_seed(seed=1):random.seed(seed)np.random.seed(seed)torch.manual_seed(seed)torch.cuda.manual_seed(seed)set_seed() # 设置随机种子rmb_label = {"1": 0, "100": 1}# 参数设置MAX_EPOCH = 10BATCH_SIZE = 16LR = 0.01log_interval = 10val_interval = 1
3.数据处理
3.1数据切分-找到或创建数据的位置
split_dir = os.path.join("..", "..", "data", "rmb_split")train_dir = os.path.join(split_dir, "train")valid_dir = os.path.join(split_dir, "valid")
3.2 定义数据的变换形式——transforms
norm_mean = [0.485, 0.456, 0.406]norm_std = [0.229, 0.224, 0.225]train_transform = transforms.Compose([transforms.Resize((32, 32)),transforms.RandomCrop(32, padding=4),transforms.ToTensor(),transforms.Normalize(norm_mean, norm_std),])valid_transform = transforms.Compose([transforms.Resize((32, 32)),transforms.ToTensor(),transforms.Normalize(norm_mean, norm_std),])
3.3 构建真实数据(利用X-Dataset类)
将数据路径和transforms变换形式作为参数传到X-Dataset类里,得到数据实例
train_data = RMBDataset(data_dir=train_dir, transform=train_transform)# 这里的RMBDataset是一个类,主要做两件事,一是根据数据路径便利获取数据信息,包括每张图片的路径和标签。@staticmethoddef get_img_info(data_dir):data_info = list()for root, dirs, _ in os.walk(data_dir):# 遍历类别for sub_dir in dirs:img_names = os.listdir(os.path.join(root, sub_dir))img_names = list(filter(lambda x: x.endswith('.jpg'), img_names))# 遍历图片for i in range(len(img_names)):img_name = img_names[i]path_img = os.path.join(root, sub_dir, img_name)label = rmb_label[sub_dir]data_info.append((path_img, int(label)))return data_info# 二是对获取的图片调用函数transforms进行变换处理def __getitem__(self, index):path_img, label = self.data_info[index]img = Image.open(path_img).convert('RGB') # 0~255if self.transform is not None:img = self.transform(img) # 在这里做transform,转为tensor等等
3.4 构建DataLoader(后续训练遍历图片)
将变换得到的真实数据作为参数传到DataLoader函数里,并设置超参数batch_size等。
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
4.构建模型
主要包括两步,一是实例化网络模型
net = LeNet(classes=2)
二是权重初始化
net.initialize_weights()
5.设置损失函数
criterion = nn.CrossEntropyLoss()
6.设置优化器
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9) # 选择优化器scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # 设置学习率下降策略
7.训练
7.1 定义训练曲线存储list
train_curve = list()valid_curve = list()
7.2 训练过程
# 1.大循环,在epoch里for epoch in range(MAX_EPOCH):#2.定义收集训练结果的参数loss_mean = 0.correct = 0.total = 0.#3.开始训练net.train()#4.遍历数据,其中参数就是DataLoader的实例化参数for i, data in enumerate(train_loader):#5.前向传播,从数据中选择输入和标签,将输入传到net里# forwardinputs, labels = dataoutputs = net(inputs)#5.后向传播,优化器梯度置0,计算损失函数(输出与标签作为参数),然后损失函数后向传播计算梯度# backwardoptimizer.zero_grad()loss = criterion(outputs, labels)loss.backward()#6.更新梯度# update weightsoptimizer.step()#7.统计输出结果的准确性# 统计分类情况_, predicted = torch.max(outputs.data, 1)total += labels.size(0)correct += (predicted == labels).squeeze().sum().numpy()# 8.打印训练信息loss_mean += loss.item()train_curve.append(loss.item())if (i+1) % log_interval == 0:loss_mean = loss_mean / log_intervalprint("Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format(epoch, MAX_EPOCH, i+1, len(train_loader), loss_mean, correct / total))loss_mean = 0.#9.更新学习率scheduler.step() # 更新学习率#10.验证模型,和获取训练结果一样的步骤,只是图片路径在valid里# validate the modelif (epoch+1) % val_interval == 0:correct_val = 0.total_val = 0.loss_val = 0.net.eval()with torch.no_grad():for j, data in enumerate(valid_loader):inputs, labels = dataoutputs = net(inputs)loss = criterion(outputs, labels)_, predicted = torch.max(outputs.data, 1)total_val += labels.size(0)correct_val += (predicted == labels).squeeze().sum().numpy()loss_val += loss.item()loss_val_epoch = loss_val / len(valid_loader)valid_curve.append(loss_val_epoch)# valid_curve.append(loss.item()) # 20191022改,记录整个epoch样本的loss,注意要取平均print("Valid:\t Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format(epoch, MAX_EPOCH, j+1, len(valid_loader), loss_val_epoch, correct_val / total_val))
7.3 训练过程可视化
train_x = range(len(train_curve))train_y = train_curvetrain_iters = len(train_loader)valid_x = np.arange(1, len(valid_curve)+1) * train_iters*val_interval # 由于valid中记录的是epochloss,需要对记录点进行转换到iterationsvalid_y = valid_curveplt.plot(train_x, train_y, label='Train')plt.plot(valid_x, valid_y, label='Valid')plt.legend(loc='upper right')plt.ylabel('loss value')plt.xlabel('Iteration')plt.show()
8.推理(简单版)
直接利用前面训练的网络模型,将test图片输入得到输出结果即可。
BASE_DIR = os.path.dirname(os.path.abspath(__file__))test_dir = os.path.join(BASE_DIR, "test_data")test_data = RMBDataset(data_dir=test_dir, transform=valid_transform)valid_loader = DataLoader(dataset=test_data, batch_size=1)for i, data in enumerate(valid_loader):# forwardinputs, labels = dataoutputs = net(inputs)_, predicted = torch.max(outputs.data, 1)rmb = 1 if predicted.numpy()[0] == 0 else 100print("模型获得{}元".format(rmb))