vs2017 VGG16处理cifar-10数据集的PyTorch实现
2019-01-23 14:30
886 查看
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_36556893/article/details/86608963
这是针对于博客vs2017安装和使用教程(详细)的VGG16-CIFAR10项目新建示例
目录
一、说明
1.网络框架搭建教程请参看博主博客:PyTorch 入门实战(四)——利用Torch.nn构建卷积神经网络
2.这里主要展示博主的代码和运行结果,希望可以帮助到正在学习PyTorch的人们
二、代码
1.nn_module_sample.py:里面是VGG-16(带有BatchNorm层)的网络,注意classifier分类器部分(全连接部分)的输入大小根据batch大小而定
[code]import torch.nn as nn class VGG16(nn.Module): def __init__(self, num_classes=10): super(VGG16, self).__init__() self.features = nn.Sequential( #1 nn.Conv2d(3,64,kernel_size=3,padding=1), nn.BatchNorm2d(64), nn.ReLU(True), #2 nn.Conv2d(64,64,kernel_size=3,padding=1), nn.BatchNorm2d(64), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), #3 nn.Conv2d(64,128,kernel_size=3,padding=1), nn.BatchNorm2d(128), nn.ReLU(True), #4 nn.Conv2d(128,128,kernel_size=3,padding=1), nn.BatchNorm2d(128), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), #5 nn.Conv2d(128,256,kernel_size=3,padding=1), nn.BatchNorm2d(256), nn.ReLU(True), #6 nn.Conv2d(256,256,kernel_size=3,padding=1), nn.BatchNorm2d(256), nn.ReLU(True), #7 nn.Conv2d(256,256,kernel_size=3,padding=1), nn.BatchNorm2d(256), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), #8 nn.Conv2d(256,512,kernel_size=3,padding=1), nn.BatchNorm2d(512), nn.ReLU(True), #9 nn.Conv2d(512,512,kernel_size=3,padding=1), nn.BatchNorm2d(512), nn.ReLU(True), #10 nn.Conv2d(512,512,kernel_size=3,padding=1), nn.BatchNorm2d(512), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), #11 nn.Conv2d(512,512,kernel_size=3,padding=1), nn.BatchNorm2d(512), nn.ReLU(True), #12 nn.Conv2d(512,512,kernel_size=3,padding=1), nn.BatchNorm2d(512), nn.ReLU(True), #13 nn.Conv2d(512,512,kernel_size=3,padding=1), nn.BatchNorm2d(512), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), nn.AvgPool2d(kernel_size=1,stride=1), ) self.classifier = nn.Sequential( #14 nn.Linear(512,4096), nn.ReLU(True), nn.Dropout(), #15 nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), #16 nn.Linear(4096,num_classes), ) #self.classifier = nn.Linear(512, 10) def forward(self, x): out = self.features(x) out = out.view(out.size(0), -1) out = self.classifier(out) return out class testNet(nn.Module): def __init__(self, num_classes=10): super(testNet, self).__init__() #定义自己的网络 self.conv1 = nn.Conv2d(3,64,kernel_size=3,padding=1) self.BN1 = nn.BatchNorm2d(64) self.relu1 = nn.ReLU(True) self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2) layer2 = nn.Sequential() layer2.add_module('conv2', nn.Conv2d(64,64,kernel_size=3,padding=1)) layer2.add_module('BN2',nn.BatchNorm2d(64)) layer2.add_module('relu2',nn.ReLU(True)) layer2.add_module('pool2',nn.MaxPool2d(kernel_size=2,stride=2)) self.layer2 = layer2 self.layer3 = nn.Sequential( nn.Conv2d(64,128,kernel_size=3,padding=1), nn.BatchNorm2d(128), nn.ReLU(True), ) self.classifier = nn.Sequential( nn.Linear(128,256), nn.ReLU(True), nn.Dropout(), nn.Linear(256, 256), nn.ReLU(True), nn.Dropout(), nn.Linear(256,num_classes), ) def forward(self,x): #定义自己的前向传播方式 out = self.conv1(x) out = self.BN1(out) out = self.relu1(out) out = self.pool1(out) out = self.layer2(out) out = self.layer3(out) out = out.view(out.size(0), -1) out = self.classifier(out) return out if __name__ == '__main__': import torch #使用gpu use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") net = VGG16().to(device) print(net)
2.train.py:包含参数设定、图像预处理、数据集读取、网络创建、损失和优化、训练和测试部分
[code]import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms import os import argparse from tensorboardX import SummaryWriter from nn_module_sample import VGG16 from torch.autograd import Variable #参数设置 parser = argparse.ArgumentParser(description='cifar10') parser.add_argument('--lr', default=1e-2,help='learning rate') #parser.add_argument('--batch_size',default=50,help='batch size') parser.add_argument('--epoch',default=15,help='time for ergodic') parser.add_argument('--pre_epoch',default=0,help='begin epoch') parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') #输出结果保存路径 parser.add_argument('--pre_model', default=True,help='use pre-model')#恢复训练时的模型路径 args = parser.parse_args() #使用gpu use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") #数据预处理 # 图像预处理和增强 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), #先四周填充0,再把图像随机裁剪成32*32 transforms.RandomHorizontalFlip(), #图像一半的概率翻转,一半的概率不翻转 transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225)) ]) transform_test = transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225)) ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=0) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=0) #Cifar-10的标签 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') #模型定义 VGG16 net = VGG16().to(device) # 定义损失函数和优化方式 criterion = nn.CrossEntropyLoss() #损失函数为交叉熵,多用于多分类问题 optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) #优化方式为mini-batch momentum-SGD,并采用L2正则化(权重衰减) #使用预训练模型 if args.pre_model: print("Resume from checkpoint...") assert os.path.isdir('checkpoint'),'Error: no checkpoint directory found' state = torch.load('./checkpoint/ckpt.t7') net.load_state_dict(state['state_dict']) best_test_acc = state['acc'] pre_epoch = state['epoch'] else: #定义最优的测试准确率 best_test_acc = 0 pre_epoch = args.pre_epoch #训练 if __name__ == "__main__": writer = SummaryWriter(log_dir='./log') print("Start Training, VGG-16...") with open("acc.txt","w") as acc_f: with open("log.txt","w") as log_f: for epoch in range(pre_epoch, args.epoch): print('\nEpoch: %d' % (epoch + 1)) #开始训练 net.train() print(net) #总损失 sum_loss = 0.0 #准确率 accuracy = 0.0 total = 0.0 for i, data in enumerate(trainloader): #准备数据 length = len(trainloader) #数据大小 inputs, labels = data #取出数据 inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() #梯度初始化为零(因为一个batch的loss关于weight的导数是所有sample的loss关于weight的导数的累加和) inputs, labels = Variable(inputs), Variable(labels) #forward + backward + optimize outputs = net(inputs) #前向传播求出预测值 loss = criterion(outputs, labels) #求loss loss.backward() #反向传播求梯度 optimizer.step() #更新参数 # 每一个batch输出对应的损失loss和准确率accuracy sum_loss += loss.item() _, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那个元素,且返回其索引 total += labels.size(0) accuracy += predicted.eq(labels.data).cpu().sum() #预测值和真实值进行比较,将数据放到cpu上并且求和 print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% ' % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total)) #写入日志 log_f.write('[epoch:%d, iter:%d] |Loss: %.03f | Acc: %.3f%% ' % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total)) log_f.write('\n') log_f.flush() #写入tensorboard writer.add_scalar('loss/train',sum_loss / (i + 1),epoch) writer.add_scalar('accuracy/train',100. * accuracy / total,epoch) #每一个训练epoch完成测试准确率 print("Waiting for test...") #在上下文环境中切断梯度计算,在此模式下,每一步的计算结果中requires_grad都是False,即使input设置为requires_grad=True with torch.no_grad(): accuracy = 0 total = 0 for data in testloader: #开始测试 net.eval() images, labels = data images, labels = images.to(device), labels.to(device) outputs = net(images) _, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那个元素,且返回其索引(得分高的那一类) total += labels.size(0) accuracy += (predicted == labels).sum() #输出测试准确率 print('测试准确率为: %.3f%%' % (100 * accuracy / total)) acc = 100. * accuracy / total #写入tensorboard writer.add_scalar('accuracy/test', acc,epoch) #将测试结果写入文件 print('Saving model...') torch.save(net.state_dict(), '%s/net_%3d.pth' % (args.outf, epoch + 1)) acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc)) acc_f.write('\n') acc_f.flush() #记录最佳的测试准确率 if acc > best_test_acc: print('Saving Best Model...') #存储状态 state = { 'state_dict': net.state_dict(), 'acc': acc, 'epoch': epoch + 1, } #没有就创建checkpoint文件夹 if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') #best_acc_f = open("best_acc.txt","w") #best_acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc)) #best_acc_f.close() torch.save(state, './checkpoint/ckpt.t7') best_test_acc = acc #写入tensorboard writer.add_scalar('best_accuracy/test', best_test_acc,epoch) #训练结束 print("Training Finished, Total Epoch = %d" % epoch) writer.close()
三、结果
1.打开cmd或者是Anaconda Prompt输入指令,找到你的log目录
[code]tensorboard --logdir 你的文件夹目录/log
例如博主的是这样的
然后打开最后一行的网址http://DESKTOP-xxxxxx:6006(这里每个电脑是不一样的),例如博主的是这样的
最终训练准确率在89%左右,测试准确率在87%左右~
2.在训练过程中还会生成data、model和checkpoint文件夹
四、注意事项
1.代码里参数设置部分pre_model是用来继续训练的,读取的是上一次epoch存储的checkpoint,设置为True即可继续训练,否则从头开始训练
2.代码里参数设置部分lr学习率如果再训练过程中准确率变化缓慢可以适当减小
3.注意如果没有gpu则需要在代码里注销这个部分
[code]#使用gpu use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu")
并且所有的xx.to(device)都需要删除;
或者不注销上面的gpu使用,在每一个xx.to(device)之前加一句话:
[code]if use_cuda:
例如:
[code]#模型定义 VGG16 if use_cuda: net = VGG16().to(device) else: net = VGG16()
返回至原博客:vs2017安装和使用教程(详细)
相关文章推荐
- TensorFlow深度学习进阶教程:TensorFlow实现CIFAR-10数据集测试的卷积神经网络
- 基于pytorch和Fashion Mnist数据集建立简单的CNN网络来实现图片分类
- Tensorflow深度学习之二十一:LeNet的实现(CIFAR-10数据集)
- python实现CIFAR-10数据集可视化
- [keras实战] 小型CNN实现Cifar-10数据集84%准确率
- Tensorflow学习笔记:CNN篇(3)——CIFAR-10数据集的CNN实现
- Tensorflow深度学习之二十二:AlexNet的实现(CIFAR-10数据集)
- Alexnet网络模型在cifar-10数据集上的实现(基于tensorflow-gpu)
- Pytorch实现CIFAR-10分类
- Keras基于Cifar-10数据集的CNN实现
- flume源码学习10-HDFSEventSink目录设置功能实现 推荐
- 15. 使用KNN实现DBRHD数据集的手写识别
- Java实现LSTM和GRU做分类(以IRIS数据集为例)
- js实现随机生成1到10的整数
- mpi学习日志(10):mpi4py实现简单并行矩阵乘法
- 报表性能优化方案之多数据集实现层式报表
- TF:TF分类问题之MNIST手写50000数据集实现87.4%准确率识别:SGD法+softmax法+cross_entropy法—Jason niu
- 基于JavaScript的公式解释器 - 10 【位操作符的实现】
- PyTorch上搭建简单神经网络实现回归和分类的示例
- 自己实现的62进制转10进制互转