您的位置:首页 > 其它

vs2017 VGG16处理cifar-10数据集的PyTorch实现

2019-01-23 14:30 886 查看
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_36556893/article/details/86608963

这是针对于博客vs2017安装和使用教程(详细)的VGG16-CIFAR10项目新建示例

目录

一、说明

二、代码

三、结果

四、注意事项

一、说明

1.网络框架搭建教程请参看博主博客:PyTorch 入门实战(四)——利用Torch.nn构建卷积神经网络

2.这里主要展示博主的代码运行结果,希望可以帮助到正在学习PyTorch的人们

二、代码

1.nn_module_sample.py里面是VGG-16(带有BatchNorm层)的网络,注意classifier分类器部分(全连接部分)的输入大小根据batch大小而定

[code]import torch.nn as nn

class VGG16(nn.Module):
def __init__(self, num_classes=10):
super(VGG16, self).__init__()
self.features = nn.Sequential(
#1
nn.Conv2d(3,64,kernel_size=3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
#2
nn.Conv2d(64,64,kernel_size=3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#3
nn.Conv2d(64,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
#4
nn.Conv2d(128,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#5
nn.Conv2d(128,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
#6
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
#7
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#8
nn.Conv2d(256,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#9
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#10
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#11
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#12
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#13
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.AvgPool2d(kernel_size=1,stride=1),
)
self.classifier = nn.Sequential(
#14
nn.Linear(512,4096),
nn.ReLU(True),
nn.Dropout(),
#15
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
#16
nn.Linear(4096,num_classes),
)
#self.classifier = nn.Linear(512, 10)

def forward(self, x):
out = self.features(x)
out = out.view(out.size(0), -1)
out = self.classifier(out)
return out

class testNet(nn.Module):
def __init__(self, num_classes=10):
super(testNet, self).__init__()
#定义自己的网络
self.conv1 = nn.Conv2d(3,64,kernel_size=3,padding=1)
self.BN1 = nn.BatchNorm2d(64)
self.relu1 = nn.ReLU(True)
self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)

layer2 = nn.Sequential()
layer2.add_module('conv2', nn.Conv2d(64,64,kernel_size=3,padding=1))
layer2.add_module('BN2',nn.BatchNorm2d(64))
layer2.add_module('relu2',nn.ReLU(True))
layer2.add_module('pool2',nn.MaxPool2d(kernel_size=2,stride=2))
self.layer2 = layer2

self.layer3 = nn.Sequential(
nn.Conv2d(64,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
)
self.classifier = nn.Sequential(
nn.Linear(128,256),
nn.ReLU(True),
nn.Dropout(),

nn.Linear(256, 256),
nn.ReLU(True),
nn.Dropout(),

nn.Linear(256,num_classes),
)
def forward(self,x):
#定义自己的前向传播方式
out = self.conv1(x)
out = self.BN1(out)
out = self.relu1(out)
out = self.pool1(out)

out = self.layer2(out)
out = self.layer3(out)

out = out.view(out.size(0), -1)
out = self.classifier(out)
return out

if __name__ == '__main__':
import torch
#使用gpu
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

net = VGG16().to(device)
print(net)

2.train.py:包含参数设定、图像预处理、数据集读取、网络创建、损失和优化、训练和测试部分

[code]import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import os
import argparse

from tensorboardX import SummaryWriter

from nn_module_sample import VGG16
from torch.autograd import Variable

#参数设置
parser = argparse.ArgumentParser(description='cifar10')
parser.add_argument('--lr', default=1e-2,help='learning rate')
#parser.add_argument('--batch_size',default=50,help='batch size')
parser.add_argument('--epoch',default=15,help='time for ergodic')
parser.add_argument('--pre_epoch',default=0,help='begin epoch')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') #输出结果保存路径
parser.add_argument('--pre_model', default=True,help='use pre-model')#恢复训练时的模型路径
args = parser.parse_args()

#使用gpu
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

#数据预处理
# 图像预处理和增强
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4), #先四周填充0,再把图像随机裁剪成32*32
transforms.RandomHorizontalFlip(),  #图像一半的概率翻转,一半的概率不翻转
transforms.ToTensor(),
#transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

transform_test = transforms.Compose([
transforms.ToTensor(),
#transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=0)
#Cifar-10的标签
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#模型定义 VGG16
net = VGG16().to(device)

# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss() #损失函数为交叉熵,多用于多分类问题
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) #优化方式为mini-batch momentum-SGD,并采用L2正则化(权重衰减)

#使用预训练模型
if args.pre_model:
print("Resume from checkpoint...")
assert os.path.isdir('checkpoint'),'Error: no checkpoint directory found'
state = torch.load('./checkpoint/ckpt.t7')
net.load_state_dict(state['state_dict'])
best_test_acc = state['acc']
pre_epoch = state['epoch']
else:
#定义最优的测试准确率
best_test_acc = 0
pre_epoch = args.pre_epoch

#训练
if __name__ == "__main__":

writer = SummaryWriter(log_dir='./log')
print("Start Training, VGG-16...")
with open("acc.txt","w") as acc_f:
with open("log.txt","w") as log_f:
for epoch in range(pre_epoch, args.epoch):
print('\nEpoch: %d' % (epoch + 1))
#开始训练
net.train()
print(net)
#总损失
sum_loss = 0.0
#准确率
accuracy = 0.0
total = 0.0

for i, data in enumerate(trainloader):
#准备数据
length = len(trainloader) #数据大小
inputs, labels = data #取出数据
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad() #梯度初始化为零(因为一个batch的loss关于weight的导数是所有sample的loss关于weight的导数的累加和)
inputs, labels = Variable(inputs), Variable(labels)
#forward + backward + optimize
outputs = net(inputs) #前向传播求出预测值
loss = criterion(outputs, labels) #求loss
loss.backward() #反向传播求梯度
optimizer.step() #更新参数

# 每一个batch输出对应的损失loss和准确率accuracy
sum_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那个元素,且返回其索引
total += labels.size(0)
accuracy += predicted.eq(labels.data).cpu().sum() #预测值和真实值进行比较,将数据放到cpu上并且求和

print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total))

#写入日志
log_f.write('[epoch:%d, iter:%d] |Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total))
log_f.write('\n')
log_f.flush()

#写入tensorboard
writer.add_scalar('loss/train',sum_loss / (i + 1),epoch)
writer.add_scalar('accuracy/train',100. * accuracy / total,epoch)
#每一个训练epoch完成测试准确率
print("Waiting for test...")
#在上下文环境中切断梯度计算,在此模式下,每一步的计算结果中requires_grad都是False,即使input设置为requires_grad=True
with torch.no_grad():
accuracy = 0
total = 0
for data in testloader:
#开始测试
net.eval()

images, labels = data
images, labels = images.to(device), labels.to(device)

outputs = net(images)

_, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那个元素,且返回其索引(得分高的那一类)
total += labels.size(0)
accuracy += (predicted == labels).sum()

#输出测试准确率
print('测试准确率为: %.3f%%' % (100 * accuracy / total))
acc = 100. * accuracy / total

#写入tensorboard
writer.add_scalar('accuracy/test', acc,epoch)

#将测试结果写入文件
print('Saving model...')
torch.save(net.state_dict(), '%s/net_%3d.pth' % (args.outf, epoch + 1))
acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
acc_f.write('\n')
acc_f.flush()

#记录最佳的测试准确率
if acc > best_test_acc:
print('Saving Best Model...')
#存储状态
state = {
'state_dict': net.state_dict(),
'acc': acc,
'epoch': epoch + 1,
}
#没有就创建checkpoint文件夹
if not os.path.isdir('checkpoint'):
os.mkdir('checkpoint')
#best_acc_f = open("best_acc.txt","w")
#best_acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
#best_acc_f.close()
torch.save(state, './checkpoint/ckpt.t7')
best_test_acc = acc
#写入tensorboard
writer.add_scalar('best_accuracy/test', best_test_acc,epoch)

#训练结束
print("Training Finished, Total Epoch = %d" % epoch)
writer.close()

三、结果

1.打开cmd或者是Anaconda Prompt输入指令,找到你的log目录

[code]tensorboard --logdir 你的文件夹目录/log

例如博主的是这样的

                          

然后打开最后一行的网址http://DESKTOP-xxxxxx:6006(这里每个电脑是不一样的),例如博主的是这样的

最终训练准确率89%左右,测试准确率87%左右~

2.在训练过程中还会生成datamodelcheckpoint文件夹

                          

四、注意事项

1.代码里参数设置部分pre_model是用来继续训练的,读取的是上一次epoch存储的checkpoint,设置为True即可继续训练,否则从头开始训练

2.代码里参数设置部分lr学习率如果再训练过程中准确率变化缓慢可以适当减小

3.注意如果没有gpu则需要在代码里注销这个部分

[code]#使用gpu
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

并且所有的xx.to(device)都需要删除;

或者不注销上面的gpu使用,在每一个xx.to(device)之前加一句话

[code]if use_cuda:

例如:

[code]#模型定义 VGG16
if use_cuda:
net = VGG16().to(device)
else:
net = VGG16()

返回至原博客:vs2017安装和使用教程(详细)

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: