Cifar10 上的 ReNet18#
本文主要介绍在 cifar10 的试验 resnet18 的性能。
导入一些必要包:
import logging
import torch
from torch import nn
from torchvision.models import resnet18, ResNet18_Weights
from torch_book.vision.classifier import Classifier, evaluate_accuracy
from torch_book.datasets.cifar10 import Cifar10
torch.cuda.empty_cache() # 清空 GPU 缓存
logging.basicConfig(filename='logs/debug-graph.log',
filemode="w",
format='%(asctime)s|%(levelname)s|%(name)s->%(funcName)s@%(message)s',
level=logging.INFO)
torchvision
上的 ResNet18 直接用于 cifar10 是不推荐的。需要做以下改动:
备注
将 conv1
替换以支持 \(32 \times 32\) 的输入。同时移除第一个 maxpool
避免图片信息过早丢失。
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.conv1 = nn.Conv2d(model.conv1.in_channels,
model.conv1.out_channels,
3, 1, 1, bias=False)
model.maxpool = nn.Identity()
model.fc = nn.Linear(model.fc.in_features, 10)
加载数据集:
data = Cifar10(batch_size = 128, num_workers=8, cutout=None)
train_iter = data.train_loader()
test_iter = data.val_loader()
Files already downloaded and verified
Files already downloaded and verified
打印每层输出尺寸:
for xs, _ in train_iter:
break
for name, m in model.named_children():
xs = m(xs)
print(name, tuple(xs.shape))
if name=="avgpool":
xs = torch.flatten(xs, 1)
conv1 (128, 64, 32, 32)
bn1 (128, 64, 32, 32)
relu (128, 64, 32, 32)
maxpool (128, 64, 32, 32)
layer1 (128, 64, 32, 32)
layer2 (128, 128, 16, 16)
layer3 (128, 256, 8, 8)
layer4 (128, 512, 4, 4)
avgpool (128, 512, 1, 1)
fc (128, 10)
做一些准备工作:
classifier = Classifier(model, train_iter, test_iter, device=torch.device("cuda:0"))
classifier.prepare_optimizer(lr=0.0142857, momentum=0.857142,
weight_decay=0.000857142)
classifier.prepare_scheduler(lr_period=2, lr_decay=0.857142)
训练模型:
torch._dynamo.reset()
num_epochs = 60
classifier.prepare_animator(num_epochs)
classifier.fit(num_epochs)
test_acc = evaluate_accuracy(classifier.mod, classifier.test_iter, device=torch.device("cuda:0"))
test_acc
0.9529
保存模型(也可以直接 下载训练权重
):
torch.save(classifier.mod.state_dict(),
'params/resnet18_cifar10_relu.h5')
Cutout Cifar10 训练#
import logging
import torch
from torch import nn
from torchvision.models import resnet18, ResNet18_Weights
from torch_book.vision.classifier import Classifier, evaluate_accuracy
from torch_book.datasets.cifar10 import Cifar10
torch.cuda.empty_cache() # 清空 GPU 缓存
from torch_book.transforms.cutout import Cutout
logging.basicConfig(filename='logs/debug-graph.log',
filemode="a",
format='%(asctime)s|%(levelname)s|%(name)s->%(funcName)s@%(message)s',
level=logging.INFO)
num_epochs = 100
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.conv1 = nn.Conv2d(model.conv1.in_channels,
model.conv1.out_channels,
3, 1, 1, bias=False)
model.maxpool = nn.Identity()
model.fc = nn.Linear(model.fc.in_features, 10)
data = Cifar10(batch_size = 32, cutout=Cutout(1, 16), num_workers=8)
train_iter = data.train_loader()
test_iter = data.val_loader()
classifier = Classifier(model, train_iter, test_iter, device=torch.device("cuda:0"))
classifier.prepare_optimizer(lr=0.0142857, momentum=0.857142,
weight_decay=0.000857142)
classifier.prepare_scheduler(lr_period=2, lr_decay=0.857142)
classifier.prepare_animator(num_epochs)
classifier.fit(num_epochs)
test_acc = evaluate_accuracy(classifier.mod, classifier.test_iter, device=torch.device("cuda:0"))
test_acc
0.9601
torch.save(classifier.mod.state_dict(),
'params/resnet18_cifar10_relu_cutout.h5')
性能度量#
from torch.profiler import profile, record_function, ProfilerActivity
inputs = torch.randn(5, 3, 32, 32).type(torch.float32).cuda()
with profile(activities=[ProfilerActivity.CPU,
ProfilerActivity.CUDA],
record_shapes=True) as prof:
with record_function("model_inference"):
classifier.mod(inputs)
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------
Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------
model_inference 3.55% 2.243ms 99.77% 63.060ms 63.060ms 0.000us 0.00% 1.294ms 1.294ms 1
aten::conv2d 0.13% 84.000us 90.74% 57.355ms 2.868ms 0.000us 0.00% 1.202ms 60.100us 20
aten::convolution 0.49% 309.000us 90.61% 57.271ms 2.864ms 0.000us 0.00% 1.202ms 60.100us 20
aten::_convolution 0.24% 153.000us 90.12% 56.962ms 2.848ms 0.000us 0.00% 1.202ms 60.100us 20
aten::cudnn_convolution 78.44% 49.575ms 89.88% 56.809ms 2.840ms 643.000us 87.72% 1.202ms 60.100us 20
cudaFuncGetAttributes 5.84% 3.691ms 5.84% 3.691ms 3.226us 557.000us 75.99% 557.000us 0.487us 1144
void cutlass_cudnn_infer::Kernel<cutlass_tensorop_s1... 0.00% 0.000us 0.00% 0.000us 0.000us 192.000us 26.19% 192.000us 32.000us 6
void cudnn::ops::nchwToNhwcKernel<float, float, floa... 0.00% 0.000us 0.00% 0.000us 0.000us 155.000us 21.15% 155.000us 4.844us 32
sm86_xmma_fprop_implicit_gemm_indexed_tf32f32_tf32f3... 0.00% 0.000us 0.00% 0.000us 0.000us 116.000us 15.83% 116.000us 29.000us 4
void cutlass_cudnn_infer::Kernel<cutlass_tensorop_s1... 0.00% 0.000us 0.00% 0.000us 0.000us 89.000us 12.14% 89.000us 22.250us 4
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------
Self CPU time total: 63.205ms
Self CUDA time total: 733.000us
STAGE:2023-11-22 11:06:07 3079502:3079502 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2023-11-22 11:06:07 3079502:3079502 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2023-11-22 11:06:07 3079502:3079502 ActivityProfilerController.cpp:322] Completed Stage: Post Processing