深度学习第五章-ResNet网络

本模型位于：E:\python文件\deep-learning-for-image-processing-master\pytorch_classification\Test5_resnet

一.模型介绍

特点：

超深的网络结构(突破1000层)

提出residual模块

使用Batch Normalization加速训练(丢弃dropout)

1.残差模块

（左边是18层、34层的残差块结构；右边是50层、101层、152层的残差块结构）

2.实线残差结构与虚线残差结构

从conv3层开始需使用虚线所示的残差网络结构（初始层有下采样）

3.BN处理方法

通常在卷积层或全连接层之后添加
Batch Normalization的目的是使我们的一批(Batch) 特征层满足均值为0，方差为1的分布规律

4.模型参数

二.数据集-花分类集

如同前两章

三.模型搭建

1.定义18层、24层网络残差块结构

图示，左边结构：conv2_x层、右边结构：conv3_x层及之后
代码

class BasicBlock(nn.Module): #定义残差块（18层、34层网络使用）
    expansion = 1 #表示残差块中前后维度的倍数

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel) #bath normalizition层
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample #定义下采样方法

    def forward(self, x):
        identity = x #输入数据作为原数据记录
        
        if self.downsample is not None: #判断是否为虚线分支结构，若是则将原输入进行下采样
            identity = self.downsample(x) 

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity #最终输出由主分支加原输入组成
        out = self.relu(out)
        return out

2.定义50层、101层、152层残差块网络结构

图示：左边结构：conv2_x层、右边结构：conv3_x层及之后
代码

class Bottleneck(nn.Module): #定义残差块 （50层、101层、152层网络使用）
    expansion = 4 #表示残差块中前后维度的倍数

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None: #判断是否为虚线分支结构,若是则将原输入进行下采样
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out

3.构建网络模型

class ResNet(nn.Module): #定义网络模型结构

    def __init__(self,block,blocks_num,num_classes=1000,include_top=True):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel,kernel_size=7,stride=2,padding=3,bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0]) #blocks_num为重复的残差块数量，一般这一层还不需要在初始位置进行下采样。
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)#这一层开始需要进行下采样
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  #添加了一个自适应平均池化层（Adaptive Average Pooling Layer），其输出尺寸为(1, 1)，高和宽变为1.
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules(): #对卷积层进行初始化操作
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1): #生成各层残差块
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion: #判断残差块初始输入时是否需要进行下采样
       		#如果需要下采样，downsample 被初始化为一个顺序模型（nn.Sequential），其中包含一个1x1的卷积层（用于调整通道数和（或）进行下采样），以及一个批量归一化层（nn.BatchNorm2d）。这个顺序模型用于调整输入数据，使其能够与残差块的输出进行相加（残差网络的关键操作之一）。
            downsample = nn.Sequential(	
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = [] #方法创建一个空列表 layers，用于存储要构建的残差块
        #首先，将第一个残差块添加到 layers 列表中。这个残差块可能带有下采样层（如果 downsample 不为 None），且其输入通道数为 self.in_channel，输出通道数为 channel。
        layers.append(block(self.in_channel,channel,downsample=downsample,stride=stride))
        self.in_channel = channel * block.expansion #更新通道数的变化

        for _ in range(1, block_num): #将剩余残差块的层进行添加
            layers.append(block(self.in_channel,channel))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x

4.定义各版本的Resnet网络

def resnet34(num_classes=1000, include_top=True): #本次训练选择的模型
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)

def resnet50(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)

def resnet101(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)

四.模型训练

1.使用迁移训练的方法，下载预训练权重

net = resnet34()
# download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
model_weight_path = "./resnet34-pre.pth" #预训练的权重文件
assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
net.load_state_dict(torch.load(model_weight_path, map_location='cpu')) #加载权重并将权重加载到模型中

net.to(device)
in_channel = net.fc.in_features #获取原始全连接层的输入特征维度（即最后一个卷积层输出的特征图数量）
net.fc = nn.Linear(in_channel, 5).to(device) #然后，创建一个新的全连接层nn.Linear(in_channel, 5)，其输入维度与原始全连接层相同，但输出维度为5，以匹配新的分类任务。最后，将这个新的全连接层赋值给net.fc，从而完成了模型的修改。

2.训练模型

loss_function = nn.CrossEntropyLoss()
#通过列表推导式，从模型的参数中筛选出需要梯度的参数（即那些被训练的参数）。这通常排除了那些不参与训练的参数，如批量归一化层（BatchNorm）中的运行均值和方差。
params = [p for p in net.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=0.0001)

epochs = 3
best_acc = 0.0
save_path = 'resnet34-pre-again.pth'
train_steps = len(train_loader)
for epoch in range(epochs):
    # train
    net.train()
    running_loss = 0.0
    train_bar = tqdm(train_loader, file=sys.stdout) #创建进度条
    for step, data in enumerate(train_bar):
        images, labels = data
        optimizer.zero_grad()
        logits = net(images.to(device))
        loss = loss_function(logits, labels.to(device))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

        train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                 epochs,
                                                                 loss)

    # validate
    net.eval() #验证模式下的BN层将不起作用
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        val_bar = tqdm(validate_loader, file=sys.stdout)
        for val_data in val_bar:
            val_images, val_labels = val_data
            outputs = net(val_images.to(device))
            # loss = loss_function(outputs, test_labels)
            predict_y = torch.max(outputs, dim=1)[1]
            acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

            val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1,
                                                       epochs)

    val_accurate = acc / val_num
    print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
          (epoch + 1, running_loss / train_steps, val_accurate))

    if val_accurate > best_acc:
        best_acc = val_accurate
        torch.save(net.state_dict(), save_path)

print('Finished Training')

五.测试结果

如同前一章结果