


  1. 课程地址

  2. 目标

  • 掌握深度学习常用模型基础知识
  • 熟练掌握一种国产开源深度学习框架
  • 具备独立完成相关深度学习任务的能力
  • 能用所学为AI加一份年味










{'loss': [6.4980035], 'acc': 0.8485721442885772}


import paddle
import numpy as nppaddle.__version__
# 配置GPU

② 数据准备


#mean and std of cifar100 datasetCIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)train_transfrom = T.Compose([T.Resize((256, 256)),T.CenterCrop((224, 224)),T.RandomHorizontalFlip(0.5),        # 随机水平翻转T.RandomRotation(degrees=15),       # (-degrees,+degrees)T.ToTensor(),                      # 数据的格式转换和标准化 HWC => CHW  T.Normalize(mean=CIFAR100_MEAN, std=CIFAR100_STD)  # 图像归一化])eval_transfrom = T.Compose([T.Resize(224),T.ToTensor(),                       # 数据的格式转换和标准化 HWC => CHW  T.Normalize(mean=CIFAR100_MEAN, std=CIFAR100_STD)  # 图像归一化])


将形状为 (H x W x C)的输入数据 PIL.Image 或 numpy.ndarray 转换为 (C x H x W)。 如果想保持形状不变,可以将参数 data_format 设置为 ‘HWC’。

同时,如果输入的 PIL.Image 的 mode 是 (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) 其中一种,或者输入的 numpy.ndarray 数据类型是'uint8',那个会将输入数据从(0-255)的范围缩放到 (0-1)的范围。其他的情况,则保持输入不变。

# 训练数据集
train_dataset ='train', 验证数据集
eval_dataset ='test', 合并数据集
dataset = paddle.concat([d[0] for d in] + [d[0] for d in])
# 计算数据均值和方差
print(f'mean:{dataset.mean(axis=[0, 2, 3]).numpy()} \n std:{dataset.std(axis=[0, 2, 3]).numpy()}')
mean:[0.5073715 0.4867007 0.441096 ] std:[0.26750046 0.25658613 0.27630225]

由于要调用resnet101的预训练模型,这里把CIFAR的 32 × 32 32\times 32 32×32的图像resize为 224 × 224 224\times 224 224×224的图像,保持特征尺寸和感受野的一致。

import as T # CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
# CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)CIFAR100_MEAN = [0.5073715, 0.4867007, 0.441096]
CIFAR100_STD = [0.26750046, 0.25658613, 0.27630225]# mean=[0.485, 0.456, 0.406]
# std=[0.229, 0.224, 0.225]train_transfrom = T.Compose([T.Resize((256, 256)),T.CenterCrop((224, 224)),T.RandomHorizontalFlip(0.5),        # 随机水平翻转T.RandomRotation(degrees=15),       # (-degrees,+degrees)T.ToTensor(),                      # 数据的格式转换和标准化 HWC => CHW  T.Normalize(mean=CIFAR100_MEAN, std=CIFAR100_STD)  # 图像归一化])eval_transfrom = T.Compose([T.Resize(224),T.ToTensor(),                       # 数据的格式转换和标准化 HWC => CHW  T.Normalize(mean=CIFAR100_MEAN, std=CIFAR100_STD)  # 图像归一化])# 训练数据集
train_dataset ='train', transform=train_transfrom)# 验证数据集
eval_dataset ='test', transform=eval_transfrom)
print(f'训练集大小: {len(train_dataset)}, 测试集大小: {len(eval_dataset)}')print("train data shape:", train_dataset[0][0].shape)print("eval data shape:", eval_dataset[0][0].shape)# train_dataset[3][0]
训练集大小: 50000, 测试集大小: 10000
train data shape: [3, 224, 224]
eval data shape: [3, 224, 224]
import matplotlib.pyplot as pltidx = np.random.randint(0, 50000, size=10)for i in idx:img = train_dataset[i][0].numpy().transpose([1, 2, 0])plt.imshow(img)plt.title(train_dataset[i][1])
3.1 模型开发

network =, pretrained=True)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/ UserWarning: Skip loading for fc.weight. fc.weight receives a shape [2048, 1000], but the expected shape is [2048, 100].warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/ UserWarning: Skip loading for fc.bias. fc.bias receives a shape [1000], but the expected shape is [100].warnings.warn(("Skip loading for {}. ".format(key) + str(err)))

3.2 模型可视化

model = paddle.Model(network)model.summary((-1, 3, 224, 224))
-------------------------------------------------------------------------------Layer (type)         Input Shape          Output Shape         Param #
===============================================================================Conv2D-105       [[1, 3, 224, 224]]   [1, 64, 112, 112]        9,408     BatchNorm2D-105   [[1, 64, 112, 112]]   [1, 64, 112, 112]         256      ReLU-35       [[1, 64, 112, 112]]   [1, 64, 112, 112]          0       MaxPool2D-2     [[1, 64, 112, 112]]    [1, 64, 56, 56]           0       Conv2D-107       [[1, 64, 56, 56]]     [1, 64, 56, 56]         4,096     BatchNorm2D-107    [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      ReLU-36        [[1, 256, 56, 56]]    [1, 256, 56, 56]          0       Conv2D-108       [[1, 64, 56, 56]]     [1, 64, 56, 56]        36,864     BatchNorm2D-108    [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      Conv2D-109       [[1, 64, 56, 56]]     [1, 256, 56, 56]       16,384     BatchNorm2D-109    [[1, 256, 56, 56]]    [1, 256, 56, 56]        1,024     Conv2D-106       [[1, 64, 56, 56]]     [1, 256, 56, 56]       16,384     BatchNorm2D-106    [[1, 256, 56, 56]]    [1, 256, 56, 56]        1,024
BottleneckBlock-34   [[1, 64, 56, 56]]     [1, 256, 56, 56]          0       Conv2D-110       [[1, 256, 56, 56]]    [1, 64, 56, 56]        16,384     BatchNorm2D-110    [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      ReLU-37        [[1, 256, 56, 56]]    [1, 256, 56, 56]          0       Conv2D-111       [[1, 64, 56, 56]]     [1, 64, 56, 56]        36,864     BatchNorm2D-111    [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      Conv2D-112       [[1, 64, 56, 56]]     [1, 256, 56, 56]       16,384     BatchNorm2D-112    [[1, 256, 56, 56]]    [1, 256, 56, 56]        1,024
BottleneckBlock-35   [[1, 256, 56, 56]]    [1, 256, 56, 56]          0       Conv2D-113       [[1, 256, 56, 56]]    [1, 64, 56, 56]        16,384     BatchNorm2D-113    [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      ReLU-38        [[1, 256, 56, 56]]    [1, 256, 56, 56]          0       Conv2D-114       [[1, 64, 56, 56]]     [1, 64, 56, 56]        36,864     BatchNorm2D-114    [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      Conv2D-115       [[1, 64, 56, 56]]     [1, 256, 56, 56]       16,384     BatchNorm2D-115    [[1, 256, 56, 56]]    [1, 256, 56, 56]        1,024
BottleneckBlock-36   [[1, 256, 56, 56]]    [1, 256, 56, 56]          0       Conv2D-117       [[1, 256, 56, 56]]    [1, 128, 56, 56]       32,768     BatchNorm2D-117    [[1, 128, 56, 56]]    [1, 128, 56, 56]         512      ReLU-39        [[1, 512, 28, 28]]    [1, 512, 28, 28]          0       Conv2D-118       [[1, 128, 56, 56]]    [1, 128, 28, 28]       147,456    BatchNorm2D-118    [[1, 128, 28, 28]]    [1, 128, 28, 28]         512      Conv2D-119       [[1, 128, 28, 28]]    [1, 512, 28, 28]       65,536     BatchNorm2D-119    [[1, 512, 28, 28]]    [1, 512, 28, 28]        2,048     Conv2D-116       [[1, 256, 56, 56]]    [1, 512, 28, 28]       131,072    BatchNorm2D-116    [[1, 512, 28, 28]]    [1, 512, 28, 28]        2,048
BottleneckBlock-37   [[1, 256, 56, 56]]    [1, 512, 28, 28]          0       Conv2D-120       [[1, 512, 28, 28]]    [1, 128, 28, 28]       65,536     BatchNorm2D-120    [[1, 128, 28, 28]]    [1, 128, 28, 28]         512      ReLU-40        [[1, 512, 28, 28]]    [1, 512, 28, 28]          0       Conv2D-121       [[1, 128, 28, 28]]    [1, 128, 28, 28]       147,456    BatchNorm2D-121    [[1, 128, 28, 28]]    [1, 128, 28, 28]         512      Conv2D-122       [[1, 128, 28, 28]]    [1, 512, 28, 28]       65,536     BatchNorm2D-122    [[1, 512, 28, 28]]    [1, 512, 28, 28]        2,048
BottleneckBlock-38   [[1, 512, 28, 28]]    [1, 512, 28, 28]          0       Conv2D-123       [[1, 512, 28, 28]]    [1, 128, 28, 28]       65,536     BatchNorm2D-123    [[1, 128, 28, 28]]    [1, 128, 28, 28]         512      ReLU-41        [[1, 512, 28, 28]]    [1, 512, 28, 28]          0       Conv2D-124       [[1, 128, 28, 28]]    [1, 128, 28, 28]       147,456    BatchNorm2D-124    [[1, 128, 28, 28]]    [1, 128, 28, 28]         512      Conv2D-125       [[1, 128, 28, 28]]    [1, 512, 28, 28]       65,536     BatchNorm2D-125    [[1, 512, 28, 28]]    [1, 512, 28, 28]        2,048
BottleneckBlock-39   [[1, 512, 28, 28]]    [1, 512, 28, 28]          0       Conv2D-126       [[1, 512, 28, 28]]    [1, 128, 28, 28]       65,536     BatchNorm2D-126    [[1, 128, 28, 28]]    [1, 128, 28, 28]         512      ReLU-42        [[1, 512, 28, 28]]    [1, 512, 28, 28]          0       Conv2D-127       [[1, 128, 28, 28]]    [1, 128, 28, 28]       147,456    BatchNorm2D-127    [[1, 128, 28, 28]]    [1, 128, 28, 28]         512      Conv2D-128       [[1, 128, 28, 28]]    [1, 512, 28, 28]       65,536     BatchNorm2D-128    [[1, 512, 28, 28]]    [1, 512, 28, 28]        2,048
BottleneckBlock-40   [[1, 512, 28, 28]]    [1, 512, 28, 28]          0       Conv2D-130       [[1, 512, 28, 28]]    [1, 256, 28, 28]       131,072    BatchNorm2D-130    [[1, 256, 28, 28]]    [1, 256, 28, 28]        1,024     ReLU-43       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-131       [[1, 256, 28, 28]]    [1, 256, 14, 14]       589,824    BatchNorm2D-131    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-132       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-132   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096     Conv2D-129       [[1, 512, 28, 28]]   [1, 1024, 14, 14]       524,288    BatchNorm2D-129   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-41   [[1, 512, 28, 28]]   [1, 1024, 14, 14]          0       Conv2D-133      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-133    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-44       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-134       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-134    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-135       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-135   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-42  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-136      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-136    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-45       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-137       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-137    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-138       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-138   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-43  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-139      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-139    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-46       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-140       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-140    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-141       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-141   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-44  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-142      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-142    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-47       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-143       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-143    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-144       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-144   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-45  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-145      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-145    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-48       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-146       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-146    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-147       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-147   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-46  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-148      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-148    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-49       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-149       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-149    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-150       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-150   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-47  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-151      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-151    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-50       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-152       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-152    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-153       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-153   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-48  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-154      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-154    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-51       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-155       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-155    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-156       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-156   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-49  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-157      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-157    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-52       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-158       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-158    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-159       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-159   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-50  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-160      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-160    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-53       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-161       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-161    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-162       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-162   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-51  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-163      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-163    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-54       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-164       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-164    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-165       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-165   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-52  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-166      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-166    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-55       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-167       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-167    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-168       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-168   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-53  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-169      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-169    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-56       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-170       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-170    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-171       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-171   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-54  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-172      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-172    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-57       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-173       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-173    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-174       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-174   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-55  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-175      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-175    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-58       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-176       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-176    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-177       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-177   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-56  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-178      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-178    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-59       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-179       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-179    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-180       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-180   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-57  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-181      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-181    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-60       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-182       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-182    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-183       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-183   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-58  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-184      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-184    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-61       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-185       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-185    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-186       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-186   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-59  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-187      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-187    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-62       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-188       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-188    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-189       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-189   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-60  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-190      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-190    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-63       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-191       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-191    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-192       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-192   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-61  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-193      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-193    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-64       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-194       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-194    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-195       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-195   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-62  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-196      [[1, 1024, 14, 14]]    [1, 256, 14, 14]       262,144    BatchNorm2D-196    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     ReLU-65       [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-197       [[1, 256, 14, 14]]    [1, 256, 14, 14]       589,824    BatchNorm2D-197    [[1, 256, 14, 14]]    [1, 256, 14, 14]        1,024     Conv2D-198       [[1, 256, 14, 14]]   [1, 1024, 14, 14]       262,144    BatchNorm2D-198   [[1, 1024, 14, 14]]   [1, 1024, 14, 14]        4,096
BottleneckBlock-63  [[1, 1024, 14, 14]]   [1, 1024, 14, 14]          0       Conv2D-200      [[1, 1024, 14, 14]]    [1, 512, 14, 14]       524,288    BatchNorm2D-200    [[1, 512, 14, 14]]    [1, 512, 14, 14]        2,048     ReLU-66        [[1, 2048, 7, 7]]     [1, 2048, 7, 7]           0       Conv2D-201       [[1, 512, 14, 14]]     [1, 512, 7, 7]       2,359,296   BatchNorm2D-201     [[1, 512, 7, 7]]      [1, 512, 7, 7]         2,048     Conv2D-202        [[1, 512, 7, 7]]     [1, 2048, 7, 7]       1,048,576   BatchNorm2D-202    [[1, 2048, 7, 7]]     [1, 2048, 7, 7]         8,192     Conv2D-199      [[1, 1024, 14, 14]]    [1, 2048, 7, 7]       2,097,152   BatchNorm2D-199    [[1, 2048, 7, 7]]     [1, 2048, 7, 7]         8,192
BottleneckBlock-64  [[1, 1024, 14, 14]]    [1, 2048, 7, 7]           0       Conv2D-203       [[1, 2048, 7, 7]]      [1, 512, 7, 7]       1,048,576   BatchNorm2D-203     [[1, 512, 7, 7]]      [1, 512, 7, 7]         2,048     ReLU-67        [[1, 2048, 7, 7]]     [1, 2048, 7, 7]           0       Conv2D-204        [[1, 512, 7, 7]]      [1, 512, 7, 7]       2,359,296   BatchNorm2D-204     [[1, 512, 7, 7]]      [1, 512, 7, 7]         2,048     Conv2D-205        [[1, 512, 7, 7]]     [1, 2048, 7, 7]       1,048,576   BatchNorm2D-205    [[1, 2048, 7, 7]]     [1, 2048, 7, 7]         8,192
BottleneckBlock-65   [[1, 2048, 7, 7]]     [1, 2048, 7, 7]           0       Conv2D-206       [[1, 2048, 7, 7]]      [1, 512, 7, 7]       1,048,576   BatchNorm2D-206     [[1, 512, 7, 7]]      [1, 512, 7, 7]         2,048     ReLU-68        [[1, 2048, 7, 7]]     [1, 2048, 7, 7]           0       Conv2D-207        [[1, 512, 7, 7]]      [1, 512, 7, 7]       2,359,296   BatchNorm2D-207     [[1, 512, 7, 7]]      [1, 512, 7, 7]         2,048     Conv2D-208        [[1, 512, 7, 7]]     [1, 2048, 7, 7]       1,048,576   BatchNorm2D-208    [[1, 2048, 7, 7]]     [1, 2048, 7, 7]         8,192
BottleneckBlock-66   [[1, 2048, 7, 7]]     [1, 2048, 7, 7]           0
AdaptiveAvgPool2D-2  [[1, 2048, 7, 7]]     [1, 2048, 1, 1]           0       Linear-2           [[1, 2048]]            [1, 100]           204,900
Total params: 42,810,404
Trainable params: 42,599,716
Non-trainable params: 210,688
Input size (MB): 0.57
Forward/backward pass size (MB): 391.63
Params size (MB): 163.31
Estimated Total Size (MB): 555.52
-------------------------------------------------------------------------------{'total_params': 42810404, 'trainable_params': 42599716}

④ 模型训练和调优

class, values, last_epoch=- 1, verbose=False)


class, warmup_steps, start_lr, end_lr, last_epoch=- 1, verbose=False)

该接口提供一种学习率优化策略-线性学习率热身(warm up)对学习率进行初步调整。在正常调整学习率之前,先逐步增大学习率。

class paddle.callbacks.EarlyStopping(monitor=‘loss’, mode=‘auto’, patience=0, verbose=1, min_delta=0, baseline=None, save_best_model=True)

在模型评估阶段,模型效果如果没有提升,EarlyStopping 会让模型提前停止训练。

  • monitor (str,可选) - 监控量。该量作为模型是否停止学习的监控指标。默认值:‘loss’。

  • mode (str,可选) - 可以是’auto’、‘min’或者’max’。在min模式下,模型会在监控量的值不再减少时停止训练;max模式下,模型会在监控量的值不再增加时停止训练;auto模式下,实际的模式会从 monitor推断出来。如果monitor中有’acc’,将会认为是max模式,其它情况下,都会被推断为min模式。默认值:‘auto’。

  • patience (int,可选) - 多少个epoch模型效果未提升会使模型提前停止训练。默认值:0。

  • verbose (int,可选) - 可以是0或者1。1代表不打印模型提前停止训练的日志,1代表打印日志。默认值:1。

  • min_delta (int|float,可选) - 监控量最小改变值。当evaluation的监控变量改变值小于min_delta ,就认为模型没有变化。默认值:0。

  • baseline (int|float,可选) - 监控量的基线。如果模型在训练 patience 个epoch后效果对比基线没有提升,将会停止训练。如果是None,代表没有基线。默认值:None。

  • save_best_model (bool,可选) - 是否保存效果最好的模型(监控量的值最优)。文件会保存在 fit 中传入的参数 save_dir 下,前缀名为best_model,默认值: True。

def make_optimizer(parameters=None, momentum=0.9, weight_decay=5e-4, boundaries=None, values=None):learning_rate =, values=values,verbose=False)# learning_rate =     learning_rate=learning_rate,#     warmup_steps=wamup_steps,#     start_lr=base_lr / 5.,#     end_lr=base_lr,#     verbose=False)optimizer = paddle.optimizer.Momentum(learning_rate=learning_rate,weight_decay=weight_decay,momentum=momentum,parameters=parameters)# optimizer = paddle.optimizer.AdamW(#     learning_rate=learning_rate,#     weight_decay=weight_decay,#     parameters=parameters)return optimizerbase_lr = 5e-4
boundaries = [5, 10]optimizer = make_optimizer(boundaries=boundaries, values=[base_lr, base_lr*0.2, base_lr*0.1], parameters=model.parameters())model.prepare(# optimizer=paddle.optimizer.Adam(learning_rate=5e-4, weight_decay=paddle.regularizer.L2Decay(5e-4), parameters=model.parameters()),optimizer=optimizer,loss=paddle.nn.CrossEntropyLoss(),metrics=paddle.metric.Accuracy(topk=(1, 5))
)# callbacks
visualdl = paddle.callbacks.VisualDL('./visualdl/resnet101')
earlystop = paddle.callbacks.EarlyStopping( # acc不在上升时停止'acc',mode='max',patience=5,verbose=1,min_delta=0,baseline=None,save_best_model=True),eval_dataset,epochs=20,batch_size=128,save_freq=2,save_dir='checkpoint/resnet101',callbacks=[visualdl, earlystop],verbose=1
The loss value printed in the log is the current step, and the metric is the average value of previous step.
Epoch 1/20
step 391/391 [==============================] - loss: 1.6645 - acc_top1: 0.5995 - acc_top5: 0.8853 - 889ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/0
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.5161 - acc_top1: 0.6217 - acc_top5: 0.8958 - 421ms/step
Eval samples: 10000
Epoch 2/20/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/hapi/ UserWarning: Monitor of EarlyStopping should be loss or metric name.'Monitor of EarlyStopping should be loss or metric name.')step 391/391 [==============================] - loss: 1.6678 - acc_top1: 0.6264 - acc_top5: 0.8994 - 891ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.3930 - acc_top1: 0.6398 - acc_top5: 0.9048 - 421ms/step
Eval samples: 10000
Epoch 3/20
step 391/391 [==============================] - loss: 1.4620 - acc_top1: 0.6483 - acc_top5: 0.9111 - 890ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/2
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.2873 - acc_top1: 0.6574 - acc_top5: 0.9170 - 419ms/step
Eval samples: 10000
Epoch 4/20
step 391/391 [==============================] - loss: 1.2918 - acc_top1: 0.6655 - acc_top5: 0.9203 - 888ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.2550 - acc_top1: 0.6713 - acc_top5: 0.9235 - 422ms/step
Eval samples: 10000
Epoch 5/20
step 391/391 [==============================] - loss: 1.2747 - acc_top1: 0.6815 - acc_top5: 0.9260 - 890ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/4
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.1627 - acc_top1: 0.6817 - acc_top5: 0.9289 - 432ms/step
Eval samples: 10000
Epoch 6/20
step 391/391 [==============================] - loss: 1.2274 - acc_top1: 0.6934 - acc_top5: 0.9329 - 896ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.1079 - acc_top1: 0.6979 - acc_top5: 0.9341 - 429ms/step
Eval samples: 10000
Epoch 7/20
step 391/391 [==============================] - loss: 1.0835 - acc_top1: 0.7030 - acc_top5: 0.9362 - 895ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/6
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.0728 - acc_top1: 0.7092 - acc_top5: 0.9389 - 427ms/step
Eval samples: 10000
Epoch 8/20
step 391/391 [==============================] - loss: 1.0596 - acc_top1: 0.7117 - acc_top5: 0.9420 - 895ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.0304 - acc_top1: 0.7185 - acc_top5: 0.9434 - 428ms/step
Eval samples: 10000
Epoch 9/20
step 391/391 [==============================] - loss: 1.0755 - acc_top1: 0.7246 - acc_top5: 0.9442 - 895ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/8
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.0199 - acc_top1: 0.7282 - acc_top5: 0.9452 - 424ms/step
Eval samples: 10000
Epoch 10/20
step 391/391 [==============================] - loss: 1.1637 - acc_top1: 0.7312 - acc_top5: 0.9478 - 893ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 1.0062 - acc_top1: 0.7315 - acc_top5: 0.9464 - 428ms/step
Eval samples: 10000
Epoch 11/20
step 391/391 [==============================] - loss: 1.0794 - acc_top1: 0.7399 - acc_top5: 0.9518 - 894ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/10
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.9966 - acc_top1: 0.7390 - acc_top5: 0.9493 - 426ms/step
Eval samples: 10000
Epoch 12/20
step 391/391 [==============================] - loss: 0.8478 - acc_top1: 0.7469 - acc_top5: 0.9539 - 894ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.9767 - acc_top1: 0.7424 - acc_top5: 0.9512 - 425ms/step
Eval samples: 10000
Epoch 13/20
step 391/391 [==============================] - loss: 0.9958 - acc_top1: 0.7526 - acc_top5: 0.9555 - 890ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/12
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.9697 - acc_top1: 0.7490 - acc_top5: 0.9539 - 426ms/step
Eval samples: 10000
Epoch 14/20
step 391/391 [==============================] - loss: 0.7780 - acc_top1: 0.7584 - acc_top5: 0.9579 - 894ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.9391 - acc_top1: 0.7539 - acc_top5: 0.9564 - 423ms/step
Eval samples: 10000
Epoch 15/20
step 391/391 [==============================] - loss: 1.0491 - acc_top1: 0.7658 - acc_top5: 0.9614 - 891ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/14
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.9187 - acc_top1: 0.7583 - acc_top5: 0.9571 - 420ms/step
Eval samples: 10000
Epoch 16/20
step 391/391 [==============================] - loss: 0.8458 - acc_top1: 0.7696 - acc_top5: 0.9617 - 890ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.9076 - acc_top1: 0.7623 - acc_top5: 0.9589 - 422ms/step
Eval samples: 10000
Epoch 17/20
step 391/391 [==============================] - loss: 1.0365 - acc_top1: 0.7758 - acc_top5: 0.9628 - 892ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/16
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.8588 - acc_top1: 0.7638 - acc_top5: 0.9598 - 424ms/step
Eval samples: 10000
Epoch 18/20
step 391/391 [==============================] - loss: 0.9865 - acc_top1: 0.7814 - acc_top5: 0.9650 - 890ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.8834 - acc_top1: 0.7701 - acc_top5: 0.9618 - 429ms/step
Eval samples: 10000
Epoch 19/20
step 391/391 [==============================] - loss: 0.6649 - acc_top1: 0.7834 - acc_top5: 0.9670 - 893ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/18
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.9119 - acc_top1: 0.7721 - acc_top5: 0.9620 - 427ms/step
Eval samples: 10000
Epoch 20/20
step 391/391 [==============================] - loss: 0.6904 - acc_top1: 0.7891 - acc_top5: 0.9680 - 898ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.8655 - acc_top1: 0.7740 - acc_top5: 0.9628 - 430ms/step
Eval samples: 10000
save checkpoint at /home/aistudio/checkpoint/resnet101/final



# 加载finetuning模型训练
model.load('./checkpoint/resnet101/14')def make_optimizer(parameters=None, momentum=0.9, weight_decay=5e-4, boundaries=None, values=None):learning_rate =, values=values,verbose=False)# learning_rate =     learning_rate=learning_rate,#     warmup_steps=wamup_steps,#     start_lr=base_lr / 5.,#     end_lr=base_lr,#     verbose=False)# optimizer = paddle.optimizer.Momentum(#     learning_rate=learning_rate,#     weight_decay=weight_decay,#     momentum=momentum,#     parameters=parameters)optimizer = paddle.optimizer.Adam(learning_rate=learning_rate,weight_decay=weight_decay,parameters=parameters)return optimizerbase_lr = 5e-5
boundaries = [5]optimizer = make_optimizer(boundaries=boundaries, values=[base_lr, base_lr*0.2], parameters=model.parameters())model.prepare(optimizer=optimizer,loss=paddle.nn.CrossEntropyLoss(),metrics=paddle.metric.Accuracy(topk=(1, 5))
)# callbacks
visualdl = paddle.callbacks.VisualDL('./visualdl/resnet101/14')
earlystop = paddle.callbacks.EarlyStopping( # acc不在上升时停止'acc',mode='max',patience=4,verbose=1,min_delta=0,baseline=None,save_best_model=True),eval_dataset,epochs=10,batch_size=128,  save_freq=2,save_dir='checkpoint/resnet101/14',callbacks=[visualdl],verbose=1
_dataset,eval_dataset,epochs=10,batch_size=128,  save_freq=2,save_dir='checkpoint/resnet101/14',callbacks=[visualdl],verbose=1
The loss value printed in the log is the current step, and the metric is the average value of previous step.
Epoch 1/10
step 391/391 [==============================] - loss: 0.7212 - acc_top1: 0.7760 - acc_top5: 0.9648 - 892ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/14/0
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.9302 - acc_top1: 0.7996 - acc_top5: 0.9680 - 421ms/step
Eval samples: 10000
Epoch 2/10
step 391/391 [==============================] - loss: 0.5295 - acc_top1: 0.8301 - acc_top5: 0.9776 - 893ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.8164 - acc_top1: 0.8103 - acc_top5: 0.9732 - 416ms/step
Eval samples: 10000
Epoch 3/10
step 391/391 [==============================] - loss: 0.4092 - acc_top1: 0.8622 - acc_top5: 0.9853 - 895ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/14/2
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.8366 - acc_top1: 0.8297 - acc_top5: 0.9747 - 416ms/step
Eval samples: 10000
Epoch 4/10
step 391/391 [==============================] - loss: 0.3479 - acc_top1: 0.8860 - acc_top5: 0.9896 - 899ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.7397 - acc_top1: 0.8325 - acc_top5: 0.9757 - 428ms/step
Eval samples: 10000
Epoch 5/10
step 391/391 [==============================] - loss: 0.3405 - acc_top1: 0.9086 - acc_top5: 0.9925 - 906ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/14/4
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.8492 - acc_top1: 0.8373 - acc_top5: 0.9780 - 430ms/step
Eval samples: 10000
Epoch 6/10
step 391/391 [==============================] - loss: 0.1852 - acc_top1: 0.9242 - acc_top5: 0.9948 - 902ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.7244 - acc_top1: 0.8436 - acc_top5: 0.9761 - 422ms/step
Eval samples: 10000
Epoch 7/10
step 391/391 [==============================] - loss: 0.3084 - acc_top1: 0.9387 - acc_top5: 0.9969 - 895ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/14/6
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.8724 - acc_top1: 0.8458 - acc_top5: 0.9767 - 419ms/step
Eval samples: 10000
Epoch 8/10
step 391/391 [==============================] - loss: 0.1378 - acc_top1: 0.9529 - acc_top5: 0.9979 - 893ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.8844 - acc_top1: 0.8443 - acc_top5: 0.9765 - 419ms/step
Eval samples: 10000
Epoch 9/10
step 391/391 [==============================] - loss: 0.1667 - acc_top1: 0.9623 - acc_top5: 0.9981 - 894ms/step
save checkpoint at /home/aistudio/checkpoint/resnet101/14/8
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.7474 - acc_top1: 0.8490 - acc_top5: 0.9775 - 416ms/step
Eval samples: 10000
Epoch 10/10
step 391/391 [==============================] - loss: 0.1841 - acc_top1: 0.9697 - acc_top5: 0.9989 - 895ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.7419 - acc_top1: 0.8471 - acc_top5: 0.9758 - 428ms/step
Eval samples: 10000
save checkpoint at /home/aistudio/checkpoint/resnet101/14/final

VisualDL'./finetuning/resnet101/model', training=True)


Eval samples: 10000
{'loss': [1.4640276], 'acc_top1': 0.6361581096849475, 'acc_top5': 0.8786464410735122}
result = model.evaluate(eval_dataset, batch_size=128, verbose=1)print(result)
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 79/79 [==============================] - loss: 0.7419 - acc_top1: 0.8468 - acc_top5: 0.9757 - 417ms/step
Eval samples: 10000
{'loss': [0.74186254], 'acc_top1': 0.8467935528120714, 'acc_top5': 0.9757373113854595}



dataset network params top1 err top5 err epoch(lr = 0.1) epoch(lr = 0.02) epoch(lr = 0.004) epoch(lr = 0.0008) total epoch
cifar100 mobilenet 3.3M 34.02 10.56 60 60 40 40 200
cifar100 mobilenetv2 2.36M 31.92 09.02 60 60 40 40 200
cifar100 squeezenet 0.78M 30.59 8.36 60 60 40 40 200
cifar100 shufflenet 1.0M 29.94 8.35 60 60 40 40 200
cifar100 shufflenetv2 1.3M 30.49 8.49 60 60 40 40 200
cifar100 vgg11_bn 28.5M 31.36 11.85 60 60 40 40 200
cifar100 vgg13_bn 28.7M 28.00 9.71 60 60 40 40 200
cifar100 vgg16_bn 34.0M 27.07 8.84 60 60 40 40 200
cifar100 vgg19_bn 39.0M 27.77 8.84 60 60 40 40 200
cifar100 resnet18 11.2M 24.39 6.95 60 60 40 40 200
cifar100 resnet34 21.3M 23.24 6.63 60 60 40 40 200
cifar100 resnet50 23.7M 22.61 6.04 60 60 40 40 200
cifar100 resnet101 42.7M 22.22 5.61 60 60 40 40 200
cifar100 resnet152 58.3M 22.31 5.81 60 60 40 40 200
cifar100 preactresnet18 11.3M 27.08 8.53 60 60 40 40 200
cifar100 preactresnet34 21.5M 24.79 7.68 60 60 40 40 200
cifar100 preactresnet50 23.9M 25.73 8.15 60 60 40 40 200
cifar100 preactresnet101 42.9M 24.84 7.83 60 60 40 40 200
cifar100 preactresnet152 58.6M 22.71 6.62 60 60 40 40 200
cifar100 resnext50 14.8M 22.23 6.00 60 60 40 40 200
cifar100 resnext101 25.3M 22.22 5.99 60 60 40 40 200
cifar100 resnext152 33.3M 22.40 5.58 60 60 40 40 200
cifar100 attention59 55.7M 33.75 12.90 60 60 40 40 200
cifar100 attention92 102.5M 36.52 11.47 60 60 40 40 200
cifar100 densenet121 7.0M 22.99 6.45 60 60 40 40 200
cifar100 densenet161 26M 21.56 6.04 60 60 60 40 200
cifar100 densenet201 18M 21.46 5.9 60 60 40 40 200
cifar100 googlenet 6.2M 21.97 5.94 60 60 40 40 200
cifar100 inceptionv3 22.3M 22.81 6.39 60 60 40 40 200
cifar100 inceptionv4 41.3M 24.14 6.90 60 60 40 40 200
cifar100 inceptionresnetv2 65.4M 27.51 9.11 60 60 40 40 200
cifar100 xception 21.0M 25.07 7.32 60 60 40 40 200
cifar100 seresnet18 11.4M 23.56 6.68 60 60 40 40 200
cifar100 seresnet34 21.6M 22.07 6.12 60 60 40 40 200
cifar100 seresnet50 26.5M 21.42 5.58 60 60 40 40 200
cifar100 seresnet101 47.7M 20.98 5.41 60 60 40 40 200
cifar100 seresnet152 66.2M 20.66 5.19 60 60 40 40 200
cifar100 nasnet 5.2M 22.71 5.91 60 60 40 40 200
cifar100 wideresnet-40-10 55.9M 21.25 5.77 60 60 40 40 200
cifar100 stochasticdepth18 11.22M 31.40 8.84 60 60 40 40 200
cifar100 stochasticdepth34 21.36M 27.72 7.32 60 60 40 40 200
cifar100 stochasticdepth50 23.71M 23.35 5.76 60 60 40 40 200
cifar100 stochasticdepth101 42.69M 21.28 5.39 60 60 40 40 200


