lab5
chenjh919
1 year, 10 days ago
| 0 | { | |
| 1 | "cells": [ | |
| 2 | { | |
| 3 | "cell_type": "code", | |
| 4 | "execution_count": 1, | |
| 5 | "id": "aa1c822b", | |
| 6 | "metadata": {}, | |
| 7 | "outputs": [], | |
| 8 | "source": [ | |
| 9 | "import torch\n", | |
| 10 | "import torch.nn as nn\n", | |
| 11 | "import torch.optim as optim\n", | |
| 12 | "import os\n", | |
| 13 | "\n", | |
| 14 | "# 导入项目中的模块\n", | |
| 15 | "from models import SimpleMLP, DeepMLP, ResidualMLP, SimpleCNN, MediumCNN, VGGStyleNet, SimpleResNet\n", | |
| 16 | "from utils import (\n", | |
| 17 | " load_cifar10, \n", | |
| 18 | " set_seed, \n", | |
| 19 | " train_model, \n", | |
| 20 | " evaluate_model, \n", | |
| 21 | " plot_training_history,\n", | |
| 22 | " visualize_model_predictions,\n", | |
| 23 | " visualize_conv_filters,\n", | |
| 24 | " model_complexity\n", | |
| 25 | ")" | |
| 26 | ] | |
| 27 | }, | |
| 28 | { | |
| 29 | "cell_type": "code", | |
| 30 | "execution_count": 3, | |
| 31 | "id": "dd3b8edc", | |
| 32 | "metadata": { | |
| 33 | "inputHidden": false | |
| 34 | }, | |
| 35 | "outputs": [ | |
| 36 | { | |
| 37 | "ename": "AttributeError", | |
| 38 | "evalue": "module 'os' has no attribute 'expanduser'", | |
| 39 | "output_type": "error", | |
| 40 | "traceback": [ | |
| 41 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| 42 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
| 43 | "\u001b[0;32m/tmp/ipykernel_246/1765368111.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mset_seed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpanduser\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"~/work/Jianhai/lab5\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;31m# 检查是否有可用的GPU\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| 44 | "\u001b[0;31mAttributeError\u001b[0m: module 'os' has no attribute 'expanduser'" | |
| 45 | ] | |
| 46 | } | |
| 47 | ], | |
| 48 | "source": [ | |
| 49 | "# 设置参数\n", | |
| 50 | "model_type = 'simple_mlp' # 可选: 'simple_mlp', 'deep_mlp', 'residual_mlp', 'simple_cnn', 'medium_cnn', 'vgg_style', 'resnet'\n", | |
| 51 | "epochs = 20\n", | |
| 52 | "learning_rate = 0.001\n", | |
| 53 | "batch_size = 128\n", | |
| 54 | "use_data_augmentation = True # CNN通常受益于数据增强\n", | |
| 55 | "save_directory = './ck'\n", | |
| 56 | "visualize_filters = True # 是否可视化卷积核(仅对CNN有效)\n", | |
| 57 | "visualize_predictions = True # 是否可视化预测结果\n", | |
| 58 | "\n", | |
| 59 | "# 设置随机种子\n", | |
| 60 | "set_seed()\n", | |
| 61 | "\n", | |
| 62 | "os.chdir(os.path.expanduser(\"~/work/Jianhai/lab5\"))\n", | |
| 63 | "\n", | |
| 64 | "# 检查是否有可用的GPU\n", | |
| 65 | "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", | |
| 66 | "print(f\"使用设备: {device}\")\n", | |
| 67 | "\n", | |
| 68 | "# 加载数据\n", | |
| 69 | "train_loader, valid_loader, test_loader, classes = load_cifar10(\n", | |
| 70 | " use_augmentation=use_data_augmentation, \n", | |
| 71 | " batch_size=batch_size\n", | |
| 72 | ")\n", | |
| 73 | "\n", | |
| 74 | "# 初始化选择的模型\n", | |
| 75 | "if model_type == 'simple_mlp':\n", | |
| 76 | " model = SimpleMLP()\n", | |
| 77 | " model_name = \"SimpleMLP\"\n", | |
| 78 | "elif model_type == 'deep_mlp':\n", | |
| 79 | " model = DeepMLP(dropout_rate=0.5, use_bn=True, use_dropout=True)\n", | |
| 80 | " model_name = \"DeepMLP\"\n", | |
| 81 | "elif model_type == 'residual_mlp':\n", | |
| 82 | " model = ResidualMLP(activation='relu')\n", | |
| 83 | " model_name = \"ResidualMLP\"\n", | |
| 84 | "elif model_type == 'simple_cnn':\n", | |
| 85 | " model = SimpleCNN()\n", | |
| 86 | " model_name = \"SimpleCNN\"\n", | |
| 87 | "elif model_type == 'medium_cnn':\n", | |
| 88 | " model = MediumCNN(use_bn=True)\n", | |
| 89 | " model_name = \"MediumCNN\"\n", | |
| 90 | "elif model_type == 'vgg_style':\n", | |
| 91 | " model = VGGStyleNet()\n", | |
| 92 | " model_name = \"VGGStyleNet\"\n", | |
| 93 | "else: # resnet\n", | |
| 94 | " model = SimpleResNet(num_blocks=[2, 2, 2])\n", | |
| 95 | " model_name = \"SimpleResNet\"\n", | |
| 96 | "\n", | |
| 97 | "print(f\"使用模型: {model_name}\")" | |
| 98 | ] | |
| 99 | }, | |
| 100 | { | |
| 101 | "cell_type": "code", | |
| 102 | "execution_count": null, | |
| 103 | "id": "1a5322fe", | |
| 104 | "metadata": {}, | |
| 105 | "outputs": [ | |
| 106 | { | |
| 107 | "name": "stdout", | |
| 108 | "output_type": "stream", | |
| 109 | "text": [ | |
| 110 | "\n", | |
| 111 | "分析模型复杂度:\n", | |
| 112 | "参数量: 1,578,506\n", | |
| 113 | "每批次(128个样本)推理时间: 8.18ms\n", | |
| 114 | "Epoch 1/20\n", | |
| 115 | "模型已保存到 ./ck/SimpleMLP_best.pth\n", | |
| 116 | "训练损失: 1.8831, 训练准确率: 0.3418\n", | |
| 117 | "验证损失: 1.7475, 验证准确率: 0.3796\n", | |
| 118 | "本轮用时: 48.95s\n", | |
| 119 | "--------------------------------------------------\n", | |
| 120 | "Epoch 2/20\n" | |
| 121 | ] | |
| 122 | } | |
| 123 | ], | |
| 124 | "source": [ | |
| 125 | "# 计算模型复杂度\n", | |
| 126 | "print(\"\\n分析模型复杂度:\")\n", | |
| 127 | "model_complexity(model, device=device)\n", | |
| 128 | "\n", | |
| 129 | "# 定义损失函数和优化器\n", | |
| 130 | "criterion = nn.CrossEntropyLoss()\n", | |
| 131 | "optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n", | |
| 132 | "\n", | |
| 133 | "# 可以添加学习率调度器\n", | |
| 134 | "scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)\n", | |
| 135 | "\n", | |
| 136 | "# 确保checkpoints目录存在\n", | |
| 137 | "os.makedirs(save_directory, exist_ok=True)\n", | |
| 138 | "\n", | |
| 139 | "# 训练模型\n", | |
| 140 | "trained_model, history = train_model(\n", | |
| 141 | " model, train_loader, valid_loader, criterion, optimizer, scheduler,\n", | |
| 142 | " num_epochs=epochs, device=device, save_dir=save_directory\n", | |
| 143 | ")\n", | |
| 144 | "\n", | |
| 145 | "# 绘制训练历史\n", | |
| 146 | "plot_training_history(history, title=f\"{model_name} Training History\")\n", | |
| 147 | "\n", | |
| 148 | "# 在测试集上评估模型\n", | |
| 149 | "print(\"\\n在测试集上评估模型:\")\n", | |
| 150 | "test_loss, test_acc = evaluate_model(trained_model, test_loader, criterion, device, classes)\n", | |
| 151 | "\n", | |
| 152 | "print(f\"{model_name} 最终测试准确率: {test_acc:.4f}\")\n", | |
| 153 | "\n", | |
| 154 | "# 如果是CNN模型并且需要可视化卷积核\n", | |
| 155 | "if visualize_filters and model_type in ['simple_cnn', 'medium_cnn', 'vgg_style', 'resnet']:\n", | |
| 156 | " print(\"\\n可视化卷积核:\")\n", | |
| 157 | " if model_type == 'simple_cnn':\n", | |
| 158 | " visualize_conv_filters(trained_model, 'conv1')\n", | |
| 159 | " elif model_type == 'medium_cnn':\n", | |
| 160 | " visualize_conv_filters(trained_model, 'conv1')\n", | |
| 161 | " elif model_type == 'vgg_style':\n", | |
| 162 | " visualize_conv_filters(trained_model, 'features.0')\n", | |
| 163 | " else: # resnet\n", | |
| 164 | " visualize_conv_filters(trained_model, 'conv1')\n", | |
| 165 | "\n", | |
| 166 | "# 如果需要可视化模型预测\n", | |
| 167 | "if visualize_predictions:\n", | |
| 168 | " print(\"\\n可视化模型预测:\")\n", | |
| 169 | " visualize_model_predictions(trained_model, test_loader, classes, device)\n", | |
| 170 | "\n", | |
| 171 | "print(f\"\\n{model_name}的训练和评估已完成!\")" | |
| 172 | ] | |
| 173 | }, | |
| 174 | { | |
| 175 | "cell_type": "code", | |
| 176 | "execution_count": null, | |
| 177 | "id": "3eaec7b4", | |
| 178 | "metadata": {}, | |
| 179 | "outputs": [], | |
| 180 | "source": [] | |
| 181 | }, | |
| 182 | { | |
| 183 | "cell_type": "code", | |
| 184 | "execution_count": null, | |
| 185 | "id": "6701954f", | |
| 186 | "metadata": {}, | |
| 187 | "outputs": [], | |
| 188 | "source": [] | |
| 189 | } | |
| 190 | ], | |
| 191 | "metadata": { | |
| 192 | "kernelspec": { | |
| 193 | "display_name": "Python 3", | |
| 194 | "language": "python", | |
| 195 | "name": "python3" | |
| 196 | }, | |
| 197 | "language_info": { | |
| 198 | "codemirror_mode": { | |
| 199 | "name": "ipython", | |
| 200 | "version": 3 | |
| 201 | }, | |
| 202 | "file_extension": ".py", | |
| 203 | "mimetype": "text/x-python", | |
| 204 | "name": "python", | |
| 205 | "nbconvert_exporter": "python", | |
| 206 | "pygments_lexer": "ipython3", | |
| 207 | "version": "3.7.5" | |
| 208 | } | |
| 209 | }, | |
| 210 | "nbformat": 4, | |
| 211 | "nbformat_minor": 5 | |
| 212 | } |
| 0 | # 深度学习模型实验指导:MLP与CNN模型对比分析 | |
| 1 | ||
| 2 | ## 实验概述 | |
| 3 | ||
| 4 | 本实验旨在通过对多层感知机(MLP)和卷积神经网络(CNN)的实现、训练和评估,帮助学生深入理解两种模型的结构特点、性能差异以及适用场景。学生将从基础模型开始,逐步探索更复杂的网络架构,最终通过对比分析,掌握深度学习模型设计与评估的关键技能。 | |
| 5 | ||
| 6 | ## 实验目的 | |
| 7 | ||
| 8 | 1. 掌握MLP和CNN的基本原理和实现方法 | |
| 9 | 2. 了解不同网络结构对模型性能的影响 | |
| 10 | 3. 学习深度学习模型训练、评估和可视化的方法 | |
| 11 | 4. 通过对比实验,理解不同模型在图像分类任务中的优缺点 | |
| 12 | 5. 培养深度学习模型调优和问题解决的能力 | |
| 13 | ||
| 14 | ## 实验准备 | |
| 15 | ||
| 16 | ### 环境要求 | |
| 17 | ||
| 18 | - Python 3.6+ | |
| 19 | - PyTorch 1.7+ | |
| 20 | - NumPy, Matplotlib | |
| 21 | - scikit-learn (用于评估) | |
| 22 | - 建议使用GPU环境(可选) | |
| 23 | ||
| 24 | 实验环境已经在mo平台中搭建好了,同学们无需自行配置 | |
| 25 | ||
| 26 | ### 实验数据集 | |
| 27 | ||
| 28 | 本实验使用CIFAR-10数据集,包含10个类别的彩色图像,每类6000张,共60000张32×32的图像。 | |
| 29 | ||
| 30 | ### 项目结构 | |
| 31 | ||
| 32 | ``` | |
| 33 | 项目根目录/ | |
| 34 | ├── models/ | |
| 35 | │ ├── __init__.py | |
| 36 | │ ├── mlp.py # MLP模型定义 | |
| 37 | │ └── cnn.py # CNN模型定义 | |
| 38 | ├── utils/ | |
| 39 | │ ├── __init__.py | |
| 40 | │ ├── data_loader.py # 数据加载函数 | |
| 41 | │ └── train_utils.py # 训练和评估函数 | |
| 42 | ├── train_all_notebook.py # 统一训练脚本 | |
| 43 | └── compare_models.py # 模型比较脚本 | |
| 44 | ``` | |
| 45 | ||
| 46 | ## 实验原理 | |
| 47 | ||
| 48 | ### 多层感知机(MLP) | |
| 49 | ||
| 50 | 多层感知机是一种前馈神经网络,由输入层、一个或多个隐藏层和输出层组成。MLP的主要特点是: | |
| 51 | ||
| 52 | 1. 每层神经元与下一层全连接 | |
| 53 | 2. 使用非线性激活函数(如ReLU、Sigmoid等) | |
| 54 | 3. 通过反向传播算法进行训练 | |
| 55 | ||
| 56 | **思考问题1**: MLP在处理图像数据时面临哪些挑战?请从数据结构、参数量和特征提取能力三个角度分析。 | |
| 57 | ||
| 58 | ||
| 59 | ### 卷积神经网络(CNN) | |
| 60 | ||
| 61 | 卷积神经网络是为处理具有网格状拓扑结构的数据而设计的神经网络,主要包含卷积层、池化层和全连接层。CNN的主要特点是: | |
| 62 | ||
| 63 | 1. 局部连接:每个神经元只与输入数据的一个局部区域连接 | |
| 64 | 2. 权重共享:同一特征图的所有神经元共享相同的权重 | |
| 65 | 3. 多层次特征提取:低层检测边缘等简单特征,高层组合这些特征形成更复杂的表示 | |
| 66 | ||
| 67 | **思考问题2**: CNN相比MLP在处理图像时具有哪些优势?解释卷积操作如何保留图像的空间信息。 | |
| 68 | ||
| 69 | ||
| 70 | ## 实验内容 | |
| 71 | ||
| 72 | ### 第一部分:基础MLP模型 | |
| 73 | ||
| 74 | #### 1.1 了解MLP模型结构 | |
| 75 | ||
| 76 | 查看`models/mlp.py`文件,理解三种MLP模型的结构: | |
| 77 | - `SimpleMLP`: 单隐层MLP | |
| 78 | - `DeepMLP`: 多隐层MLP,带有BatchNorm和Dropout | |
| 79 | - `ResidualMLP`: 带有残差连接的MLP | |
| 80 | ||
| 81 | **任务1**: 在下面的代码块中,实现一个具有两个隐藏层的MLP模型。第一隐藏层有128个神经元,第二隐藏层有64个神经元,输出层对应10个类别。使用ReLU激活函数,并添加BatchNorm和Dropout(0.3)。 | |
| 82 | ||
| 83 | ```python | |
| 84 | import torch.nn as nn | |
| 85 | ||
| 86 | class TwoLayerMLP(nn.Module): | |
| 87 | def __init__(self, input_dim=3*32*32): | |
| 88 | super(TwoLayerMLP, self).__init__() | |
| 89 | self.flatten = nn.Flatten() | |
| 90 | # 使用nn.Linear, nn.BatchNorm1d, nn.ReLU和nn.Dropout实现两个隐藏层 | |
| 91 | ||
| 92 | def forward(self, x): | |
| 93 | x = self.flatten(x) | |
| 94 | # 实现前向传播 | |
| 95 | return x | |
| 96 | ``` | |
| 97 | ||
| 98 | #### 1.2 训练和评估MLP模型 | |
| 99 | ||
| 100 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'simple_mlp'`。 | |
| 101 | ||
| 102 | 2. 观察训练过程中的损失和准确率变化,以及最终在测试集上的性能。 | |
| 103 | ||
| 104 | **分析问题1**: 训练过程中,损失和准确率曲线表现如何?是否出现过拟合或欠拟合?简要分析可能的原因。 | |
| 105 | ||
| 106 | ||
| 107 | 3. 修改参数尝试训练DeepMLP模型,将`model_type`设置为`'deep_mlp'`。 | |
| 108 | ||
| 109 | **分析问题2**: 对比SimpleMLP和DeepMLP的性能,增加网络深度对性能有何影响? | |
| 110 | ||
| 111 | ||
| 112 | ### 第二部分:基础CNN模型 | |
| 113 | ||
| 114 | #### 2.1 了解CNN模型结构 | |
| 115 | ||
| 116 | 查看`models/cnn.py`文件,理解不同CNN模型的结构: | |
| 117 | - `SimpleCNN`: 简单的CNN,包含两个卷积层 | |
| 118 | - `MediumCNN`: 中等复杂度的CNN,带有BatchNorm和Dropout | |
| 119 | - `VGGStyleNet`: VGG风格的CNN,使用连续的3x3卷积 | |
| 120 | - `SimpleResNet`: 简化的ResNet,包含残差连接 | |
| 121 | ||
| 122 | **任务2**: 修改下面的`SimpleCNN`代码,添加一个额外的卷积层和BatchNorm。新的卷积层应该在第二个池化层之后,卷积核数量为64,卷积核大小为3x3。 | |
| 123 | ||
| 124 | ```python | |
| 125 | class EnhancedCNN(nn.Module): | |
| 126 | def __init__(self): | |
| 127 | super(EnhancedCNN, self).__init__() | |
| 128 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) | |
| 129 | self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) | |
| 130 | # 在这里添加一个新的卷积层、BatchNorm和相应的池化层 | |
| 131 | self.pool = nn.MaxPool2d(2, 2) | |
| 132 | self.flatten = nn.Flatten() | |
| 133 | # 修改全连接层以适应新的特征图尺寸 | |
| 134 | self.relu = nn.ReLU() | |
| 135 | def forward(self, x): | |
| 136 | # 实现包含新卷积层的前向传播 | |
| 137 | return x | |
| 138 | ``` | |
| 139 | ||
| 140 | #### 2.2 训练和评估CNN模型 | |
| 141 | ||
| 142 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'simple_cnn'`,并将`use_data_augmentation`设置为`True`。 | |
| 143 | ||
| 144 | 2. 观察训练过程和卷积核可视化结果。 | |
| 145 | ||
| 146 | **分析问题3**: 卷积核可视化显示了什么模式?这些模式与图像中的哪些特征可能对应? | |
| 147 | ||
| 148 | ||
| 149 | 3. 继续训练MediumCNN模型,将`model_type`设置为`'medium_cnn'`。 | |
| 150 | ||
| 151 | **分析问题4**: CNN模型相比MLP在CIFAR-10上的性能有何不同?为什么会有这样的差异? | |
| 152 | ||
| 153 | ||
| 154 | ||
| 155 | ### 第三部分:高级CNN架构探索 | |
| 156 | ||
| 157 | #### 3.1 VGG风格和ResNet风格网络架构 | |
| 158 | ||
| 159 | 在本部分中,我们将探索两种影响深远的CNN架构:VGG和ResNet。通过理解这些经典架构的设计理念和特点,可以帮助我们设计更高效的神经网络。 | |
| 160 | ||
| 161 | ##### 3.1.1 VGG架构特点 | |
| 162 | VGG网络(由Visual Geometry Group开发)是一种非常简洁而有效的CNN架构,在2014年ImageNet挑战赛中取得了优异成绩。其主要特点包括: | |
| 163 | ||
| 164 | 1. **简单统一的设计**:使用小尺寸(3×3)卷积核和2×2最大池化层 | |
| 165 | 2. **深度堆叠**:通过堆叠多个相同配置的卷积层增加网络深度 | |
| 166 | 3. **结构规整**:遵循"卷积层组-池化层"的模式,随着网络深入,特征图尺寸减小而通道数增加 | |
| 167 | ||
| 168 | 在我们的实现中,`VGGStyleNet`采用了简化版的VGG设计理念,包含三个卷积块,每个块包含两个卷积层和一个池化层。 | |
| 169 | ||
| 170 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'vgg_style'`,并将`use_data_augmentation`设置为`True`。 | |
| 171 | ||
| 172 | 2. 观察网络的训练过程和性能。特别注意其收敛速度和最终准确率。 | |
| 173 | ||
| 174 | ##### 3.1.2 ResNet架构及残差连接 | |
| 175 | ||
| 176 | ResNet(残差网络)由微软研究院的He等人在2015年提出,是解决"深度退化问题"的突破性架构。其核心创新是引入了残差连接(skip connection): | |
| 177 | ||
| 178 | 1. **残差连接**:通过快捷连接(shortcut connection)将输入直接加到输出上,形成恒等映射路径 | |
| 179 | 2. **残差学习**:网络不再直接学习输入到输出的映射F(x),而是学习残差F(x)-x | |
| 180 | 3. **深度扩展**:残差连接有效缓解了梯度消失问题,使得训练非常深的网络成为可能 | |
| 181 | ||
| 182 | 在我们的实现中,`SimpleResNet`使用了基本的残差块,每个残差块包含两个3×3的卷积层和一个跳跃连接。 | |
| 183 | ||
| 184 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'resnet'`,并将`use_data_augmentation`设置为`True`。 | |
| 185 | ||
| 186 | 2. 观察网络的训练过程和性能,特别是深度对训练稳定性的影响。 | |
| 187 | ||
| 188 | ##### 3.1.3 Bottleneck结构 | |
| 189 | ||
| 190 | 在更深的ResNet变体中,常使用"瓶颈"(Bottleneck)结构来降低计算复杂度: | |
| 191 | ||
| 192 | - 使用1×1卷积降低通道数(降维) | |
| 193 | - 使用3×3卷积进行特征提取 | |
| 194 | - 再使用1×1卷积恢复通道数(升维) | |
| 195 | ||
| 196 | 这种设计大幅减少参数量和计算量,同时保持或提高性能。 | |
| 197 | ||
| 198 | **思考问题3**: 分析Bottleneck结构的优势。为什么1×1卷积在深度CNN中如此重要?它如何帮助控制网络的参数量和计算复杂度? | |
| 199 | ||
| 200 | ||
| 201 | **探索问题1**: 查看`models/cnn.py`中的`SimpleResNet`实现,分析残差连接是如何实现的。如果输入和输出通道数不匹配,代码是如何处理的? | |
| 202 | ||
| 203 | ||
| 204 | ||
| 205 | #### 3.2 模型复杂度分析 | |
| 206 | ||
| 207 | 不同CNN架构在性能和效率之间存在权衡。现在我们将通过分析不同模型的参数量和推理时间来理解这种权衡。 | |
| 208 | ||
| 209 | 1. 运行以下代码来分析各个模型的复杂度: | |
| 210 | ```python | |
| 211 | from models import SimpleMLP, DeepMLP, ResidualMLP, SimpleCNN, MediumCNN, VGGStyleNet, SimpleResNet | |
| 212 | from utils import model_complexity | |
| 213 | import torch | |
| 214 | ||
| 215 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 216 | ||
| 217 | models = { | |
| 218 | 'SimpleMLP': SimpleMLP(), | |
| 219 | 'DeepMLP': DeepMLP(), | |
| 220 | 'SimpleCNN': SimpleCNN(), | |
| 221 | 'MediumCNN': MediumCNN(), | |
| 222 | 'VGGStyleNet': VGGStyleNet(), | |
| 223 | 'SimpleResNet': SimpleResNet() | |
| 224 | } | |
| 225 | ||
| 226 | results = {} | |
| 227 | for name, model in models.items(): | |
| 228 | print(f"\n分析{name}复杂度:") | |
| 229 | params, time = model_complexity(model, device=device) | |
| 230 | results[name] = {'params': params, 'time': time} | |
| 231 | ``` | |
| 232 | ||
| 233 | 2. 记录并比较各个模型的参数量和推理时间。 | |
| 234 | ||
| 235 | **分析问题5**: VGG风格和ResNet风格网络的性能比较。残差连接带来了哪些优势? | |
| 236 | ||
| 237 | **分析问题6**: 参数量和推理时间如何影响模型的实用性?如何在性能和效率之间找到平衡? | |
| 238 | ||
| 239 | ||
| 240 | #### 3.3 理解高级CNN设计理念 | |
| 241 | ||
| 242 | 随着深度学习的发展,CNN架构设计也变得更加精细和高效。以下是一些重要的设计理念: | |
| 243 | ||
| 244 | 1. **网络深度与宽度平衡**:更深的网络能学习更抽象的特征,但也更难训练;更宽的网络(更多通道)能捕获更多特征,但参数量增加 | |
| 245 | 2. **跳跃连接**:除了ResNet的残差连接,还有DenseNet的密集连接、U-Net的跨层连接等 | |
| 246 | 3. **特征增强**:注意力机制(如SENet的通道注意力)、特征融合等 | |
| 247 | 4. **高效卷积设计**:深度可分离卷积(MobileNet)、组卷积(ShuffleNet)等 | |
| 248 | ||
| 249 | **探索问题2**: 如果你要为移动设备设计一个CNN模型,应该考虑哪些因素来权衡性能和效率?请提出至少三条具体的设计原则。 | |
| 250 | ||
| 251 | ||
| 252 | ### 第四部分:模型比较与分析 | |
| 253 | ||
| 254 | 运行 `compare.ipynb` 来对比不同模型的性能: | |
| 255 | ||
| 256 | **综合分析**: 根据比较结果,分析不同类型模型(MLP和CNN)以及不同复杂度模型的性能差异。考虑以下几点: | |
| 257 | 1. 测试准确率 | |
| 258 | 2. 参数量 | |
| 259 | 3. 推理时间 | |
| 260 | 4. 训练收敛速度 | |
| 261 | 5. 过拟合/欠拟合情况 | |
| 262 | ||
| 263 | ||
| 264 | ## 创新探索任务(选做) | |
| 265 | ||
| 266 | 选择下列一项或多项任务完成: | |
| 267 | ||
| 268 | 1. **模型改进**:对任一模型进行修改和改进,提高其在CIFAR-10上的性能。 | |
| 269 | 2. **可视化分析**:设计更好的可视化方法来解释模型的决策过程。 | |
| 270 | 3. **迁移学习**:探索如何利用预训练模型提高CIFAR-10的分类性能。 | |
| 271 | 4. **对抗性样本**:生成对抗性样本,并研究不同模型对对抗性样本的鲁棒性。 | |
| 272 | 5. **自监督学习**:实现一个简单的自监督学习方法,并评估其效果。 | |
| 273 | ||
| 274 | ## 实验报告要求 | |
| 275 | ||
| 276 | 实验报告应包含以下内容: | |
| 277 | ||
| 278 | 1. 实验目的和背景介绍 | |
| 279 | 2. 实验原理简述 | |
| 280 | 3. 实验过程描述 | |
| 281 | 4. 实现的代码(关键部分,包含详细注释) | |
| 282 | 5. 实验结果和分析(包括填写的所有分析问题和任务) | |
| 283 | 6. 创新探索任务的设计、实现和结果(如果选做) | |
| 284 | 7. 结论和思考 | |
| 285 | 8. 参考文献 | |
| 286 | ||
| 287 | ## 评分标准 | |
| 288 | ||
| 289 | - 基础任务完成度:60% | |
| 290 | - 分析问题深度和准确性:35% | |
| 291 | - 创新探索任务:15% (bonus) | |
| 292 | - 报告质量和表达清晰度:5% | |
| 293 | ||
| 294 | ## 参考资料 | |
| 295 | ||
| 296 | 1. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521(7553), 436-444. | |
| 297 | 2. He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. CVPR. | |
| 298 | 3. Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556. | |
| 299 | 4. PyTorch文档:https://pytorch.org/docs/stable/index.html | |
| 300 | 5. CS231n: Convolutional Neural Networks for Visual Recognition:https://cs231n.github.io/⏎ |
| 0 | # 深度学习模型实验指导:MLP与CNN模型对比分析 | |
| 1 | ||
| 2 | ## 实验概述 | |
| 3 | ||
| 4 | 本实验旨在通过对多层感知机(MLP)和卷积神经网络(CNN)的实现、训练和评估,帮助学生深入理解两种模型的结构特点、性能差异以及适用场景。学生将从基础模型开始,逐步探索更复杂的网络架构,最终通过对比分析,掌握深度学习模型设计与评估的关键技能。 | |
| 5 | ||
| 6 | 本实验的代码已经可以稳定运行。作业内容包括补全两个模型定义代码(MLP与CNN)以及回答一系列问题。两个补全任务的代码仅需在实验报告中体现即可。 | |
| 7 | ||
| 8 | ||
| 9 | ||
| 10 | ## 实验目的 | |
| 11 | ||
| 12 | 1. 掌握MLP和CNN的基本原理和实现方法 | |
| 13 | 2. 了解不同网络结构对模型性能的影响 | |
| 14 | 3. 学习深度学习模型训练、评估和可视化的方法 | |
| 15 | 4. 通过对比实验,理解不同模型在图像分类任务中的优缺点 | |
| 16 | 5. 培养深度学习模型调优和问题解决的能力 | |
| 17 | ||
| 18 | ## 实验准备 | |
| 19 | ||
| 20 | ### 环境要求 | |
| 21 | ||
| 22 | - Python 3.6+ | |
| 23 | - PyTorch 1.7+ | |
| 24 | - NumPy, Matplotlib | |
| 25 | - scikit-learn (用于评估) | |
| 26 | - 建议使用GPU环境(可选) | |
| 27 | ||
| 28 | 实验环境已经在mo平台中搭建好了,同学们无需自行配置 | |
| 29 | ||
| 30 | ### 实验数据集 | |
| 31 | ||
| 32 | 本实验使用CIFAR-10数据集,包含10个类别的彩色图像,每类6000张,共60000张32×32的图像。 | |
| 33 | ||
| 34 | ### 项目结构 | |
| 35 | ||
| 36 | ``` | |
| 37 | 项目根目录/ | |
| 38 | ├── models/ | |
| 39 | │ ├── __init__.py | |
| 40 | │ ├── mlp.py # MLP模型定义 | |
| 41 | │ └── cnn.py # CNN模型定义 | |
| 42 | ├── utils/ | |
| 43 | │ ├── __init__.py | |
| 44 | │ ├── data_loader.py # 数据加载函数 | |
| 45 | │ └── train_utils.py # 训练和评估函数 | |
| 46 | ├── train_all_notebook.py # 统一训练脚本 | |
| 47 | └── compare_models.py # 模型比较脚本 | |
| 48 | ``` | |
| 49 | ||
| 50 | ## 实验原理 | |
| 51 | ||
| 52 | ### 多层感知机(MLP) | |
| 53 | ||
| 54 | 多层感知机是一种前馈神经网络,由输入层、一个或多个隐藏层和输出层组成。MLP的主要特点是: | |
| 55 | ||
| 56 | 1. 每层神经元与下一层全连接 | |
| 57 | 2. 使用非线性激活函数(如ReLU、Sigmoid等) | |
| 58 | 3. 通过反向传播算法进行训练 | |
| 59 | ||
| 60 | **思考问题1**: MLP在处理图像数据时面临哪些挑战?请从数据结构、参数量和特征提取能力三个角度分析。 | |
| 61 | ||
| 62 | ||
| 63 | ### 卷积神经网络(CNN) | |
| 64 | ||
| 65 | 卷积神经网络是为处理具有网格状拓扑结构的数据而设计的神经网络,主要包含卷积层、池化层和全连接层。CNN的主要特点是: | |
| 66 | ||
| 67 | 1. 局部连接:每个神经元只与输入数据的一个局部区域连接 | |
| 68 | 2. 权重共享:同一特征图的所有神经元共享相同的权重 | |
| 69 | 3. 多层次特征提取:低层检测边缘等简单特征,高层组合这些特征形成更复杂的表示 | |
| 70 | ||
| 71 | **思考问题2**: CNN相比MLP在处理图像时具有哪些优势?解释卷积操作如何保留图像的空间信息。 | |
| 72 | ||
| 73 | ||
| 74 | ## 实验内容 | |
| 75 | ||
| 76 | ### 第一部分:基础MLP模型 | |
| 77 | ||
| 78 | #### 1.1 了解MLP模型结构 | |
| 79 | ||
| 80 | 查看`models/mlp.py`文件,理解三种MLP模型的结构: | |
| 81 | - `SimpleMLP`: 单隐层MLP | |
| 82 | - `DeepMLP`: 多隐层MLP,带有BatchNorm和Dropout | |
| 83 | - `ResidualMLP`: 带有残差连接的MLP | |
| 84 | ||
| 85 | **任务1**: 在下面的代码块中,实现一个具有两个隐藏层的MLP模型。第一隐藏层有128个神经元,第二隐藏层有64个神经元,输出层对应10个类别。使用ReLU激活函数,并添加BatchNorm和Dropout(0.3)。 | |
| 86 | ||
| 87 | ```python | |
| 88 | import torch.nn as nn | |
| 89 | ||
| 90 | class TwoLayerMLP(nn.Module): | |
| 91 | def __init__(self, input_dim=3*32*32): | |
| 92 | super(TwoLayerMLP, self).__init__() | |
| 93 | self.flatten = nn.Flatten() | |
| 94 | # 使用nn.Linear, nn.BatchNorm1d, nn.ReLU和nn.Dropout实现两个隐藏层 | |
| 95 | ||
| 96 | def forward(self, x): | |
| 97 | x = self.flatten(x) | |
| 98 | # 实现前向传播 | |
| 99 | return x | |
| 100 | ``` | |
| 101 | ||
| 102 | #### 1.2 训练和评估MLP模型 | |
| 103 | ||
| 104 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'simple_mlp'`。 | |
| 105 | ||
| 106 | 2. 观察训练过程中的损失和准确率变化,以及最终在测试集上的性能。 | |
| 107 | ||
| 108 | **分析问题1**: 训练过程中,损失和准确率曲线表现如何?是否出现过拟合或欠拟合?简要分析可能的原因。 | |
| 109 | ||
| 110 | ||
| 111 | 3. 修改参数尝试训练DeepMLP模型,将`model_type`设置为`'deep_mlp'`。 | |
| 112 | ||
| 113 | **分析问题2**: 对比SimpleMLP和DeepMLP的性能,增加网络深度对性能有何影响? | |
| 114 | ||
| 115 | ||
| 116 | ### 第二部分:基础CNN模型 | |
| 117 | ||
| 118 | #### 2.1 了解CNN模型结构 | |
| 119 | ||
| 120 | 查看`models/cnn.py`文件,理解不同CNN模型的结构: | |
| 121 | - `SimpleCNN`: 简单的CNN,包含两个卷积层 | |
| 122 | - `MediumCNN`: 中等复杂度的CNN,带有BatchNorm和Dropout | |
| 123 | - `VGGStyleNet`: VGG风格的CNN,使用连续的3x3卷积 | |
| 124 | - `SimpleResNet`: 简化的ResNet,包含残差连接 | |
| 125 | ||
| 126 | **任务2**: 修改下面的`SimpleCNN`代码,添加一个额外的卷积层和BatchNorm。新的卷积层应该在第二个池化层之后,卷积核数量为64,卷积核大小为3x3。 | |
| 127 | ||
| 128 | ```python | |
| 129 | class EnhancedCNN(nn.Module): | |
| 130 | def __init__(self): | |
| 131 | super(EnhancedCNN, self).__init__() | |
| 132 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) | |
| 133 | self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) | |
| 134 | # 在这里添加一个新的卷积层、BatchNorm和相应的池化层 | |
| 135 | self.pool = nn.MaxPool2d(2, 2) | |
| 136 | self.flatten = nn.Flatten() | |
| 137 | # 修改全连接层以适应新的特征图尺寸 | |
| 138 | self.relu = nn.ReLU() | |
| 139 | def forward(self, x): | |
| 140 | # 实现包含新卷积层的前向传播 | |
| 141 | return x | |
| 142 | ``` | |
| 143 | ||
| 144 | #### 2.2 训练和评估CNN模型 | |
| 145 | ||
| 146 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'simple_cnn'`,并将`use_data_augmentation`设置为`True`。 | |
| 147 | ||
| 148 | 2. 观察训练过程和卷积核可视化结果。 | |
| 149 | ||
| 150 | **分析问题3**: 卷积核可视化显示了什么模式?这些模式与图像中的哪些特征可能对应? | |
| 151 | ||
| 152 | ||
| 153 | 3. 继续训练MediumCNN模型,将`model_type`设置为`'medium_cnn'`。 | |
| 154 | ||
| 155 | **分析问题4**: CNN模型相比MLP在CIFAR-10上的性能有何不同?为什么会有这样的差异? | |
| 156 | ||
| 157 | ||
| 158 | ||
| 159 | ### 第三部分:高级CNN架构探索 | |
| 160 | ||
| 161 | #### 3.1 VGG风格和ResNet风格网络架构 | |
| 162 | ||
| 163 | 在本部分中,我们将探索两种影响深远的CNN架构:VGG和ResNet。通过理解这些经典架构的设计理念和特点,可以帮助我们设计更高效的神经网络。 | |
| 164 | ||
| 165 | ##### 3.1.1 VGG架构特点 | |
| 166 | VGG网络(由Visual Geometry Group开发)是一种非常简洁而有效的CNN架构,在2014年ImageNet挑战赛中取得了优异成绩。其主要特点包括: | |
| 167 | ||
| 168 | 1. **简单统一的设计**:使用小尺寸(3×3)卷积核和2×2最大池化层 | |
| 169 | 2. **深度堆叠**:通过堆叠多个相同配置的卷积层增加网络深度 | |
| 170 | 3. **结构规整**:遵循"卷积层组-池化层"的模式,随着网络深入,特征图尺寸减小而通道数增加 | |
| 171 | ||
| 172 | 在我们的实现中,`VGGStyleNet`采用了简化版的VGG设计理念,包含三个卷积块,每个块包含两个卷积层和一个池化层。 | |
| 173 | ||
| 174 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'vgg_style'`,并将`use_data_augmentation`设置为`True`。 | |
| 175 | ||
| 176 | 2. 观察网络的训练过程和性能。特别注意其收敛速度和最终准确率。 | |
| 177 | ||
| 178 | ##### 3.1.2 ResNet架构及残差连接 | |
| 179 | ||
| 180 | ResNet(残差网络)由微软研究院的He等人在2015年提出,是解决"深度退化问题"的突破性架构。其核心创新是引入了残差连接(skip connection): | |
| 181 | ||
| 182 | 1. **残差连接**:通过快捷连接(shortcut connection)将输入直接加到输出上,形成恒等映射路径 | |
| 183 | 2. **残差学习**:网络不再直接学习输入到输出的映射F(x),而是学习残差F(x)-x | |
| 184 | 3. **深度扩展**:残差连接有效缓解了梯度消失问题,使得训练非常深的网络成为可能 | |
| 185 | ||
| 186 | 在我们的实现中,`SimpleResNet`使用了基本的残差块,每个残差块包含两个3×3的卷积层和一个跳跃连接。 | |
| 187 | ||
| 188 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'resnet'`,并将`use_data_augmentation`设置为`True`。 | |
| 189 | ||
| 190 | 2. 观察网络的训练过程和性能,特别是深度对训练稳定性的影响。 | |
| 191 | ||
| 192 | ##### 3.1.3 Bottleneck结构 | |
| 193 | ||
| 194 | 在更深的ResNet变体中,常使用"瓶颈"(Bottleneck)结构来降低计算复杂度: | |
| 195 | ||
| 196 | - 使用1×1卷积降低通道数(降维) | |
| 197 | - 使用3×3卷积进行特征提取 | |
| 198 | - 再使用1×1卷积恢复通道数(升维) | |
| 199 | ||
| 200 | 这种设计大幅减少参数量和计算量,同时保持或提高性能。 | |
| 201 | ||
| 202 | **思考问题3**: 分析Bottleneck结构的优势。为什么1×1卷积在深度CNN中如此重要?它如何帮助控制网络的参数量和计算复杂度? | |
| 203 | ||
| 204 | ||
| 205 | **探索问题1**: 查看`models/cnn.py`中的`SimpleResNet`实现,分析残差连接是如何实现的。如果输入和输出通道数不匹配,代码是如何处理的? | |
| 206 | ||
| 207 | ||
| 208 | ||
| 209 | #### 3.2 模型复杂度分析 | |
| 210 | ||
| 211 | 不同CNN架构在性能和效率之间存在权衡。现在我们将通过分析不同模型的参数量和推理时间来理解这种权衡。 | |
| 212 | ||
| 213 | 1. 运行以下代码来分析各个模型的复杂度: | |
| 214 | ```python | |
| 215 | from models import SimpleMLP, DeepMLP, ResidualMLP, SimpleCNN, MediumCNN, VGGStyleNet, SimpleResNet | |
| 216 | from utils import model_complexity | |
| 217 | import torch | |
| 218 | ||
| 219 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 220 | ||
| 221 | models = { | |
| 222 | 'SimpleMLP': SimpleMLP(), | |
| 223 | 'DeepMLP': DeepMLP(), | |
| 224 | 'SimpleCNN': SimpleCNN(), | |
| 225 | 'MediumCNN': MediumCNN(), | |
| 226 | 'VGGStyleNet': VGGStyleNet(), | |
| 227 | 'SimpleResNet': SimpleResNet() | |
| 228 | } | |
| 229 | ||
| 230 | results = {} | |
| 231 | for name, model in models.items(): | |
| 232 | print(f"\n分析{name}复杂度:") | |
| 233 | params, time = model_complexity(model, device=device) | |
| 234 | results[name] = {'params': params, 'time': time} | |
| 235 | ``` | |
| 236 | ||
| 237 | 2. 记录并比较各个模型的参数量和推理时间。 | |
| 238 | ||
| 239 | **分析问题5**: VGG风格和ResNet风格网络的性能比较。残差连接带来了哪些优势? | |
| 240 | ||
| 241 | **分析问题6**: 参数量和推理时间如何影响模型的实用性?如何在性能和效率之间找到平衡? | |
| 242 | ||
| 243 | ||
| 244 | #### 3.3 理解高级CNN设计理念 | |
| 245 | ||
| 246 | 随着深度学习的发展,CNN架构设计也变得更加精细和高效。以下是一些重要的设计理念: | |
| 247 | ||
| 248 | 1. **网络深度与宽度平衡**:更深的网络能学习更抽象的特征,但也更难训练;更宽的网络(更多通道)能捕获更多特征,但参数量增加 | |
| 249 | 2. **跳跃连接**:除了ResNet的残差连接,还有DenseNet的密集连接、U-Net的跨层连接等 | |
| 250 | 3. **特征增强**:注意力机制(如SENet的通道注意力)、特征融合等 | |
| 251 | 4. **高效卷积设计**:深度可分离卷积(MobileNet)、组卷积(ShuffleNet)等 | |
| 252 | ||
| 253 | **探索问题2**: 如果你要为移动设备设计一个CNN模型,应该考虑哪些因素来权衡性能和效率?请提出至少三条具体的设计原则。 | |
| 254 | ||
| 255 | ||
| 256 | ### 第四部分:模型比较与分析 | |
| 257 | ||
| 258 | 运行 `compare.py` 来对比不同模型的性能: | |
| 259 | ||
| 260 | **综合分析**: 根据比较结果,分析不同类型模型(MLP和CNN)以及不同复杂度模型的性能差异。考虑以下几点: | |
| 261 | 1. 测试准确率 | |
| 262 | 2. 参数量 | |
| 263 | 3. 推理时间 | |
| 264 | 4. 训练收敛速度 | |
| 265 | 5. 过拟合/欠拟合情况 | |
| 266 | ||
| 267 | ||
| 268 | ## 创新探索任务(选做) | |
| 269 | ||
| 270 | 选择下列一项或多项任务完成: | |
| 271 | ||
| 272 | 1. **模型改进**:对任一模型进行修改和改进,提高其在CIFAR-10上的性能。 | |
| 273 | 2. **可视化分析**:设计更好的可视化方法来解释模型的决策过程。 | |
| 274 | 3. **迁移学习**:探索如何利用预训练模型提高CIFAR-10的分类性能。 | |
| 275 | 4. **对抗性样本**:生成对抗性样本,并研究不同模型对对抗性样本的鲁棒性。 | |
| 276 | 5. **自监督学习**:实现一个简单的自监督学习方法,并评估其效果。 | |
| 277 | ||
| 278 | ## 实验报告要求 | |
| 279 | ||
| 280 | 实验报告应包含以下内容: | |
| 281 | ||
| 282 | 1. 实验目的和背景介绍 | |
| 283 | 2. 实验原理简述 | |
| 284 | 3. 实验过程描述 | |
| 285 | 4. 实现的代码(关键部分,包含详细注释) | |
| 286 | 5. 实验结果和分析(包括填写的所有分析问题和任务) | |
| 287 | 6. 创新探索任务的设计、实现和结果(如果选做) | |
| 288 | 7. 结论和思考 | |
| 289 | 8. 参考文献 | |
| 290 | ||
| 291 | ## 评分标准 | |
| 292 | ||
| 293 | - 基础任务完成度:60% | |
| 294 | - 分析问题深度和准确性:35% | |
| 295 | - 创新探索任务:15% (bonus) | |
| 296 | - 报告质量和表达清晰度:5% | |
| 297 | ||
| 298 | ## 参考资料 | |
| 299 | ||
| 300 | 1. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521(7553), 436-444. | |
| 301 | 2. He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. CVPR. | |
| 302 | 3. Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556. | |
| 303 | 4. PyTorch文档:https://pytorch.org/docs/stable/index.html | |
| 304 | 5. CS231n: Convolutional Neural Networks for Visual Recognition:https://cs231n.github.io/⏎ |
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
| 0 | import torch | |
| 1 | import torch.nn as nn | |
| 2 | import torch.optim as optim | |
| 3 | import matplotlib.pyplot as plt | |
| 4 | import numpy as np | |
| 5 | import time | |
| 6 | import os | |
| 7 | ||
| 8 | # 导入项目中的模块 | |
| 9 | from models import SimpleMLP, DeepMLP, ResidualMLP, SimpleCNN, MediumCNN, VGGStyleNet, SimpleResNet | |
| 10 | from utils import load_cifar10, set_seed | |
| 11 | ||
| 12 | def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler=None, | |
| 13 | num_epochs=10, device=None, save_dir='./checkpoints'): | |
| 14 | """训练模型并记录性能指标""" | |
| 15 | if device is None: | |
| 16 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 17 | ||
| 18 | start_time = time.time() | |
| 19 | model = model.to(device) | |
| 20 | ||
| 21 | history = { | |
| 22 | 'train_loss': [], 'train_acc': [], | |
| 23 | 'val_loss': [], 'val_acc': [], | |
| 24 | 'epoch_times': [] | |
| 25 | } | |
| 26 | ||
| 27 | best_val_acc = 0.0 | |
| 28 | ||
| 29 | # 确保保存目录存在 | |
| 30 | os.makedirs(save_dir, exist_ok=True) | |
| 31 | ||
| 32 | for epoch in range(num_epochs): | |
| 33 | epoch_start = time.time() | |
| 34 | print(f"Epoch {epoch+1}/{num_epochs}") | |
| 35 | ||
| 36 | # 训练阶段 | |
| 37 | model.train() | |
| 38 | train_loss = 0.0 | |
| 39 | train_correct = 0 | |
| 40 | train_total = 0 | |
| 41 | ||
| 42 | for inputs, labels in train_loader: | |
| 43 | inputs, labels = inputs.to(device), labels.to(device) | |
| 44 | ||
| 45 | # 梯度清零 | |
| 46 | optimizer.zero_grad() | |
| 47 | ||
| 48 | # 前向传播 | |
| 49 | outputs = model(inputs) | |
| 50 | loss = criterion(outputs, labels) | |
| 51 | ||
| 52 | # 反向传播和优化 | |
| 53 | loss.backward() | |
| 54 | optimizer.step() | |
| 55 | ||
| 56 | # 统计 | |
| 57 | train_loss += loss.item() * inputs.size(0) | |
| 58 | _, predicted = torch.max(outputs, 1) | |
| 59 | train_total += labels.size(0) | |
| 60 | train_correct += (predicted == labels).sum().item() | |
| 61 | ||
| 62 | # 计算训练指标 | |
| 63 | train_loss = train_loss / len(train_loader.sampler) | |
| 64 | train_acc = train_correct / train_total | |
| 65 | ||
| 66 | # 验证阶段 | |
| 67 | model.eval() | |
| 68 | val_loss = 0.0 | |
| 69 | val_correct = 0 | |
| 70 | val_total = 0 | |
| 71 | ||
| 72 | with torch.no_grad(): | |
| 73 | for inputs, labels in valid_loader: | |
| 74 | inputs, labels = inputs.to(device), labels.to(device) | |
| 75 | ||
| 76 | # 前向传播 | |
| 77 | outputs = model(inputs) | |
| 78 | loss = criterion(outputs, labels) | |
| 79 | ||
| 80 | # 统计 | |
| 81 | val_loss += loss.item() * inputs.size(0) | |
| 82 | _, predicted = torch.max(outputs, 1) | |
| 83 | val_total += labels.size(0) | |
| 84 | val_correct += (predicted == labels).sum().item() | |
| 85 | ||
| 86 | # 计算验证指标 | |
| 87 | val_loss = val_loss / len(valid_loader.sampler) | |
| 88 | val_acc = val_correct / val_total | |
| 89 | ||
| 90 | # 更新学习率 | |
| 91 | if scheduler: | |
| 92 | scheduler.step() | |
| 93 | ||
| 94 | # 记录历史 | |
| 95 | history['train_loss'].append(train_loss) | |
| 96 | history['train_acc'].append(train_acc) | |
| 97 | history['val_loss'].append(val_loss) | |
| 98 | history['val_acc'].append(val_acc) | |
| 99 | ||
| 100 | # 记录每个epoch的时间 | |
| 101 | epoch_end = time.time() | |
| 102 | epoch_time = epoch_end - epoch_start | |
| 103 | history['epoch_times'].append(epoch_time) | |
| 104 | ||
| 105 | # 保存最佳模型 | |
| 106 | if val_acc > best_val_acc: | |
| 107 | best_val_acc = val_acc | |
| 108 | torch.save(model.state_dict(), f"{save_dir}/{model.__class__.__name__}_best.pth") | |
| 109 | ||
| 110 | print(f"训练损失: {train_loss:.4f}, 训练准确率: {train_acc:.4f}") | |
| 111 | print(f"验证损失: {val_loss:.4f}, 验证准确率: {val_acc:.4f}") | |
| 112 | print(f"本轮用时: {epoch_time:.2f}s") | |
| 113 | print("-" * 50) | |
| 114 | ||
| 115 | # 计算总训练时间 | |
| 116 | total_time = time.time() - start_time | |
| 117 | print(f"总训练时间: {total_time:.2f}s") | |
| 118 | ||
| 119 | return model, history | |
| 120 | ||
| 121 | def evaluate_model(model, test_loader, criterion, device=None): | |
| 122 | """评估模型在测试集上的性能""" | |
| 123 | if device is None: | |
| 124 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 125 | ||
| 126 | model = model.to(device) | |
| 127 | model.eval() | |
| 128 | ||
| 129 | test_loss = 0.0 | |
| 130 | test_correct = 0 | |
| 131 | test_total = 0 | |
| 132 | ||
| 133 | with torch.no_grad(): | |
| 134 | for inputs, labels in test_loader: | |
| 135 | inputs, labels = inputs.to(device), labels.to(device) | |
| 136 | ||
| 137 | # 前向传播 | |
| 138 | outputs = model(inputs) | |
| 139 | loss = criterion(outputs, labels) | |
| 140 | ||
| 141 | # 统计 | |
| 142 | test_loss += loss.item() * inputs.size(0) | |
| 143 | _, predicted = torch.max(outputs, 1) | |
| 144 | test_total += labels.size(0) | |
| 145 | test_correct += (predicted == labels).sum().item() | |
| 146 | ||
| 147 | # 计算测试指标 | |
| 148 | test_loss = test_loss / len(test_loader.dataset) | |
| 149 | test_acc = test_correct / test_total | |
| 150 | ||
| 151 | return test_loss, test_acc | |
| 152 | ||
| 153 | def model_complexity(model, input_size=(3, 32, 32), batch_size=128, device=None): | |
| 154 | """计算模型参数量和推理时间""" | |
| 155 | if device is None: | |
| 156 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 157 | ||
| 158 | model = model.to(device) | |
| 159 | model.eval() | |
| 160 | ||
| 161 | # 计算参数量 | |
| 162 | num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| 163 | ||
| 164 | # 创建随机输入 | |
| 165 | dummy_input = torch.randn(batch_size, *input_size).to(device) | |
| 166 | ||
| 167 | # 预热 | |
| 168 | with torch.no_grad(): | |
| 169 | for _ in range(10): | |
| 170 | _ = model(dummy_input) | |
| 171 | ||
| 172 | # 计时 | |
| 173 | start_time = time.time() | |
| 174 | with torch.no_grad(): | |
| 175 | for _ in range(100): | |
| 176 | _ = model(dummy_input) | |
| 177 | end_time = time.time() | |
| 178 | ||
| 179 | inference_time = (end_time - start_time) / 100 | |
| 180 | ||
| 181 | return num_params, inference_time | |
| 182 | ||
| 183 | def compare_models(): | |
| 184 | """比较不同模型的性能""" | |
| 185 | # 设置随机种子 | |
| 186 | set_seed() | |
| 187 | ||
| 188 | # 检查是否有可用的GPU | |
| 189 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 190 | print(f"使用设备: {device}") | |
| 191 | ||
| 192 | # 加载数据 | |
| 193 | train_loader, valid_loader, test_loader, classes = load_cifar10( | |
| 194 | use_augmentation=True, | |
| 195 | batch_size=128 | |
| 196 | ) | |
| 197 | ||
| 198 | # 定义要比较的模型 | |
| 199 | models = { | |
| 200 | 'SimpleMLP': SimpleMLP(), | |
| 201 | 'DeepMLP': DeepMLP(dropout_rate=0.5, use_bn=True, use_dropout=True), | |
| 202 | 'ResidualMLP': ResidualMLP(activation='relu'), | |
| 203 | 'SimpleCNN': SimpleCNN(), | |
| 204 | 'MediumCNN': MediumCNN(use_bn=True), | |
| 205 | 'VGGStyleNet': VGGStyleNet(), | |
| 206 | 'SimpleResNet': SimpleResNet(num_blocks=[2, 2, 2]) | |
| 207 | } | |
| 208 | ||
| 209 | # 存储结果 | |
| 210 | results = {} | |
| 211 | ||
| 212 | # 训练和评估每个模型 | |
| 213 | for model_name, model in models.items(): | |
| 214 | print(f"\n开始训练 {model_name}...") | |
| 215 | ||
| 216 | # 定义损失函数和优化器 | |
| 217 | criterion = nn.CrossEntropyLoss() | |
| 218 | optimizer = optim.Adam(model.parameters(), lr=0.001) | |
| 219 | scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15) | |
| 220 | ||
| 221 | # 计算模型复杂度 | |
| 222 | print(f"\n分析 {model_name} 复杂度...") | |
| 223 | num_params, inference_time = model_complexity(model, device=device) | |
| 224 | ||
| 225 | # 训练模型 | |
| 226 | _, history = train_model( | |
| 227 | model, train_loader, valid_loader, criterion, optimizer, scheduler, | |
| 228 | num_epochs=15, device=device, save_dir='./checkpoints' | |
| 229 | ) | |
| 230 | ||
| 231 | # 在测试集上评估模型 | |
| 232 | test_loss, test_acc = evaluate_model(model, test_loader, criterion, device) | |
| 233 | ||
| 234 | print(f"{model_name} 测试准确率: {test_acc:.4f}") | |
| 235 | ||
| 236 | # 存储结果 | |
| 237 | results[model_name] = { | |
| 238 | 'history': history, | |
| 239 | 'test_acc': test_acc, | |
| 240 | 'params': num_params, | |
| 241 | 'inf_time': inference_time | |
| 242 | } | |
| 243 | ||
| 244 | # 比较模型性能 | |
| 245 | model_names = list(results.keys()) | |
| 246 | test_accs = [results[name]['test_acc'] for name in model_names] | |
| 247 | params = [results[name]['params'] / 1e6 for name in model_names] # 转换为百万 | |
| 248 | inf_times = [results[name]['inf_time'] * 1000 for name in model_names] # 转换为毫秒 | |
| 249 | ||
| 250 | # 创建比较图表 | |
| 251 | fig, axes = plt.subplots(3, 1, figsize=(15, 15)) | |
| 252 | ||
| 253 | # 测试准确率比较 | |
| 254 | ax = axes[0] | |
| 255 | bars = ax.bar(model_names, test_accs, color='skyblue') | |
| 256 | ax.set_title('Model Test Accuracy Comparison') # 英文标题 | |
| 257 | ax.set_ylabel('Accuracy') # 英文标签 | |
| 258 | ax.set_ylim(0, 1) | |
| 259 | ||
| 260 | # 添加数值标签 | |
| 261 | for bar, acc in zip(bars, test_accs): | |
| 262 | ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01, | |
| 263 | f'{acc:.4f}', ha='center', va='bottom') | |
| 264 | ||
| 265 | # 参数量比较 | |
| 266 | ax = axes[1] | |
| 267 | bars = ax.bar(model_names, params, color='lightgreen') | |
| 268 | ax.set_title('Model Parameter Count Comparison (millions)') # 英文标题 | |
| 269 | ax.set_ylabel('Parameters (M)') # 英文标签 | |
| 270 | ||
| 271 | # 添加数值标签 | |
| 272 | for bar, param in zip(bars, params): | |
| 273 | ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.1, | |
| 274 | f'{param:.2f}M', ha='center', va='bottom') | |
| 275 | ||
| 276 | # 推理时间比较 | |
| 277 | ax = axes[2] | |
| 278 | bars = ax.bar(model_names, inf_times, color='salmon') | |
| 279 | ax.set_title('Model Inference Time Comparison (ms/batch)') # 英文标题 | |
| 280 | ax.set_ylabel('Inference time (ms)') # 英文标签 | |
| 281 | ||
| 282 | # 添加数值标签 | |
| 283 | for bar, time in zip(bars, inf_times): | |
| 284 | ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.1, | |
| 285 | f'{time:.2f}ms', ha='center', va='bottom') | |
| 286 | ||
| 287 | plt.tight_layout() | |
| 288 | plt.savefig('model_comparison.png') | |
| 289 | plt.show() | |
| 290 | ||
| 291 | # 绘制训练曲线比较 | |
| 292 | fig, axes = plt.subplots(2, 1, figsize=(15, 10)) | |
| 293 | ||
| 294 | # 训练损失比较 | |
| 295 | ax = axes[0] | |
| 296 | for name in model_names: | |
| 297 | ax.plot(results[name]['history']['train_loss'], label=f'{name} Training') | |
| 298 | ax.plot(results[name]['history']['val_loss'], '--', label=f'{name} Validation') | |
| 299 | ax.set_title('Training Loss Comparison') # 英文标题 | |
| 300 | ax.set_xlabel('Epoch') # 英文标签 | |
| 301 | ax.set_ylabel('Loss') # 英文标签 | |
| 302 | ax.legend() | |
| 303 | ||
| 304 | # 验证准确率比较 | |
| 305 | ax = axes[1] | |
| 306 | for name in model_names: | |
| 307 | ax.plot(results[name]['history']['val_acc'], label=name) | |
| 308 | ax.set_title('Validation Accuracy Comparison') # 英文标题 | |
| 309 | ax.set_xlabel('Epoch') # 英文标签 | |
| 310 | ax.set_ylabel('Accuracy') # 英文标签 | |
| 311 | ax.legend() | |
| 312 | ||
| 313 | plt.tight_layout() | |
| 314 | plt.savefig('training_curves_comparison.png') | |
| 315 | plt.show() | |
| 316 | ||
| 317 | return results | |
| 318 | ||
| 319 | if __name__ == "__main__": | |
| 320 | results = compare_models()⏎ |
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
| 0 | <meta HTTP-EQUIV="REFRESH" content="0; url=http://www.cs.toronto.edu/~kriz/cifar.html"> |
Binary diff not shown
Binary diff not shown
| 0 | from .mlp import SimpleMLP, DeepMLP, ResidualMLP | |
| 1 | from .cnn import SimpleCNN, MediumCNN, VGGStyleNet, SimpleResNet⏎ |
| 0 | import torch | |
| 1 | import torch.nn as nn | |
| 2 | import torch.nn.functional as F | |
| 3 | ||
| 4 | class SimpleCNN(nn.Module): | |
| 5 | """简单的CNN模型,包含两个卷积层和两个全连接层""" | |
| 6 | def __init__(self): | |
| 7 | super(SimpleCNN, self).__init__() | |
| 8 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) | |
| 9 | self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) | |
| 10 | self.pool = nn.MaxPool2d(2, 2) | |
| 11 | self.fc1 = nn.Linear(32 * 8 * 8, 128) | |
| 12 | self.fc2 = nn.Linear(128, 10) | |
| 13 | self.relu = nn.ReLU() | |
| 14 | ||
| 15 | def forward(self, x): | |
| 16 | x = self.pool(self.relu(self.conv1(x))) # 输出大小: 16x16x16 | |
| 17 | x = self.pool(self.relu(self.conv2(x))) # 输出大小: 8x8x32 | |
| 18 | x = x.view(-1, 32 * 8 * 8) # 展平 | |
| 19 | x = self.relu(self.fc1(x)) | |
| 20 | x = self.fc2(x) | |
| 21 | return x | |
| 22 | ||
| 23 | ||
| 24 | class MediumCNN(nn.Module): | |
| 25 | """中等复杂度的CNN模型,包含批标准化和Dropout""" | |
| 26 | def __init__(self, use_bn=True): | |
| 27 | super(MediumCNN, self).__init__() | |
| 28 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1) | |
| 29 | self.bn1 = nn.BatchNorm2d(32) if use_bn else nn.Identity() | |
| 30 | self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1) | |
| 31 | self.bn2 = nn.BatchNorm2d(32) if use_bn else nn.Identity() | |
| 32 | self.pool1 = nn.MaxPool2d(2, 2) | |
| 33 | self.dropout1 = nn.Dropout(0.25) | |
| 34 | ||
| 35 | self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) | |
| 36 | self.bn3 = nn.BatchNorm2d(64) if use_bn else nn.Identity() | |
| 37 | self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1) | |
| 38 | self.bn4 = nn.BatchNorm2d(64) if use_bn else nn.Identity() | |
| 39 | self.pool2 = nn.MaxPool2d(2, 2) | |
| 40 | self.dropout2 = nn.Dropout(0.25) | |
| 41 | ||
| 42 | self.flatten = nn.Flatten() | |
| 43 | self.fc1 = nn.Linear(64 * 8 * 8, 512) | |
| 44 | self.bn5 = nn.BatchNorm1d(512) if use_bn else nn.Identity() | |
| 45 | self.dropout3 = nn.Dropout(0.5) | |
| 46 | self.fc2 = nn.Linear(512, 10) | |
| 47 | self.relu = nn.ReLU() | |
| 48 | ||
| 49 | def forward(self, x): | |
| 50 | x = self.relu(self.bn1(self.conv1(x))) | |
| 51 | x = self.relu(self.bn2(self.conv2(x))) | |
| 52 | x = self.pool1(x) | |
| 53 | x = self.dropout1(x) | |
| 54 | ||
| 55 | x = self.relu(self.bn3(self.conv3(x))) | |
| 56 | x = self.relu(self.bn4(self.conv4(x))) | |
| 57 | x = self.pool2(x) | |
| 58 | x = self.dropout2(x) | |
| 59 | ||
| 60 | x = self.flatten(x) | |
| 61 | x = self.relu(self.bn5(self.fc1(x))) | |
| 62 | x = self.dropout3(x) | |
| 63 | x = self.fc2(x) | |
| 64 | return x | |
| 65 | ||
| 66 | ||
| 67 | class VGGStyleNet(nn.Module): | |
| 68 | """VGG风格的CNN网络,使用连续的3x3卷积和池化""" | |
| 69 | def __init__(self): | |
| 70 | super(VGGStyleNet, self).__init__() | |
| 71 | ||
| 72 | # VGG风格:连续的3x3卷积 + 池化 | |
| 73 | self.features = nn.Sequential( | |
| 74 | # 第一块:64通道 | |
| 75 | nn.Conv2d(3, 64, kernel_size=3, padding=1), | |
| 76 | nn.BatchNorm2d(64), | |
| 77 | nn.ReLU(inplace=True), | |
| 78 | nn.Conv2d(64, 64, kernel_size=3, padding=1), | |
| 79 | nn.BatchNorm2d(64), | |
| 80 | nn.ReLU(inplace=True), | |
| 81 | nn.MaxPool2d(kernel_size=2, stride=2), | |
| 82 | nn.Dropout(0.25), | |
| 83 | ||
| 84 | # 第二块:128通道 | |
| 85 | nn.Conv2d(64, 128, kernel_size=3, padding=1), | |
| 86 | nn.BatchNorm2d(128), | |
| 87 | nn.ReLU(inplace=True), | |
| 88 | nn.Conv2d(128, 128, kernel_size=3, padding=1), | |
| 89 | nn.BatchNorm2d(128), | |
| 90 | nn.ReLU(inplace=True), | |
| 91 | nn.MaxPool2d(kernel_size=2, stride=2), | |
| 92 | nn.Dropout(0.25), | |
| 93 | ||
| 94 | # 第三块:256通道 | |
| 95 | nn.Conv2d(128, 256, kernel_size=3, padding=1), | |
| 96 | nn.BatchNorm2d(256), | |
| 97 | nn.ReLU(inplace=True), | |
| 98 | nn.Conv2d(256, 256, kernel_size=3, padding=1), | |
| 99 | nn.BatchNorm2d(256), | |
| 100 | nn.ReLU(inplace=True), | |
| 101 | nn.MaxPool2d(kernel_size=2, stride=2), | |
| 102 | nn.Dropout(0.25), | |
| 103 | ) | |
| 104 | ||
| 105 | # 分类器 | |
| 106 | self.classifier = nn.Sequential( | |
| 107 | nn.Flatten(), | |
| 108 | nn.Linear(256 * 4 * 4, 512), | |
| 109 | nn.BatchNorm1d(512), | |
| 110 | nn.ReLU(inplace=True), | |
| 111 | nn.Dropout(0.5), | |
| 112 | nn.Linear(512, 10) | |
| 113 | ) | |
| 114 | ||
| 115 | # 权重初始化 | |
| 116 | self._initialize_weights() | |
| 117 | ||
| 118 | def forward(self, x): | |
| 119 | x = self.features(x) | |
| 120 | x = self.classifier(x) | |
| 121 | return x | |
| 122 | ||
| 123 | def _initialize_weights(self): | |
| 124 | for m in self.modules(): | |
| 125 | if isinstance(m, nn.Conv2d): | |
| 126 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |
| 127 | if m.bias is not None: | |
| 128 | nn.init.constant_(m.bias, 0) | |
| 129 | elif isinstance(m, nn.BatchNorm2d): | |
| 130 | nn.init.constant_(m.weight, 1) | |
| 131 | nn.init.constant_(m.bias, 0) | |
| 132 | elif isinstance(m, nn.Linear): | |
| 133 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |
| 134 | nn.init.constant_(m.bias, 0) | |
| 135 | ||
| 136 | ||
| 137 | class ResidualBlock(nn.Module): | |
| 138 | """卷积神经网络的残差块""" | |
| 139 | def __init__(self, in_channels, out_channels, stride=1): | |
| 140 | super(ResidualBlock, self).__init__() | |
| 141 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, | |
| 142 | stride=stride, padding=1, bias=False) | |
| 143 | self.bn1 = nn.BatchNorm2d(out_channels) | |
| 144 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, | |
| 145 | stride=1, padding=1, bias=False) | |
| 146 | self.bn2 = nn.BatchNorm2d(out_channels) | |
| 147 | self.relu = nn.ReLU(inplace=True) | |
| 148 | ||
| 149 | self.shortcut = nn.Sequential() | |
| 150 | if stride != 1 or in_channels != out_channels: | |
| 151 | self.shortcut = nn.Sequential( | |
| 152 | nn.Conv2d(in_channels, out_channels, kernel_size=1, | |
| 153 | stride=stride, bias=False), | |
| 154 | nn.BatchNorm2d(out_channels) | |
| 155 | ) | |
| 156 | ||
| 157 | def forward(self, x): | |
| 158 | residual = x | |
| 159 | ||
| 160 | out = self.relu(self.bn1(self.conv1(x))) | |
| 161 | out = self.bn2(self.conv2(out)) | |
| 162 | out += self.shortcut(residual) | |
| 163 | out = self.relu(out) | |
| 164 | ||
| 165 | return out | |
| 166 | ||
| 167 | ||
| 168 | class SimpleResNet(nn.Module): | |
| 169 | """简化版ResNet模型""" | |
| 170 | def __init__(self, num_blocks=[2, 2, 2], num_classes=10): | |
| 171 | super(SimpleResNet, self).__init__() | |
| 172 | self.in_channels = 16 | |
| 173 | ||
| 174 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, | |
| 175 | padding=1, bias=False) | |
| 176 | self.bn1 = nn.BatchNorm2d(16) | |
| 177 | self.relu = nn.ReLU(inplace=True) | |
| 178 | ||
| 179 | self.layer1 = self._make_layer(16, num_blocks[0], stride=1) | |
| 180 | self.layer2 = self._make_layer(32, num_blocks[1], stride=2) | |
| 181 | self.layer3 = self._make_layer(64, num_blocks[2], stride=2) | |
| 182 | ||
| 183 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) | |
| 184 | self.fc = nn.Linear(64, num_classes) | |
| 185 | ||
| 186 | def _make_layer(self, out_channels, num_blocks, stride): | |
| 187 | strides = [stride] + [1] * (num_blocks - 1) | |
| 188 | layers = [] | |
| 189 | for stride in strides: | |
| 190 | layers.append(ResidualBlock(self.in_channels, out_channels, stride)) | |
| 191 | self.in_channels = out_channels | |
| 192 | return nn.Sequential(*layers) | |
| 193 | ||
| 194 | def forward(self, x): | |
| 195 | x = self.relu(self.bn1(self.conv1(x))) | |
| 196 | x = self.layer1(x) | |
| 197 | x = self.layer2(x) | |
| 198 | x = self.layer3(x) | |
| 199 | x = self.avg_pool(x) | |
| 200 | x = x.view(x.size(0), -1) | |
| 201 | x = self.fc(x) | |
| 202 | return x⏎ |
| 0 | import torch | |
| 1 | import torch.nn as nn | |
| 2 | import torch.nn.functional as F | |
| 3 | ||
| 4 | class SimpleMLP(nn.Module): | |
| 5 | """单隐层MLP模型""" | |
| 6 | def __init__(self, input_dim=3*32*32, hidden_dim=512, output_dim=10): | |
| 7 | super(SimpleMLP, self).__init__() | |
| 8 | self.flatten = nn.Flatten() | |
| 9 | self.fc1 = nn.Linear(input_dim, hidden_dim) | |
| 10 | self.relu = nn.ReLU() | |
| 11 | self.fc2 = nn.Linear(hidden_dim, output_dim) | |
| 12 | ||
| 13 | def forward(self, x): | |
| 14 | x = self.flatten(x) | |
| 15 | x = self.relu(self.fc1(x)) | |
| 16 | x = self.fc2(x) | |
| 17 | return x | |
| 18 | ||
| 19 | ||
| 20 | class DeepMLP(nn.Module): | |
| 21 | """深层MLP模型,具有多个隐藏层、批标准化和dropout""" | |
| 22 | def __init__(self, input_dim=3*32*32, dropout_rate=0.5, use_bn=True, use_dropout=True): | |
| 23 | super(DeepMLP, self).__init__() | |
| 24 | self.flatten = nn.Flatten() | |
| 25 | self.use_bn = use_bn | |
| 26 | self.use_dropout = use_dropout | |
| 27 | ||
| 28 | # 第一层 | |
| 29 | self.fc1 = nn.Linear(input_dim, 1024) | |
| 30 | self.bn1 = nn.BatchNorm1d(1024) if use_bn else nn.Identity() | |
| 31 | ||
| 32 | # 第二层 | |
| 33 | self.fc2 = nn.Linear(1024, 512) | |
| 34 | self.bn2 = nn.BatchNorm1d(512) if use_bn else nn.Identity() | |
| 35 | ||
| 36 | # 第三层 | |
| 37 | self.fc3 = nn.Linear(512, 256) | |
| 38 | self.bn3 = nn.BatchNorm1d(256) if use_bn else nn.Identity() | |
| 39 | ||
| 40 | # 输出层 | |
| 41 | self.fc4 = nn.Linear(256, 10) | |
| 42 | ||
| 43 | # 激活和Dropout | |
| 44 | self.relu = nn.ReLU() | |
| 45 | self.dropout = nn.Dropout(dropout_rate) if use_dropout else nn.Identity() | |
| 46 | ||
| 47 | def forward(self, x): | |
| 48 | x = self.flatten(x) | |
| 49 | ||
| 50 | # 第一层 | |
| 51 | x = self.fc1(x) | |
| 52 | x = self.bn1(x) | |
| 53 | x = self.relu(x) | |
| 54 | x = self.dropout(x) | |
| 55 | ||
| 56 | # 第二层 | |
| 57 | x = self.fc2(x) | |
| 58 | x = self.bn2(x) | |
| 59 | x = self.relu(x) | |
| 60 | x = self.dropout(x) | |
| 61 | ||
| 62 | # 第三层 | |
| 63 | x = self.fc3(x) | |
| 64 | x = self.bn3(x) | |
| 65 | x = self.relu(x) | |
| 66 | x = self.dropout(x) | |
| 67 | ||
| 68 | # 输出层 | |
| 69 | x = self.fc4(x) | |
| 70 | ||
| 71 | return x | |
| 72 | ||
| 73 | ||
| 74 | class ResidualBlock(nn.Module): | |
| 75 | """MLP的残差块""" | |
| 76 | def __init__(self, input_dim, output_dim, activation, dropout_rate=0.5): | |
| 77 | super(ResidualBlock, self).__init__() | |
| 78 | ||
| 79 | self.linear1 = nn.Linear(input_dim, output_dim) | |
| 80 | self.bn1 = nn.BatchNorm1d(output_dim) | |
| 81 | self.linear2 = nn.Linear(output_dim, output_dim) | |
| 82 | self.bn2 = nn.BatchNorm1d(output_dim) | |
| 83 | ||
| 84 | self.activation = activation | |
| 85 | self.dropout = nn.Dropout(dropout_rate) | |
| 86 | ||
| 87 | # 如果输入维度不等于输出维度,添加一个线性变换 | |
| 88 | self.shortcut = nn.Identity() | |
| 89 | if input_dim != output_dim: | |
| 90 | self.shortcut = nn.Sequential( | |
| 91 | nn.Linear(input_dim, output_dim), | |
| 92 | nn.BatchNorm1d(output_dim) | |
| 93 | ) | |
| 94 | ||
| 95 | def forward(self, x): | |
| 96 | residual = x | |
| 97 | ||
| 98 | out = self.linear1(x) | |
| 99 | out = self.bn1(out) | |
| 100 | out = self.activation(out) | |
| 101 | out = self.dropout(out) | |
| 102 | ||
| 103 | out = self.linear2(out) | |
| 104 | out = self.bn2(out) | |
| 105 | ||
| 106 | out += self.shortcut(residual) | |
| 107 | out = self.activation(out) | |
| 108 | ||
| 109 | return out | |
| 110 | ||
| 111 | ||
| 112 | class ResidualMLP(nn.Module): | |
| 113 | """带有残差连接的MLP模型""" | |
| 114 | def __init__(self, input_dim=3*32*32, hidden_dims=[1024, 1024, 1024, 512, 512, 512], output_dim=10, | |
| 115 | dropout_rate=0.5, activation='relu'): | |
| 116 | super(ResidualMLP, self).__init__() | |
| 117 | self.flatten = nn.Flatten() | |
| 118 | ||
| 119 | # 选择激活函数 | |
| 120 | if activation == 'relu': | |
| 121 | self.activation = nn.ReLU() | |
| 122 | elif activation == 'leaky_relu': | |
| 123 | self.activation = nn.LeakyReLU(0.1) | |
| 124 | elif activation == 'gelu': | |
| 125 | self.activation = nn.GELU() | |
| 126 | elif activation == 'swish': | |
| 127 | self.activation = lambda x: x * torch.sigmoid(x) | |
| 128 | else: | |
| 129 | raise ValueError(f"不支持的激活函数: {activation}") | |
| 130 | ||
| 131 | # 输入层 | |
| 132 | layers = [] | |
| 133 | layers.append(nn.Linear(input_dim, hidden_dims[0])) | |
| 134 | layers.append(nn.BatchNorm1d(hidden_dims[0])) | |
| 135 | layers.append(self.activation) | |
| 136 | layers.append(nn.Dropout(dropout_rate)) | |
| 137 | ||
| 138 | # 隐藏层,带残差连接 | |
| 139 | for i in range(1, len(hidden_dims)): | |
| 140 | layers.append(ResidualBlock(hidden_dims[i-1], hidden_dims[i], self.activation, dropout_rate)) | |
| 141 | ||
| 142 | # 输出层 | |
| 143 | layers.append(nn.Linear(hidden_dims[-1], output_dim)) | |
| 144 | ||
| 145 | self.layers = nn.Sequential(*layers) | |
| 146 | ||
| 147 | def forward(self, x): | |
| 148 | x = self.flatten(x) | |
| 149 | x = self.layers(x) | |
| 150 | return x⏎ |
| 0 | { | |
| 1 | "cells": [ | |
| 2 | { | |
| 3 | "cell_type": "code", | |
| 4 | "execution_count": 1, | |
| 5 | "id": "4f3d7435", | |
| 6 | "metadata": {}, | |
| 7 | "outputs": [], | |
| 8 | "source": [ | |
| 9 | "import torch\n", | |
| 10 | "import torch.nn as nn\n", | |
| 11 | "import torch.optim as optim\n", | |
| 12 | "import os\n", | |
| 13 | "\n", | |
| 14 | "# 导入项目中的模块\n", | |
| 15 | "from models import SimpleMLP, DeepMLP, ResidualMLP, SimpleCNN, MediumCNN, VGGStyleNet, SimpleResNet\n", | |
| 16 | "from utils import (\n", | |
| 17 | " load_cifar10, \n", | |
| 18 | " set_seed, \n", | |
| 19 | " train_model, \n", | |
| 20 | " evaluate_model, \n", | |
| 21 | " plot_training_history,\n", | |
| 22 | " visualize_model_predictions,\n", | |
| 23 | " visualize_conv_filters,\n", | |
| 24 | " model_complexity\n", | |
| 25 | ")" | |
| 26 | ] | |
| 27 | }, | |
| 28 | { | |
| 29 | "cell_type": "code", | |
| 30 | "execution_count": 4, | |
| 31 | "id": "9c8a2cb3", | |
| 32 | "metadata": { | |
| 33 | "inputHidden": false | |
| 34 | }, | |
| 35 | "outputs": [ | |
| 36 | { | |
| 37 | "name": "stdout", | |
| 38 | "output_type": "stream", | |
| 39 | "text": [ | |
| 40 | "使用设备: cpu\n", | |
| 41 | "Files already downloaded and verified\n", | |
| 42 | "Files already downloaded and verified\n", | |
| 43 | "训练集大小: 45000\n", | |
| 44 | "验证集大小: 5000\n", | |
| 45 | "测试集大小: 10000\n", | |
| 46 | "使用模型: SimpleMLP\n" | |
| 47 | ] | |
| 48 | } | |
| 49 | ], | |
| 50 | "source": [ | |
| 51 | "# 设置参数\n", | |
| 52 | "model_type = 'simple_mlp' # 可选: 'simple_mlp', 'deep_mlp', 'residual_mlp', 'simple_cnn', 'medium_cnn', 'vgg_style', 'resnet'\n", | |
| 53 | "epochs = 20\n", | |
| 54 | "learning_rate = 0.001\n", | |
| 55 | "batch_size = 128\n", | |
| 56 | "use_data_augmentation = True # CNN通常受益于数据增强\n", | |
| 57 | "save_directory = './ck'\n", | |
| 58 | "visualize_filters = True # 是否可视化卷积核(仅对CNN有效)\n", | |
| 59 | "visualize_predictions = True # 是否可视化预测结果\n", | |
| 60 | "\n", | |
| 61 | "# 设置随机种子\n", | |
| 62 | "set_seed()\n", | |
| 63 | "\n", | |
| 64 | "#因为mo平台的提交任务机制,需要手动切换到该文件夹下。\n", | |
| 65 | "os.chdir(os.path.expanduser(\"~/work/Jianhai/lab5\"))\n", | |
| 66 | "\n", | |
| 67 | "# 检查是否有可用的GPU\n", | |
| 68 | "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", | |
| 69 | "print(f\"使用设备: {device}\")\n", | |
| 70 | "\n", | |
| 71 | "# 加载数据\n", | |
| 72 | "train_loader, valid_loader, test_loader, classes = load_cifar10(\n", | |
| 73 | " use_augmentation=use_data_augmentation, \n", | |
| 74 | " batch_size=batch_size\n", | |
| 75 | ")\n", | |
| 76 | "\n", | |
| 77 | "# 初始化选择的模型\n", | |
| 78 | "if model_type == 'simple_mlp':\n", | |
| 79 | " model = SimpleMLP()\n", | |
| 80 | " model_name = \"SimpleMLP\"\n", | |
| 81 | "elif model_type == 'deep_mlp':\n", | |
| 82 | " model = DeepMLP(dropout_rate=0.5, use_bn=True, use_dropout=True)\n", | |
| 83 | " model_name = \"DeepMLP\"\n", | |
| 84 | "elif model_type == 'residual_mlp':\n", | |
| 85 | " model = ResidualMLP(activation='relu')\n", | |
| 86 | " model_name = \"ResidualMLP\"\n", | |
| 87 | "elif model_type == 'simple_cnn':\n", | |
| 88 | " model = SimpleCNN()\n", | |
| 89 | " model_name = \"SimpleCNN\"\n", | |
| 90 | "elif model_type == 'medium_cnn':\n", | |
| 91 | " model = MediumCNN(use_bn=True)\n", | |
| 92 | " model_name = \"MediumCNN\"\n", | |
| 93 | "elif model_type == 'vgg_style':\n", | |
| 94 | " model = VGGStyleNet()\n", | |
| 95 | " model_name = \"VGGStyleNet\"\n", | |
| 96 | "else: # resnet\n", | |
| 97 | " model = SimpleResNet(num_blocks=[2, 2, 2])\n", | |
| 98 | " model_name = \"SimpleResNet\"\n", | |
| 99 | "\n", | |
| 100 | "print(f\"使用模型: {model_name}\")" | |
| 101 | ] | |
| 102 | }, | |
| 103 | { | |
| 104 | "cell_type": "code", | |
| 105 | "execution_count": 5, | |
| 106 | "id": "51f4362c", | |
| 107 | "metadata": {}, | |
| 108 | "outputs": [ | |
| 109 | { | |
| 110 | "name": "stdout", | |
| 111 | "output_type": "stream", | |
| 112 | "text": [ | |
| 113 | "\n", | |
| 114 | "分析模型复杂度:\n", | |
| 115 | "参数量: 1,578,506\n", | |
| 116 | "每批次(128个样本)推理时间: 8.96ms\n", | |
| 117 | "Epoch 1/20\n" | |
| 118 | ] | |
| 119 | }, | |
| 120 | { | |
| 121 | "ename": "KeyboardInterrupt", | |
| 122 | "evalue": "", | |
| 123 | "output_type": "error", | |
| 124 | "traceback": [ | |
| 125 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| 126 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| 127 | "\u001b[0;32m/tmp/ipykernel_246/3850660409.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 16\u001b[0m trained_model, history = train_model(\n\u001b[1;32m 17\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalid_loader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscheduler\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mnum_epochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdevice\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msave_dir\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msave_directory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m )\n\u001b[1;32m 20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| 128 | "\u001b[0;32m~/work/Jianhai/lab5/utils/train_utils.py\u001b[0m in \u001b[0;36mtrain_model\u001b[0;34m(model, train_loader, valid_loader, criterion, optimizer, scheduler, num_epochs, device, save_dir)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;31m# 反向传播和优化\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 65\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 66\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| 129 | "\u001b[0;32m~/.virtualenvs/basenv/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m inputs=inputs)\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| 130 | "\u001b[0;32m~/.virtualenvs/basenv/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 145\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 146\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| 131 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| 132 | ] | |
| 133 | } | |
| 134 | ], | |
| 135 | "source": [ | |
| 136 | "# 计算模型复杂度\n", | |
| 137 | "print(\"\\n分析模型复杂度:\")\n", | |
| 138 | "model_complexity(model, device=device)\n", | |
| 139 | "\n", | |
| 140 | "# 定义损失函数和优化器\n", | |
| 141 | "criterion = nn.CrossEntropyLoss()\n", | |
| 142 | "optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n", | |
| 143 | "\n", | |
| 144 | "# 可以添加学习率调度器\n", | |
| 145 | "scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)\n", | |
| 146 | "\n", | |
| 147 | "# 确保checkpoints目录存在\n", | |
| 148 | "os.makedirs(save_directory, exist_ok=True)\n", | |
| 149 | "\n", | |
| 150 | "# 训练模型\n", | |
| 151 | "trained_model, history = train_model(\n", | |
| 152 | " model, train_loader, valid_loader, criterion, optimizer, scheduler,\n", | |
| 153 | " num_epochs=epochs, device=device, save_dir=save_directory\n", | |
| 154 | ")\n", | |
| 155 | "\n", | |
| 156 | "# 绘制训练历史\n", | |
| 157 | "plot_training_history(history, title=f\"{model_name} Training History\")\n", | |
| 158 | "\n", | |
| 159 | "# 在测试集上评估模型\n", | |
| 160 | "print(\"\\n在测试集上评估模型:\")\n", | |
| 161 | "test_loss, test_acc = evaluate_model(trained_model, test_loader, criterion, device, classes)\n", | |
| 162 | "\n", | |
| 163 | "print(f\"{model_name} 最终测试准确率: {test_acc:.4f}\")\n", | |
| 164 | "\n", | |
| 165 | "# 如果是CNN模型并且需要可视化卷积核\n", | |
| 166 | "if visualize_filters and model_type in ['simple_cnn', 'medium_cnn', 'vgg_style', 'resnet']:\n", | |
| 167 | " print(\"\\n可视化卷积核:\")\n", | |
| 168 | " if model_type == 'simple_cnn':\n", | |
| 169 | " visualize_conv_filters(trained_model, 'conv1')\n", | |
| 170 | " elif model_type == 'medium_cnn':\n", | |
| 171 | " visualize_conv_filters(trained_model, 'conv1')\n", | |
| 172 | " elif model_type == 'vgg_style':\n", | |
| 173 | " visualize_conv_filters(trained_model, 'features.0')\n", | |
| 174 | " else: # resnet\n", | |
| 175 | " visualize_conv_filters(trained_model, 'conv1')\n", | |
| 176 | "\n", | |
| 177 | "# 如果需要可视化模型预测\n", | |
| 178 | "if visualize_predictions:\n", | |
| 179 | " print(\"\\n可视化模型预测:\")\n", | |
| 180 | " visualize_model_predictions(trained_model, test_loader, classes, device)\n", | |
| 181 | "\n", | |
| 182 | "print(f\"\\n{model_name}的训练和评估已完成!\")" | |
| 183 | ] | |
| 184 | }, | |
| 185 | { | |
| 186 | "cell_type": "code", | |
| 187 | "execution_count": null, | |
| 188 | "id": "d9379a62", | |
| 189 | "metadata": {}, | |
| 190 | "outputs": [], | |
| 191 | "source": [] | |
| 192 | }, | |
| 193 | { | |
| 194 | "cell_type": "code", | |
| 195 | "execution_count": null, | |
| 196 | "id": "554f08d9", | |
| 197 | "metadata": {}, | |
| 198 | "outputs": [], | |
| 199 | "source": [] | |
| 200 | } | |
| 201 | ], | |
| 202 | "metadata": { | |
| 203 | "kernelspec": { | |
| 204 | "display_name": "Python 3", | |
| 205 | "language": "python", | |
| 206 | "name": "python3" | |
| 207 | }, | |
| 208 | "language_info": { | |
| 209 | "codemirror_mode": { | |
| 210 | "name": "ipython", | |
| 211 | "version": 3 | |
| 212 | }, | |
| 213 | "file_extension": ".py", | |
| 214 | "mimetype": "text/x-python", | |
| 215 | "name": "python", | |
| 216 | "nbconvert_exporter": "python", | |
| 217 | "pygments_lexer": "ipython3", | |
| 218 | "version": "3.7.5" | |
| 219 | } | |
| 220 | }, | |
| 221 | "nbformat": 4, | |
| 222 | "nbformat_minor": 5 | |
| 223 | } |
| 0 | from .data_loader import load_cifar10, visualize_samples, set_seed | |
| 1 | from .train_utils import ( | |
| 2 | train_model, | |
| 3 | evaluate_model, | |
| 4 | plot_training_history, | |
| 5 | visualize_model_predictions, | |
| 6 | visualize_conv_filters, | |
| 7 | model_complexity | |
| 8 | )⏎ |
| 0 | import torch | |
| 1 | import torch.nn as nn | |
| 2 | import torch.optim as optim | |
| 3 | import matplotlib.pyplot as plt | |
| 4 | import numpy as np | |
| 5 | import time | |
| 6 | import os | |
| 7 | ||
| 8 | def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler=None, | |
| 9 | num_epochs=10, device=None, save_dir='./checkpoints'): | |
| 10 | """ | |
| 11 | 训练模型并记录性能指标 | |
| 12 | ||
| 13 | 参数: | |
| 14 | model: 要训练的模型 | |
| 15 | train_loader, valid_loader: 训练和验证数据加载器 | |
| 16 | criterion: 损失函数 | |
| 17 | optimizer: 优化器 | |
| 18 | scheduler: 学习率调度器(可选) | |
| 19 | num_epochs: 训练轮数 | |
| 20 | device: 使用的设备 | |
| 21 | save_dir: 模型保存目录 | |
| 22 | ||
| 23 | 返回: | |
| 24 | history: 包含训练历史的字典 | |
| 25 | """ | |
| 26 | if device is None: | |
| 27 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 28 | ||
| 29 | start_time = time.time() | |
| 30 | model = model.to(device) | |
| 31 | ||
| 32 | history = { | |
| 33 | 'train_loss': [], 'train_acc': [], | |
| 34 | 'val_loss': [], 'val_acc': [], | |
| 35 | 'epoch_times': [] | |
| 36 | } | |
| 37 | ||
| 38 | best_val_acc = 0.0 | |
| 39 | ||
| 40 | # 确保保存目录存在 | |
| 41 | os.makedirs(save_dir, exist_ok=True) | |
| 42 | ||
| 43 | for epoch in range(num_epochs): | |
| 44 | epoch_start = time.time() | |
| 45 | print(f"Epoch {epoch+1}/{num_epochs}") | |
| 46 | ||
| 47 | # 训练阶段 | |
| 48 | model.train() | |
| 49 | train_loss = 0.0 | |
| 50 | train_correct = 0 | |
| 51 | train_total = 0 | |
| 52 | ||
| 53 | for inputs, labels in train_loader: | |
| 54 | inputs, labels = inputs.to(device), labels.to(device) | |
| 55 | ||
| 56 | # 梯度清零 | |
| 57 | optimizer.zero_grad() | |
| 58 | ||
| 59 | # 前向传播 | |
| 60 | outputs = model(inputs) | |
| 61 | loss = criterion(outputs, labels) | |
| 62 | ||
| 63 | # 反向传播和优化 | |
| 64 | loss.backward() | |
| 65 | optimizer.step() | |
| 66 | ||
| 67 | # 统计 | |
| 68 | train_loss += loss.item() * inputs.size(0) | |
| 69 | _, predicted = torch.max(outputs, 1) | |
| 70 | train_total += labels.size(0) | |
| 71 | train_correct += (predicted == labels).sum().item() | |
| 72 | ||
| 73 | # 计算训练指标 | |
| 74 | train_loss = train_loss / len(train_loader.sampler) | |
| 75 | train_acc = train_correct / train_total | |
| 76 | ||
| 77 | # 验证阶段 | |
| 78 | model.eval() | |
| 79 | val_loss = 0.0 | |
| 80 | val_correct = 0 | |
| 81 | val_total = 0 | |
| 82 | ||
| 83 | with torch.no_grad(): | |
| 84 | for inputs, labels in valid_loader: | |
| 85 | inputs, labels = inputs.to(device), labels.to(device) | |
| 86 | ||
| 87 | # 前向传播 | |
| 88 | outputs = model(inputs) | |
| 89 | loss = criterion(outputs, labels) | |
| 90 | ||
| 91 | # 统计 | |
| 92 | val_loss += loss.item() * inputs.size(0) | |
| 93 | _, predicted = torch.max(outputs, 1) | |
| 94 | val_total += labels.size(0) | |
| 95 | val_correct += (predicted == labels).sum().item() | |
| 96 | ||
| 97 | # 计算验证指标 | |
| 98 | val_loss = val_loss / len(valid_loader.sampler) | |
| 99 | val_acc = val_correct / val_total | |
| 100 | ||
| 101 | # 更新学习率 | |
| 102 | if scheduler: | |
| 103 | scheduler.step() | |
| 104 | ||
| 105 | # 记录历史 | |
| 106 | history['train_loss'].append(train_loss) | |
| 107 | history['train_acc'].append(train_acc) | |
| 108 | history['val_loss'].append(val_loss) | |
| 109 | history['val_acc'].append(val_acc) | |
| 110 | ||
| 111 | # 记录每个epoch的时间 | |
| 112 | epoch_end = time.time() | |
| 113 | epoch_time = epoch_end - epoch_start | |
| 114 | history['epoch_times'].append(epoch_time) | |
| 115 | ||
| 116 | # 如果是最佳模型,保存权重 | |
| 117 | if val_acc > best_val_acc: | |
| 118 | best_val_acc = val_acc | |
| 119 | torch.save(model.state_dict(), f"{save_dir}/{model.__class__.__name__}_best.pth") | |
| 120 | print(f"Model saved to {save_dir}/{model.__class__.__name__}_best.pth") | |
| 121 | ||
| 122 | print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}") | |
| 123 | print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}") | |
| 124 | print(f"Epoch Time: {epoch_time:.2f}s") | |
| 125 | print("-" * 50) | |
| 126 | ||
| 127 | # 计算总训练时间 | |
| 128 | total_time = time.time() - start_time | |
| 129 | print(f"Total Training Time: {total_time:.2f}s") | |
| 130 | ||
| 131 | return model, history | |
| 132 | ||
| 133 | def evaluate_model(model, test_loader, criterion, device=None, classes=None): | |
| 134 | """ | |
| 135 | 评估模型在测试集上的性能 | |
| 136 | ||
| 137 | 参数: | |
| 138 | model: 要评估的模型 | |
| 139 | test_loader: 测试数据加载器 | |
| 140 | criterion: 损失函数 | |
| 141 | device: 使用的设备 | |
| 142 | classes: 类别名称列表 | |
| 143 | ||
| 144 | 返回: | |
| 145 | test_loss: 测试损失 | |
| 146 | test_acc: 测试准确率 | |
| 147 | """ | |
| 148 | if device is None: | |
| 149 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 150 | ||
| 151 | model = model.to(device) | |
| 152 | model.eval() | |
| 153 | ||
| 154 | test_loss = 0.0 | |
| 155 | test_correct = 0 | |
| 156 | test_total = 0 | |
| 157 | ||
| 158 | y_true = [] | |
| 159 | y_pred = [] | |
| 160 | ||
| 161 | with torch.no_grad(): | |
| 162 | for inputs, labels in test_loader: | |
| 163 | inputs, labels = inputs.to(device), labels.to(device) | |
| 164 | ||
| 165 | # 前向传播 | |
| 166 | outputs = model(inputs) | |
| 167 | loss = criterion(outputs, labels) | |
| 168 | ||
| 169 | # 统计 | |
| 170 | test_loss += loss.item() * inputs.size(0) | |
| 171 | _, predicted = torch.max(outputs, 1) | |
| 172 | test_total += labels.size(0) | |
| 173 | test_correct += (predicted == labels).sum().item() | |
| 174 | ||
| 175 | # 收集真实标签和预测标签 | |
| 176 | y_true.extend(labels.cpu().numpy()) | |
| 177 | y_pred.extend(predicted.cpu().numpy()) | |
| 178 | ||
| 179 | # 计算测试指标 | |
| 180 | test_loss = test_loss / len(test_loader.dataset) | |
| 181 | test_acc = test_correct / test_total | |
| 182 | ||
| 183 | print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}") | |
| 184 | ||
| 185 | # 如果提供了类别名称,计算混淆矩阵 | |
| 186 | if classes: | |
| 187 | try: | |
| 188 | from sklearn.metrics import confusion_matrix, classification_report | |
| 189 | import seaborn as sns | |
| 190 | ||
| 191 | cm = confusion_matrix(y_true, y_pred) | |
| 192 | plt.figure(figsize=(10, 8)) | |
| 193 | sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes) | |
| 194 | plt.xlabel('Predicted') | |
| 195 | plt.ylabel('True') | |
| 196 | plt.title('Confusion Matrix') | |
| 197 | plt.show() | |
| 198 | ||
| 199 | # 打印分类报告 | |
| 200 | print("Classification Report:") | |
| 201 | print(classification_report(y_true, y_pred, target_names=classes)) | |
| 202 | except ImportError: | |
| 203 | print("Warning: sklearn or seaborn not installed, cannot generate confusion matrix and classification report") | |
| 204 | ||
| 205 | return test_loss, test_acc | |
| 206 | ||
| 207 | def plot_training_history(history, title="Training History"): | |
| 208 | """ | |
| 209 | 绘制训练历史曲线 | |
| 210 | ||
| 211 | 参数: | |
| 212 | history: 包含训练历史的字典 | |
| 213 | title: 图表标题 | |
| 214 | """ | |
| 215 | plt.figure(figsize=(12, 5)) | |
| 216 | ||
| 217 | # 绘制损失曲线 | |
| 218 | plt.subplot(1, 2, 1) | |
| 219 | plt.plot(history['train_loss'], label='Training Loss') | |
| 220 | plt.plot(history['val_loss'], label='Validation Loss') | |
| 221 | plt.xlabel('Epoch') | |
| 222 | plt.ylabel('Loss') | |
| 223 | plt.title('Loss Curves') | |
| 224 | plt.legend() | |
| 225 | ||
| 226 | # 绘制准确率曲线 | |
| 227 | plt.subplot(1, 2, 2) | |
| 228 | plt.plot(history['train_acc'], label='Training Accuracy') | |
| 229 | plt.plot(history['val_acc'], label='Validation Accuracy') | |
| 230 | plt.xlabel('Epoch') | |
| 231 | plt.ylabel('Accuracy') | |
| 232 | plt.title('Accuracy Curves') | |
| 233 | plt.legend() | |
| 234 | ||
| 235 | plt.suptitle(title) | |
| 236 | plt.tight_layout() | |
| 237 | plt.savefig(f"{title.replace(' ', '_')}.png") | |
| 238 | plt.show() | |
| 239 | ||
| 240 | def visualize_model_predictions(model, test_loader, classes, device=None, num_images=25): | |
| 241 | """ | |
| 242 | 可视化模型预测 | |
| 243 | ||
| 244 | 参数: | |
| 245 | model: 要评估的模型 | |
| 246 | test_loader: 测试数据加载器 | |
| 247 | classes: 类别名称列表 | |
| 248 | device: 使用的设备 | |
| 249 | num_images: 要显示的图像数量 | |
| 250 | """ | |
| 251 | if device is None: | |
| 252 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 253 | ||
| 254 | model = model.to(device) | |
| 255 | model.eval() | |
| 256 | ||
| 257 | # 获取batch数据 | |
| 258 | images, labels = next(iter(test_loader)) | |
| 259 | ||
| 260 | with torch.no_grad(): | |
| 261 | outputs = model(images.to(device)) | |
| 262 | _, preds = torch.max(outputs, 1) | |
| 263 | ||
| 264 | # 将预测和标签转换为CPU上的numpy数组 | |
| 265 | preds = preds.cpu().numpy() | |
| 266 | labels = labels.numpy() | |
| 267 | ||
| 268 | # 计算display_grid的尺寸 | |
| 269 | grid_size = int(np.ceil(np.sqrt(num_images))) | |
| 270 | fig, axes = plt.subplots(grid_size, grid_size, figsize=(15, 15)) | |
| 271 | ||
| 272 | for i, ax in enumerate(axes.flat): | |
| 273 | if i < min(num_images, len(preds)): | |
| 274 | img = images[i].numpy().transpose((1, 2, 0)) | |
| 275 | # 反标准化 | |
| 276 | mean = np.array([0.4914, 0.4822, 0.4465]) | |
| 277 | std = np.array([0.2023, 0.1994, 0.2010]) | |
| 278 | img = std * img + mean | |
| 279 | img = np.clip(img, 0, 1) | |
| 280 | ||
| 281 | ax.imshow(img) | |
| 282 | color = "green" if preds[i] == labels[i] else "red" | |
| 283 | ax.set_title(f"Predicted: {classes[preds[i]]}\nTrue: {classes[labels[i]]}", color=color) | |
| 284 | ax.axis('off') | |
| 285 | ||
| 286 | plt.tight_layout() | |
| 287 | plt.show() | |
| 288 | ||
| 289 | def visualize_conv_filters(model, layer_name='conv1'): | |
| 290 | """ | |
| 291 | 可视化卷积核 | |
| 292 | ||
| 293 | 参数: | |
| 294 | model: 模型 | |
| 295 | layer_name: 要可视化的卷积层名称 | |
| 296 | """ | |
| 297 | model.eval() | |
| 298 | ||
| 299 | # 获取指定层的权重 | |
| 300 | for name, module in model.named_modules(): | |
| 301 | if name == layer_name and isinstance(module, nn.Conv2d): | |
| 302 | weights = module.weight.data.clone().cpu() | |
| 303 | break | |
| 304 | else: | |
| 305 | print(f"Conv layer '{layer_name}' not found") | |
| 306 | return | |
| 307 | ||
| 308 | # 规范化权重以便可视化 | |
| 309 | weights = weights - weights.min() | |
| 310 | weights = weights / weights.max() | |
| 311 | ||
| 312 | # 绘制卷积核 | |
| 313 | num_filters = min(16, weights.size(0)) | |
| 314 | fig, axes = plt.subplots(4, 4, figsize=(10, 10)) | |
| 315 | fig.suptitle(f'Filters from {layer_name} layer') | |
| 316 | ||
| 317 | for i, ax in enumerate(axes.flat): | |
| 318 | if i < num_filters: | |
| 319 | # 如果是3通道的卷积核,直接显示RGB | |
| 320 | if weights.size(1) == 3: | |
| 321 | ax.imshow(weights[i].permute(1, 2, 0)) | |
| 322 | else: | |
| 323 | # 如果不是3通道,只显示第一个通道 | |
| 324 | ax.imshow(weights[i, 0], cmap='viridis') | |
| 325 | ax.axis('off') | |
| 326 | ||
| 327 | plt.tight_layout() | |
| 328 | plt.show() | |
| 329 | ||
| 330 | def model_complexity(model, input_size=(3, 32, 32), batch_size=128, device=None): | |
| 331 | """ | |
| 332 | 计算模型参数量和推理时间 | |
| 333 | ||
| 334 | 参数: | |
| 335 | model: 要评估的模型 | |
| 336 | input_size: 输入尺寸 | |
| 337 | batch_size: 批量大小 | |
| 338 | device: 使用的设备 | |
| 339 | ||
| 340 | 返回: | |
| 341 | num_params: 参数量 | |
| 342 | inference_time: 每批次推理时间 | |
| 343 | """ | |
| 344 | if device is None: | |
| 345 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 346 | ||
| 347 | model = model.to(device) | |
| 348 | model.eval() | |
| 349 | ||
| 350 | # 计算参数量 | |
| 351 | num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| 352 | ||
| 353 | # 创建随机输入 | |
| 354 | dummy_input = torch.randn(batch_size, *input_size).to(device) | |
| 355 | ||
| 356 | # 预热 | |
| 357 | with torch.no_grad(): | |
| 358 | for _ in range(10): | |
| 359 | _ = model(dummy_input) | |
| 360 | ||
| 361 | # 计时 | |
| 362 | start_time = time.time() | |
| 363 | with torch.no_grad(): | |
| 364 | for _ in range(100): | |
| 365 | _ = model(dummy_input) | |
| 366 | end_time = time.time() | |
| 367 | ||
| 368 | inference_time = (end_time - start_time) / 100 | |
| 369 | ||
| 370 | print(f"Parameters: {num_params:,}") | |
| 371 | print(f"Inference time per batch ({batch_size} samples): {inference_time*1000:.2f}ms") | |
| 372 | ||
| 373 | return num_params, inference_time⏎ |
| 0 | from .data_loader import load_cifar10, visualize_samples, set_seed | |
| 1 | from .train_utils import ( | |
| 2 | train_model, | |
| 3 | evaluate_model, | |
| 4 | plot_training_history, | |
| 5 | visualize_model_predictions, | |
| 6 | visualize_conv_filters, | |
| 7 | model_complexity | |
| 8 | )⏎ |
| 0 | import numpy as np | |
| 1 | import matplotlib.pyplot as plt | |
| 2 | import torch | |
| 3 | from torch.utils.data import DataLoader, SubsetRandomSampler | |
| 4 | from torchvision import datasets, transforms | |
| 5 | ||
| 6 | # 设置随机种子,确保实验可重复性 | |
| 7 | def set_seed(seed=42): | |
| 8 | """ | |
| 9 | 设置随机种子,确保实验可重复性 | |
| 10 | ||
| 11 | 参数: | |
| 12 | seed: 随机种子 | |
| 13 | """ | |
| 14 | np.random.seed(seed) | |
| 15 | torch.manual_seed(seed) | |
| 16 | if torch.cuda.is_available(): | |
| 17 | torch.cuda.manual_seed(seed) | |
| 18 | torch.cuda.manual_seed_all(seed) | |
| 19 | torch.backends.cudnn.deterministic = True | |
| 20 | torch.backends.cudnn.benchmark = False | |
| 21 | ||
| 22 | # 基本数据变换 - 只进行标准化 | |
| 23 | basic_transform = transforms.Compose([ | |
| 24 | transforms.ToTensor(), | |
| 25 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) | |
| 26 | ]) | |
| 27 | ||
| 28 | # 使用数据增强的变换 | |
| 29 | augmented_transform = transforms.Compose([ | |
| 30 | transforms.RandomCrop(32, padding=4), | |
| 31 | transforms.RandomHorizontalFlip(), | |
| 32 | transforms.ToTensor(), | |
| 33 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) | |
| 34 | ]) | |
| 35 | ||
| 36 | def load_cifar10(use_augmentation=False, valid_size=0.1, batch_size=128, num_workers=2): | |
| 37 | """ | |
| 38 | 加载CIFAR-10数据集,并分割出验证集 | |
| 39 | ||
| 40 | 参数: | |
| 41 | use_augmentation: 是否对训练集使用数据增强 | |
| 42 | valid_size: 验证集比例 | |
| 43 | batch_size: 批次大小 | |
| 44 | num_workers: 数据加载器使用的工作进程数 | |
| 45 | ||
| 46 | 返回: | |
| 47 | train_loader, valid_loader, test_loader: 数据加载器 | |
| 48 | classes: 类别名称 | |
| 49 | """ | |
| 50 | transform = augmented_transform if use_augmentation else basic_transform | |
| 51 | ||
| 52 | # 加载训练数据 | |
| 53 | train_dataset = datasets.CIFAR10( | |
| 54 | root='./data', | |
| 55 | train=True, | |
| 56 | download=True, | |
| 57 | transform=transform | |
| 58 | ) | |
| 59 | ||
| 60 | # 加载测试数据 | |
| 61 | test_dataset = datasets.CIFAR10( | |
| 62 | root='./data', | |
| 63 | train=False, | |
| 64 | download=True, | |
| 65 | transform=basic_transform | |
| 66 | ) | |
| 67 | ||
| 68 | # 计算验证集大小 | |
| 69 | num_train = len(train_dataset) | |
| 70 | indices = list(range(num_train)) | |
| 71 | np.random.shuffle(indices) | |
| 72 | split = int(valid_size * num_train) | |
| 73 | train_idx, valid_idx = indices[split:], indices[:split] | |
| 74 | ||
| 75 | # 创建数据采样器 | |
| 76 | train_sampler = SubsetRandomSampler(train_idx) | |
| 77 | valid_sampler = SubsetRandomSampler(valid_idx) | |
| 78 | ||
| 79 | # 创建数据加载器 | |
| 80 | train_loader = DataLoader( | |
| 81 | train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers | |
| 82 | ) | |
| 83 | valid_loader = DataLoader( | |
| 84 | train_dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers | |
| 85 | ) | |
| 86 | test_loader = DataLoader( | |
| 87 | test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers | |
| 88 | ) | |
| 89 | ||
| 90 | print(f"训练集大小: {len(train_idx)}") | |
| 91 | print(f"验证集大小: {len(valid_idx)}") | |
| 92 | print(f"测试集大小: {len(test_dataset)}") | |
| 93 | ||
| 94 | # 获取类别名称 | |
| 95 | classes = ('plane', 'car', 'bird', 'cat', 'deer', | |
| 96 | 'dog', 'frog', 'horse', 'ship', 'truck') | |
| 97 | ||
| 98 | return train_loader, valid_loader, test_loader, classes | |
| 99 | ||
| 100 | def visualize_samples(dataloader, classes, num_samples=5): | |
| 101 | """ | |
| 102 | 可视化数据样本 | |
| 103 | ||
| 104 | 参数: | |
| 105 | dataloader: 数据加载器 | |
| 106 | classes: 类别名称 | |
| 107 | num_samples: 每个类别要显示的样本数 | |
| 108 | """ | |
| 109 | # 获取batch数据 | |
| 110 | images, labels = next(iter(dataloader)) | |
| 111 | ||
| 112 | # 创建样本计数器 | |
| 113 | class_counts = {i: 0 for i in range(len(classes))} | |
| 114 | indices = [] | |
| 115 | ||
| 116 | for i, label in enumerate(labels): | |
| 117 | label = label.item() | |
| 118 | if class_counts[label] < num_samples: | |
| 119 | indices.append(i) | |
| 120 | class_counts[label] += 1 | |
| 121 | ||
| 122 | # 如果所有类别都有足够的样本,则停止 | |
| 123 | if all(count >= num_samples for count in class_counts.values()): | |
| 124 | break | |
| 125 | ||
| 126 | # 获取选定的图像和标签 | |
| 127 | selected_images = images[indices] | |
| 128 | selected_labels = labels[indices] | |
| 129 | ||
| 130 | # 创建图像网格 | |
| 131 | fig, axes = plt.subplots(10, num_samples, figsize=(15, 20)) | |
| 132 | fig.subplots_adjust(hspace=0.5) | |
| 133 | ||
| 134 | # 对于每个类别 | |
| 135 | for class_idx in range(len(classes)): | |
| 136 | # 找到该类别的所有样本 | |
| 137 | class_indices = [i for i, label in enumerate(selected_labels) if label == class_idx] | |
| 138 | ||
| 139 | for i in range(min(num_samples, len(class_indices))): | |
| 140 | img_idx = class_indices[i] | |
| 141 | img = selected_images[img_idx].numpy().transpose((1, 2, 0)) | |
| 142 | # 反标准化 | |
| 143 | mean = np.array([0.4914, 0.4822, 0.4465]) | |
| 144 | std = np.array([0.2023, 0.1994, 0.2010]) | |
| 145 | img = std * img + mean | |
| 146 | img = np.clip(img, 0, 1) | |
| 147 | ||
| 148 | ax = axes[class_idx, i] | |
| 149 | ax.imshow(img) | |
| 150 | ax.set_title(classes[class_idx]) | |
| 151 | ax.axis('off') | |
| 152 | ||
| 153 | plt.tight_layout() | |
| 154 | plt.show() | |
| 155 | ||
| 156 | if __name__ == "__main__": | |
| 157 | # 设置随机种子 | |
| 158 | set_seed() | |
| 159 | ||
| 160 | # 检查是否有可用的GPU | |
| 161 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 162 | print(f"使用设备: {device}") | |
| 163 | ||
| 164 | # 加载数据 | |
| 165 | train_loader, valid_loader, test_loader, classes = load_cifar10(use_augmentation=False) | |
| 166 | ||
| 167 | # 可视化一些样本 | |
| 168 | visualize_samples(train_loader, classes, num_samples=5)⏎ |
| 0 | import torch | |
| 1 | import torch.nn as nn | |
| 2 | import torch.optim as optim | |
| 3 | import matplotlib.pyplot as plt | |
| 4 | import numpy as np | |
| 5 | import time | |
| 6 | import os | |
| 7 | ||
| 8 | def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler=None, | |
| 9 | num_epochs=10, device=None, save_dir='./checkpoints'): | |
| 10 | """ | |
| 11 | 训练模型并记录性能指标 | |
| 12 | ||
| 13 | 参数: | |
| 14 | model: 要训练的模型 | |
| 15 | train_loader, valid_loader: 训练和验证数据加载器 | |
| 16 | criterion: 损失函数 | |
| 17 | optimizer: 优化器 | |
| 18 | scheduler: 学习率调度器(可选) | |
| 19 | num_epochs: 训练轮数 | |
| 20 | device: 使用的设备 | |
| 21 | save_dir: 模型保存目录 | |
| 22 | ||
| 23 | 返回: | |
| 24 | history: 包含训练历史的字典 | |
| 25 | """ | |
| 26 | if device is None: | |
| 27 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 28 | ||
| 29 | start_time = time.time() | |
| 30 | model = model.to(device) | |
| 31 | ||
| 32 | history = { | |
| 33 | 'train_loss': [], 'train_acc': [], | |
| 34 | 'val_loss': [], 'val_acc': [], | |
| 35 | 'epoch_times': [] | |
| 36 | } | |
| 37 | ||
| 38 | best_val_acc = 0.0 | |
| 39 | ||
| 40 | # 确保保存目录存在 | |
| 41 | os.makedirs(save_dir, exist_ok=True) | |
| 42 | ||
| 43 | for epoch in range(num_epochs): | |
| 44 | epoch_start = time.time() | |
| 45 | print(f"Epoch {epoch+1}/{num_epochs}") | |
| 46 | ||
| 47 | # 训练阶段 | |
| 48 | model.train() | |
| 49 | train_loss = 0.0 | |
| 50 | train_correct = 0 | |
| 51 | train_total = 0 | |
| 52 | ||
| 53 | for inputs, labels in train_loader: | |
| 54 | inputs, labels = inputs.to(device), labels.to(device) | |
| 55 | ||
| 56 | # 梯度清零 | |
| 57 | optimizer.zero_grad() | |
| 58 | ||
| 59 | # 前向传播 | |
| 60 | outputs = model(inputs) | |
| 61 | loss = criterion(outputs, labels) | |
| 62 | ||
| 63 | # 反向传播和优化 | |
| 64 | loss.backward() | |
| 65 | optimizer.step() | |
| 66 | ||
| 67 | # 统计 | |
| 68 | train_loss += loss.item() * inputs.size(0) | |
| 69 | _, predicted = torch.max(outputs, 1) | |
| 70 | train_total += labels.size(0) | |
| 71 | train_correct += (predicted == labels).sum().item() | |
| 72 | ||
| 73 | # 计算训练指标 | |
| 74 | train_loss = train_loss / len(train_loader.sampler) | |
| 75 | train_acc = train_correct / train_total | |
| 76 | ||
| 77 | # 验证阶段 | |
| 78 | model.eval() | |
| 79 | val_loss = 0.0 | |
| 80 | val_correct = 0 | |
| 81 | val_total = 0 | |
| 82 | ||
| 83 | with torch.no_grad(): | |
| 84 | for inputs, labels in valid_loader: | |
| 85 | inputs, labels = inputs.to(device), labels.to(device) | |
| 86 | ||
| 87 | # 前向传播 | |
| 88 | outputs = model(inputs) | |
| 89 | loss = criterion(outputs, labels) | |
| 90 | ||
| 91 | # 统计 | |
| 92 | val_loss += loss.item() * inputs.size(0) | |
| 93 | _, predicted = torch.max(outputs, 1) | |
| 94 | val_total += labels.size(0) | |
| 95 | val_correct += (predicted == labels).sum().item() | |
| 96 | ||
| 97 | # 计算验证指标 | |
| 98 | val_loss = val_loss / len(valid_loader.sampler) | |
| 99 | val_acc = val_correct / val_total | |
| 100 | ||
| 101 | # 更新学习率 | |
| 102 | if scheduler: | |
| 103 | scheduler.step() | |
| 104 | ||
| 105 | # 记录历史 | |
| 106 | history['train_loss'].append(train_loss) | |
| 107 | history['train_acc'].append(train_acc) | |
| 108 | history['val_loss'].append(val_loss) | |
| 109 | history['val_acc'].append(val_acc) | |
| 110 | ||
| 111 | # 记录每个epoch的时间 | |
| 112 | epoch_end = time.time() | |
| 113 | epoch_time = epoch_end - epoch_start | |
| 114 | history['epoch_times'].append(epoch_time) | |
| 115 | ||
| 116 | # 如果是最佳模型,保存权重 | |
| 117 | if val_acc > best_val_acc: | |
| 118 | best_val_acc = val_acc | |
| 119 | torch.save(model.state_dict(), f"{save_dir}/{model.__class__.__name__}_best.pth") | |
| 120 | print(f"模型已保存到 {save_dir}/{model.__class__.__name__}_best.pth") | |
| 121 | ||
| 122 | print(f"训练损失: {train_loss:.4f}, 训练准确率: {train_acc:.4f}") | |
| 123 | print(f"验证损失: {val_loss:.4f}, 验证准确率: {val_acc:.4f}") | |
| 124 | print(f"本轮用时: {epoch_time:.2f}s") | |
| 125 | print("-" * 50) | |
| 126 | ||
| 127 | # 计算总训练时间 | |
| 128 | total_time = time.time() - start_time | |
| 129 | print(f"总训练时间: {total_time:.2f}s") | |
| 130 | ||
| 131 | return model, history | |
| 132 | ||
| 133 | def evaluate_model(model, test_loader, criterion, device=None, classes=None): | |
| 134 | """ | |
| 135 | 评估模型在测试集上的性能 | |
| 136 | ||
| 137 | 参数: | |
| 138 | model: 要评估的模型 | |
| 139 | test_loader: 测试数据加载器 | |
| 140 | criterion: 损失函数 | |
| 141 | device: 使用的设备 | |
| 142 | classes: 类别名称列表 | |
| 143 | ||
| 144 | 返回: | |
| 145 | test_loss: 测试损失 | |
| 146 | test_acc: 测试准确率 | |
| 147 | """ | |
| 148 | if device is None: | |
| 149 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 150 | ||
| 151 | model = model.to(device) | |
| 152 | model.eval() | |
| 153 | ||
| 154 | test_loss = 0.0 | |
| 155 | test_correct = 0 | |
| 156 | test_total = 0 | |
| 157 | ||
| 158 | y_true = [] | |
| 159 | y_pred = [] | |
| 160 | ||
| 161 | with torch.no_grad(): | |
| 162 | for inputs, labels in test_loader: | |
| 163 | inputs, labels = inputs.to(device), labels.to(device) | |
| 164 | ||
| 165 | # 前向传播 | |
| 166 | outputs = model(inputs) | |
| 167 | loss = criterion(outputs, labels) | |
| 168 | ||
| 169 | # 统计 | |
| 170 | test_loss += loss.item() * inputs.size(0) | |
| 171 | _, predicted = torch.max(outputs, 1) | |
| 172 | test_total += labels.size(0) | |
| 173 | test_correct += (predicted == labels).sum().item() | |
| 174 | ||
| 175 | # 收集真实标签和预测标签 | |
| 176 | y_true.extend(labels.cpu().numpy()) | |
| 177 | y_pred.extend(predicted.cpu().numpy()) | |
| 178 | ||
| 179 | # 计算测试指标 | |
| 180 | test_loss = test_loss / len(test_loader.dataset) | |
| 181 | test_acc = test_correct / test_total | |
| 182 | ||
| 183 | print(f"测试损失: {test_loss:.4f}, 测试准确率: {test_acc:.4f}") | |
| 184 | ||
| 185 | # 如果提供了类别名称,计算混淆矩阵 | |
| 186 | if classes: | |
| 187 | try: | |
| 188 | from sklearn.metrics import confusion_matrix, classification_report | |
| 189 | import seaborn as sns | |
| 190 | ||
| 191 | cm = confusion_matrix(y_true, y_pred) | |
| 192 | plt.figure(figsize=(10, 8)) | |
| 193 | sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes) | |
| 194 | plt.xlabel('Predicted') # 英文标签 | |
| 195 | plt.ylabel('True') # 英文标签 | |
| 196 | plt.title('Confusion Matrix') # 英文标题 | |
| 197 | plt.show() | |
| 198 | ||
| 199 | # 打印分类报告 | |
| 200 | print("分类报告:") | |
| 201 | print(classification_report(y_true, y_pred, target_names=classes)) | |
| 202 | except ImportError: | |
| 203 | print("警告: 未安装sklearn或seaborn,无法生成混淆矩阵和分类报告") | |
| 204 | ||
| 205 | return test_loss, test_acc | |
| 206 | ||
| 207 | def plot_training_history(history, title="Training History"): | |
| 208 | """ | |
| 209 | 绘制训练历史曲线 | |
| 210 | ||
| 211 | 参数: | |
| 212 | history: 包含训练历史的字典 | |
| 213 | title: 图表标题 | |
| 214 | """ | |
| 215 | plt.figure(figsize=(12, 5)) | |
| 216 | ||
| 217 | # 绘制损失曲线 | |
| 218 | plt.subplot(1, 2, 1) | |
| 219 | plt.plot(history['train_loss'], label='Training Loss') # 英文标签 | |
| 220 | plt.plot(history['val_loss'], label='Validation Loss') # 英文标签 | |
| 221 | plt.xlabel('Epochs') # 英文标签 | |
| 222 | plt.ylabel('Loss') # 英文标签 | |
| 223 | plt.title('Loss Curves') # 英文标题 | |
| 224 | plt.legend() | |
| 225 | ||
| 226 | # 绘制准确率曲线 | |
| 227 | plt.subplot(1, 2, 2) | |
| 228 | plt.plot(history['train_acc'], label='Training Accuracy') # 英文标签 | |
| 229 | plt.plot(history['val_acc'], label='Validation Accuracy') # 英文标签 | |
| 230 | plt.xlabel('Epochs') # 英文标签 | |
| 231 | plt.ylabel('Accuracy') # 英文标签 | |
| 232 | plt.title('Accuracy Curves') # 英文标题 | |
| 233 | plt.legend() | |
| 234 | ||
| 235 | plt.suptitle(title) # 英文总标题 | |
| 236 | plt.tight_layout() | |
| 237 | plt.savefig(f"{title.replace(' ', '_')}.png") | |
| 238 | plt.show() | |
| 239 | ||
| 240 | def visualize_model_predictions(model, test_loader, classes, device=None, num_images=25): | |
| 241 | """ | |
| 242 | 可视化模型预测 | |
| 243 | ||
| 244 | 参数: | |
| 245 | model: 要评估的模型 | |
| 246 | test_loader: 测试数据加载器 | |
| 247 | classes: 类别名称列表 | |
| 248 | device: 使用的设备 | |
| 249 | num_images: 要显示的图像数量 | |
| 250 | """ | |
| 251 | if device is None: | |
| 252 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 253 | ||
| 254 | model = model.to(device) | |
| 255 | model.eval() | |
| 256 | ||
| 257 | # 获取batch数据 | |
| 258 | images, labels = next(iter(test_loader)) | |
| 259 | ||
| 260 | with torch.no_grad(): | |
| 261 | outputs = model(images.to(device)) | |
| 262 | _, preds = torch.max(outputs, 1) | |
| 263 | ||
| 264 | # 将预测和标签转换为CPU上的numpy数组 | |
| 265 | preds = preds.cpu().numpy() | |
| 266 | labels = labels.numpy() | |
| 267 | ||
| 268 | # 计算display_grid的尺寸 | |
| 269 | grid_size = int(np.ceil(np.sqrt(num_images))) | |
| 270 | fig, axes = plt.subplots(grid_size, grid_size, figsize=(15, 15)) | |
| 271 | ||
| 272 | for i, ax in enumerate(axes.flat): | |
| 273 | if i < min(num_images, len(preds)): | |
| 274 | img = images[i].numpy().transpose((1, 2, 0)) | |
| 275 | # 反标准化 | |
| 276 | mean = np.array([0.4914, 0.4822, 0.4465]) | |
| 277 | std = np.array([0.2023, 0.1994, 0.2010]) | |
| 278 | img = std * img + mean | |
| 279 | img = np.clip(img, 0, 1) | |
| 280 | ||
| 281 | ax.imshow(img) | |
| 282 | color = "green" if preds[i] == labels[i] else "red" | |
| 283 | ax.set_title(f"Predicted: {classes[preds[i]]}\nTrue: {classes[labels[i]]}", color=color) # 英文标签 | |
| 284 | ax.axis('off') | |
| 285 | ||
| 286 | plt.tight_layout() | |
| 287 | plt.show() | |
| 288 | ||
| 289 | def visualize_conv_filters(model, layer_name='conv1'): | |
| 290 | """ | |
| 291 | 可视化卷积核 | |
| 292 | ||
| 293 | 参数: | |
| 294 | model: 模型 | |
| 295 | layer_name: 要可视化的卷积层名称 | |
| 296 | """ | |
| 297 | model.eval() | |
| 298 | ||
| 299 | # 获取指定层的权重 | |
| 300 | for name, module in model.named_modules(): | |
| 301 | if name == layer_name and isinstance(module, nn.Conv2d): | |
| 302 | weights = module.weight.data.clone().cpu() | |
| 303 | break | |
| 304 | else: | |
| 305 | print(f"未找到名为 {layer_name} 的卷积层") | |
| 306 | return | |
| 307 | ||
| 308 | # 规范化权重以便可视化 | |
| 309 | weights = weights - weights.min() | |
| 310 | weights = weights / weights.max() | |
| 311 | ||
| 312 | # 绘制卷积核 | |
| 313 | num_filters = min(16, weights.size(0)) | |
| 314 | fig, axes = plt.subplots(4, 4, figsize=(10, 10)) | |
| 315 | fig.suptitle(f'Conv Layer {layer_name} Filters') # 英文标题 | |
| 316 | ||
| 317 | for i, ax in enumerate(axes.flat): | |
| 318 | if i < num_filters: | |
| 319 | # 如果是3通道的卷积核,直接显示RGB | |
| 320 | if weights.size(1) == 3: | |
| 321 | ax.imshow(weights[i].permute(1, 2, 0)) | |
| 322 | else: | |
| 323 | # 如果不是3通道,只显示第一个通道 | |
| 324 | ax.imshow(weights[i, 0], cmap='viridis') | |
| 325 | ax.axis('off') | |
| 326 | ||
| 327 | plt.tight_layout() | |
| 328 | plt.show() | |
| 329 | ||
| 330 | def model_complexity(model, input_size=(3, 32, 32), batch_size=128, device=None): | |
| 331 | """ | |
| 332 | 计算模型参数量和推理时间 | |
| 333 | ||
| 334 | 参数: | |
| 335 | model: 要评估的模型 | |
| 336 | input_size: 输入尺寸 | |
| 337 | batch_size: 批量大小 | |
| 338 | device: 使用的设备 | |
| 339 | ||
| 340 | 返回: | |
| 341 | num_params: 参数量 | |
| 342 | inference_time: 每批次推理时间 | |
| 343 | """ | |
| 344 | if device is None: | |
| 345 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 346 | ||
| 347 | model = model.to(device) | |
| 348 | model.eval() | |
| 349 | ||
| 350 | # 计算参数量 | |
| 351 | num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| 352 | ||
| 353 | # 创建随机输入 | |
| 354 | dummy_input = torch.randn(batch_size, *input_size).to(device) | |
| 355 | ||
| 356 | # 预热 | |
| 357 | with torch.no_grad(): | |
| 358 | for _ in range(10): | |
| 359 | _ = model(dummy_input) | |
| 360 | ||
| 361 | # 计时 | |
| 362 | start_time = time.time() | |
| 363 | with torch.no_grad(): | |
| 364 | for _ in range(100): | |
| 365 | _ = model(dummy_input) | |
| 366 | end_time = time.time() | |
| 367 | ||
| 368 | inference_time = (end_time - start_time) / 100 | |
| 369 | ||
| 370 | print(f"参数量: {num_params:,}") | |
| 371 | print(f"每批次({batch_size}个样本)推理时间: {inference_time*1000:.2f}ms") | |
| 372 | ||
| 373 | return num_params, inference_time⏎ |
| 0 | # 深度学习模型实验指导:MLP与CNN模型对比分析 | |
| 1 | ||
| 2 | ## 实验概述 | |
| 3 | ||
| 4 | 本实验旨在通过对多层感知机(MLP)和卷积神经网络(CNN)的实现、训练和评估,帮助学生深入理解两种模型的结构特点、性能差异以及适用场景。学生将从基础模型开始,逐步探索更复杂的网络架构,最终通过对比分析,掌握深度学习模型设计与评估的关键技能。 | |
| 5 | ||
| 6 | 本实验的代码已经可以稳定运行。作业内容包括补全两个模型定义代码(MLP与CNN)以及回答一系列问题。两个补全任务的代码仅需在实验报告中体现即可。 | |
| 7 | ||
| 8 | ||
| 9 | ||
| 10 | ## 实验目的 | |
| 11 | ||
| 12 | 1. 掌握MLP和CNN的基本原理和实现方法 | |
| 13 | 2. 了解不同网络结构对模型性能的影响 | |
| 14 | 3. 学习深度学习模型训练、评估和可视化的方法 | |
| 15 | 4. 通过对比实验,理解不同模型在图像分类任务中的优缺点 | |
| 16 | 5. 培养深度学习模型调优和问题解决的能力 | |
| 17 | ||
| 18 | ## 实验准备 | |
| 19 | ||
| 20 | ### 环境要求 | |
| 21 | ||
| 22 | - Python 3.6+ | |
| 23 | - PyTorch 1.7+ | |
| 24 | - NumPy, Matplotlib | |
| 25 | - scikit-learn (用于评估) | |
| 26 | - 建议使用GPU环境(可选) | |
| 27 | ||
| 28 | 实验环境已经在mo平台中搭建好了,同学们无需自行配置 | |
| 29 | ||
| 30 | ### 实验数据集 | |
| 31 | ||
| 32 | 本实验使用CIFAR-10数据集,包含10个类别的彩色图像,每类6000张,共60000张32×32的图像。 | |
| 33 | ||
| 34 | ### 项目结构 | |
| 35 | ||
| 36 | ``` | |
| 37 | 项目根目录/ | |
| 38 | ├── models/ | |
| 39 | │ ├── __init__.py | |
| 40 | │ ├── mlp.py # MLP模型定义 | |
| 41 | │ └── cnn.py # CNN模型定义 | |
| 42 | ├── utils/ | |
| 43 | │ ├── __init__.py | |
| 44 | │ ├── data_loader.py # 数据加载函数 | |
| 45 | │ └── train_utils.py # 训练和评估函数 | |
| 46 | ├── train_all_notebook.py # 统一训练脚本 | |
| 47 | └── compare_models.py # 模型比较脚本 | |
| 48 | ``` | |
| 49 | ||
| 50 | ## 实验原理 | |
| 51 | ||
| 52 | ### 多层感知机(MLP) | |
| 53 | ||
| 54 | 多层感知机是一种前馈神经网络,由输入层、一个或多个隐藏层和输出层组成。MLP的主要特点是: | |
| 55 | ||
| 56 | 1. 每层神经元与下一层全连接 | |
| 57 | 2. 使用非线性激活函数(如ReLU、Sigmoid等) | |
| 58 | 3. 通过反向传播算法进行训练 | |
| 59 | ||
| 60 | **思考问题1**: MLP在处理图像数据时面临哪些挑战?请从数据结构、参数量和特征提取能力三个角度分析。 | |
| 61 | ||
| 62 | ||
| 63 | ### 卷积神经网络(CNN) | |
| 64 | ||
| 65 | 卷积神经网络是为处理具有网格状拓扑结构的数据而设计的神经网络,主要包含卷积层、池化层和全连接层。CNN的主要特点是: | |
| 66 | ||
| 67 | 1. 局部连接:每个神经元只与输入数据的一个局部区域连接 | |
| 68 | 2. 权重共享:同一特征图的所有神经元共享相同的权重 | |
| 69 | 3. 多层次特征提取:低层检测边缘等简单特征,高层组合这些特征形成更复杂的表示 | |
| 70 | ||
| 71 | **思考问题2**: CNN相比MLP在处理图像时具有哪些优势?解释卷积操作如何保留图像的空间信息。 | |
| 72 | ||
| 73 | ||
| 74 | ## 实验内容 | |
| 75 | ||
| 76 | ### 第一部分:基础MLP模型 | |
| 77 | ||
| 78 | #### 1.1 了解MLP模型结构 | |
| 79 | ||
| 80 | 查看`models/mlp.py`文件,理解三种MLP模型的结构: | |
| 81 | - `SimpleMLP`: 单隐层MLP | |
| 82 | - `DeepMLP`: 多隐层MLP,带有BatchNorm和Dropout | |
| 83 | - `ResidualMLP`: 带有残差连接的MLP | |
| 84 | ||
| 85 | **任务1**: 在下面的代码块中,实现一个具有两个隐藏层的MLP模型。第一隐藏层有128个神经元,第二隐藏层有64个神经元,输出层对应10个类别。使用ReLU激活函数,并添加BatchNorm和Dropout(0.3)。 | |
| 86 | ||
| 87 | ```python | |
| 88 | import torch.nn as nn | |
| 89 | ||
| 90 | class TwoLayerMLP(nn.Module): | |
| 91 | def __init__(self, input_dim=3*32*32): | |
| 92 | super(TwoLayerMLP, self).__init__() | |
| 93 | self.flatten = nn.Flatten() | |
| 94 | # 使用nn.Linear, nn.BatchNorm1d, nn.ReLU和nn.Dropout实现两个隐藏层 | |
| 95 | ||
| 96 | def forward(self, x): | |
| 97 | x = self.flatten(x) | |
| 98 | # 实现前向传播 | |
| 99 | return x | |
| 100 | ``` | |
| 101 | ||
| 102 | #### 1.2 训练和评估MLP模型 | |
| 103 | ||
| 104 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'simple_mlp'`。 | |
| 105 | ||
| 106 | 2. 观察训练过程中的损失和准确率变化,以及最终在测试集上的性能。 | |
| 107 | ||
| 108 | **分析问题1**: 训练过程中,损失和准确率曲线表现如何?是否出现过拟合或欠拟合?简要分析可能的原因。 | |
| 109 | ||
| 110 | ||
| 111 | 3. 修改参数尝试训练DeepMLP模型,将`model_type`设置为`'deep_mlp'`。 | |
| 112 | ||
| 113 | **分析问题2**: 对比SimpleMLP和DeepMLP的性能,增加网络深度对性能有何影响? | |
| 114 | ||
| 115 | ||
| 116 | ### 第二部分:基础CNN模型 | |
| 117 | ||
| 118 | #### 2.1 了解CNN模型结构 | |
| 119 | ||
| 120 | 查看`models/cnn.py`文件,理解不同CNN模型的结构: | |
| 121 | - `SimpleCNN`: 简单的CNN,包含两个卷积层 | |
| 122 | - `MediumCNN`: 中等复杂度的CNN,带有BatchNorm和Dropout | |
| 123 | - `VGGStyleNet`: VGG风格的CNN,使用连续的3x3卷积 | |
| 124 | - `SimpleResNet`: 简化的ResNet,包含残差连接 | |
| 125 | ||
| 126 | **任务2**: 修改下面的`SimpleCNN`代码,添加一个额外的卷积层和BatchNorm。新的卷积层应该在第二个池化层之后,卷积核数量为64,卷积核大小为3x3。 | |
| 127 | ||
| 128 | ```python | |
| 129 | class EnhancedCNN(nn.Module): | |
| 130 | def __init__(self): | |
| 131 | super(EnhancedCNN, self).__init__() | |
| 132 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) | |
| 133 | self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) | |
| 134 | # 在这里添加一个新的卷积层、BatchNorm和相应的池化层 | |
| 135 | self.pool = nn.MaxPool2d(2, 2) | |
| 136 | self.flatten = nn.Flatten() | |
| 137 | # 修改全连接层以适应新的特征图尺寸 | |
| 138 | self.relu = nn.ReLU() | |
| 139 | def forward(self, x): | |
| 140 | # 实现包含新卷积层的前向传播 | |
| 141 | return x | |
| 142 | ``` | |
| 143 | ||
| 144 | #### 2.2 训练和评估CNN模型 | |
| 145 | ||
| 146 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'simple_cnn'`,并将`use_data_augmentation`设置为`True`。 | |
| 147 | ||
| 148 | 2. 观察训练过程和卷积核可视化结果。 | |
| 149 | ||
| 150 | **分析问题3**: 卷积核可视化显示了什么模式?这些模式与图像中的哪些特征可能对应? | |
| 151 | ||
| 152 | ||
| 153 | 3. 继续训练MediumCNN模型,将`model_type`设置为`'medium_cnn'`。 | |
| 154 | ||
| 155 | **分析问题4**: CNN模型相比MLP在CIFAR-10上的性能有何不同?为什么会有这样的差异? | |
| 156 | ||
| 157 | ||
| 158 | ||
| 159 | ### 第三部分:高级CNN架构探索 | |
| 160 | ||
| 161 | #### 3.1 VGG风格和ResNet风格网络架构 | |
| 162 | ||
| 163 | 在本部分中,我们将探索两种影响深远的CNN架构:VGG和ResNet。通过理解这些经典架构的设计理念和特点,可以帮助我们设计更高效的神经网络。 | |
| 164 | ||
| 165 | ##### 3.1.1 VGG架构特点 | |
| 166 | VGG网络(由Visual Geometry Group开发)是一种非常简洁而有效的CNN架构,在2014年ImageNet挑战赛中取得了优异成绩。其主要特点包括: | |
| 167 | ||
| 168 | 1. **简单统一的设计**:使用小尺寸(3×3)卷积核和2×2最大池化层 | |
| 169 | 2. **深度堆叠**:通过堆叠多个相同配置的卷积层增加网络深度 | |
| 170 | 3. **结构规整**:遵循"卷积层组-池化层"的模式,随着网络深入,特征图尺寸减小而通道数增加 | |
| 171 | ||
| 172 | 在我们的实现中,`VGGStyleNet`采用了简化版的VGG设计理念,包含三个卷积块,每个块包含两个卷积层和一个池化层。 | |
| 173 | ||
| 174 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'vgg_style'`,并将`use_data_augmentation`设置为`True`。 | |
| 175 | ||
| 176 | 2. 观察网络的训练过程和性能。特别注意其收敛速度和最终准确率。 | |
| 177 | ||
| 178 | ##### 3.1.2 ResNet架构及残差连接 | |
| 179 | ||
| 180 | ResNet(残差网络)由微软研究院的He等人在2015年提出,是解决"深度退化问题"的突破性架构。其核心创新是引入了残差连接(skip connection): | |
| 181 | ||
| 182 | 1. **残差连接**:通过快捷连接(shortcut connection)将输入直接加到输出上,形成恒等映射路径 | |
| 183 | 2. **残差学习**:网络不再直接学习输入到输出的映射F(x),而是学习残差F(x)-x | |
| 184 | 3. **深度扩展**:残差连接有效缓解了梯度消失问题,使得训练非常深的网络成为可能 | |
| 185 | ||
| 186 | 在我们的实现中,`SimpleResNet`使用了基本的残差块,每个残差块包含两个3×3的卷积层和一个跳跃连接。 | |
| 187 | ||
| 188 | 1. 在 `train.ipynb` 中训练SimpleMLP模型,确保将`model_type`设置为`'resnet'`,并将`use_data_augmentation`设置为`True`。 | |
| 189 | ||
| 190 | 2. 观察网络的训练过程和性能,特别是深度对训练稳定性的影响。 | |
| 191 | ||
| 192 | ##### 3.1.3 Bottleneck结构 | |
| 193 | ||
| 194 | 在更深的ResNet变体中,常使用"瓶颈"(Bottleneck)结构来降低计算复杂度: | |
| 195 | ||
| 196 | - 使用1×1卷积降低通道数(降维) | |
| 197 | - 使用3×3卷积进行特征提取 | |
| 198 | - 再使用1×1卷积恢复通道数(升维) | |
| 199 | ||
| 200 | 这种设计大幅减少参数量和计算量,同时保持或提高性能。 | |
| 201 | ||
| 202 | **思考问题3**: 分析Bottleneck结构的优势。为什么1×1卷积在深度CNN中如此重要?它如何帮助控制网络的参数量和计算复杂度? | |
| 203 | ||
| 204 | ||
| 205 | **探索问题1**: 查看`models/cnn.py`中的`SimpleResNet`实现,分析残差连接是如何实现的。如果输入和输出通道数不匹配,代码是如何处理的? | |
| 206 | ||
| 207 | ||
| 208 | ||
| 209 | #### 3.2 模型复杂度分析 | |
| 210 | ||
| 211 | 不同CNN架构在性能和效率之间存在权衡。现在我们将通过分析不同模型的参数量和推理时间来理解这种权衡。 | |
| 212 | ||
| 213 | 1. 运行以下代码来分析各个模型的复杂度: | |
| 214 | ```python | |
| 215 | from models import SimpleMLP, DeepMLP, ResidualMLP, SimpleCNN, MediumCNN, VGGStyleNet, SimpleResNet | |
| 216 | from utils import model_complexity | |
| 217 | import torch | |
| 218 | ||
| 219 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| 220 | ||
| 221 | models = { | |
| 222 | 'SimpleMLP': SimpleMLP(), | |
| 223 | 'DeepMLP': DeepMLP(), | |
| 224 | 'SimpleCNN': SimpleCNN(), | |
| 225 | 'MediumCNN': MediumCNN(), | |
| 226 | 'VGGStyleNet': VGGStyleNet(), | |
| 227 | 'SimpleResNet': SimpleResNet() | |
| 228 | } | |
| 229 | ||
| 230 | results = {} | |
| 231 | for name, model in models.items(): | |
| 232 | print(f"\n分析{name}复杂度:") | |
| 233 | params, time = model_complexity(model, device=device) | |
| 234 | results[name] = {'params': params, 'time': time} | |
| 235 | ``` | |
| 236 | ||
| 237 | 2. 记录并比较各个模型的参数量和推理时间。 | |
| 238 | ||
| 239 | **分析问题5**: VGG风格和ResNet风格网络的性能比较。残差连接带来了哪些优势? | |
| 240 | ||
| 241 | **分析问题6**: 参数量和推理时间如何影响模型的实用性?如何在性能和效率之间找到平衡? | |
| 242 | ||
| 243 | ||
| 244 | #### 3.3 理解高级CNN设计理念 | |
| 245 | ||
| 246 | 随着深度学习的发展,CNN架构设计也变得更加精细和高效。以下是一些重要的设计理念: | |
| 247 | ||
| 248 | 1. **网络深度与宽度平衡**:更深的网络能学习更抽象的特征,但也更难训练;更宽的网络(更多通道)能捕获更多特征,但参数量增加 | |
| 249 | 2. **跳跃连接**:除了ResNet的残差连接,还有DenseNet的密集连接、U-Net的跨层连接等 | |
| 250 | 3. **特征增强**:注意力机制(如SENet的通道注意力)、特征融合等 | |
| 251 | 4. **高效卷积设计**:深度可分离卷积(MobileNet)、组卷积(ShuffleNet)等 | |
| 252 | ||
| 253 | **探索问题2**: 如果你要为移动设备设计一个CNN模型,应该考虑哪些因素来权衡性能和效率?请提出至少三条具体的设计原则。 | |
| 254 | ||
| 255 | ||
| 256 | ### 第四部分:模型比较与分析 | |
| 257 | ||
| 258 | 运行 `compare.py` 来对比不同模型的性能: | |
| 259 | ||
| 260 | **综合分析**: 根据比较结果,分析不同类型模型(MLP和CNN)以及不同复杂度模型的性能差异。考虑以下几点: | |
| 261 | 1. 测试准确率 | |
| 262 | 2. 参数量 | |
| 263 | 3. 推理时间 | |
| 264 | 4. 训练收敛速度 | |
| 265 | 5. 过拟合/欠拟合情况 | |
| 266 | ||
| 267 | ||
| 268 | ## 创新探索任务(选做) | |
| 269 | ||
| 270 | 选择下列一项或多项任务完成: | |
| 271 | ||
| 272 | 1. **模型改进**:对任一模型进行修改和改进,提高其在CIFAR-10上的性能。 | |
| 273 | 2. **可视化分析**:设计更好的可视化方法来解释模型的决策过程。 | |
| 274 | 3. **迁移学习**:探索如何利用预训练模型提高CIFAR-10的分类性能。 | |
| 275 | 4. **对抗性样本**:生成对抗性样本,并研究不同模型对对抗性样本的鲁棒性。 | |
| 276 | 5. **自监督学习**:实现一个简单的自监督学习方法,并评估其效果。 | |
| 277 | ||
| 278 | ## 实验报告要求 | |
| 279 | ||
| 280 | 实验报告应包含以下内容: | |
| 281 | ||
| 282 | 1. 实验目的和背景介绍 | |
| 283 | 2. 实验原理简述 | |
| 284 | 3. 实验过程描述 | |
| 285 | 4. 实现的代码(关键部分,包含详细注释) | |
| 286 | 5. 实验结果和分析(包括填写的所有分析问题和任务) | |
| 287 | 6. 创新探索任务的设计、实现和结果(如果选做) | |
| 288 | 7. 结论和思考 | |
| 289 | 8. 参考文献 | |
| 290 | ||
| 291 | ## 评分标准 | |
| 292 | ||
| 293 | - 基础任务完成度:60% | |
| 294 | - 分析问题深度和准确性:35% | |
| 295 | - 创新探索任务:15% (bonus) | |
| 296 | - 报告质量和表达清晰度:5% | |
| 297 | ||
| 298 | ## 参考资料 | |
| 299 | ||
| 300 | 1. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521(7553), 436-444. | |
| 301 | 2. He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. CVPR. | |
| 302 | 3. Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556. | |
| 303 | 4. PyTorch文档:https://pytorch.org/docs/stable/index.html | |
| 304 | 5. CS231n: Convolutional Neural Networks for Visual Recognition:https://cs231n.github.io/⏎ |
| 0 | ## 介绍 (Introduction) | |
| 1 | ||
| 2 | 添加该项目的功能、使用场景和输入输出参数等相关信息。 | |
| 3 | ||
| 4 | You can describe the function, usage and parameters of the project. |
| 134 | 134 | "name": "python", |
| 135 | 135 | "nbconvert_exporter": "python", |
| 136 | 136 | "pygments_lexer": "ipython3", |
| 137 | "version": "3.5.2" | |
| 137 | "version": "3.7.5" | |
| 138 | 138 | }, |
| 139 | 139 | "pycharm": { |
| 140 | 140 | "stem_cell": { |
Binary diff not shown
Binary diff not shown
| 0 | { | |
| 1 | "cells": [ | |
| 2 | { | |
| 3 | "cell_type": "code", | |
| 4 | "execution_count": null, | |
| 5 | "metadata": {}, | |
| 6 | "outputs": [], | |
| 7 | "source": [ | |
| 8 | "print('Hello Mo!')" | |
| 9 | ] | |
| 10 | } | |
| 11 | ], | |
| 12 | "metadata": { | |
| 13 | "kernelspec": { | |
| 14 | "display_name": "Python 3", | |
| 15 | "language": "python", | |
| 16 | "name": "python3" | |
| 17 | }, | |
| 18 | "language_info": { | |
| 19 | "codemirror_mode": { | |
| 20 | "name": "ipython", | |
| 21 | "version": 3 | |
| 22 | }, | |
| 23 | "file_extension": ".py", | |
| 24 | "mimetype": "text/x-python", | |
| 25 | "name": "python", | |
| 26 | "nbconvert_exporter": "python", | |
| 27 | "pygments_lexer": "ipython3", | |
| 28 | "version": "3.7.5" | |
| 29 | } | |
| 30 | }, | |
| 31 | "nbformat": 4, | |
| 32 | "nbformat_minor": 2 | |
| 33 | } |
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
| 0 | <meta HTTP-EQUIV="REFRESH" content="0; url=http://www.cs.toronto.edu/~kriz/cifar.html"> |
Binary diff not shown
Binary diff not shown
Binary diff not shown